1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2001
8 * ---------------------------------------------------------------------------*/
12 // Turn off inlining when debugging - it obfuscates things
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
29 #include "Profiling.h"
31 #include "BlockAlloc.h"
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
44 /* -----------------------------------------------------------------------------
46 * -------------------------------------------------------------------------- */
48 static nat retainerGeneration; // generation
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
70 static void belongToHeap(StgPtr p);
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
79 cStackSize <= maxCStackSize
81 static nat cStackSize, maxCStackSize;
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
92 nat costArrayLinear[N_CLOSURE_TYPES];
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
103 * -------------------------------------------------------------------------- */
113 // fixed layout or layout specified by a field in the closure
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
181 static stackElement *currentStackBoundary;
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
187 stackSize <= maxStackSize
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
208 newStackBlock( bdescr *bd )
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
223 returnToOldStack( bdescr *bd )
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
236 initializeTraverseStack( void )
238 if (firstStack != NULL) {
239 freeChain(firstStack);
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
246 newStackBlock(firstStack);
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
253 * -------------------------------------------------------------------------- */
255 closeTraverseStack( void )
257 freeChain(firstStack);
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
267 return (firstStack == currentStack) && stackTop == stackLimit;
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
275 retainerStackBlocks( void )
280 for (bd = firstStack; bd != NULL; bd = bd->link)
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
294 return stackTop == currentStackBoundary;
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
364 if (info->type == posTypeSRT) {
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #ifdef ENABLE_WIN32_DLL_SUPPORT
371 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
372 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
374 c = *(info->next.srt.srt);
376 c = *(info->next.srt.srt);
378 bitmap = bitmap >> 1;
379 info->next.srt.srt++;
380 info->next.srt.srt_bitmap = bitmap;
383 bitmap = bitmap >> 1;
384 info->next.srt.srt++;
386 // bitmap is now zero...
391 nat i = info->next.large_srt.offset;
394 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
395 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
396 bitmap = bitmap >> (i % BITS_IN(StgWord));
397 while (i < info->next.large_srt.srt->l.size) {
398 if ((bitmap & 1) != 0) {
399 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
401 info->next.large_srt.offset = i;
405 if (i % BITS_IN(W_) == 0) {
406 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
408 bitmap = bitmap >> 1;
411 // reached the end of this bitmap.
412 info->next.large_srt.offset = i;
417 /* -----------------------------------------------------------------------------
418 * push() pushes a stackElement representing the next child of *c
419 * onto the traverse stack. If *c has no child, *first_child is set
420 * to NULL and nothing is pushed onto the stack. If *c has only one
421 * child, *c_chlid is set to that child and nothing is pushed onto
422 * the stack. If *c has more than two children, *first_child is set
423 * to the first child and a stackElement representing the second
424 * child is pushed onto the stack.
427 * *c_child_r is the most recent retainer of *c's children.
428 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
429 * there cannot be any stack objects.
430 * Note: SRTs are considered to be children as well.
431 * -------------------------------------------------------------------------- */
433 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
436 bdescr *nbd; // Next Block Descriptor
438 #ifdef DEBUG_RETAINER
439 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
442 ASSERT(get_itbl(c)->type != TSO);
443 ASSERT(get_itbl(c)->type != AP_STACK);
450 se.c_child_r = c_child_r;
453 switch (get_itbl(c)->type) {
460 case SE_CAF_BLACKHOLE:
465 // one child (fixed), no SRT
467 *first_child = ((StgMutVar *)c)->var;
470 *first_child = ((StgSelector *)c)->selectee;
473 case IND_OLDGEN_PERM:
475 *first_child = ((StgInd *)c)->indirectee;
479 *first_child = c->payload[0];
482 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
483 // of the next child. We do not write a separate initialization code.
484 // Also we do not have to initialize info.type;
486 // two children (fixed), no SRT
487 // need to push a stackElement, but nothing to store in se.info
489 *first_child = c->payload[0]; // return the first pointer
490 // se.info.type = posTypeStep;
491 // se.info.next.step = 2; // 2 = second
494 // three children (fixed), no SRT
495 // need to push a stackElement
497 // head must be TSO and the head of a linked list of TSOs.
498 // Shoule it be a child? Seems to be yes.
499 *first_child = (StgClosure *)((StgMVar *)c)->head;
500 // se.info.type = posTypeStep;
501 se.info.next.step = 2; // 2 = second
504 // three children (fixed), no SRT
506 *first_child = ((StgWeak *)c)->key;
507 // se.info.type = posTypeStep;
508 se.info.next.step = 2;
511 // layout.payload.ptrs, no SRT
516 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
518 *first_child = find_ptrs(&se.info);
519 if (*first_child == NULL)
523 // StgMutArrPtr.ptrs, no SRT
525 case MUT_ARR_PTRS_FROZEN:
526 case MUT_ARR_PTRS_FROZEN0:
527 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
528 (StgPtr)(((StgMutArrPtrs *)c)->payload));
529 *first_child = find_ptrs(&se.info);
530 if (*first_child == NULL)
534 // layout.payload.ptrs, SRT
535 case FUN: // *c is a heap object.
537 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
538 *first_child = find_ptrs(&se.info);
539 if (*first_child == NULL)
540 // no child from ptrs, so check SRT
546 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
547 (StgPtr)((StgThunk *)c)->payload);
548 *first_child = find_ptrs(&se.info);
549 if (*first_child == NULL)
550 // no child from ptrs, so check SRT
554 // 1 fixed child, SRT
557 *first_child = c->payload[0];
558 ASSERT(*first_child != NULL);
559 init_srt_fun(&se.info, get_fun_itbl(c));
564 *first_child = ((StgThunk *)c)->payload[0];
565 ASSERT(*first_child != NULL);
566 init_srt_thunk(&se.info, get_thunk_itbl(c));
569 case FUN_STATIC: // *c is a heap object.
570 ASSERT(get_itbl(c)->srt_bitmap != 0);
574 init_srt_fun(&se.info, get_fun_itbl(c));
575 *first_child = find_srt(&se.info);
576 if (*first_child == NULL)
582 ASSERT(get_itbl(c)->srt_bitmap != 0);
586 init_srt_thunk(&se.info, get_thunk_itbl(c));
587 *first_child = find_srt(&se.info);
588 if (*first_child == NULL)
599 case CONSTR_CHARLIKE:
600 case CONSTR_NOCAF_STATIC:
621 barf("Invalid object *c in push()");
625 if (stackTop - 1 < stackBottom) {
626 #ifdef DEBUG_RETAINER
627 // debugBelch("push() to the next stack.\n");
629 // currentStack->free is updated when the active stack is switched
630 // to the next stack.
631 currentStack->free = (StgPtr)stackTop;
633 if (currentStack->link == NULL) {
634 nbd = allocGroup(BLOCKS_IN_STACK);
636 nbd->u.back = currentStack;
637 currentStack->link = nbd;
639 nbd = currentStack->link;
644 // adjust stackTop (acutal push)
646 // If the size of stackElement was huge, we would better replace the
647 // following statement by either a memcpy() call or a switch statement
648 // on the type of the element. Currently, the size of stackElement is
649 // small enough (5 words) that this direct assignment seems to be enough.
652 #ifdef DEBUG_RETAINER
654 if (stackSize > maxStackSize) maxStackSize = stackSize;
655 // ASSERT(stackSize >= 0);
656 // debugBelch("stackSize = %d\n", stackSize);
660 /* -----------------------------------------------------------------------------
661 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
663 * stackTop cannot be equal to stackLimit unless the whole stack is
664 * empty, in which case popOff() is not allowed.
666 * You can think of popOffReal() as a part of popOff() which is
667 * executed at the end of popOff() in necessary. Since popOff() is
668 * likely to be executed quite often while popOffReal() is not, we
669 * separate popOffReal() from popOff(), which is declared as an
670 * INLINE function (for the sake of execution speed). popOffReal()
671 * is called only within popOff() and nowhere else.
672 * -------------------------------------------------------------------------- */
676 bdescr *pbd; // Previous Block Descriptor
678 #ifdef DEBUG_RETAINER
679 // debugBelch("pop() to the previous stack.\n");
682 ASSERT(stackTop + 1 == stackLimit);
683 ASSERT(stackBottom == (stackElement *)currentStack->start);
685 if (firstStack == currentStack) {
686 // The stack is completely empty.
688 ASSERT(stackTop == stackLimit);
689 #ifdef DEBUG_RETAINER
691 if (stackSize > maxStackSize) maxStackSize = stackSize;
693 ASSERT(stackSize >= 0);
694 debugBelch("stackSize = %d\n", stackSize);
700 // currentStack->free is updated when the active stack is switched back
701 // to the previous stack.
702 currentStack->free = (StgPtr)stackLimit;
704 // find the previous block descriptor
705 pbd = currentStack->u.back;
708 returnToOldStack(pbd);
710 #ifdef DEBUG_RETAINER
712 if (stackSize > maxStackSize) maxStackSize = stackSize;
714 ASSERT(stackSize >= 0);
715 debugBelch("stackSize = %d\n", stackSize);
722 #ifdef DEBUG_RETAINER
723 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
726 ASSERT(stackTop != stackLimit);
727 ASSERT(!isEmptyRetainerStack());
729 // <= (instead of <) is wrong!
730 if (stackTop + 1 < stackLimit) {
732 #ifdef DEBUG_RETAINER
734 if (stackSize > maxStackSize) maxStackSize = stackSize;
736 ASSERT(stackSize >= 0);
737 debugBelch("stackSize = %d\n", stackSize);
746 /* -----------------------------------------------------------------------------
747 * Finds the next object to be considered for retainer profiling and store
749 * Test if the topmost stack element indicates that more objects are left,
750 * and if so, retrieve the first object and store its pointer to *c. Also,
751 * set *cp and *r appropriately, both of which are stored in the stack element.
752 * The topmost stack element then is overwritten so as for it to now denote
754 * If the topmost stack element indicates no more objects are left, pop
755 * off the stack element until either an object can be retrieved or
756 * the current stack chunk becomes empty, indicated by rtsTrue returned by
757 * isOnBoundary(), in which case *c is set to NULL.
759 * It is okay to call this function even when the current stack chunk
761 * -------------------------------------------------------------------------- */
763 pop( StgClosure **c, StgClosure **cp, retainer *r )
767 #ifdef DEBUG_RETAINER
768 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
772 if (isOnBoundary()) { // if the current stack chunk is depleted
779 switch (get_itbl(se->c)->type) {
780 // two children (fixed), no SRT
781 // nothing in se.info
783 *c = se->c->payload[1];
789 // three children (fixed), no SRT
790 // need to push a stackElement
792 if (se->info.next.step == 2) {
793 *c = (StgClosure *)((StgMVar *)se->c)->tail;
794 se->info.next.step++; // move to the next step
797 *c = ((StgMVar *)se->c)->value;
804 // three children (fixed), no SRT
806 if (se->info.next.step == 2) {
807 *c = ((StgWeak *)se->c)->value;
808 se->info.next.step++;
811 *c = ((StgWeak *)se->c)->finalizer;
822 // StgMutArrPtr.ptrs, no SRT
824 case MUT_ARR_PTRS_FROZEN:
825 case MUT_ARR_PTRS_FROZEN0:
826 *c = find_ptrs(&se->info);
835 // layout.payload.ptrs, SRT
836 case FUN: // always a heap object
838 if (se->info.type == posTypePtrs) {
839 *c = find_ptrs(&se->info);
845 init_srt_fun(&se->info, get_fun_itbl(se->c));
851 if (se->info.type == posTypePtrs) {
852 *c = find_ptrs(&se->info);
858 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
874 *c = find_srt(&se->info);
883 // no child (fixed), no SRT
889 case SE_CAF_BLACKHOLE:
891 // one child (fixed), no SRT
895 case IND_OLDGEN_PERM:
905 case CONSTR_CHARLIKE:
906 case CONSTR_NOCAF_STATIC:
927 barf("Invalid object *c in pop()");
933 /* -----------------------------------------------------------------------------
934 * RETAINER PROFILING ENGINE
935 * -------------------------------------------------------------------------- */
938 initRetainerProfiling( void )
940 initializeAllRetainerSet();
941 retainerGeneration = 0;
944 /* -----------------------------------------------------------------------------
945 * This function must be called before f-closing prof_file.
946 * -------------------------------------------------------------------------- */
948 endRetainerProfiling( void )
950 #ifdef SECOND_APPROACH
951 outputAllRetainerSet(prof_file);
955 /* -----------------------------------------------------------------------------
956 * Returns the actual pointer to the retainer set of the closure *c.
957 * It may adjust RSET(c) subject to flip.
959 * RSET(c) is initialized to NULL if its current value does not
962 * Even though this function has side effects, they CAN be ignored because
963 * subsequent calls to retainerSetOf() always result in the same return value
964 * and retainerSetOf() is the only way to retrieve retainerSet of a given
966 * We have to perform an XOR (^) operation each time a closure is examined.
967 * The reason is that we do not know when a closure is visited last.
968 * -------------------------------------------------------------------------- */
970 maybeInitRetainerSet( StgClosure *c )
972 if (!isRetainerSetFieldValid(c)) {
973 setRetainerSetToNull(c);
977 /* -----------------------------------------------------------------------------
978 * Returns rtsTrue if *c is a retainer.
979 * -------------------------------------------------------------------------- */
980 static INLINE rtsBool
981 isRetainer( StgClosure *c )
983 switch (get_itbl(c)->type) {
987 // TSOs MUST be retainers: they constitute the set of roots.
994 case MUT_ARR_PTRS_FROZEN:
995 case MUT_ARR_PTRS_FROZEN0:
997 // thunks are retainers.
1004 case THUNK_SELECTOR:
1008 // Static thunks, or CAFS, are obviously retainers.
1011 // WEAK objects are roots; there is separate code in which traversing
1012 // begins from WEAK objects.
1034 // partial applications
1040 case SE_CAF_BLACKHOLE:
1043 case IND_OLDGEN_PERM:
1057 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1059 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
1060 // cannot be *c, *cp, *r in the retainer profiling loop.
1061 case CONSTR_INTLIKE:
1062 case CONSTR_CHARLIKE:
1063 case CONSTR_NOCAF_STATIC:
1064 // Stack objects are invalid because they are never treated as
1065 // legal objects during retainer profiling.
1083 case INVALID_OBJECT:
1085 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1090 /* -----------------------------------------------------------------------------
1091 * Returns the retainer function value for the closure *c, i.e., R(*c).
1092 * This function does NOT return the retainer(s) of *c.
1094 * *c must be a retainer.
1096 * Depending on the definition of this function, the maintenance of retainer
1097 * sets can be made easier. If most retainer sets are likely to be created
1098 * again across garbage collections, refreshAllRetainerSet() in
1099 * RetainerSet.c can simply do nothing.
1100 * If this is not the case, we can free all the retainer sets and
1101 * re-initialize the hash table.
1102 * See refreshAllRetainerSet() in RetainerSet.c.
1103 * -------------------------------------------------------------------------- */
1104 static INLINE retainer
1105 getRetainerFrom( StgClosure *c )
1107 ASSERT(isRetainer(c));
1109 #if defined(RETAINER_SCHEME_INFO)
1110 // Retainer scheme 1: retainer = info table
1112 #elif defined(RETAINER_SCHEME_CCS)
1113 // Retainer scheme 2: retainer = cost centre stack
1114 return c->header.prof.ccs;
1115 #elif defined(RETAINER_SCHEME_CC)
1116 // Retainer scheme 3: retainer = cost centre
1117 return c->header.prof.ccs->cc;
1121 /* -----------------------------------------------------------------------------
1122 * Associates the retainer set *s with the closure *c, that is, *s becomes
1123 * the retainer set of *c.
1127 * -------------------------------------------------------------------------- */
1129 associate( StgClosure *c, RetainerSet *s )
1131 // StgWord has the same size as pointers, so the following type
1133 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1136 /* -----------------------------------------------------------------------------
1137 Call retainClosure for each of the closures covered by a large bitmap.
1138 -------------------------------------------------------------------------- */
1141 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1142 StgClosure *c, retainer c_child_r)
1148 bitmap = large_bitmap->bitmap[b];
1149 for (i = 0; i < size; ) {
1150 if ((bitmap & 1) == 0) {
1151 retainClosure((StgClosure *)*p, c, c_child_r);
1155 if (i % BITS_IN(W_) == 0) {
1157 bitmap = large_bitmap->bitmap[b];
1159 bitmap = bitmap >> 1;
1164 static INLINE StgPtr
1165 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1166 StgClosure *c, retainer c_child_r)
1169 if ((bitmap & 1) == 0) {
1170 retainClosure((StgClosure *)*p, c, c_child_r);
1173 bitmap = bitmap >> 1;
1179 /* -----------------------------------------------------------------------------
1180 * Call retainClosure for each of the closures in an SRT.
1181 * ------------------------------------------------------------------------- */
1184 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1191 p = (StgClosure **)srt->srt;
1193 bitmap = srt->l.bitmap[b];
1194 for (i = 0; i < size; ) {
1195 if ((bitmap & 1) != 0) {
1196 retainClosure((StgClosure *)*p, c, c_child_r);
1200 if (i % BITS_IN(W_) == 0) {
1202 bitmap = srt->l.bitmap[b];
1204 bitmap = bitmap >> 1;
1210 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1215 bitmap = srt_bitmap;
1218 if (bitmap == (StgHalfWord)(-1)) {
1219 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1223 while (bitmap != 0) {
1224 if ((bitmap & 1) != 0) {
1225 #ifdef ENABLE_WIN32_DLL_SUPPORT
1226 if ( (unsigned long)(*srt) & 0x1 ) {
1227 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1230 retainClosure(*srt,c,c_child_r);
1233 retainClosure(*srt,c,c_child_r);
1237 bitmap = bitmap >> 1;
1241 /* -----------------------------------------------------------------------------
1242 * Process all the objects in the stack chunk from stackStart to stackEnd
1243 * with *c and *c_child_r being their parent and their most recent retainer,
1244 * respectively. Treat stackOptionalFun as another child of *c if it is
1247 * *c is one of the following: TSO, AP_STACK.
1248 * If *c is TSO, c == c_child_r.
1249 * stackStart < stackEnd.
1250 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1251 * interpretation conforms to the current value of flip (even when they
1252 * are interpreted to be NULL).
1253 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1254 * or ThreadKilled, which means that its stack is ready to process.
1256 * This code was almost plagiarzied from GC.c! For each pointer,
1257 * retainClosure() is invoked instead of evacuate().
1258 * -------------------------------------------------------------------------- */
1260 retainStack( StgClosure *c, retainer c_child_r,
1261 StgPtr stackStart, StgPtr stackEnd )
1263 stackElement *oldStackBoundary;
1265 StgRetInfoTable *info;
1269 #ifdef DEBUG_RETAINER
1271 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1275 Each invocation of retainStack() creates a new virtual
1276 stack. Since all such stacks share a single common stack, we
1277 record the current currentStackBoundary, which will be restored
1280 oldStackBoundary = currentStackBoundary;
1281 currentStackBoundary = stackTop;
1283 #ifdef DEBUG_RETAINER
1284 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1287 ASSERT(get_itbl(c)->type != TSO ||
1288 (((StgTSO *)c)->what_next != ThreadRelocated &&
1289 ((StgTSO *)c)->what_next != ThreadComplete &&
1290 ((StgTSO *)c)->what_next != ThreadKilled));
1293 while (p < stackEnd) {
1294 info = get_ret_itbl((StgClosure *)p);
1296 switch(info->i.type) {
1299 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1300 p += sizeofW(StgUpdateFrame);
1307 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1308 size = BITMAP_SIZE(info->i.layout.bitmap);
1310 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1313 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1320 retainClosure((StgClosure *)*p, c, c_child_r);
1323 size = BCO_BITMAP_SIZE(bco);
1324 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1329 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1332 size = GET_LARGE_BITMAP(&info->i)->size;
1334 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1335 size, c, c_child_r);
1337 // and don't forget to follow the SRT
1340 // Dynamic bitmap: the mask is stored on the stack
1343 dyn = ((StgRetDyn *)p)->liveness;
1345 // traverse the bitmap first
1346 bitmap = RET_DYN_LIVENESS(dyn);
1347 p = (P_)&((StgRetDyn *)p)->payload[0];
1348 size = RET_DYN_BITMAP_SIZE;
1349 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1351 // skip over the non-ptr words
1352 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1354 // follow the ptr words
1355 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1356 retainClosure((StgClosure *)*p, c, c_child_r);
1363 StgRetFun *ret_fun = (StgRetFun *)p;
1364 StgFunInfoTable *fun_info;
1366 retainClosure(ret_fun->fun, c, c_child_r);
1367 fun_info = get_fun_itbl(ret_fun->fun);
1369 p = (P_)&ret_fun->payload;
1370 switch (fun_info->f.fun_type) {
1372 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1373 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1374 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1377 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1378 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1379 size, c, c_child_r);
1383 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1384 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1385 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1392 barf("Invalid object found in retainStack(): %d",
1393 (int)(info->i.type));
1397 // restore currentStackBoundary
1398 currentStackBoundary = oldStackBoundary;
1399 #ifdef DEBUG_RETAINER
1400 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1403 #ifdef DEBUG_RETAINER
1408 /* ----------------------------------------------------------------------------
1409 * Call retainClosure for each of the children of a PAP/AP
1410 * ------------------------------------------------------------------------- */
1412 static INLINE StgPtr
1413 retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
1414 StgClosure** payload, StgWord n_args)
1418 StgFunInfoTable *fun_info;
1420 retainClosure(fun, pap, c_child_r);
1421 fun_info = get_fun_itbl(fun);
1422 ASSERT(fun_info->i.type != PAP);
1424 p = (StgPtr)payload;
1426 switch (fun_info->f.fun_type) {
1428 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1429 p = retain_small_bitmap(p, n_args, bitmap,
1433 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1434 n_args, pap, c_child_r);
1438 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1439 n_args, pap, c_child_r);
1443 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1444 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1450 /* -----------------------------------------------------------------------------
1451 * Compute the retainer set of *c0 and all its desecents by traversing.
1452 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1454 * c0 = cp0 = r0 holds only for root objects.
1455 * RSET(cp0) and RSET(r0) are valid, i.e., their
1456 * interpretation conforms to the current value of flip (even when they
1457 * are interpreted to be NULL).
1458 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1459 * the current value of flip. If it does not, during the execution
1460 * of this function, RSET(c0) must be initialized as well as all
1463 * stackTop must be the same at the beginning and the exit of this function.
1464 * *c0 can be TSO (as well as AP_STACK).
1465 * -------------------------------------------------------------------------- */
1467 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1469 // c = Current closure
1470 // cp = Current closure's Parent
1471 // r = current closures' most recent Retainer
1472 // c_child_r = current closure's children's most recent retainer
1473 // first_child = first child of c
1474 StgClosure *c, *cp, *first_child;
1475 RetainerSet *s, *retainerSetOfc;
1476 retainer r, c_child_r;
1479 #ifdef DEBUG_RETAINER
1480 // StgPtr oldStackTop;
1483 #ifdef DEBUG_RETAINER
1484 // oldStackTop = stackTop;
1485 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1488 // (c, cp, r) = (c0, cp0, r0)
1495 //debugBelch("loop");
1496 // pop to (c, cp, r);
1500 #ifdef DEBUG_RETAINER
1501 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1506 //debugBelch("inner_loop");
1509 // c = current closure under consideration,
1510 // cp = current closure's parent,
1511 // r = current closure's most recent retainer
1513 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1514 // RSET(cp) and RSET(r) are valid.
1515 // RSET(c) is valid only if c has been visited before.
1517 // Loop invariants (on the relation between c, cp, and r)
1518 // if cp is not a retainer, r belongs to RSET(cp).
1519 // if cp is a retainer, r == cp.
1521 typeOfc = get_itbl(c)->type;
1523 #ifdef DEBUG_RETAINER
1526 case CONSTR_INTLIKE:
1527 case CONSTR_CHARLIKE:
1528 case CONSTR_NOCAF_STATIC:
1534 if (retainerSetOf(c) == NULL) { // first visit?
1535 costArray[typeOfc] += cost(c);
1536 sumOfNewCost += cost(c);
1545 if (((StgTSO *)c)->what_next == ThreadComplete ||
1546 ((StgTSO *)c)->what_next == ThreadKilled) {
1547 #ifdef DEBUG_RETAINER
1548 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1552 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1553 #ifdef DEBUG_RETAINER
1554 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1556 c = (StgClosure *)((StgTSO *)c)->link;
1562 // We just skip IND_STATIC, so its retainer set is never computed.
1563 c = ((StgIndStatic *)c)->indirectee;
1565 case CONSTR_INTLIKE:
1566 case CONSTR_CHARLIKE:
1567 // static objects with no pointers out, so goto loop.
1568 case CONSTR_NOCAF_STATIC:
1569 // It is not just enough not to compute the retainer set for *c; it is
1570 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1571 // scavenged_static_objects, the list from which is assumed to traverse
1572 // all static objects after major garbage collections.
1576 if (get_itbl(c)->srt_bitmap == 0) {
1577 // No need to compute the retainer set; no dynamic objects
1578 // are reachable from *c.
1580 // Static objects: if we traverse all the live closures,
1581 // including static closures, during each heap census then
1582 // we will observe that some static closures appear and
1583 // disappear. eg. a closure may contain a pointer to a
1584 // static function 'f' which is not otherwise reachable
1585 // (it doesn't indirectly point to any CAFs, so it doesn't
1586 // appear in any SRTs), so we would find 'f' during
1587 // traversal. However on the next sweep there may be no
1588 // closures pointing to 'f'.
1590 // We must therefore ignore static closures whose SRT is
1591 // empty, because these are exactly the closures that may
1592 // "appear". A closure with a non-empty SRT, and which is
1593 // still required, will always be reachable.
1595 // But what about CONSTR_STATIC? Surely these may be able
1596 // to appear, and they don't have SRTs, so we can't
1597 // check. So for now, we're calling
1598 // resetStaticObjectForRetainerProfiling() from the
1599 // garbage collector to reset the retainer sets in all the
1600 // reachable static objects.
1607 // The above objects are ignored in computing the average number of times
1608 // an object is visited.
1609 timesAnyObjectVisited++;
1611 // If this is the first visit to c, initialize its retainer set.
1612 maybeInitRetainerSet(c);
1613 retainerSetOfc = retainerSetOf(c);
1616 // isRetainer(cp) == rtsTrue => s == NULL
1617 // isRetainer(cp) == rtsFalse => s == cp.retainer
1621 s = retainerSetOf(cp);
1623 // (c, cp, r, s) is available.
1625 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1626 if (retainerSetOfc == NULL) {
1627 // This is the first visit to *c.
1631 associate(c, singleton(r));
1633 // s is actually the retainer set of *c!
1636 // compute c_child_r
1637 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1639 // This is not the first visit to *c.
1640 if (isMember(r, retainerSetOfc))
1641 goto loop; // no need to process child
1644 associate(c, addElement(r, retainerSetOfc));
1646 // s is not NULL and cp is not a retainer. This means that
1647 // each time *cp is visited, so is *c. Thus, if s has
1648 // exactly one more element in its retainer set than c, s
1649 // is also the new retainer set for *c.
1650 if (s->num == retainerSetOfc->num + 1) {
1653 // Otherwise, just add R_r to the current retainer set of *c.
1655 associate(c, addElement(r, retainerSetOfc));
1660 goto loop; // no need to process child
1662 // compute c_child_r
1666 // now, RSET() of all of *c, *cp, and *r is valid.
1667 // (c, c_child_r) are available.
1671 // Special case closures: we process these all in one go rather
1672 // than attempting to save the current position, because doing so
1676 retainStack(c, c_child_r,
1678 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1683 StgPAP *pap = (StgPAP *)c;
1684 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1690 StgAP *ap = (StgAP *)c;
1691 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1696 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1697 retainStack(c, c_child_r,
1698 (StgPtr)((StgAP_STACK *)c)->payload,
1699 (StgPtr)((StgAP_STACK *)c)->payload +
1700 ((StgAP_STACK *)c)->size);
1704 push(c, c_child_r, &first_child);
1706 // If first_child is null, c has no child.
1707 // If first_child is not null, the top stack element points to the next
1708 // object. push() may or may not push a stackElement on the stack.
1709 if (first_child == NULL)
1712 // (c, cp, r) = (first_child, c, c_child_r)
1719 /* -----------------------------------------------------------------------------
1720 * Compute the retainer set for every object reachable from *tl.
1721 * -------------------------------------------------------------------------- */
1723 retainRoot( StgClosure **tl )
1725 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1728 ASSERT(isEmptyRetainerStack());
1729 currentStackBoundary = stackTop;
1731 if (isRetainer(*tl)) {
1732 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1734 retainClosure(*tl, *tl, CCS_SYSTEM);
1737 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1738 // *tl might be a TSO which is ThreadComplete, in which
1739 // case we ignore it for the purposes of retainer profiling.
1742 /* -----------------------------------------------------------------------------
1743 * Compute the retainer set for each of the objects in the heap.
1744 * -------------------------------------------------------------------------- */
1746 computeRetainerSet( void )
1753 #ifdef DEBUG_RETAINER
1754 RetainerSet tmpRetainerSet;
1757 GetRoots(retainRoot); // for scheduler roots
1759 // This function is called after a major GC, when key, value, and finalizer
1760 // all are guaranteed to be valid, or reachable.
1762 // The following code assumes that WEAK objects are considered to be roots
1763 // for retainer profilng.
1764 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1765 // retainRoot((StgClosure *)weak);
1766 retainRoot((StgClosure **)&weak);
1768 // Consider roots from the stable ptr table.
1769 markStablePtrTable(retainRoot);
1771 // The following code resets the rs field of each unvisited mutable
1772 // object (computing sumOfNewCostExtra and updating costArray[] when
1773 // debugging retainer profiler).
1774 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1775 ASSERT(g != 0 || (generations[g].mut_list == NULL));
1777 // Traversing through mut_list is necessary
1778 // because we can find MUT_VAR objects which have not been
1779 // visited during retainer profiling.
1780 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1781 for (ml = bd->start; ml < bd->free; ml++) {
1783 maybeInitRetainerSet((StgClosure *)*ml);
1784 rtl = retainerSetOf((StgClosure *)*ml);
1786 #ifdef DEBUG_RETAINER
1788 // first visit to *ml
1789 // This is a violation of the interface rule!
1790 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1792 switch (get_itbl((StgClosure *)ml)->type) {
1796 case CONSTR_INTLIKE:
1797 case CONSTR_CHARLIKE:
1798 case CONSTR_NOCAF_STATIC:
1802 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1806 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1807 sumOfNewCostExtra += cost((StgClosure *)ml);
1817 /* -----------------------------------------------------------------------------
1818 * Traverse all static objects for which we compute retainer sets,
1819 * and reset their rs fields to NULL, which is accomplished by
1820 * invoking maybeInitRetainerSet(). This function must be called
1821 * before zeroing all objects reachable from scavenged_static_objects
1822 * in the case of major gabage collections. See GarbageCollect() in
1825 * The mut_once_list of the oldest generation must also be traversed?
1826 * Why? Because if the evacuation of an object pointed to by a static
1827 * indirection object fails, it is put back to the mut_once_list of
1828 * the oldest generation.
1829 * However, this is not necessary because any static indirection objects
1830 * are just traversed through to reach dynamic objects. In other words,
1831 * they are not taken into consideration in computing retainer sets.
1832 * -------------------------------------------------------------------------- */
1834 resetStaticObjectForRetainerProfiling( void )
1836 #ifdef DEBUG_RETAINER
1841 #ifdef DEBUG_RETAINER
1844 p = scavenged_static_objects;
1845 while (p != END_OF_STATIC_LIST) {
1846 #ifdef DEBUG_RETAINER
1849 switch (get_itbl(p)->type) {
1851 // Since we do not compute the retainer set of any
1852 // IND_STATIC object, we don't have to reset its retainer
1854 p = (StgClosure*)*IND_STATIC_LINK(p);
1857 maybeInitRetainerSet(p);
1858 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1861 maybeInitRetainerSet(p);
1862 p = (StgClosure*)*FUN_STATIC_LINK(p);
1865 maybeInitRetainerSet(p);
1866 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1869 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1870 p, get_itbl(p)->type);
1874 #ifdef DEBUG_RETAINER
1875 // debugBelch("count in scavenged_static_objects = %d\n", count);
1879 /* -----------------------------------------------------------------------------
1880 * Perform retainer profiling.
1881 * N is the oldest generation being profilied, where the generations are
1882 * numbered starting at 0.
1885 * This function should be called only immediately after major garbage
1887 * ------------------------------------------------------------------------- */
1889 retainerProfile(void)
1891 #ifdef DEBUG_RETAINER
1893 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1896 #ifdef DEBUG_RETAINER
1897 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1902 // We haven't flipped the bit yet.
1903 #ifdef DEBUG_RETAINER
1904 debugBelch("Before traversing:\n");
1905 sumOfCostLinear = 0;
1906 for (i = 0;i < N_CLOSURE_TYPES; i++)
1907 costArrayLinear[i] = 0;
1908 totalHeapSize = checkHeapSanityForRetainerProfiling();
1910 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1912 debugBelch("costArrayLinear[] = ");
1913 for (i = 0;i < N_CLOSURE_TYPES; i++)
1914 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1918 ASSERT(sumOfCostLinear == totalHeapSize);
1921 #define pcostArrayLinear(index) \
1922 if (costArrayLinear[index] > 0) \
1923 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1924 pcostArrayLinear(THUNK_STATIC);
1925 pcostArrayLinear(FUN_STATIC);
1926 pcostArrayLinear(CONSTR_STATIC);
1927 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1928 pcostArrayLinear(CONSTR_INTLIKE);
1929 pcostArrayLinear(CONSTR_CHARLIKE);
1933 // Now we flips flip.
1936 #ifdef DEBUG_RETAINER
1942 numObjectVisited = 0;
1943 timesAnyObjectVisited = 0;
1945 #ifdef DEBUG_RETAINER
1946 debugBelch("During traversing:\n");
1948 sumOfNewCostExtra = 0;
1949 for (i = 0;i < N_CLOSURE_TYPES; i++)
1954 We initialize the traverse stack each time the retainer profiling is
1955 performed (because the traverse stack size varies on each retainer profiling
1956 and this operation is not costly anyhow). However, we just refresh the
1959 initializeTraverseStack();
1960 #ifdef DEBUG_RETAINER
1961 initializeAllRetainerSet();
1963 refreshAllRetainerSet();
1965 computeRetainerSet();
1967 #ifdef DEBUG_RETAINER
1968 debugBelch("After traversing:\n");
1969 sumOfCostLinear = 0;
1970 for (i = 0;i < N_CLOSURE_TYPES; i++)
1971 costArrayLinear[i] = 0;
1972 totalHeapSize = checkHeapSanityForRetainerProfiling();
1974 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1975 ASSERT(sumOfCostLinear == totalHeapSize);
1977 // now, compare the two results
1980 costArray[] must be exactly the same as costArrayLinear[].
1982 1) Dead weak pointers, whose type is CONSTR. These objects are not
1983 reachable from any roots.
1985 debugBelch("Comparison:\n");
1986 debugBelch("\tcostArrayLinear[] (must be empty) = ");
1987 for (i = 0;i < N_CLOSURE_TYPES; i++)
1988 if (costArray[i] != costArrayLinear[i])
1989 // nothing should be printed except MUT_VAR after major GCs
1990 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1993 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
1994 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
1995 debugBelch("\tcostArray[] (must be empty) = ");
1996 for (i = 0;i < N_CLOSURE_TYPES; i++)
1997 if (costArray[i] != costArrayLinear[i])
1998 // nothing should be printed except MUT_VAR after major GCs
1999 debugBelch("[%u:%u] ", i, costArray[i]);
2002 // only for major garbage collection
2003 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2007 closeTraverseStack();
2008 #ifdef DEBUG_RETAINER
2009 closeAllRetainerSet();
2011 // Note that there is no post-processing for the retainer sets.
2013 retainerGeneration++;
2016 retainerGeneration - 1, // retainerGeneration has just been incremented!
2017 #ifdef DEBUG_RETAINER
2018 maxCStackSize, maxStackSize,
2020 (double)timesAnyObjectVisited / numObjectVisited);
2023 /* -----------------------------------------------------------------------------
2025 * -------------------------------------------------------------------------- */
2027 #ifdef DEBUG_RETAINER
2029 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2030 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2031 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2034 sanityCheckHeapClosure( StgClosure *c )
2038 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2039 ASSERT(!closure_STATIC(c));
2040 ASSERT(LOOKS_LIKE_PTR(c));
2042 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2043 if (get_itbl(c)->type == CONSTR &&
2044 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2045 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2046 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2047 costArray[get_itbl(c)->type] += cost(c);
2048 sumOfNewCost += cost(c);
2051 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2052 flip, c, get_itbl(c)->type,
2053 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2056 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2060 switch (info->type) {
2062 return tso_sizeW((StgTSO *)c);
2070 return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
2073 return sizeofW(StgMVar);
2076 case MUT_ARR_PTRS_FROZEN:
2077 case MUT_ARR_PTRS_FROZEN0:
2078 return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
2082 return pap_sizeW((StgPAP *)c);
2085 return ap_stack_sizeW((StgAP_STACK *)c);
2088 return arr_words_sizeW((StgArrWords *)c);
2107 case SE_CAF_BLACKHOLE:
2110 case IND_OLDGEN_PERM:
2113 return sizeW_fromITBL(info);
2115 case THUNK_SELECTOR:
2116 return sizeofW(StgHeader) + MIN_UPD_SIZE;
2125 case CONSTR_INTLIKE:
2126 case CONSTR_CHARLIKE:
2127 case CONSTR_NOCAF_STATIC:
2144 case INVALID_OBJECT:
2146 barf("Invalid object in sanityCheckHeapClosure(): %d",
2153 heapCheck( bdescr *bd )
2156 static nat costSum, size;
2159 while (bd != NULL) {
2161 while (p < bd->free) {
2162 size = sanityCheckHeapClosure((StgClosure *)p);
2163 sumOfCostLinear += size;
2164 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2166 // no need for slop check; I think slops are not used currently.
2168 ASSERT(p == bd->free);
2169 costSum += bd->free - bd->start;
2177 smallObjectPoolCheck(void)
2181 static nat costSum, size;
2183 bd = small_alloc_list;
2191 while (p < alloc_Hp) {
2192 size = sanityCheckHeapClosure((StgClosure *)p);
2193 sumOfCostLinear += size;
2194 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2197 ASSERT(p == alloc_Hp);
2198 costSum += alloc_Hp - bd->start;
2201 while (bd != NULL) {
2203 while (p < bd->free) {
2204 size = sanityCheckHeapClosure((StgClosure *)p);
2205 sumOfCostLinear += size;
2206 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2209 ASSERT(p == bd->free);
2210 costSum += bd->free - bd->start;
2218 chainCheck(bdescr *bd)
2223 while (bd != NULL) {
2224 // bd->free - bd->start is not an accurate measurement of the
2225 // object size. Actually it is always zero, so we compute its
2227 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2228 sumOfCostLinear += size;
2229 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2238 checkHeapSanityForRetainerProfiling( void )
2243 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2244 if (RtsFlags.GcFlags.generations == 1) {
2245 costSum += heapCheck(g0s0->to_blocks);
2246 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2247 costSum += chainCheck(g0s0->large_objects);
2248 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2250 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2251 for (s = 0; s < generations[g].n_steps; s++) {
2253 After all live objects have been scavenged, the garbage
2254 collector may create some objects in
2255 scheduleFinalizers(). These objects are created throught
2256 allocate(), so the small object pool or the large object
2257 pool of the g0s0 may not be empty.
2259 if (g == 0 && s == 0) {
2260 costSum += smallObjectPoolCheck();
2261 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2262 costSum += chainCheck(generations[g].steps[s].large_objects);
2263 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2265 costSum += heapCheck(generations[g].steps[s].blocks);
2266 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2267 costSum += chainCheck(generations[g].steps[s].large_objects);
2268 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2277 findPointer(StgPtr p)
2283 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2284 for (s = 0; s < generations[g].n_steps; s++) {
2285 // if (g == 0 && s == 0) continue;
2286 bd = generations[g].steps[s].blocks;
2287 for (; bd; bd = bd->link) {
2288 for (q = bd->start; q < bd->free; q++) {
2289 if (*q == (StgWord)p) {
2291 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2292 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2297 bd = generations[g].steps[s].large_objects;
2298 for (; bd; bd = bd->link) {
2299 e = bd->start + cost((StgClosure *)bd->start);
2300 for (q = bd->start; q < e; q++) {
2301 if (*q == (StgWord)p) {
2303 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2304 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2314 belongToHeap(StgPtr p)
2319 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2320 for (s = 0; s < generations[g].n_steps; s++) {
2321 // if (g == 0 && s == 0) continue;
2322 bd = generations[g].steps[s].blocks;
2323 for (; bd; bd = bd->link) {
2324 if (bd->start <= p && p < bd->free) {
2325 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2329 bd = generations[g].steps[s].large_objects;
2330 for (; bd; bd = bd->link) {
2331 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2332 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2339 #endif /* DEBUG_RETAINER */
2341 #endif /* PROFILING */