1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2001
8 * ---------------------------------------------------------------------------*/
12 // Turn off inlining when debugging - it obfuscates things
19 #include "PosixSource.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
28 #include "sm/Sanity.h"
29 #include "Profiling.h"
33 #include "sm/Storage.h" // for END_OF_STATIC_LIST
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
44 /* -----------------------------------------------------------------------------
46 * -------------------------------------------------------------------------- */
48 static nat retainerGeneration; // generation
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
70 static void belongToHeap(StgPtr p);
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
79 cStackSize <= maxCStackSize
81 static nat cStackSize, maxCStackSize;
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
92 nat costArrayLinear[N_CLOSURE_TYPES];
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
103 * -------------------------------------------------------------------------- */
113 // fixed layout or layout specified by a field in the closure
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
181 static stackElement *currentStackBoundary;
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
187 stackSize <= maxStackSize
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
208 newStackBlock( bdescr *bd )
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
223 returnToOldStack( bdescr *bd )
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
236 initializeTraverseStack( void )
238 if (firstStack != NULL) {
239 freeChain(firstStack);
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
246 newStackBlock(firstStack);
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
253 * -------------------------------------------------------------------------- */
255 closeTraverseStack( void )
257 freeChain(firstStack);
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
267 return (firstStack == currentStack) && stackTop == stackLimit;
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
275 retainerStackBlocks( void )
280 for (bd = firstStack; bd != NULL; bd = bd->link)
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
294 return stackTop == currentStackBoundary;
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
364 if (info->type == posTypeSRT) {
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #if defined(__PIC__) && defined(mingw32_HOST_OS)
370 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
371 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
373 c = *(info->next.srt.srt);
375 c = *(info->next.srt.srt);
377 bitmap = bitmap >> 1;
378 info->next.srt.srt++;
379 info->next.srt.srt_bitmap = bitmap;
382 bitmap = bitmap >> 1;
383 info->next.srt.srt++;
385 // bitmap is now zero...
390 nat i = info->next.large_srt.offset;
393 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
394 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
395 bitmap = bitmap >> (i % BITS_IN(StgWord));
396 while (i < info->next.large_srt.srt->l.size) {
397 if ((bitmap & 1) != 0) {
398 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
400 info->next.large_srt.offset = i;
404 if (i % BITS_IN(W_) == 0) {
405 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
407 bitmap = bitmap >> 1;
410 // reached the end of this bitmap.
411 info->next.large_srt.offset = i;
416 /* -----------------------------------------------------------------------------
417 * push() pushes a stackElement representing the next child of *c
418 * onto the traverse stack. If *c has no child, *first_child is set
419 * to NULL and nothing is pushed onto the stack. If *c has only one
420 * child, *c_chlid is set to that child and nothing is pushed onto
421 * the stack. If *c has more than two children, *first_child is set
422 * to the first child and a stackElement representing the second
423 * child is pushed onto the stack.
426 * *c_child_r is the most recent retainer of *c's children.
427 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
428 * there cannot be any stack objects.
429 * Note: SRTs are considered to be children as well.
430 * -------------------------------------------------------------------------- */
432 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
435 bdescr *nbd; // Next Block Descriptor
437 #ifdef DEBUG_RETAINER
438 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
441 ASSERT(get_itbl(c)->type != TSO);
442 ASSERT(get_itbl(c)->type != AP_STACK);
449 se.c_child_r = c_child_r;
452 switch (get_itbl(c)->type) {
460 // one child (fixed), no SRT
463 *first_child = ((StgMutVar *)c)->var;
466 *first_child = ((StgSelector *)c)->selectee;
470 *first_child = ((StgInd *)c)->indirectee;
474 *first_child = c->payload[0];
477 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
478 // of the next child. We do not write a separate initialization code.
479 // Also we do not have to initialize info.type;
481 // two children (fixed), no SRT
482 // need to push a stackElement, but nothing to store in se.info
484 *first_child = c->payload[0]; // return the first pointer
485 // se.info.type = posTypeStep;
486 // se.info.next.step = 2; // 2 = second
489 // three children (fixed), no SRT
490 // need to push a stackElement
493 // head must be TSO and the head of a linked list of TSOs.
494 // Shoule it be a child? Seems to be yes.
495 *first_child = (StgClosure *)((StgMVar *)c)->head;
496 // se.info.type = posTypeStep;
497 se.info.next.step = 2; // 2 = second
500 // three children (fixed), no SRT
502 *first_child = ((StgWeak *)c)->key;
503 // se.info.type = posTypeStep;
504 se.info.next.step = 2;
507 // layout.payload.ptrs, no SRT
513 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
515 *first_child = find_ptrs(&se.info);
516 if (*first_child == NULL)
520 // StgMutArrPtr.ptrs, no SRT
521 case MUT_ARR_PTRS_CLEAN:
522 case MUT_ARR_PTRS_DIRTY:
523 case MUT_ARR_PTRS_FROZEN:
524 case MUT_ARR_PTRS_FROZEN0:
525 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
526 (StgPtr)(((StgMutArrPtrs *)c)->payload));
527 *first_child = find_ptrs(&se.info);
528 if (*first_child == NULL)
532 // layout.payload.ptrs, SRT
533 case FUN: // *c is a heap object.
535 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
536 *first_child = find_ptrs(&se.info);
537 if (*first_child == NULL)
538 // no child from ptrs, so check SRT
544 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
545 (StgPtr)((StgThunk *)c)->payload);
546 *first_child = find_ptrs(&se.info);
547 if (*first_child == NULL)
548 // no child from ptrs, so check SRT
552 // 1 fixed child, SRT
555 *first_child = c->payload[0];
556 ASSERT(*first_child != NULL);
557 init_srt_fun(&se.info, get_fun_itbl(c));
562 *first_child = ((StgThunk *)c)->payload[0];
563 ASSERT(*first_child != NULL);
564 init_srt_thunk(&se.info, get_thunk_itbl(c));
567 case FUN_STATIC: // *c is a heap object.
568 ASSERT(get_itbl(c)->srt_bitmap != 0);
572 init_srt_fun(&se.info, get_fun_itbl(c));
573 *first_child = find_srt(&se.info);
574 if (*first_child == NULL)
580 ASSERT(get_itbl(c)->srt_bitmap != 0);
584 init_srt_thunk(&se.info, get_thunk_itbl(c));
585 *first_child = find_srt(&se.info);
586 if (*first_child == NULL)
591 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
592 se.info.next.step = 0; // entry no.
602 case CONSTR_NOCAF_STATIC:
606 case UNDERFLOW_FRAME:
616 barf("Invalid object *c in push()");
620 if (stackTop - 1 < stackBottom) {
621 #ifdef DEBUG_RETAINER
622 // debugBelch("push() to the next stack.\n");
624 // currentStack->free is updated when the active stack is switched
625 // to the next stack.
626 currentStack->free = (StgPtr)stackTop;
628 if (currentStack->link == NULL) {
629 nbd = allocGroup(BLOCKS_IN_STACK);
631 nbd->u.back = currentStack;
632 currentStack->link = nbd;
634 nbd = currentStack->link;
639 // adjust stackTop (acutal push)
641 // If the size of stackElement was huge, we would better replace the
642 // following statement by either a memcpy() call or a switch statement
643 // on the type of the element. Currently, the size of stackElement is
644 // small enough (5 words) that this direct assignment seems to be enough.
646 // ToDo: The line below leads to the warning:
647 // warning: 'se.info.type' may be used uninitialized in this function
648 // This is caused by the fact that there are execution paths through the
649 // large switch statement above where some cases do not initialize this
650 // field. Is this really harmless? Can we avoid the warning?
653 #ifdef DEBUG_RETAINER
655 if (stackSize > maxStackSize) maxStackSize = stackSize;
656 // ASSERT(stackSize >= 0);
657 // debugBelch("stackSize = %d\n", stackSize);
661 /* -----------------------------------------------------------------------------
662 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
664 * stackTop cannot be equal to stackLimit unless the whole stack is
665 * empty, in which case popOff() is not allowed.
667 * You can think of popOffReal() as a part of popOff() which is
668 * executed at the end of popOff() in necessary. Since popOff() is
669 * likely to be executed quite often while popOffReal() is not, we
670 * separate popOffReal() from popOff(), which is declared as an
671 * INLINE function (for the sake of execution speed). popOffReal()
672 * is called only within popOff() and nowhere else.
673 * -------------------------------------------------------------------------- */
677 bdescr *pbd; // Previous Block Descriptor
679 #ifdef DEBUG_RETAINER
680 // debugBelch("pop() to the previous stack.\n");
683 ASSERT(stackTop + 1 == stackLimit);
684 ASSERT(stackBottom == (stackElement *)currentStack->start);
686 if (firstStack == currentStack) {
687 // The stack is completely empty.
689 ASSERT(stackTop == stackLimit);
690 #ifdef DEBUG_RETAINER
692 if (stackSize > maxStackSize) maxStackSize = stackSize;
694 ASSERT(stackSize >= 0);
695 debugBelch("stackSize = %d\n", stackSize);
701 // currentStack->free is updated when the active stack is switched back
702 // to the previous stack.
703 currentStack->free = (StgPtr)stackLimit;
705 // find the previous block descriptor
706 pbd = currentStack->u.back;
709 returnToOldStack(pbd);
711 #ifdef DEBUG_RETAINER
713 if (stackSize > maxStackSize) maxStackSize = stackSize;
715 ASSERT(stackSize >= 0);
716 debugBelch("stackSize = %d\n", stackSize);
723 #ifdef DEBUG_RETAINER
724 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
727 ASSERT(stackTop != stackLimit);
728 ASSERT(!isEmptyRetainerStack());
730 // <= (instead of <) is wrong!
731 if (stackTop + 1 < stackLimit) {
733 #ifdef DEBUG_RETAINER
735 if (stackSize > maxStackSize) maxStackSize = stackSize;
737 ASSERT(stackSize >= 0);
738 debugBelch("stackSize = %d\n", stackSize);
747 /* -----------------------------------------------------------------------------
748 * Finds the next object to be considered for retainer profiling and store
750 * Test if the topmost stack element indicates that more objects are left,
751 * and if so, retrieve the first object and store its pointer to *c. Also,
752 * set *cp and *r appropriately, both of which are stored in the stack element.
753 * The topmost stack element then is overwritten so as for it to now denote
755 * If the topmost stack element indicates no more objects are left, pop
756 * off the stack element until either an object can be retrieved or
757 * the current stack chunk becomes empty, indicated by rtsTrue returned by
758 * isOnBoundary(), in which case *c is set to NULL.
760 * It is okay to call this function even when the current stack chunk
762 * -------------------------------------------------------------------------- */
764 pop( StgClosure **c, StgClosure **cp, retainer *r )
768 #ifdef DEBUG_RETAINER
769 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
773 if (isOnBoundary()) { // if the current stack chunk is depleted
780 switch (get_itbl(se->c)->type) {
781 // two children (fixed), no SRT
782 // nothing in se.info
784 *c = se->c->payload[1];
790 // three children (fixed), no SRT
791 // need to push a stackElement
794 if (se->info.next.step == 2) {
795 *c = (StgClosure *)((StgMVar *)se->c)->tail;
796 se->info.next.step++; // move to the next step
799 *c = ((StgMVar *)se->c)->value;
806 // three children (fixed), no SRT
808 if (se->info.next.step == 2) {
809 *c = ((StgWeak *)se->c)->value;
810 se->info.next.step++;
813 *c = ((StgWeak *)se->c)->finalizer;
821 // These are pretty complicated: we have N entries, each
822 // of which contains 3 fields that we want to follow. So
823 // we divide the step counter: the 2 low bits indicate
824 // which field, and the rest of the bits indicate the
825 // entry number (starting from zero).
827 nat entry_no = se->info.next.step >> 2;
828 nat field_no = se->info.next.step & 3;
829 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
834 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
836 *c = (StgClosure *)entry->tvar;
837 } else if (field_no == 1) {
838 *c = entry->expected_value;
840 *c = entry->new_value;
844 se->info.next.step++;
853 // StgMutArrPtr.ptrs, no SRT
854 case MUT_ARR_PTRS_CLEAN:
855 case MUT_ARR_PTRS_DIRTY:
856 case MUT_ARR_PTRS_FROZEN:
857 case MUT_ARR_PTRS_FROZEN0:
858 *c = find_ptrs(&se->info);
867 // layout.payload.ptrs, SRT
868 case FUN: // always a heap object
870 if (se->info.type == posTypePtrs) {
871 *c = find_ptrs(&se->info);
877 init_srt_fun(&se->info, get_fun_itbl(se->c));
883 if (se->info.type == posTypePtrs) {
884 *c = find_ptrs(&se->info);
890 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
906 *c = find_srt(&se->info);
915 // no child (fixed), no SRT
919 // one child (fixed), no SRT
932 case CONSTR_NOCAF_STATIC:
937 case UNDERFLOW_FRAME:
946 barf("Invalid object *c in pop()");
952 /* -----------------------------------------------------------------------------
953 * RETAINER PROFILING ENGINE
954 * -------------------------------------------------------------------------- */
957 initRetainerProfiling( void )
959 initializeAllRetainerSet();
960 retainerGeneration = 0;
963 /* -----------------------------------------------------------------------------
964 * This function must be called before f-closing prof_file.
965 * -------------------------------------------------------------------------- */
967 endRetainerProfiling( void )
969 #ifdef SECOND_APPROACH
970 outputAllRetainerSet(prof_file);
974 /* -----------------------------------------------------------------------------
975 * Returns the actual pointer to the retainer set of the closure *c.
976 * It may adjust RSET(c) subject to flip.
978 * RSET(c) is initialized to NULL if its current value does not
981 * Even though this function has side effects, they CAN be ignored because
982 * subsequent calls to retainerSetOf() always result in the same return value
983 * and retainerSetOf() is the only way to retrieve retainerSet of a given
985 * We have to perform an XOR (^) operation each time a closure is examined.
986 * The reason is that we do not know when a closure is visited last.
987 * -------------------------------------------------------------------------- */
989 maybeInitRetainerSet( StgClosure *c )
991 if (!isRetainerSetFieldValid(c)) {
992 setRetainerSetToNull(c);
996 /* -----------------------------------------------------------------------------
997 * Returns rtsTrue if *c is a retainer.
998 * -------------------------------------------------------------------------- */
999 static INLINE rtsBool
1000 isRetainer( StgClosure *c )
1002 switch (get_itbl(c)->type) {
1006 // TSOs MUST be retainers: they constitute the set of roots.
1016 case MUT_ARR_PTRS_CLEAN:
1017 case MUT_ARR_PTRS_DIRTY:
1018 case MUT_ARR_PTRS_FROZEN:
1019 case MUT_ARR_PTRS_FROZEN0:
1021 // thunks are retainers.
1028 case THUNK_SELECTOR:
1032 // Static thunks, or CAFS, are obviously retainers.
1035 // WEAK objects are roots; there is separate code in which traversing
1036 // begins from WEAK objects.
1058 // partial applications
1062 // IND_STATIC used to be an error, but at the moment it can happen
1063 // as isAlive doesn't look through IND_STATIC as it ignores static
1064 // closures. See trac #3956 for a program that hit this error.
1081 // CONSTR_NOCAF_STATIC
1082 // cannot be *c, *cp, *r in the retainer profiling loop.
1083 case CONSTR_NOCAF_STATIC:
1084 // Stack objects are invalid because they are never treated as
1085 // legal objects during retainer profiling.
1088 case UNDERFLOW_FRAME:
1096 case INVALID_OBJECT:
1098 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1103 /* -----------------------------------------------------------------------------
1104 * Returns the retainer function value for the closure *c, i.e., R(*c).
1105 * This function does NOT return the retainer(s) of *c.
1107 * *c must be a retainer.
1109 * Depending on the definition of this function, the maintenance of retainer
1110 * sets can be made easier. If most retainer sets are likely to be created
1111 * again across garbage collections, refreshAllRetainerSet() in
1112 * RetainerSet.c can simply do nothing.
1113 * If this is not the case, we can free all the retainer sets and
1114 * re-initialize the hash table.
1115 * See refreshAllRetainerSet() in RetainerSet.c.
1116 * -------------------------------------------------------------------------- */
1117 static INLINE retainer
1118 getRetainerFrom( StgClosure *c )
1120 ASSERT(isRetainer(c));
1122 #if defined(RETAINER_SCHEME_INFO)
1123 // Retainer scheme 1: retainer = info table
1125 #elif defined(RETAINER_SCHEME_CCS)
1126 // Retainer scheme 2: retainer = cost centre stack
1127 return c->header.prof.ccs;
1128 #elif defined(RETAINER_SCHEME_CC)
1129 // Retainer scheme 3: retainer = cost centre
1130 return c->header.prof.ccs->cc;
1134 /* -----------------------------------------------------------------------------
1135 * Associates the retainer set *s with the closure *c, that is, *s becomes
1136 * the retainer set of *c.
1140 * -------------------------------------------------------------------------- */
1142 associate( StgClosure *c, RetainerSet *s )
1144 // StgWord has the same size as pointers, so the following type
1146 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1149 /* -----------------------------------------------------------------------------
1150 Call retainClosure for each of the closures covered by a large bitmap.
1151 -------------------------------------------------------------------------- */
1154 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1155 StgClosure *c, retainer c_child_r)
1161 bitmap = large_bitmap->bitmap[b];
1162 for (i = 0; i < size; ) {
1163 if ((bitmap & 1) == 0) {
1164 retainClosure((StgClosure *)*p, c, c_child_r);
1168 if (i % BITS_IN(W_) == 0) {
1170 bitmap = large_bitmap->bitmap[b];
1172 bitmap = bitmap >> 1;
1177 static INLINE StgPtr
1178 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1179 StgClosure *c, retainer c_child_r)
1182 if ((bitmap & 1) == 0) {
1183 retainClosure((StgClosure *)*p, c, c_child_r);
1186 bitmap = bitmap >> 1;
1192 /* -----------------------------------------------------------------------------
1193 * Call retainClosure for each of the closures in an SRT.
1194 * ------------------------------------------------------------------------- */
1197 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1204 p = (StgClosure **)srt->srt;
1206 bitmap = srt->l.bitmap[b];
1207 for (i = 0; i < size; ) {
1208 if ((bitmap & 1) != 0) {
1209 retainClosure((StgClosure *)*p, c, c_child_r);
1213 if (i % BITS_IN(W_) == 0) {
1215 bitmap = srt->l.bitmap[b];
1217 bitmap = bitmap >> 1;
1223 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1228 bitmap = srt_bitmap;
1231 if (bitmap == (StgHalfWord)(-1)) {
1232 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1236 while (bitmap != 0) {
1237 if ((bitmap & 1) != 0) {
1238 #if defined(__PIC__) && defined(mingw32_HOST_OS)
1239 if ( (unsigned long)(*srt) & 0x1 ) {
1240 retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
1243 retainClosure(*srt,c,c_child_r);
1246 retainClosure(*srt,c,c_child_r);
1250 bitmap = bitmap >> 1;
1254 /* -----------------------------------------------------------------------------
1255 * Process all the objects in the stack chunk from stackStart to stackEnd
1256 * with *c and *c_child_r being their parent and their most recent retainer,
1257 * respectively. Treat stackOptionalFun as another child of *c if it is
1260 * *c is one of the following: TSO, AP_STACK.
1261 * If *c is TSO, c == c_child_r.
1262 * stackStart < stackEnd.
1263 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1264 * interpretation conforms to the current value of flip (even when they
1265 * are interpreted to be NULL).
1266 * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
1267 * which means that its stack is ready to process.
1269 * This code was almost plagiarzied from GC.c! For each pointer,
1270 * retainClosure() is invoked instead of evacuate().
1271 * -------------------------------------------------------------------------- */
1273 retainStack( StgClosure *c, retainer c_child_r,
1274 StgPtr stackStart, StgPtr stackEnd )
1276 stackElement *oldStackBoundary;
1278 StgRetInfoTable *info;
1282 #ifdef DEBUG_RETAINER
1284 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1288 Each invocation of retainStack() creates a new virtual
1289 stack. Since all such stacks share a single common stack, we
1290 record the current currentStackBoundary, which will be restored
1293 oldStackBoundary = currentStackBoundary;
1294 currentStackBoundary = stackTop;
1296 #ifdef DEBUG_RETAINER
1297 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1300 ASSERT(get_itbl(c)->type == STACK);
1303 while (p < stackEnd) {
1304 info = get_ret_itbl((StgClosure *)p);
1306 switch(info->i.type) {
1309 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1310 p += sizeofW(StgUpdateFrame);
1313 case UNDERFLOW_FRAME:
1316 case CATCH_STM_FRAME:
1317 case CATCH_RETRY_FRAME:
1318 case ATOMICALLY_FRAME:
1320 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1321 size = BITMAP_SIZE(info->i.layout.bitmap);
1323 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1326 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1333 retainClosure((StgClosure *)*p, c, c_child_r);
1336 size = BCO_BITMAP_SIZE(bco);
1337 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1342 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1344 size = GET_LARGE_BITMAP(&info->i)->size;
1346 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1347 size, c, c_child_r);
1349 // and don't forget to follow the SRT
1352 // Dynamic bitmap: the mask is stored on the stack
1355 dyn = ((StgRetDyn *)p)->liveness;
1357 // traverse the bitmap first
1358 bitmap = RET_DYN_LIVENESS(dyn);
1359 p = (P_)&((StgRetDyn *)p)->payload[0];
1360 size = RET_DYN_BITMAP_SIZE;
1361 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1363 // skip over the non-ptr words
1364 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1366 // follow the ptr words
1367 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1368 retainClosure((StgClosure *)*p, c, c_child_r);
1375 StgRetFun *ret_fun = (StgRetFun *)p;
1376 StgFunInfoTable *fun_info;
1378 retainClosure(ret_fun->fun, c, c_child_r);
1379 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1381 p = (P_)&ret_fun->payload;
1382 switch (fun_info->f.fun_type) {
1384 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1385 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1386 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1389 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1390 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1391 size, c, c_child_r);
1395 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1396 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1397 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1404 barf("Invalid object found in retainStack(): %d",
1405 (int)(info->i.type));
1409 // restore currentStackBoundary
1410 currentStackBoundary = oldStackBoundary;
1411 #ifdef DEBUG_RETAINER
1412 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1415 #ifdef DEBUG_RETAINER
1420 /* ----------------------------------------------------------------------------
1421 * Call retainClosure for each of the children of a PAP/AP
1422 * ------------------------------------------------------------------------- */
1424 static INLINE StgPtr
1425 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1426 retainer c_child_r, /* NOT tagged */
1427 StgClosure *fun, /* tagged */
1428 StgClosure** payload, StgWord n_args)
1432 StgFunInfoTable *fun_info;
1434 retainClosure(fun, pap, c_child_r);
1435 fun = UNTAG_CLOSURE(fun);
1436 fun_info = get_fun_itbl(fun);
1437 ASSERT(fun_info->i.type != PAP);
1439 p = (StgPtr)payload;
1441 switch (fun_info->f.fun_type) {
1443 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1444 p = retain_small_bitmap(p, n_args, bitmap,
1448 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1449 n_args, pap, c_child_r);
1453 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1454 n_args, pap, c_child_r);
1458 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1459 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1465 /* -----------------------------------------------------------------------------
1466 * Compute the retainer set of *c0 and all its desecents by traversing.
1467 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1469 * c0 = cp0 = r0 holds only for root objects.
1470 * RSET(cp0) and RSET(r0) are valid, i.e., their
1471 * interpretation conforms to the current value of flip (even when they
1472 * are interpreted to be NULL).
1473 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1474 * the current value of flip. If it does not, during the execution
1475 * of this function, RSET(c0) must be initialized as well as all
1478 * stackTop must be the same at the beginning and the exit of this function.
1479 * *c0 can be TSO (as well as AP_STACK).
1480 * -------------------------------------------------------------------------- */
1482 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1484 // c = Current closure (possibly tagged)
1485 // cp = Current closure's Parent (NOT tagged)
1486 // r = current closures' most recent Retainer (NOT tagged)
1487 // c_child_r = current closure's children's most recent retainer
1488 // first_child = first child of c
1489 StgClosure *c, *cp, *first_child;
1490 RetainerSet *s, *retainerSetOfc;
1491 retainer r, c_child_r;
1494 #ifdef DEBUG_RETAINER
1495 // StgPtr oldStackTop;
1498 #ifdef DEBUG_RETAINER
1499 // oldStackTop = stackTop;
1500 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1503 // (c, cp, r) = (c0, cp0, r0)
1510 //debugBelch("loop");
1511 // pop to (c, cp, r);
1515 #ifdef DEBUG_RETAINER
1516 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1521 //debugBelch("inner_loop");
1524 c = UNTAG_CLOSURE(c);
1526 // c = current closure under consideration,
1527 // cp = current closure's parent,
1528 // r = current closure's most recent retainer
1530 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1531 // RSET(cp) and RSET(r) are valid.
1532 // RSET(c) is valid only if c has been visited before.
1534 // Loop invariants (on the relation between c, cp, and r)
1535 // if cp is not a retainer, r belongs to RSET(cp).
1536 // if cp is a retainer, r == cp.
1538 typeOfc = get_itbl(c)->type;
1540 #ifdef DEBUG_RETAINER
1543 case CONSTR_NOCAF_STATIC:
1549 if (retainerSetOf(c) == NULL) { // first visit?
1550 costArray[typeOfc] += cost(c);
1551 sumOfNewCost += cost(c);
1560 if (((StgTSO *)c)->what_next == ThreadComplete ||
1561 ((StgTSO *)c)->what_next == ThreadKilled) {
1562 #ifdef DEBUG_RETAINER
1563 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1570 // We just skip IND_STATIC, so its retainer set is never computed.
1571 c = ((StgIndStatic *)c)->indirectee;
1573 // static objects with no pointers out, so goto loop.
1574 case CONSTR_NOCAF_STATIC:
1575 // It is not just enough not to compute the retainer set for *c; it is
1576 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1577 // scavenged_static_objects, the list from which is assumed to traverse
1578 // all static objects after major garbage collections.
1582 if (get_itbl(c)->srt_bitmap == 0) {
1583 // No need to compute the retainer set; no dynamic objects
1584 // are reachable from *c.
1586 // Static objects: if we traverse all the live closures,
1587 // including static closures, during each heap census then
1588 // we will observe that some static closures appear and
1589 // disappear. eg. a closure may contain a pointer to a
1590 // static function 'f' which is not otherwise reachable
1591 // (it doesn't indirectly point to any CAFs, so it doesn't
1592 // appear in any SRTs), so we would find 'f' during
1593 // traversal. However on the next sweep there may be no
1594 // closures pointing to 'f'.
1596 // We must therefore ignore static closures whose SRT is
1597 // empty, because these are exactly the closures that may
1598 // "appear". A closure with a non-empty SRT, and which is
1599 // still required, will always be reachable.
1601 // But what about CONSTR_STATIC? Surely these may be able
1602 // to appear, and they don't have SRTs, so we can't
1603 // check. So for now, we're calling
1604 // resetStaticObjectForRetainerProfiling() from the
1605 // garbage collector to reset the retainer sets in all the
1606 // reachable static objects.
1613 // The above objects are ignored in computing the average number of times
1614 // an object is visited.
1615 timesAnyObjectVisited++;
1617 // If this is the first visit to c, initialize its retainer set.
1618 maybeInitRetainerSet(c);
1619 retainerSetOfc = retainerSetOf(c);
1622 // isRetainer(cp) == rtsTrue => s == NULL
1623 // isRetainer(cp) == rtsFalse => s == cp.retainer
1627 s = retainerSetOf(cp);
1629 // (c, cp, r, s) is available.
1631 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1632 if (retainerSetOfc == NULL) {
1633 // This is the first visit to *c.
1637 associate(c, singleton(r));
1639 // s is actually the retainer set of *c!
1642 // compute c_child_r
1643 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1645 // This is not the first visit to *c.
1646 if (isMember(r, retainerSetOfc))
1647 goto loop; // no need to process child
1650 associate(c, addElement(r, retainerSetOfc));
1652 // s is not NULL and cp is not a retainer. This means that
1653 // each time *cp is visited, so is *c. Thus, if s has
1654 // exactly one more element in its retainer set than c, s
1655 // is also the new retainer set for *c.
1656 if (s->num == retainerSetOfc->num + 1) {
1659 // Otherwise, just add R_r to the current retainer set of *c.
1661 associate(c, addElement(r, retainerSetOfc));
1666 goto loop; // no need to process child
1668 // compute c_child_r
1672 // now, RSET() of all of *c, *cp, and *r is valid.
1673 // (c, c_child_r) are available.
1677 // Special case closures: we process these all in one go rather
1678 // than attempting to save the current position, because doing so
1682 retainStack(c, c_child_r,
1683 ((StgStack *)c)->sp,
1684 ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1689 StgTSO *tso = (StgTSO *)c;
1691 retainClosure(tso->stackobj, c, c_child_r);
1692 retainClosure(tso->blocked_exceptions, c, c_child_r);
1693 retainClosure(tso->bq, c, c_child_r);
1694 retainClosure(tso->trec, c, c_child_r);
1695 if ( tso->why_blocked == BlockedOnMVar
1696 || tso->why_blocked == BlockedOnBlackHole
1697 || tso->why_blocked == BlockedOnMsgThrowTo
1699 retainClosure(tso->block_info.closure, c, c_child_r);
1706 StgPAP *pap = (StgPAP *)c;
1707 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1713 StgAP *ap = (StgAP *)c;
1714 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1719 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1720 retainStack(c, c_child_r,
1721 (StgPtr)((StgAP_STACK *)c)->payload,
1722 (StgPtr)((StgAP_STACK *)c)->payload +
1723 ((StgAP_STACK *)c)->size);
1727 push(c, c_child_r, &first_child);
1729 // If first_child is null, c has no child.
1730 // If first_child is not null, the top stack element points to the next
1731 // object. push() may or may not push a stackElement on the stack.
1732 if (first_child == NULL)
1735 // (c, cp, r) = (first_child, c, c_child_r)
1742 /* -----------------------------------------------------------------------------
1743 * Compute the retainer set for every object reachable from *tl.
1744 * -------------------------------------------------------------------------- */
1746 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1750 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1753 ASSERT(isEmptyRetainerStack());
1754 currentStackBoundary = stackTop;
1756 c = UNTAG_CLOSURE(*tl);
1757 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1758 retainClosure(c, c, getRetainerFrom(c));
1760 retainClosure(c, c, CCS_SYSTEM);
1763 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1764 // *tl might be a TSO which is ThreadComplete, in which
1765 // case we ignore it for the purposes of retainer profiling.
1768 /* -----------------------------------------------------------------------------
1769 * Compute the retainer set for each of the objects in the heap.
1770 * -------------------------------------------------------------------------- */
1772 computeRetainerSet( void )
1779 #ifdef DEBUG_RETAINER
1780 RetainerSet tmpRetainerSet;
1783 markCapabilities(retainRoot, NULL); // for scheduler roots
1785 // This function is called after a major GC, when key, value, and finalizer
1786 // all are guaranteed to be valid, or reachable.
1788 // The following code assumes that WEAK objects are considered to be roots
1789 // for retainer profilng.
1790 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1791 // retainRoot((StgClosure *)weak);
1792 retainRoot(NULL, (StgClosure **)&weak);
1794 // Consider roots from the stable ptr table.
1795 markStablePtrTable(retainRoot, NULL);
1797 // The following code resets the rs field of each unvisited mutable
1798 // object (computing sumOfNewCostExtra and updating costArray[] when
1799 // debugging retainer profiler).
1800 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1801 // NOT TRUE: even G0 has a block on its mutable list
1802 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1804 // Traversing through mut_list is necessary
1805 // because we can find MUT_VAR objects which have not been
1806 // visited during retainer profiling.
1807 for (n = 0; n < n_capabilities; n++) {
1808 for (bd = capabilities[n].mut_lists[g]; bd != NULL; bd = bd->link) {
1809 for (ml = bd->start; ml < bd->free; ml++) {
1811 maybeInitRetainerSet((StgClosure *)*ml);
1812 rtl = retainerSetOf((StgClosure *)*ml);
1814 #ifdef DEBUG_RETAINER
1816 // first visit to *ml
1817 // This is a violation of the interface rule!
1818 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1820 switch (get_itbl((StgClosure *)ml)->type) {
1824 case CONSTR_NOCAF_STATIC:
1828 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1832 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1833 sumOfNewCostExtra += cost((StgClosure *)ml);
1844 /* -----------------------------------------------------------------------------
1845 * Traverse all static objects for which we compute retainer sets,
1846 * and reset their rs fields to NULL, which is accomplished by
1847 * invoking maybeInitRetainerSet(). This function must be called
1848 * before zeroing all objects reachable from scavenged_static_objects
1849 * in the case of major gabage collections. See GarbageCollect() in
1852 * The mut_once_list of the oldest generation must also be traversed?
1853 * Why? Because if the evacuation of an object pointed to by a static
1854 * indirection object fails, it is put back to the mut_once_list of
1855 * the oldest generation.
1856 * However, this is not necessary because any static indirection objects
1857 * are just traversed through to reach dynamic objects. In other words,
1858 * they are not taken into consideration in computing retainer sets.
1859 * -------------------------------------------------------------------------- */
1861 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1863 #ifdef DEBUG_RETAINER
1868 #ifdef DEBUG_RETAINER
1872 while (p != END_OF_STATIC_LIST) {
1873 #ifdef DEBUG_RETAINER
1876 switch (get_itbl(p)->type) {
1878 // Since we do not compute the retainer set of any
1879 // IND_STATIC object, we don't have to reset its retainer
1881 p = (StgClosure*)*IND_STATIC_LINK(p);
1884 maybeInitRetainerSet(p);
1885 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1888 maybeInitRetainerSet(p);
1889 p = (StgClosure*)*FUN_STATIC_LINK(p);
1892 maybeInitRetainerSet(p);
1893 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1896 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1897 p, get_itbl(p)->type);
1901 #ifdef DEBUG_RETAINER
1902 // debugBelch("count in scavenged_static_objects = %d\n", count);
1906 /* -----------------------------------------------------------------------------
1907 * Perform retainer profiling.
1908 * N is the oldest generation being profilied, where the generations are
1909 * numbered starting at 0.
1912 * This function should be called only immediately after major garbage
1914 * ------------------------------------------------------------------------- */
1916 retainerProfile(void)
1918 #ifdef DEBUG_RETAINER
1920 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1923 #ifdef DEBUG_RETAINER
1924 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1929 // We haven't flipped the bit yet.
1930 #ifdef DEBUG_RETAINER
1931 debugBelch("Before traversing:\n");
1932 sumOfCostLinear = 0;
1933 for (i = 0;i < N_CLOSURE_TYPES; i++)
1934 costArrayLinear[i] = 0;
1935 totalHeapSize = checkHeapSanityForRetainerProfiling();
1937 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1939 debugBelch("costArrayLinear[] = ");
1940 for (i = 0;i < N_CLOSURE_TYPES; i++)
1941 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1945 ASSERT(sumOfCostLinear == totalHeapSize);
1948 #define pcostArrayLinear(index) \
1949 if (costArrayLinear[index] > 0) \
1950 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1951 pcostArrayLinear(THUNK_STATIC);
1952 pcostArrayLinear(FUN_STATIC);
1953 pcostArrayLinear(CONSTR_STATIC);
1954 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1958 // Now we flips flip.
1961 #ifdef DEBUG_RETAINER
1967 numObjectVisited = 0;
1968 timesAnyObjectVisited = 0;
1970 #ifdef DEBUG_RETAINER
1971 debugBelch("During traversing:\n");
1973 sumOfNewCostExtra = 0;
1974 for (i = 0;i < N_CLOSURE_TYPES; i++)
1979 We initialize the traverse stack each time the retainer profiling is
1980 performed (because the traverse stack size varies on each retainer profiling
1981 and this operation is not costly anyhow). However, we just refresh the
1984 initializeTraverseStack();
1985 #ifdef DEBUG_RETAINER
1986 initializeAllRetainerSet();
1988 refreshAllRetainerSet();
1990 computeRetainerSet();
1992 #ifdef DEBUG_RETAINER
1993 debugBelch("After traversing:\n");
1994 sumOfCostLinear = 0;
1995 for (i = 0;i < N_CLOSURE_TYPES; i++)
1996 costArrayLinear[i] = 0;
1997 totalHeapSize = checkHeapSanityForRetainerProfiling();
1999 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2000 ASSERT(sumOfCostLinear == totalHeapSize);
2002 // now, compare the two results
2005 costArray[] must be exactly the same as costArrayLinear[].
2007 1) Dead weak pointers, whose type is CONSTR. These objects are not
2008 reachable from any roots.
2010 debugBelch("Comparison:\n");
2011 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2012 for (i = 0;i < N_CLOSURE_TYPES; i++)
2013 if (costArray[i] != costArrayLinear[i])
2014 // nothing should be printed except MUT_VAR after major GCs
2015 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2018 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2019 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2020 debugBelch("\tcostArray[] (must be empty) = ");
2021 for (i = 0;i < N_CLOSURE_TYPES; i++)
2022 if (costArray[i] != costArrayLinear[i])
2023 // nothing should be printed except MUT_VAR after major GCs
2024 debugBelch("[%u:%u] ", i, costArray[i]);
2027 // only for major garbage collection
2028 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2032 closeTraverseStack();
2033 #ifdef DEBUG_RETAINER
2034 closeAllRetainerSet();
2036 // Note that there is no post-processing for the retainer sets.
2038 retainerGeneration++;
2041 retainerGeneration - 1, // retainerGeneration has just been incremented!
2042 #ifdef DEBUG_RETAINER
2043 maxCStackSize, maxStackSize,
2045 (double)timesAnyObjectVisited / numObjectVisited);
2048 /* -----------------------------------------------------------------------------
2050 * -------------------------------------------------------------------------- */
2052 #ifdef DEBUG_RETAINER
2054 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2055 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2056 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2059 sanityCheckHeapClosure( StgClosure *c )
2063 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2064 ASSERT(!closure_STATIC(c));
2065 ASSERT(LOOKS_LIKE_PTR(c));
2067 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2068 if (get_itbl(c)->type == CONSTR &&
2069 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2070 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2071 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2072 costArray[get_itbl(c)->type] += cost(c);
2073 sumOfNewCost += cost(c);
2076 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2077 flip, c, get_itbl(c)->type,
2078 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2081 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2084 return closure_sizeW(c);
2088 heapCheck( bdescr *bd )
2091 static nat costSum, size;
2094 while (bd != NULL) {
2096 while (p < bd->free) {
2097 size = sanityCheckHeapClosure((StgClosure *)p);
2098 sumOfCostLinear += size;
2099 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2101 // no need for slop check; I think slops are not used currently.
2103 ASSERT(p == bd->free);
2104 costSum += bd->free - bd->start;
2112 smallObjectPoolCheck(void)
2116 static nat costSum, size;
2126 while (p < alloc_Hp) {
2127 size = sanityCheckHeapClosure((StgClosure *)p);
2128 sumOfCostLinear += size;
2129 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2132 ASSERT(p == alloc_Hp);
2133 costSum += alloc_Hp - bd->start;
2136 while (bd != NULL) {
2138 while (p < bd->free) {
2139 size = sanityCheckHeapClosure((StgClosure *)p);
2140 sumOfCostLinear += size;
2141 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2144 ASSERT(p == bd->free);
2145 costSum += bd->free - bd->start;
2153 chainCheck(bdescr *bd)
2158 while (bd != NULL) {
2159 // bd->free - bd->start is not an accurate measurement of the
2160 // object size. Actually it is always zero, so we compute its
2162 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2163 sumOfCostLinear += size;
2164 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2173 checkHeapSanityForRetainerProfiling( void )
2178 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2179 if (RtsFlags.GcFlags.generations == 1) {
2180 costSum += heapCheck(g0s0->to_blocks);
2181 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2182 costSum += chainCheck(g0s0->large_objects);
2183 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2185 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2186 for (s = 0; s < generations[g].n_steps; s++) {
2188 After all live objects have been scavenged, the garbage
2189 collector may create some objects in
2190 scheduleFinalizers(). These objects are created throught
2191 allocate(), so the small object pool or the large object
2192 pool of the g0s0 may not be empty.
2194 if (g == 0 && s == 0) {
2195 costSum += smallObjectPoolCheck();
2196 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2197 costSum += chainCheck(generations[g].steps[s].large_objects);
2198 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2200 costSum += heapCheck(generations[g].steps[s].blocks);
2201 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2202 costSum += chainCheck(generations[g].steps[s].large_objects);
2203 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2212 findPointer(StgPtr p)
2218 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2219 for (s = 0; s < generations[g].n_steps; s++) {
2220 // if (g == 0 && s == 0) continue;
2221 bd = generations[g].steps[s].blocks;
2222 for (; bd; bd = bd->link) {
2223 for (q = bd->start; q < bd->free; q++) {
2224 if (*q == (StgWord)p) {
2226 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2227 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2232 bd = generations[g].steps[s].large_objects;
2233 for (; bd; bd = bd->link) {
2234 e = bd->start + cost((StgClosure *)bd->start);
2235 for (q = bd->start; q < e; q++) {
2236 if (*q == (StgWord)p) {
2238 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2239 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2249 belongToHeap(StgPtr p)
2254 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2255 for (s = 0; s < generations[g].n_steps; s++) {
2256 // if (g == 0 && s == 0) continue;
2257 bd = generations[g].steps[s].blocks;
2258 for (; bd; bd = bd->link) {
2259 if (bd->start <= p && p < bd->free) {
2260 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2264 bd = generations[g].steps[s].large_objects;
2265 for (; bd; bd = bd->link) {
2266 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2267 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2274 #endif /* DEBUG_RETAINER */
2276 #endif /* PROFILING */