1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2001
8 * ---------------------------------------------------------------------------*/
12 // Turn off inlining when debugging - it obfuscates things
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
30 #include "Profiling.h"
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
44 /* -----------------------------------------------------------------------------
46 * -------------------------------------------------------------------------- */
48 static nat retainerGeneration; // generation
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
70 static void belongToHeap(StgPtr p);
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
79 cStackSize <= maxCStackSize
81 static nat cStackSize, maxCStackSize;
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
92 nat costArrayLinear[N_CLOSURE_TYPES];
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
103 * -------------------------------------------------------------------------- */
113 // fixed layout or layout specified by a field in the closure
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
181 static stackElement *currentStackBoundary;
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
187 stackSize <= maxStackSize
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
208 newStackBlock( bdescr *bd )
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
223 returnToOldStack( bdescr *bd )
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
236 initializeTraverseStack( void )
238 if (firstStack != NULL) {
239 freeChain(firstStack);
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
246 newStackBlock(firstStack);
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
253 * -------------------------------------------------------------------------- */
255 closeTraverseStack( void )
257 freeChain(firstStack);
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
267 return (firstStack == currentStack) && stackTop == stackLimit;
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
275 retainerStackBlocks( void )
280 for (bd = firstStack; bd != NULL; bd = bd->link)
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
294 return stackTop == currentStackBoundary;
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
364 if (info->type == posTypeSRT) {
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #ifdef ENABLE_WIN32_DLL_SUPPORT
371 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
372 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
374 c = *(info->next.srt.srt);
376 c = *(info->next.srt.srt);
378 bitmap = bitmap >> 1;
379 info->next.srt.srt++;
380 info->next.srt.srt_bitmap = bitmap;
383 bitmap = bitmap >> 1;
384 info->next.srt.srt++;
386 // bitmap is now zero...
391 nat i = info->next.large_srt.offset;
394 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
395 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
396 bitmap = bitmap >> (i % BITS_IN(StgWord));
397 while (i < info->next.large_srt.srt->l.size) {
398 if ((bitmap & 1) != 0) {
399 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
401 info->next.large_srt.offset = i;
405 if (i % BITS_IN(W_) == 0) {
406 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
408 bitmap = bitmap >> 1;
411 // reached the end of this bitmap.
412 info->next.large_srt.offset = i;
417 /* -----------------------------------------------------------------------------
418 * push() pushes a stackElement representing the next child of *c
419 * onto the traverse stack. If *c has no child, *first_child is set
420 * to NULL and nothing is pushed onto the stack. If *c has only one
421 * child, *c_chlid is set to that child and nothing is pushed onto
422 * the stack. If *c has more than two children, *first_child is set
423 * to the first child and a stackElement representing the second
424 * child is pushed onto the stack.
427 * *c_child_r is the most recent retainer of *c's children.
428 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
429 * there cannot be any stack objects.
430 * Note: SRTs are considered to be children as well.
431 * -------------------------------------------------------------------------- */
433 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
436 bdescr *nbd; // Next Block Descriptor
438 #ifdef DEBUG_RETAINER
439 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
442 ASSERT(get_itbl(c)->type != TSO);
443 ASSERT(get_itbl(c)->type != AP_STACK);
450 se.c_child_r = c_child_r;
453 switch (get_itbl(c)->type) {
460 case SE_CAF_BLACKHOLE:
465 // one child (fixed), no SRT
468 *first_child = ((StgMutVar *)c)->var;
471 *first_child = ((StgSelector *)c)->selectee;
474 case IND_OLDGEN_PERM:
476 *first_child = ((StgInd *)c)->indirectee;
480 *first_child = c->payload[0];
483 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
484 // of the next child. We do not write a separate initialization code.
485 // Also we do not have to initialize info.type;
487 // two children (fixed), no SRT
488 // need to push a stackElement, but nothing to store in se.info
490 *first_child = c->payload[0]; // return the first pointer
491 // se.info.type = posTypeStep;
492 // se.info.next.step = 2; // 2 = second
495 // three children (fixed), no SRT
496 // need to push a stackElement
498 // head must be TSO and the head of a linked list of TSOs.
499 // Shoule it be a child? Seems to be yes.
500 *first_child = (StgClosure *)((StgMVar *)c)->head;
501 // se.info.type = posTypeStep;
502 se.info.next.step = 2; // 2 = second
505 // three children (fixed), no SRT
507 *first_child = ((StgWeak *)c)->key;
508 // se.info.type = posTypeStep;
509 se.info.next.step = 2;
512 // layout.payload.ptrs, no SRT
517 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
519 *first_child = find_ptrs(&se.info);
520 if (*first_child == NULL)
524 // StgMutArrPtr.ptrs, no SRT
525 case MUT_ARR_PTRS_CLEAN:
526 case MUT_ARR_PTRS_DIRTY:
527 case MUT_ARR_PTRS_FROZEN:
528 case MUT_ARR_PTRS_FROZEN0:
529 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
530 (StgPtr)(((StgMutArrPtrs *)c)->payload));
531 *first_child = find_ptrs(&se.info);
532 if (*first_child == NULL)
536 // layout.payload.ptrs, SRT
537 case FUN: // *c is a heap object.
539 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
540 *first_child = find_ptrs(&se.info);
541 if (*first_child == NULL)
542 // no child from ptrs, so check SRT
548 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
549 (StgPtr)((StgThunk *)c)->payload);
550 *first_child = find_ptrs(&se.info);
551 if (*first_child == NULL)
552 // no child from ptrs, so check SRT
556 // 1 fixed child, SRT
559 *first_child = c->payload[0];
560 ASSERT(*first_child != NULL);
561 init_srt_fun(&se.info, get_fun_itbl(c));
566 *first_child = ((StgThunk *)c)->payload[0];
567 ASSERT(*first_child != NULL);
568 init_srt_thunk(&se.info, get_thunk_itbl(c));
571 case FUN_STATIC: // *c is a heap object.
572 ASSERT(get_itbl(c)->srt_bitmap != 0);
576 init_srt_fun(&se.info, get_fun_itbl(c));
577 *first_child = find_srt(&se.info);
578 if (*first_child == NULL)
584 ASSERT(get_itbl(c)->srt_bitmap != 0);
588 init_srt_thunk(&se.info, get_thunk_itbl(c));
589 *first_child = find_srt(&se.info);
590 if (*first_child == NULL)
594 case TVAR_WATCH_QUEUE:
595 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
596 se.info.next.step = 2; // 2 = second
599 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
602 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
605 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
606 se.info.next.step = 0; // entry no.
615 case CONSTR_NOCAF_STATIC:
636 barf("Invalid object *c in push()");
640 if (stackTop - 1 < stackBottom) {
641 #ifdef DEBUG_RETAINER
642 // debugBelch("push() to the next stack.\n");
644 // currentStack->free is updated when the active stack is switched
645 // to the next stack.
646 currentStack->free = (StgPtr)stackTop;
648 if (currentStack->link == NULL) {
649 nbd = allocGroup(BLOCKS_IN_STACK);
651 nbd->u.back = currentStack;
652 currentStack->link = nbd;
654 nbd = currentStack->link;
659 // adjust stackTop (acutal push)
661 // If the size of stackElement was huge, we would better replace the
662 // following statement by either a memcpy() call or a switch statement
663 // on the type of the element. Currently, the size of stackElement is
664 // small enough (5 words) that this direct assignment seems to be enough.
666 // ToDo: The line below leads to the warning:
667 // warning: 'se.info.type' may be used uninitialized in this function
668 // This is caused by the fact that there are execution paths through the
669 // large switch statement above where some cases do not initialize this
670 // field. Is this really harmless? Can we avoid the warning?
673 #ifdef DEBUG_RETAINER
675 if (stackSize > maxStackSize) maxStackSize = stackSize;
676 // ASSERT(stackSize >= 0);
677 // debugBelch("stackSize = %d\n", stackSize);
681 /* -----------------------------------------------------------------------------
682 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
684 * stackTop cannot be equal to stackLimit unless the whole stack is
685 * empty, in which case popOff() is not allowed.
687 * You can think of popOffReal() as a part of popOff() which is
688 * executed at the end of popOff() in necessary. Since popOff() is
689 * likely to be executed quite often while popOffReal() is not, we
690 * separate popOffReal() from popOff(), which is declared as an
691 * INLINE function (for the sake of execution speed). popOffReal()
692 * is called only within popOff() and nowhere else.
693 * -------------------------------------------------------------------------- */
697 bdescr *pbd; // Previous Block Descriptor
699 #ifdef DEBUG_RETAINER
700 // debugBelch("pop() to the previous stack.\n");
703 ASSERT(stackTop + 1 == stackLimit);
704 ASSERT(stackBottom == (stackElement *)currentStack->start);
706 if (firstStack == currentStack) {
707 // The stack is completely empty.
709 ASSERT(stackTop == stackLimit);
710 #ifdef DEBUG_RETAINER
712 if (stackSize > maxStackSize) maxStackSize = stackSize;
714 ASSERT(stackSize >= 0);
715 debugBelch("stackSize = %d\n", stackSize);
721 // currentStack->free is updated when the active stack is switched back
722 // to the previous stack.
723 currentStack->free = (StgPtr)stackLimit;
725 // find the previous block descriptor
726 pbd = currentStack->u.back;
729 returnToOldStack(pbd);
731 #ifdef DEBUG_RETAINER
733 if (stackSize > maxStackSize) maxStackSize = stackSize;
735 ASSERT(stackSize >= 0);
736 debugBelch("stackSize = %d\n", stackSize);
743 #ifdef DEBUG_RETAINER
744 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
747 ASSERT(stackTop != stackLimit);
748 ASSERT(!isEmptyRetainerStack());
750 // <= (instead of <) is wrong!
751 if (stackTop + 1 < stackLimit) {
753 #ifdef DEBUG_RETAINER
755 if (stackSize > maxStackSize) maxStackSize = stackSize;
757 ASSERT(stackSize >= 0);
758 debugBelch("stackSize = %d\n", stackSize);
767 /* -----------------------------------------------------------------------------
768 * Finds the next object to be considered for retainer profiling and store
770 * Test if the topmost stack element indicates that more objects are left,
771 * and if so, retrieve the first object and store its pointer to *c. Also,
772 * set *cp and *r appropriately, both of which are stored in the stack element.
773 * The topmost stack element then is overwritten so as for it to now denote
775 * If the topmost stack element indicates no more objects are left, pop
776 * off the stack element until either an object can be retrieved or
777 * the current stack chunk becomes empty, indicated by rtsTrue returned by
778 * isOnBoundary(), in which case *c is set to NULL.
780 * It is okay to call this function even when the current stack chunk
782 * -------------------------------------------------------------------------- */
784 pop( StgClosure **c, StgClosure **cp, retainer *r )
788 #ifdef DEBUG_RETAINER
789 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
793 if (isOnBoundary()) { // if the current stack chunk is depleted
800 switch (get_itbl(se->c)->type) {
801 // two children (fixed), no SRT
802 // nothing in se.info
804 *c = se->c->payload[1];
810 // three children (fixed), no SRT
811 // need to push a stackElement
813 if (se->info.next.step == 2) {
814 *c = (StgClosure *)((StgMVar *)se->c)->tail;
815 se->info.next.step++; // move to the next step
818 *c = ((StgMVar *)se->c)->value;
825 // three children (fixed), no SRT
827 if (se->info.next.step == 2) {
828 *c = ((StgWeak *)se->c)->value;
829 se->info.next.step++;
832 *c = ((StgWeak *)se->c)->finalizer;
839 case TVAR_WATCH_QUEUE:
840 if (se->info.next.step == 2) {
841 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
842 se->info.next.step++; // move to the next step
845 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
853 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
860 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
867 // These are pretty complicated: we have N entries, each
868 // of which contains 3 fields that we want to follow. So
869 // we divide the step counter: the 2 low bits indicate
870 // which field, and the rest of the bits indicate the
871 // entry number (starting from zero).
872 nat entry_no = se->info.next.step >> 2;
873 nat field_no = se->info.next.step & 3;
874 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
879 TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
881 *c = (StgClosure *)entry->tvar;
882 } else if (field_no == 1) {
883 *c = entry->expected_value;
885 *c = entry->new_value;
889 se->info.next.step++;
897 // StgMutArrPtr.ptrs, no SRT
898 case MUT_ARR_PTRS_CLEAN:
899 case MUT_ARR_PTRS_DIRTY:
900 case MUT_ARR_PTRS_FROZEN:
901 case MUT_ARR_PTRS_FROZEN0:
902 *c = find_ptrs(&se->info);
911 // layout.payload.ptrs, SRT
912 case FUN: // always a heap object
914 if (se->info.type == posTypePtrs) {
915 *c = find_ptrs(&se->info);
921 init_srt_fun(&se->info, get_fun_itbl(se->c));
927 if (se->info.type == posTypePtrs) {
928 *c = find_ptrs(&se->info);
934 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
950 *c = find_srt(&se->info);
959 // no child (fixed), no SRT
965 case SE_CAF_BLACKHOLE:
967 // one child (fixed), no SRT
972 case IND_OLDGEN_PERM:
981 case CONSTR_NOCAF_STATIC:
1000 case INVALID_OBJECT:
1002 barf("Invalid object *c in pop()");
1008 /* -----------------------------------------------------------------------------
1009 * RETAINER PROFILING ENGINE
1010 * -------------------------------------------------------------------------- */
1013 initRetainerProfiling( void )
1015 initializeAllRetainerSet();
1016 retainerGeneration = 0;
1019 /* -----------------------------------------------------------------------------
1020 * This function must be called before f-closing prof_file.
1021 * -------------------------------------------------------------------------- */
1023 endRetainerProfiling( void )
1025 #ifdef SECOND_APPROACH
1026 outputAllRetainerSet(prof_file);
1030 /* -----------------------------------------------------------------------------
1031 * Returns the actual pointer to the retainer set of the closure *c.
1032 * It may adjust RSET(c) subject to flip.
1034 * RSET(c) is initialized to NULL if its current value does not
1037 * Even though this function has side effects, they CAN be ignored because
1038 * subsequent calls to retainerSetOf() always result in the same return value
1039 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1041 * We have to perform an XOR (^) operation each time a closure is examined.
1042 * The reason is that we do not know when a closure is visited last.
1043 * -------------------------------------------------------------------------- */
1045 maybeInitRetainerSet( StgClosure *c )
1047 if (!isRetainerSetFieldValid(c)) {
1048 setRetainerSetToNull(c);
1052 /* -----------------------------------------------------------------------------
1053 * Returns rtsTrue if *c is a retainer.
1054 * -------------------------------------------------------------------------- */
1055 static INLINE rtsBool
1056 isRetainer( StgClosure *c )
1058 switch (get_itbl(c)->type) {
1062 // TSOs MUST be retainers: they constitute the set of roots.
1069 case MUT_ARR_PTRS_CLEAN:
1070 case MUT_ARR_PTRS_DIRTY:
1071 case MUT_ARR_PTRS_FROZEN:
1072 case MUT_ARR_PTRS_FROZEN0:
1074 // thunks are retainers.
1081 case THUNK_SELECTOR:
1085 // Static thunks, or CAFS, are obviously retainers.
1088 // WEAK objects are roots; there is separate code in which traversing
1089 // begins from WEAK objects.
1092 // Since the other mutvar-type things are retainers, seems
1093 // like the right thing to do:
1115 // partial applications
1121 case SE_CAF_BLACKHOLE:
1124 case IND_OLDGEN_PERM:
1134 case TVAR_WATCH_QUEUE:
1142 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1144 // CONSTR_NOCAF_STATIC
1145 // cannot be *c, *cp, *r in the retainer profiling loop.
1146 case CONSTR_NOCAF_STATIC:
1147 // Stack objects are invalid because they are never treated as
1148 // legal objects during retainer profiling.
1166 case INVALID_OBJECT:
1168 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1173 /* -----------------------------------------------------------------------------
1174 * Returns the retainer function value for the closure *c, i.e., R(*c).
1175 * This function does NOT return the retainer(s) of *c.
1177 * *c must be a retainer.
1179 * Depending on the definition of this function, the maintenance of retainer
1180 * sets can be made easier. If most retainer sets are likely to be created
1181 * again across garbage collections, refreshAllRetainerSet() in
1182 * RetainerSet.c can simply do nothing.
1183 * If this is not the case, we can free all the retainer sets and
1184 * re-initialize the hash table.
1185 * See refreshAllRetainerSet() in RetainerSet.c.
1186 * -------------------------------------------------------------------------- */
1187 static INLINE retainer
1188 getRetainerFrom( StgClosure *c )
1190 ASSERT(isRetainer(c));
1192 #if defined(RETAINER_SCHEME_INFO)
1193 // Retainer scheme 1: retainer = info table
1195 #elif defined(RETAINER_SCHEME_CCS)
1196 // Retainer scheme 2: retainer = cost centre stack
1197 return c->header.prof.ccs;
1198 #elif defined(RETAINER_SCHEME_CC)
1199 // Retainer scheme 3: retainer = cost centre
1200 return c->header.prof.ccs->cc;
1204 /* -----------------------------------------------------------------------------
1205 * Associates the retainer set *s with the closure *c, that is, *s becomes
1206 * the retainer set of *c.
1210 * -------------------------------------------------------------------------- */
1212 associate( StgClosure *c, RetainerSet *s )
1214 // StgWord has the same size as pointers, so the following type
1216 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1219 /* -----------------------------------------------------------------------------
1220 Call retainClosure for each of the closures covered by a large bitmap.
1221 -------------------------------------------------------------------------- */
1224 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1225 StgClosure *c, retainer c_child_r)
1231 bitmap = large_bitmap->bitmap[b];
1232 for (i = 0; i < size; ) {
1233 if ((bitmap & 1) == 0) {
1234 retainClosure((StgClosure *)*p, c, c_child_r);
1238 if (i % BITS_IN(W_) == 0) {
1240 bitmap = large_bitmap->bitmap[b];
1242 bitmap = bitmap >> 1;
1247 static INLINE StgPtr
1248 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1249 StgClosure *c, retainer c_child_r)
1252 if ((bitmap & 1) == 0) {
1253 retainClosure((StgClosure *)*p, c, c_child_r);
1256 bitmap = bitmap >> 1;
1262 /* -----------------------------------------------------------------------------
1263 * Call retainClosure for each of the closures in an SRT.
1264 * ------------------------------------------------------------------------- */
1267 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1274 p = (StgClosure **)srt->srt;
1276 bitmap = srt->l.bitmap[b];
1277 for (i = 0; i < size; ) {
1278 if ((bitmap & 1) != 0) {
1279 retainClosure((StgClosure *)*p, c, c_child_r);
1283 if (i % BITS_IN(W_) == 0) {
1285 bitmap = srt->l.bitmap[b];
1287 bitmap = bitmap >> 1;
1293 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1298 bitmap = srt_bitmap;
1301 if (bitmap == (StgHalfWord)(-1)) {
1302 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1306 while (bitmap != 0) {
1307 if ((bitmap & 1) != 0) {
1308 #ifdef ENABLE_WIN32_DLL_SUPPORT
1309 if ( (unsigned long)(*srt) & 0x1 ) {
1310 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1313 retainClosure(*srt,c,c_child_r);
1316 retainClosure(*srt,c,c_child_r);
1320 bitmap = bitmap >> 1;
1324 /* -----------------------------------------------------------------------------
1325 * Process all the objects in the stack chunk from stackStart to stackEnd
1326 * with *c and *c_child_r being their parent and their most recent retainer,
1327 * respectively. Treat stackOptionalFun as another child of *c if it is
1330 * *c is one of the following: TSO, AP_STACK.
1331 * If *c is TSO, c == c_child_r.
1332 * stackStart < stackEnd.
1333 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1334 * interpretation conforms to the current value of flip (even when they
1335 * are interpreted to be NULL).
1336 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1337 * or ThreadKilled, which means that its stack is ready to process.
1339 * This code was almost plagiarzied from GC.c! For each pointer,
1340 * retainClosure() is invoked instead of evacuate().
1341 * -------------------------------------------------------------------------- */
1343 retainStack( StgClosure *c, retainer c_child_r,
1344 StgPtr stackStart, StgPtr stackEnd )
1346 stackElement *oldStackBoundary;
1348 StgRetInfoTable *info;
1352 #ifdef DEBUG_RETAINER
1354 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1358 Each invocation of retainStack() creates a new virtual
1359 stack. Since all such stacks share a single common stack, we
1360 record the current currentStackBoundary, which will be restored
1363 oldStackBoundary = currentStackBoundary;
1364 currentStackBoundary = stackTop;
1366 #ifdef DEBUG_RETAINER
1367 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1370 ASSERT(get_itbl(c)->type != TSO ||
1371 (((StgTSO *)c)->what_next != ThreadRelocated &&
1372 ((StgTSO *)c)->what_next != ThreadComplete &&
1373 ((StgTSO *)c)->what_next != ThreadKilled));
1376 while (p < stackEnd) {
1377 info = get_ret_itbl((StgClosure *)p);
1379 switch(info->i.type) {
1382 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1383 p += sizeofW(StgUpdateFrame);
1388 case CATCH_STM_FRAME:
1389 case CATCH_RETRY_FRAME:
1390 case ATOMICALLY_FRAME:
1393 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1394 size = BITMAP_SIZE(info->i.layout.bitmap);
1396 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1399 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1406 retainClosure((StgClosure *)*p, c, c_child_r);
1409 size = BCO_BITMAP_SIZE(bco);
1410 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1415 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1418 size = GET_LARGE_BITMAP(&info->i)->size;
1420 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1421 size, c, c_child_r);
1423 // and don't forget to follow the SRT
1426 // Dynamic bitmap: the mask is stored on the stack
1429 dyn = ((StgRetDyn *)p)->liveness;
1431 // traverse the bitmap first
1432 bitmap = RET_DYN_LIVENESS(dyn);
1433 p = (P_)&((StgRetDyn *)p)->payload[0];
1434 size = RET_DYN_BITMAP_SIZE;
1435 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1437 // skip over the non-ptr words
1438 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1440 // follow the ptr words
1441 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1442 retainClosure((StgClosure *)*p, c, c_child_r);
1449 StgRetFun *ret_fun = (StgRetFun *)p;
1450 StgFunInfoTable *fun_info;
1452 retainClosure(ret_fun->fun, c, c_child_r);
1453 fun_info = get_fun_itbl(ret_fun->fun);
1455 p = (P_)&ret_fun->payload;
1456 switch (fun_info->f.fun_type) {
1458 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1459 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1460 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1463 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1464 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1465 size, c, c_child_r);
1469 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1470 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1471 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1478 barf("Invalid object found in retainStack(): %d",
1479 (int)(info->i.type));
1483 // restore currentStackBoundary
1484 currentStackBoundary = oldStackBoundary;
1485 #ifdef DEBUG_RETAINER
1486 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1489 #ifdef DEBUG_RETAINER
1494 /* ----------------------------------------------------------------------------
1495 * Call retainClosure for each of the children of a PAP/AP
1496 * ------------------------------------------------------------------------- */
1498 static INLINE StgPtr
1499 retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
1500 StgClosure** payload, StgWord n_args)
1504 StgFunInfoTable *fun_info;
1506 retainClosure(fun, pap, c_child_r);
1507 fun_info = get_fun_itbl(fun);
1508 ASSERT(fun_info->i.type != PAP);
1510 p = (StgPtr)payload;
1512 switch (fun_info->f.fun_type) {
1514 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1515 p = retain_small_bitmap(p, n_args, bitmap,
1519 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1520 n_args, pap, c_child_r);
1524 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1525 n_args, pap, c_child_r);
1529 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1530 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1536 /* -----------------------------------------------------------------------------
1537 * Compute the retainer set of *c0 and all its desecents by traversing.
1538 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1540 * c0 = cp0 = r0 holds only for root objects.
1541 * RSET(cp0) and RSET(r0) are valid, i.e., their
1542 * interpretation conforms to the current value of flip (even when they
1543 * are interpreted to be NULL).
1544 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1545 * the current value of flip. If it does not, during the execution
1546 * of this function, RSET(c0) must be initialized as well as all
1549 * stackTop must be the same at the beginning and the exit of this function.
1550 * *c0 can be TSO (as well as AP_STACK).
1551 * -------------------------------------------------------------------------- */
1553 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1555 // c = Current closure
1556 // cp = Current closure's Parent
1557 // r = current closures' most recent Retainer
1558 // c_child_r = current closure's children's most recent retainer
1559 // first_child = first child of c
1560 StgClosure *c, *cp, *first_child;
1561 RetainerSet *s, *retainerSetOfc;
1562 retainer r, c_child_r;
1565 #ifdef DEBUG_RETAINER
1566 // StgPtr oldStackTop;
1569 #ifdef DEBUG_RETAINER
1570 // oldStackTop = stackTop;
1571 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1574 // (c, cp, r) = (c0, cp0, r0)
1581 //debugBelch("loop");
1582 // pop to (c, cp, r);
1586 #ifdef DEBUG_RETAINER
1587 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1592 //debugBelch("inner_loop");
1595 // c = current closure under consideration,
1596 // cp = current closure's parent,
1597 // r = current closure's most recent retainer
1599 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1600 // RSET(cp) and RSET(r) are valid.
1601 // RSET(c) is valid only if c has been visited before.
1603 // Loop invariants (on the relation between c, cp, and r)
1604 // if cp is not a retainer, r belongs to RSET(cp).
1605 // if cp is a retainer, r == cp.
1607 typeOfc = get_itbl(c)->type;
1609 #ifdef DEBUG_RETAINER
1612 case CONSTR_NOCAF_STATIC:
1618 if (retainerSetOf(c) == NULL) { // first visit?
1619 costArray[typeOfc] += cost(c);
1620 sumOfNewCost += cost(c);
1629 if (((StgTSO *)c)->what_next == ThreadComplete ||
1630 ((StgTSO *)c)->what_next == ThreadKilled) {
1631 #ifdef DEBUG_RETAINER
1632 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1636 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1637 #ifdef DEBUG_RETAINER
1638 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1640 c = (StgClosure *)((StgTSO *)c)->link;
1646 // We just skip IND_STATIC, so its retainer set is never computed.
1647 c = ((StgIndStatic *)c)->indirectee;
1649 // static objects with no pointers out, so goto loop.
1650 case CONSTR_NOCAF_STATIC:
1651 // It is not just enough not to compute the retainer set for *c; it is
1652 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1653 // scavenged_static_objects, the list from which is assumed to traverse
1654 // all static objects after major garbage collections.
1658 if (get_itbl(c)->srt_bitmap == 0) {
1659 // No need to compute the retainer set; no dynamic objects
1660 // are reachable from *c.
1662 // Static objects: if we traverse all the live closures,
1663 // including static closures, during each heap census then
1664 // we will observe that some static closures appear and
1665 // disappear. eg. a closure may contain a pointer to a
1666 // static function 'f' which is not otherwise reachable
1667 // (it doesn't indirectly point to any CAFs, so it doesn't
1668 // appear in any SRTs), so we would find 'f' during
1669 // traversal. However on the next sweep there may be no
1670 // closures pointing to 'f'.
1672 // We must therefore ignore static closures whose SRT is
1673 // empty, because these are exactly the closures that may
1674 // "appear". A closure with a non-empty SRT, and which is
1675 // still required, will always be reachable.
1677 // But what about CONSTR_STATIC? Surely these may be able
1678 // to appear, and they don't have SRTs, so we can't
1679 // check. So for now, we're calling
1680 // resetStaticObjectForRetainerProfiling() from the
1681 // garbage collector to reset the retainer sets in all the
1682 // reachable static objects.
1689 // The above objects are ignored in computing the average number of times
1690 // an object is visited.
1691 timesAnyObjectVisited++;
1693 // If this is the first visit to c, initialize its retainer set.
1694 maybeInitRetainerSet(c);
1695 retainerSetOfc = retainerSetOf(c);
1698 // isRetainer(cp) == rtsTrue => s == NULL
1699 // isRetainer(cp) == rtsFalse => s == cp.retainer
1703 s = retainerSetOf(cp);
1705 // (c, cp, r, s) is available.
1707 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1708 if (retainerSetOfc == NULL) {
1709 // This is the first visit to *c.
1713 associate(c, singleton(r));
1715 // s is actually the retainer set of *c!
1718 // compute c_child_r
1719 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1721 // This is not the first visit to *c.
1722 if (isMember(r, retainerSetOfc))
1723 goto loop; // no need to process child
1726 associate(c, addElement(r, retainerSetOfc));
1728 // s is not NULL and cp is not a retainer. This means that
1729 // each time *cp is visited, so is *c. Thus, if s has
1730 // exactly one more element in its retainer set than c, s
1731 // is also the new retainer set for *c.
1732 if (s->num == retainerSetOfc->num + 1) {
1735 // Otherwise, just add R_r to the current retainer set of *c.
1737 associate(c, addElement(r, retainerSetOfc));
1742 goto loop; // no need to process child
1744 // compute c_child_r
1748 // now, RSET() of all of *c, *cp, and *r is valid.
1749 // (c, c_child_r) are available.
1753 // Special case closures: we process these all in one go rather
1754 // than attempting to save the current position, because doing so
1758 retainStack(c, c_child_r,
1760 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1765 StgPAP *pap = (StgPAP *)c;
1766 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1772 StgAP *ap = (StgAP *)c;
1773 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1778 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1779 retainStack(c, c_child_r,
1780 (StgPtr)((StgAP_STACK *)c)->payload,
1781 (StgPtr)((StgAP_STACK *)c)->payload +
1782 ((StgAP_STACK *)c)->size);
1786 push(c, c_child_r, &first_child);
1788 // If first_child is null, c has no child.
1789 // If first_child is not null, the top stack element points to the next
1790 // object. push() may or may not push a stackElement on the stack.
1791 if (first_child == NULL)
1794 // (c, cp, r) = (first_child, c, c_child_r)
1801 /* -----------------------------------------------------------------------------
1802 * Compute the retainer set for every object reachable from *tl.
1803 * -------------------------------------------------------------------------- */
1805 retainRoot( StgClosure **tl )
1807 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1810 ASSERT(isEmptyRetainerStack());
1811 currentStackBoundary = stackTop;
1813 if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) {
1814 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1816 retainClosure(*tl, *tl, CCS_SYSTEM);
1819 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1820 // *tl might be a TSO which is ThreadComplete, in which
1821 // case we ignore it for the purposes of retainer profiling.
1824 /* -----------------------------------------------------------------------------
1825 * Compute the retainer set for each of the objects in the heap.
1826 * -------------------------------------------------------------------------- */
1828 computeRetainerSet( void )
1835 #ifdef DEBUG_RETAINER
1836 RetainerSet tmpRetainerSet;
1839 GetRoots(retainRoot); // for scheduler roots
1841 // This function is called after a major GC, when key, value, and finalizer
1842 // all are guaranteed to be valid, or reachable.
1844 // The following code assumes that WEAK objects are considered to be roots
1845 // for retainer profilng.
1846 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1847 // retainRoot((StgClosure *)weak);
1848 retainRoot((StgClosure **)&weak);
1850 // Consider roots from the stable ptr table.
1851 markStablePtrTable(retainRoot);
1853 // The following code resets the rs field of each unvisited mutable
1854 // object (computing sumOfNewCostExtra and updating costArray[] when
1855 // debugging retainer profiler).
1856 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1857 // NOT TRUE: even G0 has a block on its mutable list
1858 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1860 // Traversing through mut_list is necessary
1861 // because we can find MUT_VAR objects which have not been
1862 // visited during retainer profiling.
1863 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1864 for (ml = bd->start; ml < bd->free; ml++) {
1866 maybeInitRetainerSet((StgClosure *)*ml);
1867 rtl = retainerSetOf((StgClosure *)*ml);
1869 #ifdef DEBUG_RETAINER
1871 // first visit to *ml
1872 // This is a violation of the interface rule!
1873 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1875 switch (get_itbl((StgClosure *)ml)->type) {
1879 case CONSTR_NOCAF_STATIC:
1883 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1887 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1888 sumOfNewCostExtra += cost((StgClosure *)ml);
1898 /* -----------------------------------------------------------------------------
1899 * Traverse all static objects for which we compute retainer sets,
1900 * and reset their rs fields to NULL, which is accomplished by
1901 * invoking maybeInitRetainerSet(). This function must be called
1902 * before zeroing all objects reachable from scavenged_static_objects
1903 * in the case of major gabage collections. See GarbageCollect() in
1906 * The mut_once_list of the oldest generation must also be traversed?
1907 * Why? Because if the evacuation of an object pointed to by a static
1908 * indirection object fails, it is put back to the mut_once_list of
1909 * the oldest generation.
1910 * However, this is not necessary because any static indirection objects
1911 * are just traversed through to reach dynamic objects. In other words,
1912 * they are not taken into consideration in computing retainer sets.
1913 * -------------------------------------------------------------------------- */
1915 resetStaticObjectForRetainerProfiling( void )
1917 #ifdef DEBUG_RETAINER
1922 #ifdef DEBUG_RETAINER
1925 p = scavenged_static_objects;
1926 while (p != END_OF_STATIC_LIST) {
1927 #ifdef DEBUG_RETAINER
1930 switch (get_itbl(p)->type) {
1932 // Since we do not compute the retainer set of any
1933 // IND_STATIC object, we don't have to reset its retainer
1935 p = (StgClosure*)*IND_STATIC_LINK(p);
1938 maybeInitRetainerSet(p);
1939 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1942 maybeInitRetainerSet(p);
1943 p = (StgClosure*)*FUN_STATIC_LINK(p);
1946 maybeInitRetainerSet(p);
1947 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1950 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1951 p, get_itbl(p)->type);
1955 #ifdef DEBUG_RETAINER
1956 // debugBelch("count in scavenged_static_objects = %d\n", count);
1960 /* -----------------------------------------------------------------------------
1961 * Perform retainer profiling.
1962 * N is the oldest generation being profilied, where the generations are
1963 * numbered starting at 0.
1966 * This function should be called only immediately after major garbage
1968 * ------------------------------------------------------------------------- */
1970 retainerProfile(void)
1972 #ifdef DEBUG_RETAINER
1974 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1977 #ifdef DEBUG_RETAINER
1978 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1983 // We haven't flipped the bit yet.
1984 #ifdef DEBUG_RETAINER
1985 debugBelch("Before traversing:\n");
1986 sumOfCostLinear = 0;
1987 for (i = 0;i < N_CLOSURE_TYPES; i++)
1988 costArrayLinear[i] = 0;
1989 totalHeapSize = checkHeapSanityForRetainerProfiling();
1991 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1993 debugBelch("costArrayLinear[] = ");
1994 for (i = 0;i < N_CLOSURE_TYPES; i++)
1995 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1999 ASSERT(sumOfCostLinear == totalHeapSize);
2002 #define pcostArrayLinear(index) \
2003 if (costArrayLinear[index] > 0) \
2004 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
2005 pcostArrayLinear(THUNK_STATIC);
2006 pcostArrayLinear(FUN_STATIC);
2007 pcostArrayLinear(CONSTR_STATIC);
2008 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2012 // Now we flips flip.
2015 #ifdef DEBUG_RETAINER
2021 numObjectVisited = 0;
2022 timesAnyObjectVisited = 0;
2024 #ifdef DEBUG_RETAINER
2025 debugBelch("During traversing:\n");
2027 sumOfNewCostExtra = 0;
2028 for (i = 0;i < N_CLOSURE_TYPES; i++)
2033 We initialize the traverse stack each time the retainer profiling is
2034 performed (because the traverse stack size varies on each retainer profiling
2035 and this operation is not costly anyhow). However, we just refresh the
2038 initializeTraverseStack();
2039 #ifdef DEBUG_RETAINER
2040 initializeAllRetainerSet();
2042 refreshAllRetainerSet();
2044 computeRetainerSet();
2046 #ifdef DEBUG_RETAINER
2047 debugBelch("After traversing:\n");
2048 sumOfCostLinear = 0;
2049 for (i = 0;i < N_CLOSURE_TYPES; i++)
2050 costArrayLinear[i] = 0;
2051 totalHeapSize = checkHeapSanityForRetainerProfiling();
2053 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2054 ASSERT(sumOfCostLinear == totalHeapSize);
2056 // now, compare the two results
2059 costArray[] must be exactly the same as costArrayLinear[].
2061 1) Dead weak pointers, whose type is CONSTR. These objects are not
2062 reachable from any roots.
2064 debugBelch("Comparison:\n");
2065 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2066 for (i = 0;i < N_CLOSURE_TYPES; i++)
2067 if (costArray[i] != costArrayLinear[i])
2068 // nothing should be printed except MUT_VAR after major GCs
2069 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2072 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2073 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2074 debugBelch("\tcostArray[] (must be empty) = ");
2075 for (i = 0;i < N_CLOSURE_TYPES; i++)
2076 if (costArray[i] != costArrayLinear[i])
2077 // nothing should be printed except MUT_VAR after major GCs
2078 debugBelch("[%u:%u] ", i, costArray[i]);
2081 // only for major garbage collection
2082 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2086 closeTraverseStack();
2087 #ifdef DEBUG_RETAINER
2088 closeAllRetainerSet();
2090 // Note that there is no post-processing for the retainer sets.
2092 retainerGeneration++;
2095 retainerGeneration - 1, // retainerGeneration has just been incremented!
2096 #ifdef DEBUG_RETAINER
2097 maxCStackSize, maxStackSize,
2099 (double)timesAnyObjectVisited / numObjectVisited);
2102 /* -----------------------------------------------------------------------------
2104 * -------------------------------------------------------------------------- */
2106 #ifdef DEBUG_RETAINER
2108 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2109 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2110 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2113 sanityCheckHeapClosure( StgClosure *c )
2117 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2118 ASSERT(!closure_STATIC(c));
2119 ASSERT(LOOKS_LIKE_PTR(c));
2121 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2122 if (get_itbl(c)->type == CONSTR &&
2123 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2124 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2125 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2126 costArray[get_itbl(c)->type] += cost(c);
2127 sumOfNewCost += cost(c);
2130 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2131 flip, c, get_itbl(c)->type,
2132 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2135 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2138 return closure_sizeW(c);
2142 heapCheck( bdescr *bd )
2145 static nat costSum, size;
2148 while (bd != NULL) {
2150 while (p < bd->free) {
2151 size = sanityCheckHeapClosure((StgClosure *)p);
2152 sumOfCostLinear += size;
2153 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2155 // no need for slop check; I think slops are not used currently.
2157 ASSERT(p == bd->free);
2158 costSum += bd->free - bd->start;
2166 smallObjectPoolCheck(void)
2170 static nat costSum, size;
2172 bd = small_alloc_list;
2180 while (p < alloc_Hp) {
2181 size = sanityCheckHeapClosure((StgClosure *)p);
2182 sumOfCostLinear += size;
2183 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2186 ASSERT(p == alloc_Hp);
2187 costSum += alloc_Hp - bd->start;
2190 while (bd != NULL) {
2192 while (p < bd->free) {
2193 size = sanityCheckHeapClosure((StgClosure *)p);
2194 sumOfCostLinear += size;
2195 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2198 ASSERT(p == bd->free);
2199 costSum += bd->free - bd->start;
2207 chainCheck(bdescr *bd)
2212 while (bd != NULL) {
2213 // bd->free - bd->start is not an accurate measurement of the
2214 // object size. Actually it is always zero, so we compute its
2216 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2217 sumOfCostLinear += size;
2218 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2227 checkHeapSanityForRetainerProfiling( void )
2232 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2233 if (RtsFlags.GcFlags.generations == 1) {
2234 costSum += heapCheck(g0s0->to_blocks);
2235 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2236 costSum += chainCheck(g0s0->large_objects);
2237 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2239 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2240 for (s = 0; s < generations[g].n_steps; s++) {
2242 After all live objects have been scavenged, the garbage
2243 collector may create some objects in
2244 scheduleFinalizers(). These objects are created throught
2245 allocate(), so the small object pool or the large object
2246 pool of the g0s0 may not be empty.
2248 if (g == 0 && s == 0) {
2249 costSum += smallObjectPoolCheck();
2250 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2251 costSum += chainCheck(generations[g].steps[s].large_objects);
2252 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2254 costSum += heapCheck(generations[g].steps[s].blocks);
2255 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2256 costSum += chainCheck(generations[g].steps[s].large_objects);
2257 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2266 findPointer(StgPtr p)
2272 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2273 for (s = 0; s < generations[g].n_steps; s++) {
2274 // if (g == 0 && s == 0) continue;
2275 bd = generations[g].steps[s].blocks;
2276 for (; bd; bd = bd->link) {
2277 for (q = bd->start; q < bd->free; q++) {
2278 if (*q == (StgWord)p) {
2280 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2281 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2286 bd = generations[g].steps[s].large_objects;
2287 for (; bd; bd = bd->link) {
2288 e = bd->start + cost((StgClosure *)bd->start);
2289 for (q = bd->start; q < e; q++) {
2290 if (*q == (StgWord)p) {
2292 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2293 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2303 belongToHeap(StgPtr p)
2308 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2309 for (s = 0; s < generations[g].n_steps; s++) {
2310 // if (g == 0 && s == 0) continue;
2311 bd = generations[g].steps[s].blocks;
2312 for (; bd; bd = bd->link) {
2313 if (bd->start <= p && p < bd->free) {
2314 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2318 bd = generations[g].steps[s].large_objects;
2319 for (; bd; bd = bd->link) {
2320 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2321 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2328 #endif /* DEBUG_RETAINER */
2330 #endif /* PROFILING */