1 /* -----------------------------------------------------------------------------
2 * $Id: RetainerProfile.c,v 1.11 2004/08/13 13:10:28 simonmar Exp $
4 * (c) The GHC Team, 2001
9 * ---------------------------------------------------------------------------*/
13 // Turn off inlining when debugging - it obfuscates things
24 #include "RetainerProfile.h"
25 #include "RetainerSet.h"
32 #include "Profiling.h"
34 #include "BlockAlloc.h"
39 Note: what to change in order to plug-in a new retainer profiling scheme?
40 (1) type retainer in ../includes/StgRetainerProf.h
41 (2) retainer function R(), i.e., getRetainerFrom()
42 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
43 in RetainerSet.h, if needed.
44 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
47 /* -----------------------------------------------------------------------------
49 * -------------------------------------------------------------------------- */
51 static nat retainerGeneration; // generation
53 static nat numObjectVisited; // total number of objects visited
54 static nat timesAnyObjectVisited; // number of times any objects are visited
57 The rs field in the profile header of any object points to its retainer
58 set in an indirect way: if flip is 0, it points to the retainer set;
59 if flip is 1, it points to the next byte after the retainer set (even
60 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
61 pointer. See retainerSetOf().
64 StgWord flip = 0; // flip bit
65 // must be 0 if DEBUG_RETAINER is on (for static closures)
67 #define setRetainerSetToNull(c) \
68 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
70 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
71 static void retainClosure(StgClosure *, StgClosure *, retainer);
73 static void belongToHeap(StgPtr p);
78 cStackSize records how many times retainStack() has been invoked recursively,
79 that is, the number of activation records for retainStack() on the C stack.
80 maxCStackSize records its max value.
82 cStackSize <= maxCStackSize
84 static nat cStackSize, maxCStackSize;
86 static nat sumOfNewCost; // sum of the cost of each object, computed
87 // when the object is first visited
88 static nat sumOfNewCostExtra; // for those objects not visited during
89 // retainer profiling, e.g., MUT_VAR
90 static nat costArray[N_CLOSURE_TYPES];
92 nat sumOfCostLinear; // sum of the costs of all object, computed
93 // when linearly traversing the heap after
95 nat costArrayLinear[N_CLOSURE_TYPES];
98 /* -----------------------------------------------------------------------------
99 * Retainer stack - header
101 * Although the retainer stack implementation could be separated *
102 * from the retainer profiling engine, there does not seem to be
103 * any advantage in doing that; retainer stack is an integral part
104 * of retainer profiling engine and cannot be use elsewhere at
106 * -------------------------------------------------------------------------- */
116 // fixed layout or layout specified by a field in the closure
121 // See StgClosureInfo in InfoTables.h
122 #if SIZEOF_VOID_P == 8
159 firstStack points to the first block group.
160 currentStack points to the block group currently being used.
161 currentStack->free == stackLimit.
162 stackTop points to the topmost byte in the stack of currentStack.
163 Unless the whole stack is empty, stackTop must point to the topmost
164 object (or byte) in the whole stack. Thus, it is only when the whole stack
165 is empty that stackTop == stackLimit (not during the execution of push()
167 stackBottom == currentStack->start.
168 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
170 When a current stack becomes empty, stackTop is set to point to
171 the topmost element on the previous block group so as to satisfy
172 the invariants described above.
174 static bdescr *firstStack = NULL;
175 static bdescr *currentStack;
176 static stackElement *stackBottom, *stackTop, *stackLimit;
179 currentStackBoundary is used to mark the current stack chunk.
180 If stackTop == currentStackBoundary, it means that the current stack chunk
181 is empty. It is the responsibility of the user to keep currentStackBoundary
182 valid all the time if it is to be employed.
184 static stackElement *currentStackBoundary;
187 stackSize records the current size of the stack.
188 maxStackSize records its high water mark.
190 stackSize <= maxStackSize
192 stackSize is just an estimate measure of the depth of the graph. The reason
193 is that some heap objects have only a single child and may not result
194 in a new element being pushed onto the stack. Therefore, at the end of
195 retainer profiling, maxStackSize + maxCStackSize is some value no greater
196 than the actual depth of the graph.
198 #ifdef DEBUG_RETAINER
199 static int stackSize, maxStackSize;
202 // number of blocks allocated for one stack
203 #define BLOCKS_IN_STACK 1
205 /* -----------------------------------------------------------------------------
206 * Add a new block group to the stack.
208 * currentStack->link == s.
209 * -------------------------------------------------------------------------- */
211 newStackBlock( bdescr *bd )
214 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
215 stackBottom = (stackElement *)bd->start;
216 stackLimit = (stackElement *)stackTop;
217 bd->free = (StgPtr)stackLimit;
220 /* -----------------------------------------------------------------------------
221 * Return to the previous block group.
223 * s->link == currentStack.
224 * -------------------------------------------------------------------------- */
226 returnToOldStack( bdescr *bd )
229 stackTop = (stackElement *)bd->free;
230 stackBottom = (stackElement *)bd->start;
231 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
232 bd->free = (StgPtr)stackLimit;
235 /* -----------------------------------------------------------------------------
236 * Initializes the traverse stack.
237 * -------------------------------------------------------------------------- */
239 initializeTraverseStack( void )
241 if (firstStack != NULL) {
242 freeChain(firstStack);
245 firstStack = allocGroup(BLOCKS_IN_STACK);
246 firstStack->link = NULL;
247 firstStack->u.back = NULL;
249 newStackBlock(firstStack);
252 /* -----------------------------------------------------------------------------
253 * Frees all the block groups in the traverse stack.
256 * -------------------------------------------------------------------------- */
258 closeTraverseStack( void )
260 freeChain(firstStack);
264 /* -----------------------------------------------------------------------------
265 * Returns rtsTrue if the whole stack is empty.
266 * -------------------------------------------------------------------------- */
267 static INLINE rtsBool
268 isEmptyRetainerStack( void )
270 return (firstStack == currentStack) && stackTop == stackLimit;
273 /* -----------------------------------------------------------------------------
274 * Returns size of stack
275 * -------------------------------------------------------------------------- */
278 retainerStackBlocks( void )
283 for (bd = firstStack; bd != NULL; bd = bd->link)
290 /* -----------------------------------------------------------------------------
291 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
292 * i.e., if the current stack chunk is empty.
293 * -------------------------------------------------------------------------- */
294 static INLINE rtsBool
297 return stackTop == currentStackBoundary;
300 /* -----------------------------------------------------------------------------
301 * Initializes *info from ptrs and payload.
303 * payload[] begins with ptrs pointers followed by non-pointers.
304 * -------------------------------------------------------------------------- */
306 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
308 info->type = posTypePtrs;
309 info->next.ptrs.pos = 0;
310 info->next.ptrs.ptrs = ptrs;
311 info->next.ptrs.payload = payload;
314 /* -----------------------------------------------------------------------------
315 * Find the next object from *info.
316 * -------------------------------------------------------------------------- */
317 static INLINE StgClosure *
318 find_ptrs( stackPos *info )
320 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
321 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
327 /* -----------------------------------------------------------------------------
328 * Initializes *info from SRT information stored in *infoTable.
329 * -------------------------------------------------------------------------- */
331 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
333 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
334 info->type = posTypeLargeSRT;
335 info->next.large_srt.srt = (StgLargeSRT *)infoTable->f.srt;
336 info->next.large_srt.offset = 0;
338 info->type = posTypeSRT;
339 info->next.srt.srt = (StgClosure **)(infoTable->f.srt);
340 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
345 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
347 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
348 info->type = posTypeLargeSRT;
349 info->next.large_srt.srt = (StgLargeSRT *)infoTable->srt;
350 info->next.large_srt.offset = 0;
352 info->type = posTypeSRT;
353 info->next.srt.srt = (StgClosure **)(infoTable->srt);
354 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
358 /* -----------------------------------------------------------------------------
359 * Find the next object from *info.
360 * -------------------------------------------------------------------------- */
361 static INLINE StgClosure *
362 find_srt( stackPos *info )
367 if (info->type == posTypeSRT) {
369 bitmap = info->next.srt.srt_bitmap;
370 while (bitmap != 0) {
371 if ((bitmap & 1) != 0) {
372 #ifdef ENABLE_WIN32_DLL_SUPPORT
374 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
375 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
377 c = *(info->next.srt.srt);
379 c = *(info->next.srt.srt);
381 bitmap = bitmap >> 1;
382 info->next.srt.srt++;
383 info->next.srt.srt_bitmap = bitmap;
386 bitmap = bitmap >> 1;
387 info->next.srt.srt++;
389 // bitmap is now zero...
394 nat i = info->next.large_srt.offset;
397 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
398 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
399 bitmap = bitmap >> (i % BITS_IN(StgWord));
400 while (i < info->next.large_srt.srt->l.size) {
401 if ((bitmap & 1) != 0) {
402 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
404 info->next.large_srt.offset = i;
408 if (i % BITS_IN(W_) == 0) {
409 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
411 bitmap = bitmap >> 1;
414 // reached the end of this bitmap.
415 info->next.large_srt.offset = i;
420 /* -----------------------------------------------------------------------------
421 * push() pushes a stackElement representing the next child of *c
422 * onto the traverse stack. If *c has no child, *first_child is set
423 * to NULL and nothing is pushed onto the stack. If *c has only one
424 * child, *c_chlid is set to that child and nothing is pushed onto
425 * the stack. If *c has more than two children, *first_child is set
426 * to the first child and a stackElement representing the second
427 * child is pushed onto the stack.
430 * *c_child_r is the most recent retainer of *c's children.
431 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
432 * there cannot be any stack objects.
433 * Note: SRTs are considered to be children as well.
434 * -------------------------------------------------------------------------- */
436 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
439 bdescr *nbd; // Next Block Descriptor
441 #ifdef DEBUG_RETAINER
442 // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
445 ASSERT(get_itbl(c)->type != TSO);
446 ASSERT(get_itbl(c)->type != AP_STACK);
453 se.c_child_r = c_child_r;
456 switch (get_itbl(c)->type) {
463 case SE_CAF_BLACKHOLE:
468 // one child (fixed), no SRT
471 *first_child = ((StgMutVar *)c)->var;
474 // blocking_queue must be TSO and the head of a linked list of TSOs.
475 // Shoule it be a child? Seems to be yes.
476 *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
479 *first_child = ((StgSelector *)c)->selectee;
482 case IND_OLDGEN_PERM:
484 *first_child = ((StgIndOldGen *)c)->indirectee;
488 *first_child = c->payload[0];
491 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
492 // of the next child. We do not write a separate initialization code.
493 // Also we do not have to initialize info.type;
495 // two children (fixed), no SRT
496 // need to push a stackElement, but nothing to store in se.info
498 *first_child = c->payload[0]; // return the first pointer
499 // se.info.type = posTypeStep;
500 // se.info.next.step = 2; // 2 = second
503 // three children (fixed), no SRT
504 // need to push a stackElement
506 // head must be TSO and the head of a linked list of TSOs.
507 // Shoule it be a child? Seems to be yes.
508 *first_child = (StgClosure *)((StgMVar *)c)->head;
509 // se.info.type = posTypeStep;
510 se.info.next.step = 2; // 2 = second
513 // three children (fixed), no SRT
515 *first_child = ((StgWeak *)c)->key;
516 // se.info.type = posTypeStep;
517 se.info.next.step = 2;
520 // layout.payload.ptrs, no SRT
526 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
528 *first_child = find_ptrs(&se.info);
529 if (*first_child == NULL)
533 // StgMutArrPtr.ptrs, no SRT
535 case MUT_ARR_PTRS_FROZEN:
536 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
537 (StgPtr)(((StgMutArrPtrs *)c)->payload));
538 *first_child = find_ptrs(&se.info);
539 if (*first_child == NULL)
543 // layout.payload.ptrs, SRT
544 case FUN: // *c is a heap object.
546 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
547 *first_child = find_ptrs(&se.info);
548 if (*first_child == NULL)
549 // no child from ptrs, so check SRT
555 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
556 *first_child = find_ptrs(&se.info);
557 if (*first_child == NULL)
558 // no child from ptrs, so check SRT
562 // 1 fixed child, SRT
565 *first_child = c->payload[0];
566 ASSERT(*first_child != NULL);
567 init_srt_fun(&se.info, get_fun_itbl(c));
572 *first_child = c->payload[0];
573 ASSERT(*first_child != NULL);
574 init_srt_thunk(&se.info, get_thunk_itbl(c));
577 case FUN_STATIC: // *c is a heap object.
578 ASSERT(get_itbl(c)->srt_bitmap != 0);
582 init_srt_fun(&se.info, get_fun_itbl(c));
583 *first_child = find_srt(&se.info);
584 if (*first_child == NULL)
590 ASSERT(get_itbl(c)->srt_bitmap != 0);
594 init_srt_thunk(&se.info, get_thunk_itbl(c));
595 *first_child = find_srt(&se.info);
596 if (*first_child == NULL)
607 case CONSTR_CHARLIKE:
608 case CONSTR_NOCAF_STATIC:
629 barf("Invalid object *c in push()");
633 if (stackTop - 1 < stackBottom) {
634 #ifdef DEBUG_RETAINER
635 // fprintf(stderr, "push() to the next stack.\n");
637 // currentStack->free is updated when the active stack is switched
638 // to the next stack.
639 currentStack->free = (StgPtr)stackTop;
641 if (currentStack->link == NULL) {
642 nbd = allocGroup(BLOCKS_IN_STACK);
644 nbd->u.back = currentStack;
645 currentStack->link = nbd;
647 nbd = currentStack->link;
652 // adjust stackTop (acutal push)
654 // If the size of stackElement was huge, we would better replace the
655 // following statement by either a memcpy() call or a switch statement
656 // on the type of the element. Currently, the size of stackElement is
657 // small enough (5 words) that this direct assignment seems to be enough.
660 #ifdef DEBUG_RETAINER
662 if (stackSize > maxStackSize) maxStackSize = stackSize;
663 // ASSERT(stackSize >= 0);
664 // fprintf(stderr, "stackSize = %d\n", stackSize);
668 /* -----------------------------------------------------------------------------
669 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
671 * stackTop cannot be equal to stackLimit unless the whole stack is
672 * empty, in which case popOff() is not allowed.
674 * You can think of popOffReal() as a part of popOff() which is
675 * executed at the end of popOff() in necessary. Since popOff() is
676 * likely to be executed quite often while popOffReal() is not, we
677 * separate popOffReal() from popOff(), which is declared as an
678 * INLINE function (for the sake of execution speed). popOffReal()
679 * is called only within popOff() and nowhere else.
680 * -------------------------------------------------------------------------- */
684 bdescr *pbd; // Previous Block Descriptor
686 #ifdef DEBUG_RETAINER
687 // fprintf(stderr, "pop() to the previous stack.\n");
690 ASSERT(stackTop + 1 == stackLimit);
691 ASSERT(stackBottom == (stackElement *)currentStack->start);
693 if (firstStack == currentStack) {
694 // The stack is completely empty.
696 ASSERT(stackTop == stackLimit);
697 #ifdef DEBUG_RETAINER
699 if (stackSize > maxStackSize) maxStackSize = stackSize;
701 ASSERT(stackSize >= 0);
702 fprintf(stderr, "stackSize = %d\n", stackSize);
708 // currentStack->free is updated when the active stack is switched back
709 // to the previous stack.
710 currentStack->free = (StgPtr)stackLimit;
712 // find the previous block descriptor
713 pbd = currentStack->u.back;
716 returnToOldStack(pbd);
718 #ifdef DEBUG_RETAINER
720 if (stackSize > maxStackSize) maxStackSize = stackSize;
722 ASSERT(stackSize >= 0);
723 fprintf(stderr, "stackSize = %d\n", stackSize);
730 #ifdef DEBUG_RETAINER
731 // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
734 ASSERT(stackTop != stackLimit);
735 ASSERT(!isEmptyRetainerStack());
737 // <= (instead of <) is wrong!
738 if (stackTop + 1 < stackLimit) {
740 #ifdef DEBUG_RETAINER
742 if (stackSize > maxStackSize) maxStackSize = stackSize;
744 ASSERT(stackSize >= 0);
745 fprintf(stderr, "stackSize = %d\n", stackSize);
754 /* -----------------------------------------------------------------------------
755 * Finds the next object to be considered for retainer profiling and store
757 * Test if the topmost stack element indicates that more objects are left,
758 * and if so, retrieve the first object and store its pointer to *c. Also,
759 * set *cp and *r appropriately, both of which are stored in the stack element.
760 * The topmost stack element then is overwritten so as for it to now denote
762 * If the topmost stack element indicates no more objects are left, pop
763 * off the stack element until either an object can be retrieved or
764 * the current stack chunk becomes empty, indicated by rtsTrue returned by
765 * isOnBoundary(), in which case *c is set to NULL.
767 * It is okay to call this function even when the current stack chunk
769 * -------------------------------------------------------------------------- */
771 pop( StgClosure **c, StgClosure **cp, retainer *r )
775 #ifdef DEBUG_RETAINER
776 // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
780 if (isOnBoundary()) { // if the current stack chunk is depleted
787 switch (get_itbl(se->c)->type) {
788 // two children (fixed), no SRT
789 // nothing in se.info
791 *c = se->c->payload[1];
797 // three children (fixed), no SRT
798 // need to push a stackElement
800 if (se->info.next.step == 2) {
801 *c = (StgClosure *)((StgMVar *)se->c)->tail;
802 se->info.next.step++; // move to the next step
805 *c = ((StgMVar *)se->c)->value;
812 // three children (fixed), no SRT
814 if (se->info.next.step == 2) {
815 *c = ((StgWeak *)se->c)->value;
816 se->info.next.step++;
819 *c = ((StgWeak *)se->c)->finalizer;
831 // StgMutArrPtr.ptrs, no SRT
833 case MUT_ARR_PTRS_FROZEN:
834 *c = find_ptrs(&se->info);
843 // layout.payload.ptrs, SRT
844 case FUN: // always a heap object
846 if (se->info.type == posTypePtrs) {
847 *c = find_ptrs(&se->info);
853 init_srt_fun(&se->info, get_fun_itbl(se->c));
859 if (se->info.type == posTypePtrs) {
860 *c = find_ptrs(&se->info);
866 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
882 *c = find_srt(&se->info);
891 // no child (fixed), no SRT
897 case SE_CAF_BLACKHOLE:
899 // one child (fixed), no SRT
905 case IND_OLDGEN_PERM:
915 case CONSTR_CHARLIKE:
916 case CONSTR_NOCAF_STATIC:
937 barf("Invalid object *c in pop()");
943 /* -----------------------------------------------------------------------------
944 * RETAINER PROFILING ENGINE
945 * -------------------------------------------------------------------------- */
948 initRetainerProfiling( void )
950 initializeAllRetainerSet();
951 retainerGeneration = 0;
954 /* -----------------------------------------------------------------------------
955 * This function must be called before f-closing prof_file.
956 * -------------------------------------------------------------------------- */
958 endRetainerProfiling( void )
960 #ifdef SECOND_APPROACH
961 outputAllRetainerSet(prof_file);
965 /* -----------------------------------------------------------------------------
966 * Returns the actual pointer to the retainer set of the closure *c.
967 * It may adjust RSET(c) subject to flip.
969 * RSET(c) is initialized to NULL if its current value does not
972 * Even though this function has side effects, they CAN be ignored because
973 * subsequent calls to retainerSetOf() always result in the same return value
974 * and retainerSetOf() is the only way to retrieve retainerSet of a given
976 * We have to perform an XOR (^) operation each time a closure is examined.
977 * The reason is that we do not know when a closure is visited last.
978 * -------------------------------------------------------------------------- */
980 maybeInitRetainerSet( StgClosure *c )
982 if (!isRetainerSetFieldValid(c)) {
983 setRetainerSetToNull(c);
987 /* -----------------------------------------------------------------------------
988 * Returns rtsTrue if *c is a retainer.
989 * -------------------------------------------------------------------------- */
990 static INLINE rtsBool
991 isRetainer( StgClosure *c )
993 switch (get_itbl(c)->type) {
997 // TSOs MUST be retainers: they constitute the set of roots.
1005 case MUT_ARR_PTRS_FROZEN:
1007 // thunks are retainers.
1014 case THUNK_SELECTOR:
1018 // Static thunks, or CAFS, are obviously retainers.
1021 // WEAK objects are roots; there is separate code in which traversing
1022 // begins from WEAK objects.
1044 // partial applications
1050 case SE_CAF_BLACKHOLE:
1054 case IND_OLDGEN_PERM:
1069 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1071 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
1072 // cannot be *c, *cp, *r in the retainer profiling loop.
1073 case CONSTR_INTLIKE:
1074 case CONSTR_CHARLIKE:
1075 case CONSTR_NOCAF_STATIC:
1076 // Stack objects are invalid because they are never treated as
1077 // legal objects during retainer profiling.
1095 case INVALID_OBJECT:
1097 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1102 /* -----------------------------------------------------------------------------
1103 * Returns the retainer function value for the closure *c, i.e., R(*c).
1104 * This function does NOT return the retainer(s) of *c.
1106 * *c must be a retainer.
1108 * Depending on the definition of this function, the maintenance of retainer
1109 * sets can be made easier. If most retainer sets are likely to be created
1110 * again across garbage collections, refreshAllRetainerSet() in
1111 * RetainerSet.c can simply do nothing.
1112 * If this is not the case, we can free all the retainer sets and
1113 * re-initialize the hash table.
1114 * See refreshAllRetainerSet() in RetainerSet.c.
1115 * -------------------------------------------------------------------------- */
1116 static INLINE retainer
1117 getRetainerFrom( StgClosure *c )
1119 ASSERT(isRetainer(c));
1121 #if defined(RETAINER_SCHEME_INFO)
1122 // Retainer scheme 1: retainer = info table
1124 #elif defined(RETAINER_SCHEME_CCS)
1125 // Retainer scheme 2: retainer = cost centre stack
1126 return c->header.prof.ccs;
1127 #elif defined(RETAINER_SCHEME_CC)
1128 // Retainer scheme 3: retainer = cost centre
1129 return c->header.prof.ccs->cc;
1133 /* -----------------------------------------------------------------------------
1134 * Associates the retainer set *s with the closure *c, that is, *s becomes
1135 * the retainer set of *c.
1139 * -------------------------------------------------------------------------- */
1141 associate( StgClosure *c, RetainerSet *s )
1143 // StgWord has the same size as pointers, so the following type
1145 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1148 /* -----------------------------------------------------------------------------
1149 Call retainClosure for each of the closures covered by a large bitmap.
1150 -------------------------------------------------------------------------- */
1153 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1154 StgClosure *c, retainer c_child_r)
1160 bitmap = large_bitmap->bitmap[b];
1161 for (i = 0; i < size; ) {
1162 if ((bitmap & 1) == 0) {
1163 retainClosure((StgClosure *)*p, c, c_child_r);
1167 if (i % BITS_IN(W_) == 0) {
1169 bitmap = large_bitmap->bitmap[b];
1171 bitmap = bitmap >> 1;
1176 static INLINE StgPtr
1177 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1178 StgClosure *c, retainer c_child_r)
1181 if ((bitmap & 1) == 0) {
1182 retainClosure((StgClosure *)*p, c, c_child_r);
1185 bitmap = bitmap >> 1;
1191 /* -----------------------------------------------------------------------------
1192 * Call retainClosure for each of the closures in an SRT.
1193 * ------------------------------------------------------------------------- */
1196 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1203 p = (StgClosure **)srt->srt;
1205 bitmap = srt->l.bitmap[b];
1206 for (i = 0; i < size; ) {
1207 if ((bitmap & 1) != 0) {
1208 retainClosure((StgClosure *)*p, c, c_child_r);
1212 if (i % BITS_IN(W_) == 0) {
1214 bitmap = srt->l.bitmap[b];
1216 bitmap = bitmap >> 1;
1222 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1227 bitmap = srt_bitmap;
1230 if (bitmap == (StgHalfWord)(-1)) {
1231 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1235 while (bitmap != 0) {
1236 if ((bitmap & 1) != 0) {
1237 #ifdef ENABLE_WIN32_DLL_SUPPORT
1238 if ( (unsigned long)(*srt) & 0x1 ) {
1239 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1242 retainClosure(*srt,c,c_child_r);
1245 retainClosure(*srt,c,c_child_r);
1249 bitmap = bitmap >> 1;
1253 /* -----------------------------------------------------------------------------
1254 * Process all the objects in the stack chunk from stackStart to stackEnd
1255 * with *c and *c_child_r being their parent and their most recent retainer,
1256 * respectively. Treat stackOptionalFun as another child of *c if it is
1259 * *c is one of the following: TSO, AP_STACK.
1260 * If *c is TSO, c == c_child_r.
1261 * stackStart < stackEnd.
1262 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1263 * interpretation conforms to the current value of flip (even when they
1264 * are interpreted to be NULL).
1265 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1266 * or ThreadKilled, which means that its stack is ready to process.
1268 * This code was almost plagiarzied from GC.c! For each pointer,
1269 * retainClosure() is invoked instead of evacuate().
1270 * -------------------------------------------------------------------------- */
1272 retainStack( StgClosure *c, retainer c_child_r,
1273 StgPtr stackStart, StgPtr stackEnd )
1275 stackElement *oldStackBoundary;
1277 StgRetInfoTable *info;
1281 #ifdef DEBUG_RETAINER
1283 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1287 Each invocation of retainStack() creates a new virtual
1288 stack. Since all such stacks share a single common stack, we
1289 record the current currentStackBoundary, which will be restored
1292 oldStackBoundary = currentStackBoundary;
1293 currentStackBoundary = stackTop;
1295 #ifdef DEBUG_RETAINER
1296 // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1299 ASSERT(get_itbl(c)->type != TSO ||
1300 (((StgTSO *)c)->what_next != ThreadRelocated &&
1301 ((StgTSO *)c)->what_next != ThreadComplete &&
1302 ((StgTSO *)c)->what_next != ThreadKilled));
1305 while (p < stackEnd) {
1306 info = get_ret_itbl((StgClosure *)p);
1308 switch(info->i.type) {
1311 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1312 p += sizeofW(StgUpdateFrame);
1319 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1320 size = BITMAP_SIZE(info->i.layout.bitmap);
1322 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1325 retainSRT((StgClosure **)info->srt, info->i.srt_bitmap, c, c_child_r);
1332 retainClosure((StgClosure *)*p, c, c_child_r);
1335 size = BCO_BITMAP_SIZE(bco);
1336 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1341 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1344 size = info->i.layout.large_bitmap->size;
1346 retain_large_bitmap(p, info->i.layout.large_bitmap,
1347 size, c, c_child_r);
1349 // and don't forget to follow the SRT
1352 // Dynamic bitmap: the mask is stored on the stack
1355 dyn = ((StgRetDyn *)p)->liveness;
1357 // traverse the bitmap first
1358 bitmap = RET_DYN_LIVENESS(dyn);
1359 p = (P_)&((StgRetDyn *)p)->payload[0];
1360 size = RET_DYN_BITMAP_SIZE;
1361 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1363 // skip over the non-ptr words
1364 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1366 // follow the ptr words
1367 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1368 retainClosure((StgClosure *)*p, c, c_child_r);
1375 StgRetFun *ret_fun = (StgRetFun *)p;
1376 StgFunInfoTable *fun_info;
1378 retainClosure(ret_fun->fun, c, c_child_r);
1379 fun_info = get_fun_itbl(ret_fun->fun);
1381 p = (P_)&ret_fun->payload;
1382 switch (fun_info->f.fun_type) {
1384 bitmap = BITMAP_BITS(fun_info->f.bitmap);
1385 size = BITMAP_SIZE(fun_info->f.bitmap);
1386 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1389 size = ((StgLargeBitmap *)fun_info->f.bitmap)->size;
1390 retain_large_bitmap(p, (StgLargeBitmap *)fun_info->f.bitmap,
1391 size, c, c_child_r);
1395 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1396 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1397 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1404 barf("Invalid object found in retainStack(): %d",
1405 (int)(info->i.type));
1409 // restore currentStackBoundary
1410 currentStackBoundary = oldStackBoundary;
1411 #ifdef DEBUG_RETAINER
1412 // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1415 #ifdef DEBUG_RETAINER
1420 /* ----------------------------------------------------------------------------
1421 * Call retainClosure for each of the children of a PAP/AP
1422 * ------------------------------------------------------------------------- */
1424 static INLINE StgPtr
1425 retain_PAP (StgPAP *pap, retainer c_child_r)
1428 StgWord bitmap, size;
1429 StgFunInfoTable *fun_info;
1431 retainClosure(pap->fun, (StgClosure *)pap, c_child_r);
1432 fun_info = get_fun_itbl(pap->fun);
1433 ASSERT(fun_info->i.type != PAP);
1435 p = (StgPtr)pap->payload;
1438 switch (fun_info->f.fun_type) {
1440 bitmap = BITMAP_BITS(fun_info->f.bitmap);
1441 p = retain_small_bitmap(p, pap->n_args, bitmap,
1442 (StgClosure *)pap, c_child_r);
1445 retain_large_bitmap(p, (StgLargeBitmap *)fun_info->f.bitmap,
1446 size, (StgClosure *)pap, c_child_r);
1450 retain_large_bitmap((StgPtr)pap->payload, BCO_BITMAP(pap->fun),
1451 size, (StgClosure *)pap, c_child_r);
1455 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1456 p = retain_small_bitmap(p, pap->n_args, bitmap,
1457 (StgClosure *)pap, c_child_r);
1463 /* -----------------------------------------------------------------------------
1464 * Compute the retainer set of *c0 and all its desecents by traversing.
1465 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1467 * c0 = cp0 = r0 holds only for root objects.
1468 * RSET(cp0) and RSET(r0) are valid, i.e., their
1469 * interpretation conforms to the current value of flip (even when they
1470 * are interpreted to be NULL).
1471 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1472 * the current value of flip. If it does not, during the execution
1473 * of this function, RSET(c0) must be initialized as well as all
1476 * stackTop must be the same at the beginning and the exit of this function.
1477 * *c0 can be TSO (as well as AP_STACK).
1478 * -------------------------------------------------------------------------- */
1480 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1482 // c = Current closure
1483 // cp = Current closure's Parent
1484 // r = current closures' most recent Retainer
1485 // c_child_r = current closure's children's most recent retainer
1486 // first_child = first child of c
1487 StgClosure *c, *cp, *first_child;
1488 RetainerSet *s, *retainerSetOfc;
1489 retainer r, c_child_r;
1492 #ifdef DEBUG_RETAINER
1493 // StgPtr oldStackTop;
1496 #ifdef DEBUG_RETAINER
1497 // oldStackTop = stackTop;
1498 // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1501 // (c, cp, r) = (c0, cp0, r0)
1508 //fprintf(stderr, "loop");
1509 // pop to (c, cp, r);
1513 #ifdef DEBUG_RETAINER
1514 // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1519 //fprintf(stderr, "inner_loop");
1522 // c = current closure under consideration,
1523 // cp = current closure's parent,
1524 // r = current closure's most recent retainer
1526 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1527 // RSET(cp) and RSET(r) are valid.
1528 // RSET(c) is valid only if c has been visited before.
1530 // Loop invariants (on the relation between c, cp, and r)
1531 // if cp is not a retainer, r belongs to RSET(cp).
1532 // if cp is a retainer, r == cp.
1534 typeOfc = get_itbl(c)->type;
1536 #ifdef DEBUG_RETAINER
1539 case CONSTR_INTLIKE:
1540 case CONSTR_CHARLIKE:
1541 case CONSTR_NOCAF_STATIC:
1547 if (retainerSetOf(c) == NULL) { // first visit?
1548 costArray[typeOfc] += cost(c);
1549 sumOfNewCost += cost(c);
1558 if (((StgTSO *)c)->what_next == ThreadComplete ||
1559 ((StgTSO *)c)->what_next == ThreadKilled) {
1560 #ifdef DEBUG_RETAINER
1561 fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1565 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1566 #ifdef DEBUG_RETAINER
1567 fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
1569 c = (StgClosure *)((StgTSO *)c)->link;
1575 // We just skip IND_STATIC, so its retainer set is never computed.
1576 c = ((StgIndStatic *)c)->indirectee;
1578 case CONSTR_INTLIKE:
1579 case CONSTR_CHARLIKE:
1580 // static objects with no pointers out, so goto loop.
1581 case CONSTR_NOCAF_STATIC:
1582 // It is not just enough not to compute the retainer set for *c; it is
1583 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1584 // scavenged_static_objects, the list from which is assumed to traverse
1585 // all static objects after major garbage collections.
1589 if (get_itbl(c)->srt_bitmap == 0) {
1590 // No need to compute the retainer set; no dynamic objects
1591 // are reachable from *c.
1593 // Static objects: if we traverse all the live closures,
1594 // including static closures, during each heap census then
1595 // we will observe that some static closures appear and
1596 // disappear. eg. a closure may contain a pointer to a
1597 // static function 'f' which is not otherwise reachable
1598 // (it doesn't indirectly point to any CAFs, so it doesn't
1599 // appear in any SRTs), so we would find 'f' during
1600 // traversal. However on the next sweep there may be no
1601 // closures pointing to 'f'.
1603 // We must therefore ignore static closures whose SRT is
1604 // empty, because these are exactly the closures that may
1605 // "appear". A closure with a non-empty SRT, and which is
1606 // still required, will always be reachable.
1608 // But what about CONSTR_STATIC? Surely these may be able
1609 // to appear, and they don't have SRTs, so we can't
1610 // check. So for now, we're calling
1611 // resetStaticObjectForRetainerProfiling() from the
1612 // garbage collector to reset the retainer sets in all the
1613 // reachable static objects.
1620 // The above objects are ignored in computing the average number of times
1621 // an object is visited.
1622 timesAnyObjectVisited++;
1624 // If this is the first visit to c, initialize its retainer set.
1625 maybeInitRetainerSet(c);
1626 retainerSetOfc = retainerSetOf(c);
1629 // isRetainer(cp) == rtsTrue => s == NULL
1630 // isRetainer(cp) == rtsFalse => s == cp.retainer
1634 s = retainerSetOf(cp);
1636 // (c, cp, r, s) is available.
1638 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1639 if (retainerSetOfc == NULL) {
1640 // This is the first visit to *c.
1644 associate(c, singleton(r));
1646 // s is actually the retainer set of *c!
1649 // compute c_child_r
1650 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1652 // This is not the first visit to *c.
1653 if (isMember(r, retainerSetOfc))
1654 goto loop; // no need to process child
1657 associate(c, addElement(r, retainerSetOfc));
1659 // s is not NULL and cp is not a retainer. This means that
1660 // each time *cp is visited, so is *c. Thus, if s has
1661 // exactly one more element in its retainer set than c, s
1662 // is also the new retainer set for *c.
1663 if (s->num == retainerSetOfc->num + 1) {
1666 // Otherwise, just add R_r to the current retainer set of *c.
1668 associate(c, addElement(r, retainerSetOfc));
1673 goto loop; // no need to process child
1675 // compute c_child_r
1679 // now, RSET() of all of *c, *cp, and *r is valid.
1680 // (c, c_child_r) are available.
1684 // Special case closures: we process these all in one go rather
1685 // than attempting to save the current position, because doing so
1689 retainStack(c, c_child_r,
1691 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1696 retain_PAP((StgPAP *)c, c_child_r);
1700 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1701 retainStack(c, c_child_r,
1702 (StgPtr)((StgAP_STACK *)c)->payload,
1703 (StgPtr)((StgAP_STACK *)c)->payload +
1704 ((StgAP_STACK *)c)->size);
1708 push(c, c_child_r, &first_child);
1710 // If first_child is null, c has no child.
1711 // If first_child is not null, the top stack element points to the next
1712 // object. push() may or may not push a stackElement on the stack.
1713 if (first_child == NULL)
1716 // (c, cp, r) = (first_child, c, c_child_r)
1723 /* -----------------------------------------------------------------------------
1724 * Compute the retainer set for every object reachable from *tl.
1725 * -------------------------------------------------------------------------- */
1727 retainRoot( StgClosure **tl )
1729 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1732 ASSERT(isEmptyRetainerStack());
1733 currentStackBoundary = stackTop;
1735 if (isRetainer(*tl)) {
1736 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1738 retainClosure(*tl, *tl, CCS_SYSTEM);
1741 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1742 // *tl might be a TSO which is ThreadComplete, in which
1743 // case we ignore it for the purposes of retainer profiling.
1746 /* -----------------------------------------------------------------------------
1747 * Compute the retainer set for each of the objects in the heap.
1748 * -------------------------------------------------------------------------- */
1750 computeRetainerSet( void )
1756 #ifdef DEBUG_RETAINER
1757 RetainerSet tmpRetainerSet;
1760 GetRoots(retainRoot); // for scheduler roots
1762 // This function is called after a major GC, when key, value, and finalizer
1763 // all are guaranteed to be valid, or reachable.
1765 // The following code assumes that WEAK objects are considered to be roots
1766 // for retainer profilng.
1767 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1768 // retainRoot((StgClosure *)weak);
1769 retainRoot((StgClosure **)&weak);
1771 // Consider roots from the stable ptr table.
1772 markStablePtrTable(retainRoot);
1774 // The following code resets the rs field of each unvisited mutable
1775 // object (computing sumOfNewCostExtra and updating costArray[] when
1776 // debugging retainer profiler).
1777 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1779 (generations[g].mut_list == END_MUT_LIST &&
1780 generations[g].mut_once_list == END_MUT_LIST));
1783 // I think traversing through mut_list is unnecessary.
1784 // Think about removing this part.
1785 for (ml = generations[g].mut_list; ml != END_MUT_LIST;
1786 ml = ml->mut_link) {
1788 maybeInitRetainerSet((StgClosure *)ml);
1789 rtl = retainerSetOf((StgClosure *)ml);
1791 #ifdef DEBUG_RETAINER
1793 // first visit to *ml
1794 // This is a violation of the interface rule!
1795 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1797 switch (get_itbl((StgClosure *)ml)->type) {
1801 case CONSTR_INTLIKE:
1802 case CONSTR_CHARLIKE:
1803 case CONSTR_NOCAF_STATIC:
1807 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1811 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1812 sumOfNewCostExtra += cost((StgClosure *)ml);
1819 // Traversing through mut_once_list is, in contrast, necessary
1820 // because we can find MUT_VAR objects which have not been
1821 // visited during retainer profiling.
1822 for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
1823 ml = ml->mut_link) {
1825 maybeInitRetainerSet((StgClosure *)ml);
1826 rtl = retainerSetOf((StgClosure *)ml);
1827 #ifdef DEBUG_RETAINER
1829 // first visit to *ml
1830 // This is a violation of the interface rule!
1831 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1833 switch (get_itbl((StgClosure *)ml)->type) {
1837 case CONSTR_INTLIKE:
1838 case CONSTR_CHARLIKE:
1839 case CONSTR_NOCAF_STATIC:
1843 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1847 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1848 sumOfNewCostExtra += cost((StgClosure *)ml);
1857 /* -----------------------------------------------------------------------------
1858 * Traverse all static objects for which we compute retainer sets,
1859 * and reset their rs fields to NULL, which is accomplished by
1860 * invoking maybeInitRetainerSet(). This function must be called
1861 * before zeroing all objects reachable from scavenged_static_objects
1862 * in the case of major gabage collections. See GarbageCollect() in
1865 * The mut_once_list of the oldest generation must also be traversed?
1866 * Why? Because if the evacuation of an object pointed to by a static
1867 * indirection object fails, it is put back to the mut_once_list of
1868 * the oldest generation.
1869 * However, this is not necessary because any static indirection objects
1870 * are just traversed through to reach dynamic objects. In other words,
1871 * they are not taken into consideration in computing retainer sets.
1872 * -------------------------------------------------------------------------- */
1874 resetStaticObjectForRetainerProfiling( void )
1876 #ifdef DEBUG_RETAINER
1881 #ifdef DEBUG_RETAINER
1884 p = scavenged_static_objects;
1885 while (p != END_OF_STATIC_LIST) {
1886 #ifdef DEBUG_RETAINER
1889 switch (get_itbl(p)->type) {
1891 // Since we do not compute the retainer set of any
1892 // IND_STATIC object, we don't have to reset its retainer
1894 p = IND_STATIC_LINK(p);
1897 maybeInitRetainerSet(p);
1898 p = THUNK_STATIC_LINK(p);
1901 maybeInitRetainerSet(p);
1902 p = FUN_STATIC_LINK(p);
1905 maybeInitRetainerSet(p);
1906 p = STATIC_LINK(get_itbl(p), p);
1909 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1910 p, get_itbl(p)->type);
1914 #ifdef DEBUG_RETAINER
1915 // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
1919 /* -----------------------------------------------------------------------------
1920 * Perform retainer profiling.
1921 * N is the oldest generation being profilied, where the generations are
1922 * numbered starting at 0.
1925 * This function should be called only immediately after major garbage
1927 * ------------------------------------------------------------------------- */
1929 retainerProfile(void)
1931 #ifdef DEBUG_RETAINER
1933 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1936 #ifdef DEBUG_RETAINER
1937 fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
1942 // We haven't flipped the bit yet.
1943 #ifdef DEBUG_RETAINER
1944 fprintf(stderr, "Before traversing:\n");
1945 sumOfCostLinear = 0;
1946 for (i = 0;i < N_CLOSURE_TYPES; i++)
1947 costArrayLinear[i] = 0;
1948 totalHeapSize = checkHeapSanityForRetainerProfiling();
1950 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1952 fprintf(stderr, "costArrayLinear[] = ");
1953 for (i = 0;i < N_CLOSURE_TYPES; i++)
1954 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
1955 fprintf(stderr, "\n");
1958 ASSERT(sumOfCostLinear == totalHeapSize);
1961 #define pcostArrayLinear(index) \
1962 if (costArrayLinear[index] > 0) \
1963 fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1964 pcostArrayLinear(THUNK_STATIC);
1965 pcostArrayLinear(FUN_STATIC);
1966 pcostArrayLinear(CONSTR_STATIC);
1967 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1968 pcostArrayLinear(CONSTR_INTLIKE);
1969 pcostArrayLinear(CONSTR_CHARLIKE);
1973 // Now we flips flip.
1976 #ifdef DEBUG_RETAINER
1982 numObjectVisited = 0;
1983 timesAnyObjectVisited = 0;
1985 #ifdef DEBUG_RETAINER
1986 fprintf(stderr, "During traversing:\n");
1988 sumOfNewCostExtra = 0;
1989 for (i = 0;i < N_CLOSURE_TYPES; i++)
1994 We initialize the traverse stack each time the retainer profiling is
1995 performed (because the traverse stack size varies on each retainer profiling
1996 and this operation is not costly anyhow). However, we just refresh the
1999 initializeTraverseStack();
2000 #ifdef DEBUG_RETAINER
2001 initializeAllRetainerSet();
2003 refreshAllRetainerSet();
2005 computeRetainerSet();
2007 #ifdef DEBUG_RETAINER
2008 fprintf(stderr, "After traversing:\n");
2009 sumOfCostLinear = 0;
2010 for (i = 0;i < N_CLOSURE_TYPES; i++)
2011 costArrayLinear[i] = 0;
2012 totalHeapSize = checkHeapSanityForRetainerProfiling();
2014 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2015 ASSERT(sumOfCostLinear == totalHeapSize);
2017 // now, compare the two results
2020 costArray[] must be exactly the same as costArrayLinear[].
2022 1) Dead weak pointers, whose type is CONSTR. These objects are not
2023 reachable from any roots.
2025 fprintf(stderr, "Comparison:\n");
2026 fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
2027 for (i = 0;i < N_CLOSURE_TYPES; i++)
2028 if (costArray[i] != costArrayLinear[i])
2029 // nothing should be printed except MUT_VAR after major GCs
2030 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
2031 fprintf(stderr, "\n");
2033 fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
2034 fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2035 fprintf(stderr, "\tcostArray[] (must be empty) = ");
2036 for (i = 0;i < N_CLOSURE_TYPES; i++)
2037 if (costArray[i] != costArrayLinear[i])
2038 // nothing should be printed except MUT_VAR after major GCs
2039 fprintf(stderr, "[%u:%u] ", i, costArray[i]);
2040 fprintf(stderr, "\n");
2042 // only for major garbage collection
2043 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2047 closeTraverseStack();
2048 #ifdef DEBUG_RETAINER
2049 closeAllRetainerSet();
2051 // Note that there is no post-processing for the retainer sets.
2053 retainerGeneration++;
2056 retainerGeneration - 1, // retainerGeneration has just been incremented!
2057 #ifdef DEBUG_RETAINER
2058 maxCStackSize, maxStackSize,
2060 (double)timesAnyObjectVisited / numObjectVisited);
2063 /* -----------------------------------------------------------------------------
2065 * -------------------------------------------------------------------------- */
2067 #ifdef DEBUG_RETAINER
2069 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2070 ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
2071 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2074 sanityCheckHeapClosure( StgClosure *c )
2078 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2079 ASSERT(!closure_STATIC(c));
2080 ASSERT(LOOKS_LIKE_PTR(c));
2082 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2083 if (get_itbl(c)->type == CONSTR &&
2084 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2085 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2086 fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
2087 costArray[get_itbl(c)->type] += cost(c);
2088 sumOfNewCost += cost(c);
2091 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2092 flip, c, get_itbl(c)->type,
2093 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2096 // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2100 switch (info->type) {
2102 return tso_sizeW((StgTSO *)c);
2110 return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
2113 return sizeofW(StgMVar);
2116 case MUT_ARR_PTRS_FROZEN:
2117 return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
2121 return pap_sizeW((StgPAP *)c);
2124 return ap_stack_sizeW((StgAP_STACK *)c);
2127 return arr_words_sizeW((StgArrWords *)c);
2147 case SE_CAF_BLACKHOLE:
2151 case IND_OLDGEN_PERM:
2155 return sizeW_fromITBL(info);
2157 case THUNK_SELECTOR:
2158 return sizeofW(StgHeader) + MIN_UPD_SIZE;
2167 case CONSTR_INTLIKE:
2168 case CONSTR_CHARLIKE:
2169 case CONSTR_NOCAF_STATIC:
2186 case INVALID_OBJECT:
2188 barf("Invalid object in sanityCheckHeapClosure(): %d",
2195 heapCheck( bdescr *bd )
2198 static nat costSum, size;
2201 while (bd != NULL) {
2203 while (p < bd->free) {
2204 size = sanityCheckHeapClosure((StgClosure *)p);
2205 sumOfCostLinear += size;
2206 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2208 // no need for slop check; I think slops are not used currently.
2210 ASSERT(p == bd->free);
2211 costSum += bd->free - bd->start;
2219 smallObjectPoolCheck(void)
2223 static nat costSum, size;
2225 bd = small_alloc_list;
2233 while (p < alloc_Hp) {
2234 size = sanityCheckHeapClosure((StgClosure *)p);
2235 sumOfCostLinear += size;
2236 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2239 ASSERT(p == alloc_Hp);
2240 costSum += alloc_Hp - bd->start;
2243 while (bd != NULL) {
2245 while (p < bd->free) {
2246 size = sanityCheckHeapClosure((StgClosure *)p);
2247 sumOfCostLinear += size;
2248 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2251 ASSERT(p == bd->free);
2252 costSum += bd->free - bd->start;
2260 chainCheck(bdescr *bd)
2265 while (bd != NULL) {
2266 // bd->free - bd->start is not an accurate measurement of the
2267 // object size. Actually it is always zero, so we compute its
2269 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2270 sumOfCostLinear += size;
2271 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2280 checkHeapSanityForRetainerProfiling( void )
2285 fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2286 if (RtsFlags.GcFlags.generations == 1) {
2287 costSum += heapCheck(g0s0->to_blocks);
2288 fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2289 costSum += chainCheck(g0s0->large_objects);
2290 fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2292 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2293 for (s = 0; s < generations[g].n_steps; s++) {
2295 After all live objects have been scavenged, the garbage
2296 collector may create some objects in
2297 scheduleFinalizers(). These objects are created throught
2298 allocate(), so the small object pool or the large object
2299 pool of the g0s0 may not be empty.
2301 if (g == 0 && s == 0) {
2302 costSum += smallObjectPoolCheck();
2303 fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2304 costSum += chainCheck(generations[g].steps[s].large_objects);
2305 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2307 costSum += heapCheck(generations[g].steps[s].blocks);
2308 fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2309 costSum += chainCheck(generations[g].steps[s].large_objects);
2310 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2319 findPointer(StgPtr p)
2325 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2326 for (s = 0; s < generations[g].n_steps; s++) {
2327 // if (g == 0 && s == 0) continue;
2328 bd = generations[g].steps[s].blocks;
2329 for (; bd; bd = bd->link) {
2330 for (q = bd->start; q < bd->free; q++) {
2331 if (*q == (StgWord)p) {
2333 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2334 fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2339 bd = generations[g].steps[s].large_objects;
2340 for (; bd; bd = bd->link) {
2341 e = bd->start + cost((StgClosure *)bd->start);
2342 for (q = bd->start; q < e; q++) {
2343 if (*q == (StgWord)p) {
2345 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2346 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
2356 belongToHeap(StgPtr p)
2361 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2362 for (s = 0; s < generations[g].n_steps; s++) {
2363 // if (g == 0 && s == 0) continue;
2364 bd = generations[g].steps[s].blocks;
2365 for (; bd; bd = bd->link) {
2366 if (bd->start <= p && p < bd->free) {
2367 fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
2371 bd = generations[g].steps[s].large_objects;
2372 for (; bd; bd = bd->link) {
2373 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2374 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
2381 #endif // DEBUG_RETAINER
2383 #endif /* PROFILING */