1 /* -----------------------------------------------------------------------------
2 * $Id: RetainerProfile.c,v 1.10 2003/05/16 14:39:29 simonmar Exp $
4 * (c) The GHC Team, 2001
9 * ---------------------------------------------------------------------------*/
13 // Turn off inlining when debugging - it obfuscates things
24 #include "RetainerProfile.h"
25 #include "RetainerSet.h"
29 #include "StoragePriv.h"
33 #include "StablePriv.h"
34 #include "Profiling.h"
36 #include "BlockAlloc.h"
41 Note: what to change in order to plug-in a new retainer profiling scheme?
42 (1) type retainer in ../includes/StgRetainerProf.h
43 (2) retainer function R(), i.e., getRetainerFrom()
44 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
45 in RetainerSet.h, if needed.
46 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
49 /* -----------------------------------------------------------------------------
51 * -------------------------------------------------------------------------- */
53 static nat retainerGeneration; // generation
55 static nat numObjectVisited; // total number of objects visited
56 static nat timesAnyObjectVisited; // number of times any objects are visited
59 The rs field in the profile header of any object points to its retainer
60 set in an indirect way: if flip is 0, it points to the retainer set;
61 if flip is 1, it points to the next byte after the retainer set (even
62 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
63 pointer. See retainerSetOf().
66 StgWord flip = 0; // flip bit
67 // must be 0 if DEBUG_RETAINER is on (for static closures)
69 #define setRetainerSetToNull(c) \
70 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
72 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
73 static void retainClosure(StgClosure *, StgClosure *, retainer);
75 static void belongToHeap(StgPtr p);
80 cStackSize records how many times retainStack() has been invoked recursively,
81 that is, the number of activation records for retainStack() on the C stack.
82 maxCStackSize records its max value.
84 cStackSize <= maxCStackSize
86 static nat cStackSize, maxCStackSize;
88 static nat sumOfNewCost; // sum of the cost of each object, computed
89 // when the object is first visited
90 static nat sumOfNewCostExtra; // for those objects not visited during
91 // retainer profiling, e.g., MUT_VAR
92 static nat costArray[N_CLOSURE_TYPES];
94 nat sumOfCostLinear; // sum of the costs of all object, computed
95 // when linearly traversing the heap after
97 nat costArrayLinear[N_CLOSURE_TYPES];
100 /* -----------------------------------------------------------------------------
101 * Retainer stack - header
103 * Although the retainer stack implementation could be separated *
104 * from the retainer profiling engine, there does not seem to be
105 * any advantage in doing that; retainer stack is an integral part
106 * of retainer profiling engine and cannot be use elsewhere at
108 * -------------------------------------------------------------------------- */
118 // fixed layout or layout specified by a field in the closure
123 // See StgClosureInfo in InfoTables.h
124 #if SIZEOF_VOID_P == 8
161 firstStack points to the first block group.
162 currentStack points to the block group currently being used.
163 currentStack->free == stackLimit.
164 stackTop points to the topmost byte in the stack of currentStack.
165 Unless the whole stack is empty, stackTop must point to the topmost
166 object (or byte) in the whole stack. Thus, it is only when the whole stack
167 is empty that stackTop == stackLimit (not during the execution of push()
169 stackBottom == currentStack->start.
170 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
172 When a current stack becomes empty, stackTop is set to point to
173 the topmost element on the previous block group so as to satisfy
174 the invariants described above.
176 static bdescr *firstStack = NULL;
177 static bdescr *currentStack;
178 static stackElement *stackBottom, *stackTop, *stackLimit;
181 currentStackBoundary is used to mark the current stack chunk.
182 If stackTop == currentStackBoundary, it means that the current stack chunk
183 is empty. It is the responsibility of the user to keep currentStackBoundary
184 valid all the time if it is to be employed.
186 static stackElement *currentStackBoundary;
189 stackSize records the current size of the stack.
190 maxStackSize records its high water mark.
192 stackSize <= maxStackSize
194 stackSize is just an estimate measure of the depth of the graph. The reason
195 is that some heap objects have only a single child and may not result
196 in a new element being pushed onto the stack. Therefore, at the end of
197 retainer profiling, maxStackSize + maxCStackSize is some value no greater
198 than the actual depth of the graph.
200 #ifdef DEBUG_RETAINER
201 static int stackSize, maxStackSize;
204 // number of blocks allocated for one stack
205 #define BLOCKS_IN_STACK 1
207 /* -----------------------------------------------------------------------------
208 * Add a new block group to the stack.
210 * currentStack->link == s.
211 * -------------------------------------------------------------------------- */
213 newStackBlock( bdescr *bd )
216 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
217 stackBottom = (stackElement *)bd->start;
218 stackLimit = (stackElement *)stackTop;
219 bd->free = (StgPtr)stackLimit;
222 /* -----------------------------------------------------------------------------
223 * Return to the previous block group.
225 * s->link == currentStack.
226 * -------------------------------------------------------------------------- */
228 returnToOldStack( bdescr *bd )
231 stackTop = (stackElement *)bd->free;
232 stackBottom = (stackElement *)bd->start;
233 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
234 bd->free = (StgPtr)stackLimit;
237 /* -----------------------------------------------------------------------------
238 * Initializes the traverse stack.
239 * -------------------------------------------------------------------------- */
241 initializeTraverseStack( void )
243 if (firstStack != NULL) {
244 freeChain(firstStack);
247 firstStack = allocGroup(BLOCKS_IN_STACK);
248 firstStack->link = NULL;
249 firstStack->u.back = NULL;
251 newStackBlock(firstStack);
254 /* -----------------------------------------------------------------------------
255 * Frees all the block groups in the traverse stack.
258 * -------------------------------------------------------------------------- */
260 closeTraverseStack( void )
262 freeChain(firstStack);
266 /* -----------------------------------------------------------------------------
267 * Returns rtsTrue if the whole stack is empty.
268 * -------------------------------------------------------------------------- */
269 static INLINE rtsBool
270 isEmptyRetainerStack( void )
272 return (firstStack == currentStack) && stackTop == stackLimit;
275 /* -----------------------------------------------------------------------------
276 * Returns size of stack
277 * -------------------------------------------------------------------------- */
280 retainerStackBlocks( void )
285 for (bd = firstStack; bd != NULL; bd = bd->link)
292 /* -----------------------------------------------------------------------------
293 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
294 * i.e., if the current stack chunk is empty.
295 * -------------------------------------------------------------------------- */
296 static INLINE rtsBool
299 return stackTop == currentStackBoundary;
302 /* -----------------------------------------------------------------------------
303 * Initializes *info from ptrs and payload.
305 * payload[] begins with ptrs pointers followed by non-pointers.
306 * -------------------------------------------------------------------------- */
308 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
310 info->type = posTypePtrs;
311 info->next.ptrs.pos = 0;
312 info->next.ptrs.ptrs = ptrs;
313 info->next.ptrs.payload = payload;
316 /* -----------------------------------------------------------------------------
317 * Find the next object from *info.
318 * -------------------------------------------------------------------------- */
319 static INLINE StgClosure *
320 find_ptrs( stackPos *info )
322 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
323 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
329 /* -----------------------------------------------------------------------------
330 * Initializes *info from SRT information stored in *infoTable.
331 * -------------------------------------------------------------------------- */
333 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
335 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
336 info->type = posTypeLargeSRT;
337 info->next.large_srt.srt = (StgLargeSRT *)infoTable->srt;
338 info->next.large_srt.offset = 0;
340 info->type = posTypeSRT;
341 info->next.srt.srt = (StgClosure **)(infoTable->srt);
342 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
347 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
349 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
350 info->type = posTypeLargeSRT;
351 info->next.large_srt.srt = (StgLargeSRT *)infoTable->srt;
352 info->next.large_srt.offset = 0;
354 info->type = posTypeSRT;
355 info->next.srt.srt = (StgClosure **)(infoTable->srt);
356 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
360 /* -----------------------------------------------------------------------------
361 * Find the next object from *info.
362 * -------------------------------------------------------------------------- */
363 static INLINE StgClosure *
364 find_srt( stackPos *info )
369 if (info->type == posTypeSRT) {
371 bitmap = info->next.srt.srt_bitmap;
372 while (bitmap != 0) {
373 if ((bitmap & 1) != 0) {
374 #ifdef ENABLE_WIN32_DLL_SUPPORT
376 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
377 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
379 c = *(info->next.srt.srt);
381 c = *(info->next.srt.srt);
383 bitmap = bitmap >> 1;
384 info->next.srt.srt++;
385 info->next.srt.srt_bitmap = bitmap;
388 bitmap = bitmap >> 1;
389 info->next.srt.srt++;
391 // bitmap is now zero...
396 nat i = info->next.large_srt.offset;
399 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
400 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
401 bitmap = bitmap >> (i % BITS_IN(StgWord));
402 while (i < info->next.large_srt.srt->l.size) {
403 if ((bitmap & 1) != 0) {
404 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
406 info->next.large_srt.offset = i;
410 if (i % BITS_IN(W_) == 0) {
411 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
413 bitmap = bitmap >> 1;
416 // reached the end of this bitmap.
417 info->next.large_srt.offset = i;
422 /* -----------------------------------------------------------------------------
423 * push() pushes a stackElement representing the next child of *c
424 * onto the traverse stack. If *c has no child, *first_child is set
425 * to NULL and nothing is pushed onto the stack. If *c has only one
426 * child, *c_chlid is set to that child and nothing is pushed onto
427 * the stack. If *c has more than two children, *first_child is set
428 * to the first child and a stackElement representing the second
429 * child is pushed onto the stack.
432 * *c_child_r is the most recent retainer of *c's children.
433 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
434 * there cannot be any stack objects.
435 * Note: SRTs are considered to be children as well.
436 * -------------------------------------------------------------------------- */
438 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
441 bdescr *nbd; // Next Block Descriptor
443 #ifdef DEBUG_RETAINER
444 // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
447 ASSERT(get_itbl(c)->type != TSO);
448 ASSERT(get_itbl(c)->type != AP_STACK);
455 se.c_child_r = c_child_r;
458 switch (get_itbl(c)->type) {
465 case SE_CAF_BLACKHOLE:
470 // one child (fixed), no SRT
473 *first_child = ((StgMutVar *)c)->var;
476 // blocking_queue must be TSO and the head of a linked list of TSOs.
477 // Shoule it be a child? Seems to be yes.
478 *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
481 *first_child = ((StgSelector *)c)->selectee;
484 case IND_OLDGEN_PERM:
486 *first_child = ((StgIndOldGen *)c)->indirectee;
490 *first_child = c->payload[0];
493 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
494 // of the next child. We do not write a separate initialization code.
495 // Also we do not have to initialize info.type;
497 // two children (fixed), no SRT
498 // need to push a stackElement, but nothing to store in se.info
500 *first_child = c->payload[0]; // return the first pointer
501 // se.info.type = posTypeStep;
502 // se.info.next.step = 2; // 2 = second
505 // three children (fixed), no SRT
506 // need to push a stackElement
508 // head must be TSO and the head of a linked list of TSOs.
509 // Shoule it be a child? Seems to be yes.
510 *first_child = (StgClosure *)((StgMVar *)c)->head;
511 // se.info.type = posTypeStep;
512 se.info.next.step = 2; // 2 = second
515 // three children (fixed), no SRT
517 *first_child = ((StgWeak *)c)->key;
518 // se.info.type = posTypeStep;
519 se.info.next.step = 2;
522 // layout.payload.ptrs, no SRT
528 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
530 *first_child = find_ptrs(&se.info);
531 if (*first_child == NULL)
535 // StgMutArrPtr.ptrs, no SRT
537 case MUT_ARR_PTRS_FROZEN:
538 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
539 (StgPtr)(((StgMutArrPtrs *)c)->payload));
540 *first_child = find_ptrs(&se.info);
541 if (*first_child == NULL)
545 // layout.payload.ptrs, SRT
546 case FUN: // *c is a heap object.
548 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
549 *first_child = find_ptrs(&se.info);
550 if (*first_child == NULL)
551 // no child from ptrs, so check SRT
557 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
558 *first_child = find_ptrs(&se.info);
559 if (*first_child == NULL)
560 // no child from ptrs, so check SRT
564 // 1 fixed child, SRT
567 *first_child = c->payload[0];
568 ASSERT(*first_child != NULL);
569 init_srt_fun(&se.info, get_fun_itbl(c));
574 *first_child = c->payload[0];
575 ASSERT(*first_child != NULL);
576 init_srt_thunk(&se.info, get_thunk_itbl(c));
579 case FUN_STATIC: // *c is a heap object.
580 ASSERT(get_itbl(c)->srt_bitmap != 0);
584 init_srt_fun(&se.info, get_fun_itbl(c));
585 *first_child = find_srt(&se.info);
586 if (*first_child == NULL)
592 ASSERT(get_itbl(c)->srt_bitmap != 0);
596 init_srt_thunk(&se.info, get_thunk_itbl(c));
597 *first_child = find_srt(&se.info);
598 if (*first_child == NULL)
609 case CONSTR_CHARLIKE:
610 case CONSTR_NOCAF_STATIC:
631 barf("Invalid object *c in push()");
635 if (stackTop - 1 < stackBottom) {
636 #ifdef DEBUG_RETAINER
637 // fprintf(stderr, "push() to the next stack.\n");
639 // currentStack->free is updated when the active stack is switched
640 // to the next stack.
641 currentStack->free = (StgPtr)stackTop;
643 if (currentStack->link == NULL) {
644 nbd = allocGroup(BLOCKS_IN_STACK);
646 nbd->u.back = currentStack;
647 currentStack->link = nbd;
649 nbd = currentStack->link;
654 // adjust stackTop (acutal push)
656 // If the size of stackElement was huge, we would better replace the
657 // following statement by either a memcpy() call or a switch statement
658 // on the type of the element. Currently, the size of stackElement is
659 // small enough (5 words) that this direct assignment seems to be enough.
662 #ifdef DEBUG_RETAINER
664 if (stackSize > maxStackSize) maxStackSize = stackSize;
665 // ASSERT(stackSize >= 0);
666 // fprintf(stderr, "stackSize = %d\n", stackSize);
670 /* -----------------------------------------------------------------------------
671 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
673 * stackTop cannot be equal to stackLimit unless the whole stack is
674 * empty, in which case popOff() is not allowed.
676 * You can think of popOffReal() as a part of popOff() which is
677 * executed at the end of popOff() in necessary. Since popOff() is
678 * likely to be executed quite often while popOffReal() is not, we
679 * separate popOffReal() from popOff(), which is declared as an
680 * INLINE function (for the sake of execution speed). popOffReal()
681 * is called only within popOff() and nowhere else.
682 * -------------------------------------------------------------------------- */
686 bdescr *pbd; // Previous Block Descriptor
688 #ifdef DEBUG_RETAINER
689 // fprintf(stderr, "pop() to the previous stack.\n");
692 ASSERT(stackTop + 1 == stackLimit);
693 ASSERT(stackBottom == (stackElement *)currentStack->start);
695 if (firstStack == currentStack) {
696 // The stack is completely empty.
698 ASSERT(stackTop == stackLimit);
699 #ifdef DEBUG_RETAINER
701 if (stackSize > maxStackSize) maxStackSize = stackSize;
703 ASSERT(stackSize >= 0);
704 fprintf(stderr, "stackSize = %d\n", stackSize);
710 // currentStack->free is updated when the active stack is switched back
711 // to the previous stack.
712 currentStack->free = (StgPtr)stackLimit;
714 // find the previous block descriptor
715 pbd = currentStack->u.back;
718 returnToOldStack(pbd);
720 #ifdef DEBUG_RETAINER
722 if (stackSize > maxStackSize) maxStackSize = stackSize;
724 ASSERT(stackSize >= 0);
725 fprintf(stderr, "stackSize = %d\n", stackSize);
732 #ifdef DEBUG_RETAINER
733 // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
736 ASSERT(stackTop != stackLimit);
737 ASSERT(!isEmptyRetainerStack());
739 // <= (instead of <) is wrong!
740 if (stackTop + 1 < stackLimit) {
742 #ifdef DEBUG_RETAINER
744 if (stackSize > maxStackSize) maxStackSize = stackSize;
746 ASSERT(stackSize >= 0);
747 fprintf(stderr, "stackSize = %d\n", stackSize);
756 /* -----------------------------------------------------------------------------
757 * Finds the next object to be considered for retainer profiling and store
759 * Test if the topmost stack element indicates that more objects are left,
760 * and if so, retrieve the first object and store its pointer to *c. Also,
761 * set *cp and *r appropriately, both of which are stored in the stack element.
762 * The topmost stack element then is overwritten so as for it to now denote
764 * If the topmost stack element indicates no more objects are left, pop
765 * off the stack element until either an object can be retrieved or
766 * the current stack chunk becomes empty, indicated by rtsTrue returned by
767 * isOnBoundary(), in which case *c is set to NULL.
769 * It is okay to call this function even when the current stack chunk
771 * -------------------------------------------------------------------------- */
773 pop( StgClosure **c, StgClosure **cp, retainer *r )
777 #ifdef DEBUG_RETAINER
778 // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
782 if (isOnBoundary()) { // if the current stack chunk is depleted
789 switch (get_itbl(se->c)->type) {
790 // two children (fixed), no SRT
791 // nothing in se.info
793 *c = se->c->payload[1];
799 // three children (fixed), no SRT
800 // need to push a stackElement
802 if (se->info.next.step == 2) {
803 *c = (StgClosure *)((StgMVar *)se->c)->tail;
804 se->info.next.step++; // move to the next step
807 *c = ((StgMVar *)se->c)->value;
814 // three children (fixed), no SRT
816 if (se->info.next.step == 2) {
817 *c = ((StgWeak *)se->c)->value;
818 se->info.next.step++;
821 *c = ((StgWeak *)se->c)->finalizer;
833 // StgMutArrPtr.ptrs, no SRT
835 case MUT_ARR_PTRS_FROZEN:
836 *c = find_ptrs(&se->info);
845 // layout.payload.ptrs, SRT
846 case FUN: // always a heap object
848 if (se->info.type == posTypePtrs) {
849 *c = find_ptrs(&se->info);
855 init_srt_fun(&se->info, get_fun_itbl(se->c));
861 if (se->info.type == posTypePtrs) {
862 *c = find_ptrs(&se->info);
868 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
884 *c = find_srt(&se->info);
893 // no child (fixed), no SRT
899 case SE_CAF_BLACKHOLE:
901 // one child (fixed), no SRT
907 case IND_OLDGEN_PERM:
917 case CONSTR_CHARLIKE:
918 case CONSTR_NOCAF_STATIC:
939 barf("Invalid object *c in pop()");
945 /* -----------------------------------------------------------------------------
946 * RETAINER PROFILING ENGINE
947 * -------------------------------------------------------------------------- */
950 initRetainerProfiling( void )
952 initializeAllRetainerSet();
953 retainerGeneration = 0;
956 /* -----------------------------------------------------------------------------
957 * This function must be called before f-closing prof_file.
958 * -------------------------------------------------------------------------- */
960 endRetainerProfiling( void )
962 #ifdef SECOND_APPROACH
963 outputAllRetainerSet(prof_file);
967 /* -----------------------------------------------------------------------------
968 * Returns the actual pointer to the retainer set of the closure *c.
969 * It may adjust RSET(c) subject to flip.
971 * RSET(c) is initialized to NULL if its current value does not
974 * Even though this function has side effects, they CAN be ignored because
975 * subsequent calls to retainerSetOf() always result in the same return value
976 * and retainerSetOf() is the only way to retrieve retainerSet of a given
978 * We have to perform an XOR (^) operation each time a closure is examined.
979 * The reason is that we do not know when a closure is visited last.
980 * -------------------------------------------------------------------------- */
982 maybeInitRetainerSet( StgClosure *c )
984 if (!isRetainerSetFieldValid(c)) {
985 setRetainerSetToNull(c);
989 /* -----------------------------------------------------------------------------
990 * Returns rtsTrue if *c is a retainer.
991 * -------------------------------------------------------------------------- */
992 static INLINE rtsBool
993 isRetainer( StgClosure *c )
995 switch (get_itbl(c)->type) {
999 // TSOs MUST be retainers: they constitute the set of roots.
1007 case MUT_ARR_PTRS_FROZEN:
1009 // thunks are retainers.
1016 case THUNK_SELECTOR:
1020 // Static thunks, or CAFS, are obviously retainers.
1023 // WEAK objects are roots; there is separate code in which traversing
1024 // begins from WEAK objects.
1046 // partial applications
1052 case SE_CAF_BLACKHOLE:
1056 case IND_OLDGEN_PERM:
1071 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1073 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
1074 // cannot be *c, *cp, *r in the retainer profiling loop.
1075 case CONSTR_INTLIKE:
1076 case CONSTR_CHARLIKE:
1077 case CONSTR_NOCAF_STATIC:
1078 // Stack objects are invalid because they are never treated as
1079 // legal objects during retainer profiling.
1097 case INVALID_OBJECT:
1099 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1104 /* -----------------------------------------------------------------------------
1105 * Returns the retainer function value for the closure *c, i.e., R(*c).
1106 * This function does NOT return the retainer(s) of *c.
1108 * *c must be a retainer.
1110 * Depending on the definition of this function, the maintenance of retainer
1111 * sets can be made easier. If most retainer sets are likely to be created
1112 * again across garbage collections, refreshAllRetainerSet() in
1113 * RetainerSet.c can simply do nothing.
1114 * If this is not the case, we can free all the retainer sets and
1115 * re-initialize the hash table.
1116 * See refreshAllRetainerSet() in RetainerSet.c.
1117 * -------------------------------------------------------------------------- */
1118 static INLINE retainer
1119 getRetainerFrom( StgClosure *c )
1121 ASSERT(isRetainer(c));
1123 #if defined(RETAINER_SCHEME_INFO)
1124 // Retainer scheme 1: retainer = info table
1126 #elif defined(RETAINER_SCHEME_CCS)
1127 // Retainer scheme 2: retainer = cost centre stack
1128 return c->header.prof.ccs;
1129 #elif defined(RETAINER_SCHEME_CC)
1130 // Retainer scheme 3: retainer = cost centre
1131 return c->header.prof.ccs->cc;
1135 /* -----------------------------------------------------------------------------
1136 * Associates the retainer set *s with the closure *c, that is, *s becomes
1137 * the retainer set of *c.
1141 * -------------------------------------------------------------------------- */
1143 associate( StgClosure *c, RetainerSet *s )
1145 // StgWord has the same size as pointers, so the following type
1147 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1150 /* -----------------------------------------------------------------------------
1151 Call retainClosure for each of the closures covered by a large bitmap.
1152 -------------------------------------------------------------------------- */
1155 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1156 StgClosure *c, retainer c_child_r)
1162 bitmap = large_bitmap->bitmap[b];
1163 for (i = 0; i < size; ) {
1164 if ((bitmap & 1) == 0) {
1165 retainClosure((StgClosure *)*p, c, c_child_r);
1169 if (i % BITS_IN(W_) == 0) {
1171 bitmap = large_bitmap->bitmap[b];
1173 bitmap = bitmap >> 1;
1178 static INLINE StgPtr
1179 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1180 StgClosure *c, retainer c_child_r)
1183 if ((bitmap & 1) == 0) {
1184 retainClosure((StgClosure *)*p, c, c_child_r);
1187 bitmap = bitmap >> 1;
1193 /* -----------------------------------------------------------------------------
1194 * Call retainClosure for each of the closures in an SRT.
1195 * ------------------------------------------------------------------------- */
1198 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1205 p = (StgClosure **)srt->srt;
1207 bitmap = srt->l.bitmap[b];
1208 for (i = 0; i < size; ) {
1209 if ((bitmap & 1) != 0) {
1210 retainClosure((StgClosure *)*p, c, c_child_r);
1214 if (i % BITS_IN(W_) == 0) {
1216 bitmap = srt->l.bitmap[b];
1218 bitmap = bitmap >> 1;
1224 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1229 bitmap = srt_bitmap;
1232 if (bitmap == (StgHalfWord)(-1)) {
1233 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1237 while (bitmap != 0) {
1238 if ((bitmap & 1) != 0) {
1239 #ifdef ENABLE_WIN32_DLL_SUPPORT
1240 if ( (unsigned long)(*srt) & 0x1 ) {
1241 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1244 retainClosure(*srt,c,c_child_r);
1247 retainClosure(*srt,c,c_child_r);
1251 bitmap = bitmap >> 1;
1255 /* -----------------------------------------------------------------------------
1256 * Process all the objects in the stack chunk from stackStart to stackEnd
1257 * with *c and *c_child_r being their parent and their most recent retainer,
1258 * respectively. Treat stackOptionalFun as another child of *c if it is
1261 * *c is one of the following: TSO, AP_STACK.
1262 * If *c is TSO, c == c_child_r.
1263 * stackStart < stackEnd.
1264 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1265 * interpretation conforms to the current value of flip (even when they
1266 * are interpreted to be NULL).
1267 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1268 * or ThreadKilled, which means that its stack is ready to process.
1270 * This code was almost plagiarzied from GC.c! For each pointer,
1271 * retainClosure() is invoked instead of evacuate().
1272 * -------------------------------------------------------------------------- */
1274 retainStack( StgClosure *c, retainer c_child_r,
1275 StgPtr stackStart, StgPtr stackEnd )
1277 stackElement *oldStackBoundary;
1279 StgRetInfoTable *info;
1283 #ifdef DEBUG_RETAINER
1285 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1289 Each invocation of retainStack() creates a new virtual
1290 stack. Since all such stacks share a single common stack, we
1291 record the current currentStackBoundary, which will be restored
1294 oldStackBoundary = currentStackBoundary;
1295 currentStackBoundary = stackTop;
1297 #ifdef DEBUG_RETAINER
1298 // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1301 ASSERT(get_itbl(c)->type != TSO ||
1302 (((StgTSO *)c)->what_next != ThreadRelocated &&
1303 ((StgTSO *)c)->what_next != ThreadComplete &&
1304 ((StgTSO *)c)->what_next != ThreadKilled));
1307 while (p < stackEnd) {
1308 info = get_ret_itbl((StgClosure *)p);
1310 switch(info->i.type) {
1313 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1314 p += sizeofW(StgUpdateFrame);
1321 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1322 size = BITMAP_SIZE(info->i.layout.bitmap);
1324 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1327 retainSRT((StgClosure **)info->srt, info->i.srt_bitmap, c, c_child_r);
1334 retainClosure((StgClosure *)*p, c, c_child_r);
1337 size = BCO_BITMAP_SIZE(bco);
1338 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1343 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1346 size = info->i.layout.large_bitmap->size;
1348 retain_large_bitmap(p, info->i.layout.large_bitmap,
1349 size, c, c_child_r);
1351 // and don't forget to follow the SRT
1354 // Dynamic bitmap: the mask is stored on the stack
1357 dyn = ((StgRetDyn *)p)->liveness;
1359 // traverse the bitmap first
1360 bitmap = GET_LIVENESS(dyn);
1361 p = (P_)&((StgRetDyn *)p)->payload[0];
1362 size = RET_DYN_BITMAP_SIZE;
1363 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1365 // skip over the non-ptr words
1366 p += GET_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1368 // follow the ptr words
1369 for (size = GET_PTRS(dyn); size > 0; size--) {
1370 retainClosure((StgClosure *)*p, c, c_child_r);
1377 StgRetFun *ret_fun = (StgRetFun *)p;
1378 StgFunInfoTable *fun_info;
1380 retainClosure(ret_fun->fun, c, c_child_r);
1381 fun_info = get_fun_itbl(ret_fun->fun);
1383 p = (P_)&ret_fun->payload;
1384 switch (fun_info->fun_type) {
1386 bitmap = BITMAP_BITS(fun_info->bitmap);
1387 size = BITMAP_SIZE(fun_info->bitmap);
1388 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1391 size = ((StgLargeBitmap *)fun_info->bitmap)->size;
1392 retain_large_bitmap(p, (StgLargeBitmap *)fun_info->bitmap,
1393 size, c, c_child_r);
1397 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->fun_type]);
1398 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->fun_type]);
1399 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1406 barf("Invalid object found in retainStack(): %d",
1407 (int)(info->i.type));
1411 // restore currentStackBoundary
1412 currentStackBoundary = oldStackBoundary;
1413 #ifdef DEBUG_RETAINER
1414 // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1417 #ifdef DEBUG_RETAINER
1422 /* ----------------------------------------------------------------------------
1423 * Call retainClosure for each of the children of a PAP/AP
1424 * ------------------------------------------------------------------------- */
1426 static INLINE StgPtr
1427 retain_PAP (StgPAP *pap, retainer c_child_r)
1430 StgWord bitmap, size;
1431 StgFunInfoTable *fun_info;
1433 retainClosure(pap->fun, (StgClosure *)pap, c_child_r);
1434 fun_info = get_fun_itbl(pap->fun);
1435 ASSERT(fun_info->i.type != PAP);
1437 p = (StgPtr)pap->payload;
1440 switch (fun_info->fun_type) {
1442 bitmap = BITMAP_BITS(fun_info->bitmap);
1443 p = retain_small_bitmap(p, pap->n_args, bitmap,
1444 (StgClosure *)pap, c_child_r);
1447 retain_large_bitmap(p, (StgLargeBitmap *)fun_info->bitmap,
1448 size, (StgClosure *)pap, c_child_r);
1452 retain_large_bitmap((StgPtr)pap->payload, BCO_BITMAP(pap->fun),
1453 size, (StgClosure *)pap, c_child_r);
1457 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->fun_type]);
1458 p = retain_small_bitmap(p, pap->n_args, bitmap,
1459 (StgClosure *)pap, c_child_r);
1465 /* -----------------------------------------------------------------------------
1466 * Compute the retainer set of *c0 and all its desecents by traversing.
1467 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1469 * c0 = cp0 = r0 holds only for root objects.
1470 * RSET(cp0) and RSET(r0) are valid, i.e., their
1471 * interpretation conforms to the current value of flip (even when they
1472 * are interpreted to be NULL).
1473 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1474 * the current value of flip. If it does not, during the execution
1475 * of this function, RSET(c0) must be initialized as well as all
1478 * stackTop must be the same at the beginning and the exit of this function.
1479 * *c0 can be TSO (as well as AP_STACK).
1480 * -------------------------------------------------------------------------- */
1482 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1484 // c = Current closure
1485 // cp = Current closure's Parent
1486 // r = current closures' most recent Retainer
1487 // c_child_r = current closure's children's most recent retainer
1488 // first_child = first child of c
1489 StgClosure *c, *cp, *first_child;
1490 RetainerSet *s, *retainerSetOfc;
1491 retainer r, c_child_r;
1494 #ifdef DEBUG_RETAINER
1495 // StgPtr oldStackTop;
1498 #ifdef DEBUG_RETAINER
1499 // oldStackTop = stackTop;
1500 // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1503 // (c, cp, r) = (c0, cp0, r0)
1510 //fprintf(stderr, "loop");
1511 // pop to (c, cp, r);
1515 #ifdef DEBUG_RETAINER
1516 // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1521 //fprintf(stderr, "inner_loop");
1524 // c = current closure under consideration,
1525 // cp = current closure's parent,
1526 // r = current closure's most recent retainer
1528 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1529 // RSET(cp) and RSET(r) are valid.
1530 // RSET(c) is valid only if c has been visited before.
1532 // Loop invariants (on the relation between c, cp, and r)
1533 // if cp is not a retainer, r belongs to RSET(cp).
1534 // if cp is a retainer, r == cp.
1536 typeOfc = get_itbl(c)->type;
1538 #ifdef DEBUG_RETAINER
1541 case CONSTR_INTLIKE:
1542 case CONSTR_CHARLIKE:
1543 case CONSTR_NOCAF_STATIC:
1549 if (retainerSetOf(c) == NULL) { // first visit?
1550 costArray[typeOfc] += cost(c);
1551 sumOfNewCost += cost(c);
1560 if (((StgTSO *)c)->what_next == ThreadComplete ||
1561 ((StgTSO *)c)->what_next == ThreadKilled) {
1562 #ifdef DEBUG_RETAINER
1563 fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1567 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1568 #ifdef DEBUG_RETAINER
1569 fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
1571 c = (StgClosure *)((StgTSO *)c)->link;
1577 // We just skip IND_STATIC, so its retainer set is never computed.
1578 c = ((StgIndStatic *)c)->indirectee;
1580 case CONSTR_INTLIKE:
1581 case CONSTR_CHARLIKE:
1582 // static objects with no pointers out, so goto loop.
1583 case CONSTR_NOCAF_STATIC:
1584 // It is not just enough not to compute the retainer set for *c; it is
1585 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1586 // scavenged_static_objects, the list from which is assumed to traverse
1587 // all static objects after major garbage collections.
1591 if (get_itbl(c)->srt_bitmap == 0) {
1592 // No need to compute the retainer set; no dynamic objects
1593 // are reachable from *c.
1595 // Static objects: if we traverse all the live closures,
1596 // including static closures, during each heap census then
1597 // we will observe that some static closures appear and
1598 // disappear. eg. a closure may contain a pointer to a
1599 // static function 'f' which is not otherwise reachable
1600 // (it doesn't indirectly point to any CAFs, so it doesn't
1601 // appear in any SRTs), so we would find 'f' during
1602 // traversal. However on the next sweep there may be no
1603 // closures pointing to 'f'.
1605 // We must therefore ignore static closures whose SRT is
1606 // empty, because these are exactly the closures that may
1607 // "appear". A closure with a non-empty SRT, and which is
1608 // still required, will always be reachable.
1610 // But what about CONSTR_STATIC? Surely these may be able
1611 // to appear, and they don't have SRTs, so we can't
1612 // check. So for now, we're calling
1613 // resetStaticObjectForRetainerProfiling() from the
1614 // garbage collector to reset the retainer sets in all the
1615 // reachable static objects.
1622 // The above objects are ignored in computing the average number of times
1623 // an object is visited.
1624 timesAnyObjectVisited++;
1626 // If this is the first visit to c, initialize its retainer set.
1627 maybeInitRetainerSet(c);
1628 retainerSetOfc = retainerSetOf(c);
1631 // isRetainer(cp) == rtsTrue => s == NULL
1632 // isRetainer(cp) == rtsFalse => s == cp.retainer
1636 s = retainerSetOf(cp);
1638 // (c, cp, r, s) is available.
1640 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1641 if (retainerSetOfc == NULL) {
1642 // This is the first visit to *c.
1646 associate(c, singleton(r));
1648 // s is actually the retainer set of *c!
1651 // compute c_child_r
1652 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1654 // This is not the first visit to *c.
1655 if (isMember(r, retainerSetOfc))
1656 goto loop; // no need to process child
1659 associate(c, addElement(r, retainerSetOfc));
1661 // s is not NULL and cp is not a retainer. This means that
1662 // each time *cp is visited, so is *c. Thus, if s has
1663 // exactly one more element in its retainer set than c, s
1664 // is also the new retainer set for *c.
1665 if (s->num == retainerSetOfc->num + 1) {
1668 // Otherwise, just add R_r to the current retainer set of *c.
1670 associate(c, addElement(r, retainerSetOfc));
1675 goto loop; // no need to process child
1677 // compute c_child_r
1681 // now, RSET() of all of *c, *cp, and *r is valid.
1682 // (c, c_child_r) are available.
1686 // Special case closures: we process these all in one go rather
1687 // than attempting to save the current position, because doing so
1691 retainStack(c, c_child_r,
1693 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1698 retain_PAP((StgPAP *)c, c_child_r);
1702 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1703 retainStack(c, c_child_r,
1704 (StgPtr)((StgAP_STACK *)c)->payload,
1705 (StgPtr)((StgAP_STACK *)c)->payload +
1706 ((StgAP_STACK *)c)->size);
1710 push(c, c_child_r, &first_child);
1712 // If first_child is null, c has no child.
1713 // If first_child is not null, the top stack element points to the next
1714 // object. push() may or may not push a stackElement on the stack.
1715 if (first_child == NULL)
1718 // (c, cp, r) = (first_child, c, c_child_r)
1725 /* -----------------------------------------------------------------------------
1726 * Compute the retainer set for every object reachable from *tl.
1727 * -------------------------------------------------------------------------- */
1729 retainRoot( StgClosure **tl )
1731 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1734 ASSERT(isEmptyRetainerStack());
1735 currentStackBoundary = stackTop;
1737 if (isRetainer(*tl)) {
1738 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1740 retainClosure(*tl, *tl, CCS_SYSTEM);
1743 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1744 // *tl might be a TSO which is ThreadComplete, in which
1745 // case we ignore it for the purposes of retainer profiling.
1748 /* -----------------------------------------------------------------------------
1749 * Compute the retainer set for each of the objects in the heap.
1750 * -------------------------------------------------------------------------- */
1752 computeRetainerSet( void )
1758 #ifdef DEBUG_RETAINER
1759 RetainerSet tmpRetainerSet;
1762 GetRoots(retainRoot); // for scheduler roots
1764 // This function is called after a major GC, when key, value, and finalizer
1765 // all are guaranteed to be valid, or reachable.
1767 // The following code assumes that WEAK objects are considered to be roots
1768 // for retainer profilng.
1769 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1770 // retainRoot((StgClosure *)weak);
1771 retainRoot((StgClosure **)&weak);
1773 // Consider roots from the stable ptr table.
1774 markStablePtrTable(retainRoot);
1776 // The following code resets the rs field of each unvisited mutable
1777 // object (computing sumOfNewCostExtra and updating costArray[] when
1778 // debugging retainer profiler).
1779 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1781 (generations[g].mut_list == END_MUT_LIST &&
1782 generations[g].mut_once_list == END_MUT_LIST));
1785 // I think traversing through mut_list is unnecessary.
1786 // Think about removing this part.
1787 for (ml = generations[g].mut_list; ml != END_MUT_LIST;
1788 ml = ml->mut_link) {
1790 maybeInitRetainerSet((StgClosure *)ml);
1791 rtl = retainerSetOf((StgClosure *)ml);
1793 #ifdef DEBUG_RETAINER
1795 // first visit to *ml
1796 // This is a violation of the interface rule!
1797 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1799 switch (get_itbl((StgClosure *)ml)->type) {
1803 case CONSTR_INTLIKE:
1804 case CONSTR_CHARLIKE:
1805 case CONSTR_NOCAF_STATIC:
1809 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1813 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1814 sumOfNewCostExtra += cost((StgClosure *)ml);
1821 // Traversing through mut_once_list is, in contrast, necessary
1822 // because we can find MUT_VAR objects which have not been
1823 // visited during retainer profiling.
1824 for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
1825 ml = ml->mut_link) {
1827 maybeInitRetainerSet((StgClosure *)ml);
1828 rtl = retainerSetOf((StgClosure *)ml);
1829 #ifdef DEBUG_RETAINER
1831 // first visit to *ml
1832 // This is a violation of the interface rule!
1833 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1835 switch (get_itbl((StgClosure *)ml)->type) {
1839 case CONSTR_INTLIKE:
1840 case CONSTR_CHARLIKE:
1841 case CONSTR_NOCAF_STATIC:
1845 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1849 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1850 sumOfNewCostExtra += cost((StgClosure *)ml);
1859 /* -----------------------------------------------------------------------------
1860 * Traverse all static objects for which we compute retainer sets,
1861 * and reset their rs fields to NULL, which is accomplished by
1862 * invoking maybeInitRetainerSet(). This function must be called
1863 * before zeroing all objects reachable from scavenged_static_objects
1864 * in the case of major gabage collections. See GarbageCollect() in
1867 * The mut_once_list of the oldest generation must also be traversed?
1868 * Why? Because if the evacuation of an object pointed to by a static
1869 * indirection object fails, it is put back to the mut_once_list of
1870 * the oldest generation.
1871 * However, this is not necessary because any static indirection objects
1872 * are just traversed through to reach dynamic objects. In other words,
1873 * they are not taken into consideration in computing retainer sets.
1874 * -------------------------------------------------------------------------- */
1876 resetStaticObjectForRetainerProfiling( void )
1878 #ifdef DEBUG_RETAINER
1883 #ifdef DEBUG_RETAINER
1886 p = scavenged_static_objects;
1887 while (p != END_OF_STATIC_LIST) {
1888 #ifdef DEBUG_RETAINER
1891 switch (get_itbl(p)->type) {
1893 // Since we do not compute the retainer set of any
1894 // IND_STATIC object, we don't have to reset its retainer
1896 p = IND_STATIC_LINK(p);
1899 maybeInitRetainerSet(p);
1900 p = THUNK_STATIC_LINK(p);
1903 maybeInitRetainerSet(p);
1904 p = FUN_STATIC_LINK(p);
1907 maybeInitRetainerSet(p);
1908 p = STATIC_LINK(get_itbl(p), p);
1911 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1912 p, get_itbl(p)->type);
1916 #ifdef DEBUG_RETAINER
1917 // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
1921 /* -----------------------------------------------------------------------------
1922 * Perform retainer profiling.
1923 * N is the oldest generation being profilied, where the generations are
1924 * numbered starting at 0.
1927 * This function should be called only immediately after major garbage
1929 * ------------------------------------------------------------------------- */
1931 retainerProfile(void)
1933 #ifdef DEBUG_RETAINER
1935 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1938 #ifdef DEBUG_RETAINER
1939 fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
1944 // We haven't flipped the bit yet.
1945 #ifdef DEBUG_RETAINER
1946 fprintf(stderr, "Before traversing:\n");
1947 sumOfCostLinear = 0;
1948 for (i = 0;i < N_CLOSURE_TYPES; i++)
1949 costArrayLinear[i] = 0;
1950 totalHeapSize = checkHeapSanityForRetainerProfiling();
1952 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1954 fprintf(stderr, "costArrayLinear[] = ");
1955 for (i = 0;i < N_CLOSURE_TYPES; i++)
1956 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
1957 fprintf(stderr, "\n");
1960 ASSERT(sumOfCostLinear == totalHeapSize);
1963 #define pcostArrayLinear(index) \
1964 if (costArrayLinear[index] > 0) \
1965 fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1966 pcostArrayLinear(THUNK_STATIC);
1967 pcostArrayLinear(FUN_STATIC);
1968 pcostArrayLinear(CONSTR_STATIC);
1969 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1970 pcostArrayLinear(CONSTR_INTLIKE);
1971 pcostArrayLinear(CONSTR_CHARLIKE);
1975 // Now we flips flip.
1978 #ifdef DEBUG_RETAINER
1984 numObjectVisited = 0;
1985 timesAnyObjectVisited = 0;
1987 #ifdef DEBUG_RETAINER
1988 fprintf(stderr, "During traversing:\n");
1990 sumOfNewCostExtra = 0;
1991 for (i = 0;i < N_CLOSURE_TYPES; i++)
1996 We initialize the traverse stack each time the retainer profiling is
1997 performed (because the traverse stack size varies on each retainer profiling
1998 and this operation is not costly anyhow). However, we just refresh the
2001 initializeTraverseStack();
2002 #ifdef DEBUG_RETAINER
2003 initializeAllRetainerSet();
2005 refreshAllRetainerSet();
2007 computeRetainerSet();
2009 #ifdef DEBUG_RETAINER
2010 fprintf(stderr, "After traversing:\n");
2011 sumOfCostLinear = 0;
2012 for (i = 0;i < N_CLOSURE_TYPES; i++)
2013 costArrayLinear[i] = 0;
2014 totalHeapSize = checkHeapSanityForRetainerProfiling();
2016 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2017 ASSERT(sumOfCostLinear == totalHeapSize);
2019 // now, compare the two results
2022 costArray[] must be exactly the same as costArrayLinear[].
2024 1) Dead weak pointers, whose type is CONSTR. These objects are not
2025 reachable from any roots.
2027 fprintf(stderr, "Comparison:\n");
2028 fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
2029 for (i = 0;i < N_CLOSURE_TYPES; i++)
2030 if (costArray[i] != costArrayLinear[i])
2031 // nothing should be printed except MUT_VAR after major GCs
2032 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
2033 fprintf(stderr, "\n");
2035 fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
2036 fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2037 fprintf(stderr, "\tcostArray[] (must be empty) = ");
2038 for (i = 0;i < N_CLOSURE_TYPES; i++)
2039 if (costArray[i] != costArrayLinear[i])
2040 // nothing should be printed except MUT_VAR after major GCs
2041 fprintf(stderr, "[%u:%u] ", i, costArray[i]);
2042 fprintf(stderr, "\n");
2044 // only for major garbage collection
2045 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2049 closeTraverseStack();
2050 #ifdef DEBUG_RETAINER
2051 closeAllRetainerSet();
2053 // Note that there is no post-processing for the retainer sets.
2055 retainerGeneration++;
2058 retainerGeneration - 1, // retainerGeneration has just been incremented!
2059 #ifdef DEBUG_RETAINER
2060 maxCStackSize, maxStackSize,
2062 (double)timesAnyObjectVisited / numObjectVisited);
2065 /* -----------------------------------------------------------------------------
2067 * -------------------------------------------------------------------------- */
2069 #ifdef DEBUG_RETAINER
2071 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2072 ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
2073 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2076 sanityCheckHeapClosure( StgClosure *c )
2080 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2081 ASSERT(!closure_STATIC(c));
2082 ASSERT(LOOKS_LIKE_PTR(c));
2084 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2085 if (get_itbl(c)->type == CONSTR &&
2086 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2087 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2088 fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
2089 costArray[get_itbl(c)->type] += cost(c);
2090 sumOfNewCost += cost(c);
2093 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2094 flip, c, get_itbl(c)->type,
2095 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2098 // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2102 switch (info->type) {
2104 return tso_sizeW((StgTSO *)c);
2112 return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
2115 return sizeofW(StgMVar);
2118 case MUT_ARR_PTRS_FROZEN:
2119 return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
2123 return pap_sizeW((StgPAP *)c);
2126 return ap_stack_sizeW((StgAP_STACK *)c);
2129 return arr_words_sizeW((StgArrWords *)c);
2149 case SE_CAF_BLACKHOLE:
2153 case IND_OLDGEN_PERM:
2157 return sizeW_fromITBL(info);
2159 case THUNK_SELECTOR:
2160 return sizeofW(StgHeader) + MIN_UPD_SIZE;
2169 case CONSTR_INTLIKE:
2170 case CONSTR_CHARLIKE:
2171 case CONSTR_NOCAF_STATIC:
2188 case INVALID_OBJECT:
2190 barf("Invalid object in sanityCheckHeapClosure(): %d",
2197 heapCheck( bdescr *bd )
2200 static nat costSum, size;
2203 while (bd != NULL) {
2205 while (p < bd->free) {
2206 size = sanityCheckHeapClosure((StgClosure *)p);
2207 sumOfCostLinear += size;
2208 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2210 // no need for slop check; I think slops are not used currently.
2212 ASSERT(p == bd->free);
2213 costSum += bd->free - bd->start;
2221 smallObjectPoolCheck(void)
2225 static nat costSum, size;
2227 bd = small_alloc_list;
2235 while (p < alloc_Hp) {
2236 size = sanityCheckHeapClosure((StgClosure *)p);
2237 sumOfCostLinear += size;
2238 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2241 ASSERT(p == alloc_Hp);
2242 costSum += alloc_Hp - bd->start;
2245 while (bd != NULL) {
2247 while (p < bd->free) {
2248 size = sanityCheckHeapClosure((StgClosure *)p);
2249 sumOfCostLinear += size;
2250 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2253 ASSERT(p == bd->free);
2254 costSum += bd->free - bd->start;
2262 chainCheck(bdescr *bd)
2267 while (bd != NULL) {
2268 // bd->free - bd->start is not an accurate measurement of the
2269 // object size. Actually it is always zero, so we compute its
2271 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2272 sumOfCostLinear += size;
2273 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2282 checkHeapSanityForRetainerProfiling( void )
2287 fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2288 if (RtsFlags.GcFlags.generations == 1) {
2289 costSum += heapCheck(g0s0->to_blocks);
2290 fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2291 costSum += chainCheck(g0s0->large_objects);
2292 fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2294 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2295 for (s = 0; s < generations[g].n_steps; s++) {
2297 After all live objects have been scavenged, the garbage
2298 collector may create some objects in
2299 scheduleFinalizers(). These objects are created throught
2300 allocate(), so the small object pool or the large object
2301 pool of the g0s0 may not be empty.
2303 if (g == 0 && s == 0) {
2304 costSum += smallObjectPoolCheck();
2305 fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2306 costSum += chainCheck(generations[g].steps[s].large_objects);
2307 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2309 costSum += heapCheck(generations[g].steps[s].blocks);
2310 fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2311 costSum += chainCheck(generations[g].steps[s].large_objects);
2312 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2321 findPointer(StgPtr p)
2327 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2328 for (s = 0; s < generations[g].n_steps; s++) {
2329 // if (g == 0 && s == 0) continue;
2330 bd = generations[g].steps[s].blocks;
2331 for (; bd; bd = bd->link) {
2332 for (q = bd->start; q < bd->free; q++) {
2333 if (*q == (StgWord)p) {
2335 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2336 fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2341 bd = generations[g].steps[s].large_objects;
2342 for (; bd; bd = bd->link) {
2343 e = bd->start + cost((StgClosure *)bd->start);
2344 for (q = bd->start; q < e; q++) {
2345 if (*q == (StgWord)p) {
2347 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2348 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
2358 belongToHeap(StgPtr p)
2363 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2364 for (s = 0; s < generations[g].n_steps; s++) {
2365 // if (g == 0 && s == 0) continue;
2366 bd = generations[g].steps[s].blocks;
2367 for (; bd; bd = bd->link) {
2368 if (bd->start <= p && p < bd->free) {
2369 fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
2373 bd = generations[g].steps[s].large_objects;
2374 for (; bd; bd = bd->link) {
2375 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2376 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
2383 #endif // DEBUG_RETAINER
2385 #endif /* PROFILING */