1 /* -----------------------------------------------------------------------------
2 * $Id: RetainerProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
4 * (c) The GHC Team, 2001
9 * ---------------------------------------------------------------------------*/
15 #include "RetainerProfile.h"
16 #include "RetainerSet.h"
20 #include "StoragePriv.h"
24 #include "Profiling.h"
26 #include "BlockAlloc.h"
28 #include "Proftimer.h"
32 Note: what to change in order to plug-in a new retainer profiling scheme?
33 (1) type retainer in ../includes/StgRetainerProf.h
34 (2) retainer function R(), i.e., getRetainerFrom()
35 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
36 in RetainerSet.h, if needed.
37 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
40 /* -----------------------------------------------------------------------------
42 * -------------------------------------------------------------------------- */
44 static nat retainerGeneration; // generation
46 static nat numObjectVisited; // total number of objects visited
47 static nat timesAnyObjectVisited; // number of times any objects are visited
50 The rs field in the profile header of any object points to its retainer
51 set in an indirect way: if flip is 0, it points to the retainer set;
52 if flip is 1, it points to the next byte after the retainer set (even
53 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
54 pointer. See retainerSetOf().
57 // extract the retainer set field from c
58 #define RSET(c) ((c)->header.prof.hp.rs)
60 static StgWord flip = 0; // flip bit
61 // must be 0 if DEBUG_RETAINER is on (for static closures)
63 #define isRetainerSetFieldValid(c) \
64 ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0)
66 #define setRetainerSetToNull(c) \
67 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
69 static void retainStack(StgClosure *, StgClosure *, StgClosure *, StgPtr, StgPtr);
70 static void retainClosure(StgClosure *, StgClosure *, StgClosure *);
72 static void belongToHeap(StgPtr p);
77 cStackSize records how many times retainStack() has been invoked recursively,
78 that is, the number of activation records for retainStack() on the C stack.
79 maxCStackSize records its max value.
81 cStackSize <= maxCStackSize
83 static nat cStackSize, maxCStackSize;
85 static nat sumOfNewCost; // sum of the cost of each object, computed
86 // when the object is first visited
87 static nat sumOfNewCostExtra; // for those objects not visited during
88 // retainer profiling, e.g., MUT_VAR
89 static nat costArray[N_CLOSURE_TYPES];
91 nat sumOfCostLinear; // sum of the costs of all object, computed
92 // when linearly traversing the heap after
94 nat costArrayLinear[N_CLOSURE_TYPES];
97 /* -----------------------------------------------------------------------------
98 * Retainer stack - header
100 * Although the retainer stack implementation could be separated *
101 * from the retainer profiling engine, there does not seem to be
102 * any advantage in doing that; retainer stack is an integral part
103 * of retainer profiling engine and cannot be use elsewhere at
105 * -------------------------------------------------------------------------- */
114 // fixed layout or layout specified by a field in the closure
119 // See StgClosureInfo in InfoTables.h
120 #if SIZEOF_VOID_P == 8
133 StgClosure **srt_end;
144 StgClosure *c_child_r;
150 firstStack points to the first block group.
151 currentStack points to the block group currently being used.
152 currentStack->free == stackLimit.
153 stackTop points to the topmost byte in the stack of currentStack.
154 Unless the whole stack is empty, stackTop must point to the topmost
155 object (or byte) in the whole stack. Thus, it is only when the whole stack
156 is empty that stackTop == stackLimit (not during the execution of push()
158 stackBottom == currentStack->start.
159 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
161 When a current stack becomes empty, stackTop is set to point to
162 the topmost element on the previous block group so as to satisfy
163 the invariants described above.
165 bdescr *firstStack = NULL;
166 static bdescr *currentStack;
167 static stackElement *stackBottom, *stackTop, *stackLimit;
170 currentStackBoundary is used to mark the current stack chunk.
171 If stackTop == currentStackBoundary, it means that the current stack chunk
172 is empty. It is the responsibility of the user to keep currentStackBoundary
173 valid all the time if it is to be employed.
175 static stackElement *currentStackBoundary;
178 stackSize records the current size of the stack.
179 maxStackSize records its high water mark.
181 stackSize <= maxStackSize
183 stackSize is just an estimate measure of the depth of the graph. The reason
184 is that some heap objects have only a single child and may not result
185 in a new element being pushed onto the stack. Therefore, at the end of
186 retainer profiling, maxStackSize + maxCStackSize is some value no greater
187 than the actual depth of the graph.
189 #ifdef DEBUG_RETAINER
190 static int stackSize, maxStackSize;
193 // number of blocks allocated for one stack
194 #define BLOCKS_IN_STACK 1
196 /* -----------------------------------------------------------------------------
197 * Add a new block group to the stack.
199 * currentStack->link == s.
200 * -------------------------------------------------------------------------- */
202 newStackBlock( bdescr *bd )
205 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
206 stackBottom = (stackElement *)bd->start;
207 stackLimit = (stackElement *)stackTop;
208 bd->free = (StgPtr)stackLimit;
211 /* -----------------------------------------------------------------------------
212 * Return to the previous block group.
214 * s->link == currentStack.
215 * -------------------------------------------------------------------------- */
217 returnToOldStack( bdescr *bd )
220 stackTop = (stackElement *)bd->free;
221 stackBottom = (stackElement *)bd->start;
222 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
223 bd->free = (StgPtr)stackLimit;
226 /* -----------------------------------------------------------------------------
227 * Initializes the traverse stack.
228 * -------------------------------------------------------------------------- */
230 initializeTraverseStack( void )
232 if (firstStack != NULL) {
233 freeChain(firstStack);
236 firstStack = allocGroup(BLOCKS_IN_STACK);
237 firstStack->link = NULL;
238 firstStack->u.back = NULL;
240 newStackBlock(firstStack);
243 /* -----------------------------------------------------------------------------
244 * Frees all the block groups in the traverse stack.
247 * -------------------------------------------------------------------------- */
249 closeTraverseStack( void )
251 freeChain(firstStack);
255 /* -----------------------------------------------------------------------------
256 * Returns rtsTrue if the whole stack is empty.
257 * -------------------------------------------------------------------------- */
258 static inline rtsBool
259 isEmptyRetainerStack( void )
261 return (firstStack == currentStack) && stackTop == stackLimit;
264 /* -----------------------------------------------------------------------------
265 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
266 * i.e., if the current stack chunk is empty.
267 * -------------------------------------------------------------------------- */
268 static inline rtsBool
271 return stackTop == currentStackBoundary;
274 /* -----------------------------------------------------------------------------
275 * Initializes *info from ptrs and payload.
277 * payload[] begins with ptrs pointers followed by non-pointers.
278 * -------------------------------------------------------------------------- */
280 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
282 info->type = posTypePtrs;
283 info->next.ptrs.pos = 0;
284 info->next.ptrs.ptrs = ptrs;
285 info->next.ptrs.payload = payload;
288 /* -----------------------------------------------------------------------------
289 * Find the next object from *info.
290 * -------------------------------------------------------------------------- */
291 static inline StgClosure *
292 find_ptrs( stackPos *info )
294 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
295 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
301 /* -----------------------------------------------------------------------------
302 * Initializes *info from SRT information stored in *infoTable.
303 * -------------------------------------------------------------------------- */
305 init_srt( stackPos *info, StgInfoTable *infoTable )
307 info->type = posTypeSRT;
308 info->next.srt.srt = (StgClosure **)(infoTable->srt);
309 info->next.srt.srt_end = info->next.srt.srt + infoTable->srt_len;
312 /* -----------------------------------------------------------------------------
313 * Find the next object from *info.
314 * -------------------------------------------------------------------------- */
315 static inline StgClosure *
316 find_srt( stackPos *info )
320 if (info->next.srt.srt < info->next.srt.srt_end) {
321 // See scavenge_srt() in GC.c for details.
322 #ifdef ENABLE_WIN32_DLL_SUPPORT
323 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
324 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
326 c = *(info->next.srt.srt);
328 c = *(info->next.srt.srt);
330 info->next.srt.srt++;
337 /* -----------------------------------------------------------------------------
338 * push() pushes a stackElement representing the next child of *c
339 * onto the traverse stack. If *c has no child, *first_child is set
340 * to NULL and nothing is pushed onto the stack. If *c has only one
341 * child, *c_chlid is set to that child and nothing is pushed onto
342 * the stack. If *c has more than two children, *first_child is set
343 * to the first child and a stackElement representing the second
344 * child is pushed onto the stack.
347 * *c_child_r is the most recent retainer of *c's children.
348 * *c is not any of TSO, PAP, or AP_UPD, which means that
349 * there cannot be any stack objects.
350 * Note: SRTs are considered to be children as well.
351 * -------------------------------------------------------------------------- */
353 push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
356 bdescr *nbd; // Next Block Descriptor
358 #ifdef DEBUG_RETAINER
359 // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
362 ASSERT(get_itbl(c)->type != TSO);
363 ASSERT(get_itbl(c)->type != PAP);
364 ASSERT(get_itbl(c)->type != AP_UPD);
371 se.c_child_r = c_child_r;
374 switch (get_itbl(c)->type) {
381 case SE_CAF_BLACKHOLE:
386 // one child (fixed), no SRT
389 *first_child = ((StgMutVar *)c)->var;
392 // blocking_queue must be TSO and the head of a linked list of TSOs.
393 // Shoule it be a child? Seems to be yes.
394 *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
397 *first_child = ((StgSelector *)c)->selectee;
400 case IND_OLDGEN_PERM:
402 *first_child = ((StgIndOldGen *)c)->indirectee;
406 *first_child = c->payload[0];
409 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
410 // of the next child. We do not write a separate initialization code.
411 // Also we do not have to initialize info.type;
413 // two children (fixed), no SRT
414 // need to push a stackElement, but nothing to store in se.info
416 *first_child = c->payload[0]; // return the first pointer
417 // se.info.type = posTypeStep;
418 // se.info.next.step = 2; // 2 = second
421 // three children (fixed), no SRT
422 // need to push a stackElement
424 // head must be TSO and the head of a linked list of TSOs.
425 // Shoule it be a child? Seems to be yes.
426 *first_child = (StgClosure *)((StgMVar *)c)->head;
427 // se.info.type = posTypeStep;
428 se.info.next.step = 2; // 2 = second
431 // three children (fixed), no SRT
433 *first_child = ((StgWeak *)c)->key;
434 // se.info.type = posTypeStep;
435 se.info.next.step = 2;
438 // layout.payload.ptrs, no SRT
444 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
446 *first_child = find_ptrs(&se.info);
447 if (*first_child == NULL)
451 // StgMutArrPtr.ptrs, no SRT
453 case MUT_ARR_PTRS_FROZEN:
454 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
455 (StgPtr)(((StgMutArrPtrs *)c)->payload));
456 *first_child = find_ptrs(&se.info);
457 if (*first_child == NULL)
461 // layout.payload.ptrs, SRT
462 case FUN: // *c is a heap object.
466 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
467 *first_child = find_ptrs(&se.info);
468 if (*first_child == NULL)
469 // no child from ptrs, so check SRT
473 // 1 fixed child, SRT
478 *first_child = c->payload[0];
479 ASSERT(*first_child != NULL);
480 init_srt(&se.info, get_itbl(c));
485 case FUN_STATIC: // *c is a heap object.
486 ASSERT(get_itbl(c)->srt_len != 0);
492 init_srt(&se.info, get_itbl(c));
493 *first_child = find_srt(&se.info);
494 if (*first_child == NULL)
504 case CONSTR_CHARLIKE:
505 case CONSTR_NOCAF_STATIC:
527 barf("Invalid object *c in push()");
531 if (stackTop - 1 < stackBottom) {
532 #ifdef DEBUG_RETAINER
533 // fprintf(stderr, "push() to the next stack.\n");
535 // currentStack->free is updated when the active stack is switched
536 // to the next stack.
537 currentStack->free = (StgPtr)stackTop;
539 if (currentStack->link == NULL) {
540 nbd = allocGroup(BLOCKS_IN_STACK);
542 nbd->u.back = currentStack;
543 currentStack->link = nbd;
545 nbd = currentStack->link;
550 // adjust stackTop (acutal push)
552 // If the size of stackElement was huge, we would better replace the
553 // following statement by either a memcpy() call or a switch statement
554 // on the type of the element. Currently, the size of stackElement is
555 // small enough (5 words) that this direct assignment seems to be enough.
558 #ifdef DEBUG_RETAINER
560 if (stackSize > maxStackSize) maxStackSize = stackSize;
561 // ASSERT(stackSize >= 0);
562 // fprintf(stderr, "stackSize = %d\n", stackSize);
566 /* -----------------------------------------------------------------------------
567 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
569 * stackTop cannot be equal to stackLimit unless the whole stack is
570 * empty, in which case popOff() is not allowed.
572 * You can think of popOffReal() as a part of popOff() which is
573 * executed at the end of popOff() in necessary. Since popOff() is
574 * likely to be executed quite often while popOffReal() is not, we
575 * separate popOffReal() from popOff(), which is declared as an
576 * inline function (for the sake of execution speed). popOffReal()
577 * is called only within popOff() and nowhere else.
578 * -------------------------------------------------------------------------- */
582 bdescr *pbd; // Previous Block Descriptor
584 #ifdef DEBUG_RETAINER
585 // fprintf(stderr, "pop() to the previous stack.\n");
588 ASSERT(stackTop + 1 == stackLimit);
589 ASSERT(stackBottom == (stackElement *)currentStack->start);
591 if (firstStack == currentStack) {
592 // The stack is completely empty.
594 ASSERT(stackTop == stackLimit);
595 #ifdef DEBUG_RETAINER
597 if (stackSize > maxStackSize) maxStackSize = stackSize;
599 ASSERT(stackSize >= 0);
600 fprintf(stderr, "stackSize = %d\n", stackSize);
606 // currentStack->free is updated when the active stack is switched back
607 // to the previous stack.
608 currentStack->free = (StgPtr)stackLimit;
610 // find the previous block descriptor
611 pbd = currentStack->u.back;
614 returnToOldStack(pbd);
616 #ifdef DEBUG_RETAINER
618 if (stackSize > maxStackSize) maxStackSize = stackSize;
620 ASSERT(stackSize >= 0);
621 fprintf(stderr, "stackSize = %d\n", stackSize);
628 #ifdef DEBUG_RETAINER
629 // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
632 ASSERT(stackTop != stackLimit);
633 ASSERT(!isEmptyRetainerStack());
635 // <= (instead of <) is wrong!
636 if (stackTop + 1 < stackLimit) {
638 #ifdef DEBUG_RETAINER
640 if (stackSize > maxStackSize) maxStackSize = stackSize;
642 ASSERT(stackSize >= 0);
643 fprintf(stderr, "stackSize = %d\n", stackSize);
652 /* -----------------------------------------------------------------------------
653 * Finds the next object to be considered for retainer profiling and store
655 * Test if the topmost stack element indicates that more objects are left,
656 * and if so, retrieve the first object and store its pointer to *c. Also,
657 * set *cp and *r appropriately, both of which are stored in the stack element.
658 * The topmost stack element then is overwritten so as for it to now denote
660 * If the topmost stack element indicates no more objects are left, pop
661 * off the stack element until either an object can be retrieved or
662 * the current stack chunk becomes empty, indicated by rtsTrue returned by
663 * isOnBoundary(), in which case *c is set to NULL.
665 * It is okay to call this function even when the current stack chunk
667 * -------------------------------------------------------------------------- */
669 pop( StgClosure **c, StgClosure **cp, StgClosure **r )
673 #ifdef DEBUG_RETAINER
674 // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
678 if (isOnBoundary()) { // if the current stack chunk is depleted
685 switch (get_itbl(se->c)->type) {
686 // two children (fixed), no SRT
687 // nothing in se.info
689 *c = se->c->payload[1];
695 // three children (fixed), no SRT
696 // need to push a stackElement
698 if (se->info.next.step == 2) {
699 *c = (StgClosure *)((StgMVar *)se->c)->tail;
700 se->info.next.step++; // move to the next step
703 *c = ((StgMVar *)se->c)->value;
710 // three children (fixed), no SRT
712 if (se->info.next.step == 2) {
713 *c = ((StgWeak *)se->c)->value;
714 se->info.next.step++;
717 *c = ((StgWeak *)se->c)->finalizer;
729 // StgMutArrPtr.ptrs, no SRT
731 case MUT_ARR_PTRS_FROZEN:
732 *c = find_ptrs(&se->info);
741 // layout.payload.ptrs, SRT
742 case FUN: // always a heap object
746 if (se->info.type == posTypePtrs) {
747 *c = find_ptrs(&se->info);
753 init_srt(&se->info, get_itbl(se->c));
768 *c = find_srt(&se->info);
777 // no child (fixed), no SRT
783 case SE_CAF_BLACKHOLE:
785 // one child (fixed), no SRT
791 case IND_OLDGEN_PERM:
800 case CONSTR_CHARLIKE:
801 case CONSTR_NOCAF_STATIC:
823 barf("Invalid object *c in pop()");
829 /* -----------------------------------------------------------------------------
830 * RETAINER PROFILING ENGINE
831 * -------------------------------------------------------------------------- */
834 initRetainerProfiling( void )
836 initializeAllRetainerSet();
837 retainerGeneration = 0;
840 /* -----------------------------------------------------------------------------
841 * This function must be called before f-closing prof_file.
842 * -------------------------------------------------------------------------- */
844 endRetainerProfiling( void )
846 #ifdef SECOND_APPROACH
847 outputAllRetainerSet(prof_file);
851 /* -----------------------------------------------------------------------------
852 * Returns the actual pointer to the retainer set of the closure *c.
853 * It may adjust RSET(c) subject to flip.
855 * RSET(c) is initialized to NULL if its current value does not
858 * Even though this function has side effects, they CAN be ignored because
859 * subsequent calls to retainerSetOf() always result in the same return value
860 * and retainerSetOf() is the only way to retrieve retainerSet of a given
862 * We have to perform an XOR (^) operation each time a closure is examined.
863 * The reason is that we do not know when a closure is visited last.
864 * -------------------------------------------------------------------------- */
866 maybeInitRetainerSet( StgClosure *c )
868 if (!isRetainerSetFieldValid(c)) {
869 setRetainerSetToNull(c);
873 static inline RetainerSet *
874 retainerSetOf( StgClosure *c )
876 ASSERT( isRetainerSetFieldValid(c) );
877 // StgWord has the same size as pointers, so the following type
879 return (RetainerSet *)((StgWord)RSET(c) ^ flip);
882 /* -----------------------------------------------------------------------------
883 * Returns the cost of the closure *c, e.g., the amount of heap memory
884 * allocated to *c. Static objects cost 0.
885 * The cost includes even the words allocated for profiling purpose.
887 * -------------------------------------------------------------------------- */
889 cost( StgClosure *c )
894 switch (info->type) {
896 return tso_sizeW((StgTSO *)c);
904 return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
913 return sizeofW(StgMVar);
916 case MUT_ARR_PTRS_FROZEN:
917 return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
921 return pap_sizeW((StgPAP *)c);
924 return arr_words_sizeW((StgArrWords *)c);
944 case SE_CAF_BLACKHOLE:
948 case IND_OLDGEN_PERM:
952 return sizeW_fromITBL(info);
955 return sizeofW(StgHeader) + MIN_UPD_SIZE;
960 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
962 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
963 // cannot be *c, *cp, *r in the retainer profiling loop.
965 case CONSTR_CHARLIKE:
966 case CONSTR_NOCAF_STATIC:
967 // Stack objects are invalid because they are never treated as
968 // legal objects during retainer profiling.
989 barf("Invalid object in cost(): %d", get_itbl(c)->type);
993 /* -----------------------------------------------------------------------------
994 * Returns the pure cost of the closure *c, i.e., the size of memory
995 * allocated for this object without profiling.
997 * costPure() subtracts the overhead incurred by profiling for all types
998 * of objects except TSO. Even though the overhead in the TSO object
999 * itself is taken into account, the additional costs due to larger
1000 * stack objects (with unnecessary retainer profiling fields) is not
1001 * considered. Still, costPure() should result in an accurate estimate
1002 * of heap use because stacks in TSO objects are allocated in large blocks.
1003 * If we get rid of the (currently unused) retainer profiling field in
1004 * all stack objects, the result will be accurate.
1005 * ------------------------------------------------------------------------- */
1007 costPure( StgClosure *c )
1011 if (!closureSatisfiesConstraints(c)) {
1017 ASSERT(cst == 0 || cst - sizeofW(StgProfHeader) > 0);
1020 return cst - sizeofW(StgProfHeader);
1026 /* -----------------------------------------------------------------------------
1027 * Returns rtsTrue if *c is a retainer.
1028 * -------------------------------------------------------------------------- */
1029 static inline rtsBool
1030 isRetainer( StgClosure *c )
1032 if (get_itbl(c)->prof.closure_desc != NULL && !strcmp(get_itbl(c)->prof.closure_desc,"PCS")) { return rtsTrue; }
1034 switch (get_itbl(c)->type) {
1038 // TSOs MUST be retainers: they constitute the set of roots.
1046 case MUT_ARR_PTRS_FROZEN:
1048 // thunks are retainers.
1055 case THUNK_SELECTOR:
1058 // Static thunks, or CAFS, are obviously retainers.
1061 // WEAK objects are roots; there is separate code in which traversing
1062 // begins from WEAK objects.
1084 // partial applications
1090 case SE_CAF_BLACKHOLE:
1094 case IND_OLDGEN_PERM:
1109 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1111 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
1112 // cannot be *c, *cp, *r in the retainer profiling loop.
1113 case CONSTR_INTLIKE:
1114 case CONSTR_CHARLIKE:
1115 case CONSTR_NOCAF_STATIC:
1116 // Stack objects are invalid because they are never treated as
1117 // legal objects during retainer profiling.
1136 case INVALID_OBJECT:
1138 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1143 /* -----------------------------------------------------------------------------
1144 * Returns the retainer function value for the closure *c, i.e., R(*c).
1145 * This function does NOT return the retainer(s) of *c.
1147 * *c must be a retainer.
1149 * Depending on the definition of this function, the maintenance of retainer
1150 * sets can be made easier. If most retainer sets are likely to be created
1151 * again across garbage collections, refreshAllRetainerSet() in
1152 * RetainerSet.c can simply set the cost field of each retainer set.
1153 * If this is not the case, we can free all the retainer sets and
1154 * re-initialize the hash table.
1155 * See refreshAllRetainerSet() in RetainerSet.c.
1156 * -------------------------------------------------------------------------- */
1157 static inline retainer
1158 getRetainerFrom( StgClosure *c )
1160 ASSERT(isRetainer(c));
1162 #if defined(RETAINER_SCHEME_INFO)
1163 // Retainer scheme 1: retainer = info table
1165 #elif defined(RETAINER_SCHEME_CCS)
1166 // Retainer scheme 2: retainer = cost centre stack
1167 return c->header.prof.ccs;
1168 #elif defined(RETAINER_SCHEME_CC)
1169 // Retainer scheme 3: retainer = cost centre
1170 return c->header.prof.ccs->cc;
1174 /* -----------------------------------------------------------------------------
1175 * Associates the retainer set *s with the closure *c, that is, *s becomes
1176 * the retainer set of *c.
1180 * -------------------------------------------------------------------------- */
1182 associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
1186 cost_c = costPure(c); // not cost(c)
1187 if (rsOfc != NULL) {
1188 ASSERT(rsOfc->cost >= cost_c);
1189 rsOfc->cost -= cost_c;
1191 // StgWord has the same size as pointers, so the following type
1193 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1197 /* -----------------------------------------------------------------------------
1198 * Process all the objects in the stack chunk from stackStart to stackEnd
1199 * with *c and *c_child_r being their parent and their most recent retainer,
1200 * respectively. Treat stackOptionalFun as another child of *c if it is
1203 * *c is one of the following: TSO, PAP, and AP_UPD.
1204 * If *c is AP_UPD or PAP, stackOptionalFun is not NULL. Otherwise,
1206 * If *c is TSO, c == c_child_r.
1207 * stackStart < stackEnd.
1208 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1209 * interpretation conforms to the current value of flip (even when they
1210 * are interpreted to be NULL).
1211 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1212 * or ThreadKilled, which means that its stack is ready to process.
1214 * This code was almost plagiarzied from GC.c! For each pointer,
1215 * retainClosure() is invoked instead of evacuate().
1216 * -------------------------------------------------------------------------- */
1218 retainStack( StgClosure *c, StgClosure *c_child_r,
1219 StgClosure *stackOptionalFun, StgPtr stackStart,
1222 stackElement *oldStackBoundary;
1227 #ifdef DEBUG_RETAINER
1229 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1233 Each invocation of retainStack() creates a new virtual
1234 stack. Since all such stacks share a single common stack, we
1235 record the current currentStackBoundary, which will be restored
1238 oldStackBoundary = currentStackBoundary;
1239 currentStackBoundary = stackTop;
1241 #ifdef DEBUG_RETAINER
1242 // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1245 if (stackOptionalFun != NULL) {
1246 ASSERT(get_itbl(c)->type == AP_UPD || get_itbl(c)->type == PAP);
1247 retainClosure(stackOptionalFun, c, c_child_r);
1249 ASSERT(get_itbl(c)->type == TSO);
1250 ASSERT(((StgTSO *)c)->what_next != ThreadRelocated &&
1251 ((StgTSO *)c)->what_next != ThreadComplete &&
1252 ((StgTSO *)c)->what_next != ThreadKilled);
1256 while (p < stackEnd) {
1261 // The correctness of retainer profiling is subject to the
1262 // correctness of the two macros IS_ARG_TAG() and
1263 // LOOKS_LIKE_GHC_INFO(). Since LOOKS_LIKE_GHC_INFO() is a bit
1264 // precarious macro, so I believe that the current
1265 // implementation may not be quite safe. Also, scavenge_stack()
1266 // in GC.c also exploits this macro in order to identify shallow
1267 // pointers. I am not sure whether scavenge_stack() takes
1268 // further measurements to discern real shallow pointers.
1270 // I think this can be a serious problem if a stack chunk
1271 // contains some word which looks like a pointer but is
1272 // actually, say, a word constituting a floating number.
1275 // skip tagged words
1276 if (IS_ARG_TAG((StgWord)q)) {
1277 p += 1 + ARG_SIZE(q);
1281 // check if *p is a shallow closure pointer
1282 if (!LOOKS_LIKE_GHC_INFO(q)) {
1283 retainClosure((StgClosure *)q, c, c_child_r);
1288 // regular stack objects
1289 info = get_itbl((StgClosure *)p);
1290 switch(info->type) {
1292 bitmap = ((StgRetDyn *)p)->liveness;
1293 p = ((StgRetDyn *)p)->payload;
1296 // FUN and FUN_STATIC keep only their info pointer.
1303 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1304 p += sizeofW(StgUpdateFrame);
1313 bitmap = info->layout.bitmap;
1316 while (bitmap != 0) {
1317 if ((bitmap & 1) == 0)
1318 retainClosure((StgClosure *)*p, c, c_child_r);
1320 bitmap = bitmap >> 1;
1324 StgClosure **srt, **srt_end;
1326 srt = (StgClosure **)(info->srt);
1327 srt_end = srt + info->srt_len;
1328 for (; srt < srt_end; srt++) {
1329 // See scavenge_srt() in GC.c for details.
1330 #ifdef ENABLE_WIN32_DLL_SUPPORT
1331 if ((unsigned long)(*srt) & 0x1)
1332 retainClosure(*(StgClosure **)(((unsigned long)*srt & ~0x1)), c, c_child_r);
1334 retainClosure(*srt, c, c_child_r);
1336 retainClosure(*srt, c, c_child_r);
1346 StgLargeBitmap *large_bitmap;
1349 large_bitmap = info->layout.large_bitmap;
1352 for (i = 0; i < large_bitmap->size; i++) {
1353 bitmap = large_bitmap->bitmap[i];
1354 q = p + sizeofW(StgWord) * 8;
1355 while (bitmap != 0) {
1356 if ((bitmap & 1) == 0)
1357 retainClosure((StgClosure *)*p, c, c_child_r);
1359 bitmap = bitmap >> 1;
1361 if (i + 1 < large_bitmap->size) {
1363 retainClosure((StgClosure *)*p, c, c_child_r);
1371 barf("Invalid object found in retainStack(): %d",
1376 // restore currentStackBoundary
1377 currentStackBoundary = oldStackBoundary;
1378 #ifdef DEBUG_RETAINER
1379 // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1382 #ifdef DEBUG_RETAINER
1387 /* -----------------------------------------------------------------------------
1388 * Compute the retainer set of *c0 and all its desecents by traversing.
1389 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1391 * c0 = cp0 = r0 holds only for root objects.
1392 * RSET(cp0) and RSET(r0) are valid, i.e., their
1393 * interpretation conforms to the current value of flip (even when they
1394 * are interpreted to be NULL).
1395 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1396 * the current value of flip. If it does not, during the execution
1397 * of this function, RSET(c0) must be initialized as well as all
1400 * stackTop must be the same at the beginning and the exit of this function.
1401 * *c0 can be TSO (as well as PAP and AP_UPD).
1402 * -------------------------------------------------------------------------- */
1404 retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
1406 // c = Current closure
1407 // cp = Current closure's Parent
1408 // r = current closures' most recent Retainer
1409 // c_child_r = current closure's children's most recent retainer
1410 // first_child = first child of c
1411 StgClosure *c, *cp, *r, *c_child_r, *first_child;
1412 RetainerSet *s, *retainerSetOfc;
1416 #ifdef DEBUG_RETAINER
1417 // StgPtr oldStackTop;
1420 #ifdef DEBUG_RETAINER
1421 // oldStackTop = stackTop;
1422 // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1425 // (c, cp, r) = (c0, cp0, r0)
1432 //fprintf(stderr, "loop");
1433 // pop to (c, cp, r);
1437 #ifdef DEBUG_RETAINER
1438 // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1443 //fprintf(stderr, "inner_loop");
1446 // c = current closure under consideration,
1447 // cp = current closure's parent,
1448 // r = current closure's most recent retainer
1450 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1451 // RSET(cp) and RSET(r) are valid.
1452 // RSET(c) is valid only if c has been visited before.
1454 // Loop invariants (on the relation between c, cp, and r)
1455 // if cp is not a retainer, r belongs to RSET(cp).
1456 // if cp is a retainer, r == cp.
1458 typeOfc = get_itbl(c)->type;
1460 #ifdef DEBUG_RETAINER
1463 case CONSTR_INTLIKE:
1464 case CONSTR_CHARLIKE:
1465 case CONSTR_NOCAF_STATIC:
1471 if (retainerSetOf(c) == NULL) { // first visit?
1472 costArray[typeOfc] += cost(c);
1473 sumOfNewCost += cost(c);
1482 if (((StgTSO *)c)->what_next == ThreadComplete ||
1483 ((StgTSO *)c)->what_next == ThreadKilled) {
1484 #ifdef DEBUG_RETAINER
1485 fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1489 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1490 #ifdef DEBUG_RETAINER
1491 fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
1493 c = (StgClosure *)((StgTSO *)c)->link;
1499 // We just skip IND_STATIC, so its retainer set is never computed.
1500 c = ((StgIndStatic *)c)->indirectee;
1502 case CONSTR_INTLIKE:
1503 case CONSTR_CHARLIKE:
1504 // static objects with no pointers out, so goto loop.
1505 case CONSTR_NOCAF_STATIC:
1506 // It is not just enough not to compute the retainer set for *c; it is
1507 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1508 // scavenged_static_objects, the list from which is assumed to traverse
1509 // all static objects after major garbage collections.
1513 if (get_itbl(c)->srt_len == 0) {
1514 // No need to compute the retainer set; no dynamic objects
1515 // are reachable from *c.
1517 // Static objects: if we traverse all the live closures,
1518 // including static closures, during each heap census then
1519 // we will observe that some static closures appear and
1520 // disappear. eg. a closure may contain a pointer to a
1521 // static function 'f' which is not otherwise reachable
1522 // (it doesn't indirectly point to any CAFs, so it doesn't
1523 // appear in any SRTs), so we would find 'f' during
1524 // traversal. However on the next sweep there may be no
1525 // closures pointing to 'f'.
1527 // We must therefore ignore static closures whose SRT is
1528 // empty, because these are exactly the closures that may
1529 // "appear". A closure with a non-empty SRT, and which is
1530 // still required, will always be reachable.
1532 // But what about CONSTR_STATIC? Surely these may be able
1533 // to appear, and they don't have SRTs, so we can't
1534 // check. So for now, we're calling
1535 // resetStaticObjectForRetainerProfiling() from the
1536 // garbage collector to reset the retainer sets in all the
1537 // reachable static objects.
1544 // The above objects are ignored in computing the average number of times
1545 // an object is visited.
1546 timesAnyObjectVisited++;
1548 // If this is the first visit to c, initialize its retainer set.
1549 maybeInitRetainerSet(c);
1550 retainerSetOfc = retainerSetOf(c);
1553 // isRetainer(cp) == rtsTrue => s == NULL
1554 // isRetainer(cp) == rtsFalse => s == cp.retainer
1558 s = retainerSetOf(cp);
1560 // (c, cp, r, s) is available.
1561 R_r = getRetainerFrom(r);
1563 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1564 if (retainerSetOfc == NULL) {
1565 // This is the first visit to *c.
1569 associate(c, NULL, singleton(R_r));
1571 // s is actually the retainer set of *c!
1572 associate(c, NULL, s);
1574 // compute c_child_r
1575 c_child_r = isRetainer(c) ? c : r;
1577 // This is not the first visit to *c.
1578 if (isMember(R_r, retainerSetOfc))
1579 goto loop; // no need to process child
1582 associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
1584 // s is not NULL and cp is not a retainer. This means that
1585 // each time *cp is visited, so is *c. Thus, if s has
1586 // exactly one more element in its retainer set than c, s
1587 // is also the new retainer set for *c.
1588 if (s->num == retainerSetOfc->num + 1) {
1589 associate(c, retainerSetOfc, s);
1591 // Otherwise, just add R_r to the current retainer set of *c.
1593 associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
1598 goto loop; // no need to process child
1600 // compute c_child_r
1604 // now, RSET() of all of *c, *cp, and *r is valid.
1605 // (c, c_child_r) are available.
1609 if (typeOfc == TSO) {
1610 retainStack(c, c_child_r,
1613 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1616 } else if (typeOfc == PAP) {
1617 retainStack(c, c_child_r,
1619 (StgPtr)((StgPAP *)c)->payload,
1620 (StgPtr)((StgPAP *)c)->payload + ((StgPAP *)c)->n_args);
1623 } else if (typeOfc == AP_UPD) {
1624 retainStack(c, c_child_r,
1625 ((StgAP_UPD *)c)->fun,
1626 (StgPtr)((StgAP_UPD *)c)->payload,
1627 (StgPtr)((StgAP_UPD *)c)->payload +
1628 ((StgAP_UPD *)c)->n_args);
1633 push(c, c_child_r, &first_child);
1635 // If first_child is null, c has no child.
1636 // If first_child is not null, the top stack element points to the next
1637 // object. push() may or may not push a stackElement on the stack.
1638 if (first_child == NULL)
1641 // (c, cp, r) = (first_child, c, c_child_r)
1648 /* -----------------------------------------------------------------------------
1649 * Compute the retainer set for every object reachable from *tl.
1650 * -------------------------------------------------------------------------- */
1652 retainRoot( StgClosure **tl )
1654 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1657 ASSERT(isEmptyRetainerStack());
1658 currentStackBoundary = stackTop;
1660 retainClosure(*tl, *tl, *tl);
1662 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1663 // *tl might be a TSO which is ThreadComplete, in which
1664 // case we ignore it for the purposes of retainer profiling.
1667 /* -----------------------------------------------------------------------------
1668 * Compute the retainer set for each of the objects in the heap.
1669 * -------------------------------------------------------------------------- */
1671 computeRetainerSet( void )
1677 #ifdef DEBUG_RETAINER
1678 RetainerSet tmpRetainerSet;
1681 GetRoots(retainRoot); // for scheduler roots
1683 // This function is called after a major GC, when key, value, and finalizer
1684 // all are guaranteed to be valid, or reachable.
1686 // The following code assumes that WEAK objects are considered to be roots
1687 // for retainer profilng.
1688 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1689 // retainRoot((StgClosure *)weak);
1690 retainRoot((StgClosure **)&weak);
1692 // The following code resets the rs field of each unvisited mutable
1693 // object (computing sumOfNewCostExtra and updating costArray[] when
1694 // debugging retainer profiler).
1695 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1697 (generations[g].mut_list == END_MUT_LIST &&
1698 generations[g].mut_once_list == END_MUT_LIST));
1701 // I think traversing through mut_list is unnecessary.
1702 // Think about removing this part.
1703 for (ml = generations[g].mut_list; ml != END_MUT_LIST;
1704 ml = ml->mut_link) {
1706 maybeInitRetainerSet((StgClosure *)ml);
1707 rtl = retainerSetOf((StgClosure *)ml);
1709 #ifdef DEBUG_RETAINER
1711 // first visit to *ml
1712 // This is a violation of the interface rule!
1713 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1715 switch (get_itbl((StgClosure *)ml)->type) {
1719 case CONSTR_INTLIKE:
1720 case CONSTR_CHARLIKE:
1721 case CONSTR_NOCAF_STATIC:
1725 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1729 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1730 sumOfNewCostExtra += cost((StgClosure *)ml);
1737 // Traversing through mut_once_list is, in contrast, necessary
1738 // because we can find MUT_VAR objects which have not been
1739 // visited during retainer profiling.
1740 for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
1741 ml = ml->mut_link) {
1743 maybeInitRetainerSet((StgClosure *)ml);
1744 rtl = retainerSetOf((StgClosure *)ml);
1745 #ifdef DEBUG_RETAINER
1747 // first visit to *ml
1748 // This is a violation of the interface rule!
1749 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1751 switch (get_itbl((StgClosure *)ml)->type) {
1755 case CONSTR_INTLIKE:
1756 case CONSTR_CHARLIKE:
1757 case CONSTR_NOCAF_STATIC:
1761 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1765 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1766 sumOfNewCostExtra += cost((StgClosure *)ml);
1775 /* -----------------------------------------------------------------------------
1776 * Traverse all static objects for which we compute retainer sets,
1777 * and reset their rs fields to NULL, which is accomplished by
1778 * invoking maybeInitRetainerSet(). This function must be called
1779 * before zeroing all objects reachable from scavenged_static_objects
1780 * in the case of major gabage collections. See GarbageCollect() in
1783 * The mut_once_list of the oldest generation must also be traversed?
1784 * Why? Because if the evacuation of an object pointed to by a static
1785 * indirection object fails, it is put back to the mut_once_list of
1786 * the oldest generation.
1787 * However, this is not necessary because any static indirection objects
1788 * are just traversed through to reach dynamic objects. In other words,
1789 * they are not taken into consideration in computing retainer sets.
1790 * -------------------------------------------------------------------------- */
1792 resetStaticObjectForRetainerProfiling( void )
1794 #ifdef DEBUG_RETAINER
1799 #ifdef DEBUG_RETAINER
1802 p = scavenged_static_objects;
1803 while (p != END_OF_STATIC_LIST) {
1804 #ifdef DEBUG_RETAINER
1807 switch (get_itbl(p)->type) {
1809 // Since we do not compute the retainer set of any
1810 // IND_STATIC object, we don't have to reset its retainer
1812 p = IND_STATIC_LINK(p);
1815 maybeInitRetainerSet(p);
1816 p = THUNK_STATIC_LINK(p);
1819 maybeInitRetainerSet(p);
1820 p = FUN_STATIC_LINK(p);
1823 maybeInitRetainerSet(p);
1824 p = STATIC_LINK(get_itbl(p), p);
1827 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1828 p, get_itbl(p)->type);
1832 #ifdef DEBUG_RETAINER
1833 // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
1837 /* -----------------------------------------------------------------------------
1838 * Perform retainer profiling.
1839 * N is the oldest generation being profilied, where the generations are
1840 * numbered starting at 0.
1843 * This function should be called only immediately after major garbage
1845 * ------------------------------------------------------------------------- */
1847 retainerProfile(void)
1849 nat allCost, numSet;
1850 #ifdef DEBUG_RETAINER
1852 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1855 #ifdef DEBUG_RETAINER
1856 fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
1861 // We haven't flipped the bit yet.
1862 #ifdef DEBUG_RETAINER
1863 fprintf(stderr, "Before traversing:\n");
1864 sumOfCostLinear = 0;
1865 for (i = 0;i < N_CLOSURE_TYPES; i++)
1866 costArrayLinear[i] = 0;
1867 totalHeapSize = checkHeapSanityForRetainerProfiling();
1869 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1871 fprintf(stderr, "costArrayLinear[] = ");
1872 for (i = 0;i < N_CLOSURE_TYPES; i++)
1873 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
1874 fprintf(stderr, "\n");
1877 ASSERT(sumOfCostLinear == totalHeapSize);
1880 #define pcostArrayLinear(index) \
1881 if (costArrayLinear[index] > 0) \
1882 fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1883 pcostArrayLinear(THUNK_STATIC);
1884 pcostArrayLinear(FUN_STATIC);
1885 pcostArrayLinear(CONSTR_STATIC);
1886 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1887 pcostArrayLinear(CONSTR_INTLIKE);
1888 pcostArrayLinear(CONSTR_CHARLIKE);
1892 // Now we flips flip.
1895 #ifdef DEBUG_RETAINER
1901 numObjectVisited = 0;
1902 timesAnyObjectVisited = 0;
1904 #ifdef DEBUG_RETAINER
1905 fprintf(stderr, "During traversing:\n");
1907 sumOfNewCostExtra = 0;
1908 for (i = 0;i < N_CLOSURE_TYPES; i++)
1913 We initialize the traverse stack each time the retainer profiling is
1914 performed (because the traverse stack size varies on each retainer profiling
1915 and this operation is not costly anyhow). However, we just refresh the
1918 initializeTraverseStack();
1919 #ifdef DEBUG_RETAINER
1920 initializeAllRetainerSet();
1922 refreshAllRetainerSet();
1924 computeRetainerSet();
1926 outputRetainerSet(hp_file, &allCost, &numSet);
1928 #ifdef DEBUG_RETAINER
1929 fprintf(stderr, "After traversing:\n");
1930 sumOfCostLinear = 0;
1931 for (i = 0;i < N_CLOSURE_TYPES; i++)
1932 costArrayLinear[i] = 0;
1933 totalHeapSize = checkHeapSanityForRetainerProfiling();
1935 fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1936 ASSERT(sumOfCostLinear == totalHeapSize);
1938 // now, compare the two results
1941 costArray[] must be exactly the same as costArrayLinear[].
1943 1) Dead weak pointers, whose type is CONSTR. These objects are not
1944 reachable from any roots.
1946 fprintf(stderr, "Comparison:\n");
1947 fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
1948 for (i = 0;i < N_CLOSURE_TYPES; i++)
1949 if (costArray[i] != costArrayLinear[i])
1950 // nothing should be printed except MUT_VAR after major GCs
1951 fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
1952 fprintf(stderr, "\n");
1954 fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
1955 fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
1956 fprintf(stderr, "\tcostArray[] (must be empty) = ");
1957 for (i = 0;i < N_CLOSURE_TYPES; i++)
1958 if (costArray[i] != costArrayLinear[i])
1959 // nothing should be printed except MUT_VAR after major GCs
1960 fprintf(stderr, "[%u:%u] ", i, costArray[i]);
1961 fprintf(stderr, "\n");
1963 // only for major garbage collection
1964 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
1968 closeTraverseStack();
1969 #ifdef DEBUG_RETAINER
1970 closeAllRetainerSet();
1972 // Note that there is no post-processing for the retainer sets.
1974 retainerGeneration++;
1977 retainerGeneration - 1, // retainerGeneration has just been incremented!
1978 #ifdef DEBUG_RETAINER
1979 maxCStackSize, maxStackSize,
1981 (double)timesAnyObjectVisited / numObjectVisited,
1985 /* -----------------------------------------------------------------------------
1987 * -------------------------------------------------------------------------- */
1989 #ifdef DEBUG_RETAINER
1991 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
1992 ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
1993 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
1996 sanityCheckHeapClosure( StgClosure *c )
2000 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2001 ASSERT(!closure_STATIC(c));
2002 ASSERT(LOOKS_LIKE_PTR(c));
2004 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2005 if (get_itbl(c)->type == CONSTR &&
2006 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2007 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2008 fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
2009 costArray[get_itbl(c)->type] += cost(c);
2010 sumOfNewCost += cost(c);
2013 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2014 flip, c, get_itbl(c)->type,
2015 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2018 // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2022 switch (info->type) {
2024 return tso_sizeW((StgTSO *)c);
2032 return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
2035 return sizeofW(StgMVar);
2038 case MUT_ARR_PTRS_FROZEN:
2039 return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
2043 return pap_sizeW((StgPAP *)c);
2046 return arr_words_sizeW((StgArrWords *)c);
2066 case SE_CAF_BLACKHOLE:
2070 case IND_OLDGEN_PERM:
2074 return sizeW_fromITBL(info);
2076 case THUNK_SELECTOR:
2077 return sizeofW(StgHeader) + MIN_UPD_SIZE;
2086 case CONSTR_INTLIKE:
2087 case CONSTR_CHARLIKE:
2088 case CONSTR_NOCAF_STATIC:
2106 case INVALID_OBJECT:
2108 barf("Invalid object in sanityCheckHeapClosure(): %d",
2115 heapCheck( bdescr *bd )
2118 static nat costSum, size;
2121 while (bd != NULL) {
2123 while (p < bd->free) {
2124 size = sanityCheckHeapClosure((StgClosure *)p);
2125 sumOfCostLinear += size;
2126 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2128 // no need for slop check; I think slops are not used currently.
2130 ASSERT(p == bd->free);
2131 costSum += bd->free - bd->start;
2139 smallObjectPoolCheck(void)
2143 static nat costSum, size;
2145 bd = small_alloc_list;
2153 while (p < alloc_Hp) {
2154 size = sanityCheckHeapClosure((StgClosure *)p);
2155 sumOfCostLinear += size;
2156 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2159 ASSERT(p == alloc_Hp);
2160 costSum += alloc_Hp - bd->start;
2163 while (bd != NULL) {
2165 while (p < bd->free) {
2166 size = sanityCheckHeapClosure((StgClosure *)p);
2167 sumOfCostLinear += size;
2168 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2171 ASSERT(p == bd->free);
2172 costSum += bd->free - bd->start;
2180 chainCheck(bdescr *bd)
2185 while (bd != NULL) {
2186 // bd->free - bd->start is not an accurate measurement of the
2187 // object size. Actually it is always zero, so we compute its
2189 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2190 sumOfCostLinear += size;
2191 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2200 checkHeapSanityForRetainerProfiling( void )
2205 fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2206 if (RtsFlags.GcFlags.generations == 1) {
2207 costSum += heapCheck(g0s0->to_blocks);
2208 fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2209 costSum += chainCheck(g0s0->large_objects);
2210 fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2212 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2213 for (s = 0; s < generations[g].n_steps; s++) {
2215 After all live objects have been scavenged, the garbage
2216 collector may create some objects in
2217 scheduleFinalizers(). These objects are created throught
2218 allocate(), so the small object pool or the large object
2219 pool of the g0s0 may not be empty.
2221 if (g == 0 && s == 0) {
2222 costSum += smallObjectPoolCheck();
2223 fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2224 costSum += chainCheck(generations[g].steps[s].large_objects);
2225 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2227 costSum += heapCheck(generations[g].steps[s].blocks);
2228 fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2229 costSum += chainCheck(generations[g].steps[s].large_objects);
2230 fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2239 findPointer(StgPtr p)
2245 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2246 for (s = 0; s < generations[g].n_steps; s++) {
2247 // if (g == 0 && s == 0) continue;
2248 bd = generations[g].steps[s].blocks;
2249 for (; bd; bd = bd->link) {
2250 for (q = bd->start; q < bd->free; q++) {
2251 if (*q == (StgWord)p) {
2253 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2254 fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2259 bd = generations[g].steps[s].large_objects;
2260 for (; bd; bd = bd->link) {
2261 e = bd->start + cost((StgClosure *)bd->start);
2262 for (q = bd->start; q < e; q++) {
2263 if (*q == (StgWord)p) {
2265 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2266 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
2276 belongToHeap(StgPtr p)
2281 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2282 for (s = 0; s < generations[g].n_steps; s++) {
2283 // if (g == 0 && s == 0) continue;
2284 bd = generations[g].steps[s].blocks;
2285 for (; bd; bd = bd->link) {
2286 if (bd->start <= p && p < bd->free) {
2287 fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
2291 bd = generations[g].steps[s].large_objects;
2292 for (; bd; bd = bd->link) {
2293 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2294 fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
2301 #endif // DEBUG_RETAINER
2303 #endif /* PROFILING */