1 /* -----------------------------------------------------------------------------
2 * $Id: LdvProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
4 * (c) The GHC Team, 2001
7 * Lag/Drag/Void profiling.
9 * ---------------------------------------------------------------------------*/
15 #include "LdvProfile.h"
18 #include "Proftimer.h"
19 #include "Profiling.h"
26 ldvTime stores the current LDV time, that is, the current era. It
27 is one larger than the number of times LDV profiling has been
29 ldvTime - 1 == the number of time LDV profiling was executed
30 == the number of censuses made so far.
32 ldvTime must be no longer than LDV_SHIFT (15 or 30) bits.
34 LDV profiling is turned off if ldvTime is 0.
35 LDV profiling is turned on if ldvTime is > 0.
36 ldvTime is initialized to 1 in initLdvProfiling().
37 If LDV profiling is not performed, ldvTime must remain 0 (e.g., when we
38 are doing retainer profiling).
39 ldvTime is set to 1 in initLdvProfiling().
40 ldvTime is set back to 0 in shutdownHaskell().
41 In the meanwhile, ldvTime increments.
45 // ldvTimeSave is set in LdvCensusKillAll(), and stores the final number of
46 // times that LDV profiling was proformed.
47 static nat ldvTimeSave;
49 // gi[] stores the statistics obtained at each heap census.
50 // gi[0] is not used. See initLdvProfiling().
53 #define giINCREMENT 32 // allocation unit for gi[]
54 static nat giLength; // current length of gi[]
56 // giMax is initialized to 2^LDV_SHIFT in initLdvProfiling().
57 // When ldvTime reaches giMax, the profiling stops because a closure can
58 // store only up to (giMax - 1) as its creation or last use time.
61 /* --------------------------------------------------------------------------
62 * Fills in the slop when a *dynamic* closure changes its type.
63 * First calls LDV_recordDead() to declare the closure is dead, and then
67 * 1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in
68 * includes/StgMacros.h), threadLazyBlackHole() and
69 * threadSqueezeStack() (in GC.c).
70 * 2) updating with indirection closures, updateWithIndirection()
71 * and updateWithPermIndirection() (in Storage.h).
73 * LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used'
74 * closures such as TSO. It is not called on PAP because PAP is not updatable.
75 * ----------------------------------------------------------------------- */
77 LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p )
80 StgInfoTable *inf = get_itbl((p));
92 nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
93 if (nw < MIN_UPD_SIZE)
97 nw = sizeofW(StgPAP) - sizeofW(StgHeader) + ((StgPAP *)p)->n_args;
102 case SE_CAF_BLACKHOLE:
103 nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
106 barf("Unexpected closure type %u in LDV_recordDead_FILL_SLOP_DYNAMIC()", inf->type);
109 LDV_recordDead((StgClosure *)(p), nw + sizeofW(StgHeader));
110 for (i = 0; i < nw; i++) {
111 ((StgClosure *)(p))->payload[i] = 0;
116 /* --------------------------------------------------------------------------
117 * Initialize gi[ldvTime].
118 * ----------------------------------------------------------------------- */
120 giInitForCurrentEra(void)
122 gi[ldvTime].notUsed = 0;
123 gi[ldvTime].inherentlyUsed = 0;
124 gi[ldvTime].used = 0;
126 gi[ldvTime].voidNew = 0;
127 gi[ldvTime].dragNew = 0;
130 /* --------------------------------------------------------------------------
131 * Increases ldvTime by 1 and initialize gi[ldvTime].
132 * Reallocates gi[] and increases its size if needed.
133 * ----------------------------------------------------------------------- */
135 incrementLdvTime( void )
139 if (ldvTime == giMax) {
141 "Lag/Drag/Void profiling limit %u reached. "
142 "Please increase the profiling interval with -L option.\n",
144 barf("Current profiling interval = %f seconds",
145 (float)RtsFlags.ProfFlags.profileInterval / 1000.0 );
148 if (ldvTime % giINCREMENT == 0) {
149 gi = stgReallocBytes(gi, sizeof(LdvGenInfo) * (giLength + giINCREMENT),
151 giLength += giINCREMENT;
154 // What a stupid bug I struggled against for such a long time! I
155 // placed giInitForCurrentEra() before the above rellocation part,
156 // and it cost me three hours!
157 giInitForCurrentEra();
160 /* --------------------------------------------------------------------------
161 * Initialization code for LDV profiling.
162 * ----------------------------------------------------------------------- */
164 initLdvProfiling( void )
168 gi = stgMallocBytes(sizeof(LdvGenInfo) * giINCREMENT, "initLdvProfiling");
169 giLength = giINCREMENT;
171 ldvTime = 1; // turn on LDV profiling.
172 giInitForCurrentEra();
174 // giMax = 2^LDV_SHIFT
176 for (p = 0; p < LDV_SHIFT; p++)
180 /* --------------------------------------------------------------------------
181 * This function must be called before f-closing prof_file.
182 * Still hp_file is open; see endHeapProfiling() in ProfHeap.c.
183 * ----------------------------------------------------------------------- */
185 endLdvProfiling( void )
188 int sumVoidNew, sumDragNew;
190 // Now we compute voidTotal and dragTotal of each LdvGenInfo structure.
193 for (t = 0; t < ldvTimeSave; t++) {
194 sumVoidNew += gi[t].voidNew;
195 sumDragNew += gi[t].dragNew;
196 gi[t].voidTotal = sumVoidNew;
197 gi[t].dragTotal = sumDragNew;
200 // t = 0 is wrong (because ldvTime == 0 indicates LDV profiling is
202 for (t = 1;t < ldvTimeSave; t++) {
203 fprintf(hp_file, "MARK %f\n", gi[t].time);
204 fprintf(hp_file, "BEGIN_SAMPLE %f\n", gi[t].time);
205 fprintf(hp_file, "VOID\t%u\n", gi[t].voidTotal * sizeof(StgWord));
206 fprintf(hp_file, "LAG\t%u\n", (gi[t].notUsed - gi[t].voidTotal) * sizeof(StgWord));
207 fprintf(hp_file, "USE\t%u\n", (gi[t].used - gi[t].dragTotal) * sizeof(StgWord));
208 fprintf(hp_file, "INHERENT_USE\t%u\n", gi[t].inherentlyUsed * sizeof(StgWord));
209 fprintf(hp_file, "DRAG\t%u\n", gi[t].dragTotal * sizeof(StgWord));
210 fprintf(hp_file, "END_SAMPLE %f\n", gi[t].time);
214 /* --------------------------------------------------------------------------
215 * Print the statistics.
216 * This function is called after each retainer profiling.
217 * ----------------------------------------------------------------------- */
223 /* --------------------------------------------------------------------------
224 * This function is eventually called on every object in the heap
225 * during a census. Any census is initiated immediately after a major
226 * garbage collection, and we exploit this fact in the implementation.
227 * If c is an 'inherently used' closure, gi[ldvTime].inherentlyUsed is
228 * updated. If c is an ordinary closure, either gi[ldvTime].notUsed or
229 * gi[ldvTime].used is updated.
230 * ----------------------------------------------------------------------- */
232 processHeapClosure(StgClosure *c)
240 ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
241 ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0
244 ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
246 (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
247 (LDVW(c) & LDV_LAST_MASK) > 0
251 switch (info->type) {
253 'inherently used' cases: add to gi[ldvTime].inherentlyUsed
257 size = tso_sizeW((StgTSO *)c);
258 goto inherently_used;
261 size = sizeofW(StgMVar);
262 goto inherently_used;
265 case MUT_ARR_PTRS_FROZEN:
266 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
267 goto inherently_used;
270 size = arr_words_sizeW((StgArrWords *)c);
271 goto inherently_used;
279 size = sizeW_fromITBL(info);
280 goto inherently_used;
283 ordinary cases: add to gi[ldvTime].notUsed if c is not being used.
284 add to gi[ldvTime].used if c is being used.
287 size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
296 size = sizeofW(StgHeader) + MIN_UPD_SIZE;
301 size = pap_sizeW((StgPAP *)c);
322 case SE_CAF_BLACKHOLE:
323 size = sizeW_fromITBL(info);
327 size = sizeofW(StgInd);
330 case IND_OLDGEN_PERM:
331 size = sizeofW(StgIndOldGen);
337 case IND: // IND cannot appear after major GCs.
338 case IND_OLDGEN: // IND_OLDGEN cannot appear major GCs.
339 case EVACUATED: // EVACUATED is encountered only during GCs.
346 case CONSTR_CHARLIKE:
347 case CONSTR_NOCAF_STATIC:
367 barf("Invalid object in processHeapClosure(): %d", info->type);
373 We can compute either gi[ldvTime].notUsed or gi[ldvTime].used; the other
374 can be computed from the total sum of costs.
375 At the moment, we choose to compute gi[ldvTime].notUsed, which seems to
376 be smaller than gi[ldvTime].used.
379 // ignore closures that don't satisfy our constraints.
380 if (closureSatisfiesConstraints(c)) {
381 if ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE)
382 gi[ldvTime].notUsed += size - sizeofW(StgProfHeader);
384 gi[ldvTime].used += size - sizeofW(StgProfHeader);
389 // ignore closures that don't satisfy our constraints.
390 if (closureSatisfiesConstraints(c)) {
391 gi[ldvTime].inherentlyUsed += size - sizeofW(StgProfHeader);
396 /* --------------------------------------------------------------------------
397 * Calls processHeapClosure() on every closure in the heap blocks
398 * begining at bd during a census.
399 * ----------------------------------------------------------------------- */
401 processHeap( bdescr *bd )
408 while (p < bd->free) {
409 size = processHeapClosure((StgClosure *)p);
411 while (p < bd->free && !*p) // skip slop
414 ASSERT(p == bd->free);
419 /* --------------------------------------------------------------------------
420 * Calls processHeapClosure() on every closure in the small object pool
422 * ----------------------------------------------------------------------- */
424 processSmallObjectPool( void )
430 bd = small_alloc_list;
437 while (p < alloc_Hp) {
438 size = processHeapClosure((StgClosure *)p);
440 while (p < alloc_Hp && !*p) // skip slop
443 ASSERT(p == alloc_Hp);
448 while (p < bd->free) {
449 size = processHeapClosure((StgClosure *)p);
451 while (p < bd->free && !*p) // skip slop
454 ASSERT(p == bd->free);
459 /* --------------------------------------------------------------------------
460 * Calls processHeapClosure() on every (large) closure in the object
461 * chain beginning at bd during a census.
462 * ----------------------------------------------------------------------- */
464 processChain( bdescr *bd )
467 // bd->free - bd->start is not an accurate measurement of the
468 // object size. Actually it is always zero, so we compute its
470 processHeapClosure((StgClosure *)bd->start);
475 /* --------------------------------------------------------------------------
476 * Starts a census for LDV profiling.
478 * Any call to LdvCensus() is preceded by a major garbage collection.
479 * ----------------------------------------------------------------------- */
485 // ldvTime == 0 means that LDV profiling is currently turned off.
491 // Todo: when we perform LDV profiling, the Haskell mutator time seems to
492 // be affected by -S or -s runtime option. For instance, the
493 // following two options should result in nearly same
494 // profiling outputs, but the second run (without -Sstderr
495 // option) spends almost twice as long in the Haskell
496 // mutator as the first run:
498 // 1) +RTS -Sstderr -hL -RTS
501 // This is quite a subtle bug because this wierd phenomenon is not
502 // observed in retainer profiling, yet mut_user_time_during_LDV() is
503 // completely orthogonal to mut_user_time_during_RP(). However, the
504 // overall shapes of the resultant graphs are almost the same.
506 gi[ldvTime].time = mut_user_time_during_LDV();
507 if (RtsFlags.GcFlags.generations == 1) {
509 // Todo: support LDV for two-space garbage collection.
511 barf("Lag/Drag/Void profiling not supported with -G1");
513 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
514 for (s = 0; s < generations[g].n_steps; s++) {
515 if (g == 0 && s == 0) {
516 // after a major GC, the nursery must be empty,
517 // and no need to call processNursery().
518 ASSERT(MainCapability.r.rNursery->start ==
519 MainCapability.r.rNursery->free);
520 processSmallObjectPool();
521 processChain(generations[g].steps[s].large_objects);
523 processHeap(generations[g].steps[s].blocks);
524 processChain(generations[g].steps[s].large_objects);
528 outputLdvSet(); // output to hp_file
529 stat_endLDV(); // output to prof_file
534 /* --------------------------------------------------------------------------
535 * This function is called eventually on every object destroyed during
536 * a garbage collection, whether it is a major garbage collection or
537 * not. If c is an 'inherently used' closure, nothing happens. If c
538 * is an ordinary closure, LDV_recordDead() is called on c with its
539 * proper size which excludes the profiling header portion in the
540 * closure. Returns the size of the closure, including the profiling
541 * header portion, so that the caller can find the next closure.
542 * ----------------------------------------------------------------------- */
544 processHeapClosureForDead( StgClosure *c )
551 if (info->type != EVACUATED) {
552 ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
553 ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0);
554 ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
556 (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
557 (LDVW(c) & LDV_LAST_MASK) > 0
561 switch (info->type) {
563 'inherently used' cases: do nothing.
567 size = tso_sizeW((StgTSO *)c);
571 size = sizeofW(StgMVar);
575 case MUT_ARR_PTRS_FROZEN:
576 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
580 size = arr_words_sizeW((StgArrWords *)c);
589 size = sizeW_fromITBL(info);
593 ordinary cases: call LDV_recordDead().
597 size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
606 size = sizeofW(StgHeader) + MIN_UPD_SIZE;
611 size = pap_sizeW((StgPAP *)c);
632 case SE_CAF_BLACKHOLE:
633 size = sizeW_fromITBL(info);
637 size = sizeofW(StgInd);
640 case IND_OLDGEN_PERM:
641 size = sizeofW(StgIndOldGen);
647 // Why can we ignore IND/IND_OLDGEN closures? We assume that
648 // any census is preceded by a major garbage collection, which
649 // IND/IND_OLDGEN closures cannot survive. Therefore, it is no
650 // use considering IND/IND_OLDGEN closures in the meanwhile
651 // because they will perish before the next census at any
654 size = sizeofW(StgInd);
658 size = sizeofW(StgIndOldGen);
662 // The size of the evacuated closure is currently stored in
663 // the LDV field. See SET_EVACUAEE_FOR_LDV() in
664 // includes/StgLdvProf.h.
676 case CONSTR_CHARLIKE:
677 case CONSTR_NOCAF_STATIC:
697 barf("Invalid object in processHeapClosureForDead(): %d", info->type);
701 // Found a dead closure: record its size
702 LDV_recordDead(c, size);
706 /* --------------------------------------------------------------------------
707 * Calls processHeapClosureForDead() on every *dead* closures in the
708 * heap blocks starting at bd.
709 * ----------------------------------------------------------------------- */
711 processHeapForDead( bdescr *bd )
717 while (p < bd->free) {
718 p += processHeapClosureForDead((StgClosure *)p);
719 while (p < bd->free && !*p) // skip slop
722 ASSERT(p == bd->free);
727 /* --------------------------------------------------------------------------
728 * Calls processHeapClosureForDead() on every *dead* closures in the nursery.
729 * ----------------------------------------------------------------------- */
731 processNurseryForDead( void )
736 bd = MainCapability.r.rNursery;
737 while (bd->start < bd->free) {
739 bdLimit = bd->start + BLOCK_SIZE_W;
740 while (p < bd->free && p < bdLimit) {
741 p += processHeapClosureForDead((StgClosure *)p);
742 while (p < bd->free && p < bdLimit && !*p) // skip slop
751 /* --------------------------------------------------------------------------
752 * Calls processHeapClosureForDead() on every *dead* closures in the
754 * ----------------------------------------------------------------------- */
756 processSmallObjectPoolForDead( void )
761 bd = small_alloc_list;
768 while (p < alloc_Hp) {
769 p += processHeapClosureForDead((StgClosure *)p);
770 while (p < alloc_Hp && !*p) // skip slop
773 ASSERT(p == alloc_Hp);
778 while (p < bd->free) {
779 p += processHeapClosureForDead((StgClosure *)p);
780 while (p < bd->free && !*p) // skip slop
783 ASSERT(p == bd->free);
788 /* --------------------------------------------------------------------------
789 * Calls processHeapClosureForDead() on every *dead* closures in the closure
791 * ----------------------------------------------------------------------- */
793 processChainForDead( bdescr *bd )
795 // Any object still in the chain is dead!
797 processHeapClosureForDead((StgClosure *)bd->start);
802 /* --------------------------------------------------------------------------
803 * Start a census for *dead* closures, and calls
804 * processHeapClosureForDead() on every closure which died in the
805 * current garbage collection. This function is called from a garbage
806 * collector right before tidying up, when all dead closures are still
807 * stored in the heap and easy to identify. Generations 0 through N
808 * have just beed garbage collected.
809 * ----------------------------------------------------------------------- */
811 LdvCensusForDead( nat N )
815 // ldvTime == 0 means that LDV profiling is currently turned off.
819 if (RtsFlags.GcFlags.generations == 1) {
821 // Todo: support LDV for two-space garbage collection.
823 barf("Lag/Drag/Void profiling not supported with -G1");
825 for (g = 0; g <= N; g++)
826 for (s = 0; s < generations[g].n_steps; s++) {
827 if (g == 0 && s == 0) {
828 processSmallObjectPoolForDead();
829 processNurseryForDead();
830 processChainForDead(generations[g].steps[s].large_objects);
832 processHeapForDead(generations[g].steps[s].blocks);
833 processChainForDead(generations[g].steps[s].large_objects);
839 /* --------------------------------------------------------------------------
840 * Regard any closure in the current heap as dead or moribund and update
841 * LDV statistics accordingly.
842 * Called from shutdownHaskell() in RtsStartup.c.
843 * Also, stops LDV profiling by resetting ldvTime to 0.
844 * ----------------------------------------------------------------------- */
846 LdvCensusKillAll( void )
848 LdvCensusForDead(RtsFlags.GcFlags.generations - 1);
850 // record the time when LDV profiling stops.
851 ldvTimeSave = ldvTime;
853 // and, stops LDV profiling.
857 #endif /* PROFILING */