1 /* -----------------------------------------------------------------------------
2 * $Id: ProfHeap.c,v 1.39 2002/11/01 11:05:46 simonmar Exp $
4 * (c) The GHC Team, 1998-2000
6 * Support for heap profiling
8 * ---------------------------------------------------------------------------*/
10 #if defined(DEBUG) && !defined(PROFILING)
11 #define DEBUG_HEAP_PROF
13 #undef DEBUG_HEAP_PROF
16 #if defined(PROFILING) || defined(DEBUG_HEAP_PROF)
18 #include "PosixSource.h"
22 #include "Profiling.h"
28 #include "RetainerProfile.h"
29 #include "LdvProfile.h"
35 /* -----------------------------------------------------------------------------
36 * era stores the current time period. It is the same as the
37 * number of censuses that have been performed.
40 * era must be no longer than LDV_SHIFT (15 or 30) bits.
42 * era is initialized to 1 in initHeapProfiling().
44 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
45 * When era reaches max_era, the profiling stops because a closure can
46 * store only up to (max_era - 1) as its creation or last use time.
47 * -------------------------------------------------------------------------- */
51 /* -----------------------------------------------------------------------------
54 * For most heap profiles each closure identity gets a simple count
55 * of live words in the heap at each census. However, if we're
56 * selecting by biography, then we have to keep the various
57 * lag/drag/void counters for each identity.
58 * -------------------------------------------------------------------------- */
59 typedef struct _counter {
64 int prim; // total size of 'inherently used' closures
65 int not_used; // total size of 'never used' closures
66 int used; // total size of 'used at least once' closures
67 int void_total; // current total size of 'destroyed without being used' closures
68 int drag_total; // current total size of 'used at least once and waiting to die'
71 struct _counter *next;
75 initLDVCtr( counter *ctr )
78 ctr->c.ldv.not_used = 0;
80 ctr->c.ldv.void_total = 0;
81 ctr->c.ldv.drag_total = 0;
85 double time; // the time in MUT time when the census is made
90 // for LDV profiling, when just displaying by LDV
98 static Census *censuses = NULL;
99 static nat n_censuses = 0;
102 static void aggregateCensusInfo( void );
105 static void dumpCensus( Census *census );
107 /* -----------------------------------------------------------------------------
108 Closure Type Profiling;
110 PROBABLY TOTALLY OUT OF DATE -- ToDo (SDM)
111 -------------------------------------------------------------------------- */
113 #ifdef DEBUG_HEAP_PROF
114 static char *type_names[] = {
120 , "CONSTR_NOCAF_STATIC"
158 , "MUT_ARR_PTRS_FROZEN"
172 #endif /* DEBUG_HEAP_PROF */
174 /* -----------------------------------------------------------------------------
175 * Find the "closure identity", which is a unique pointer reresenting
176 * the band to which this closure's heap space is attributed in the
178 * ------------------------------------------------------------------------- */
180 closureIdentity( StgClosure *p )
182 switch (RtsFlags.ProfFlags.doHeapProfile) {
186 return p->header.prof.ccs;
188 return p->header.prof.ccs->cc->module;
190 return get_itbl(p)->prof.closure_desc;
192 return get_itbl(p)->prof.closure_type;
193 case HEAP_BY_RETAINER:
194 // AFAIK, the only closures in the heap which might not have a
195 // valid retainer set are DEAD_WEAK closures.
196 if (isRetainerSetFieldValid(p))
197 return retainerSetOf(p);
202 case HEAP_BY_INFOPTR:
203 return (void *)((StgClosure *)p)->header.info;
204 case HEAP_BY_CLOSURE_TYPE:
205 return type_names[get_itbl(p)->type];
209 barf("closureIdentity");
213 /* --------------------------------------------------------------------------
214 * Profiling type predicates
215 * ----------------------------------------------------------------------- */
217 static inline rtsBool
218 doingLDVProfiling( void )
220 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
221 || RtsFlags.ProfFlags.bioSelector != NULL);
224 static inline rtsBool
225 doingRetainerProfiling( void )
227 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
228 || RtsFlags.ProfFlags.retainerSelector != NULL);
232 // Precesses a closure 'c' being destroyed whose size is 'size'.
233 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
234 // such as TSO; they should not be involved in computing dragNew or voidNew.
236 // Even though era is checked in both LdvCensusForDead() and
237 // LdvCensusKillAll(), we still need to make sure that era is > 0 because
238 // LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
239 // when a thunk is replaced by an indirection object.
243 LDV_recordDead( StgClosure *c, nat size )
249 if (era > 0 && closureSatisfiesConstraints(c)) {
250 size -= sizeofW(StgProfHeader);
251 if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
252 t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
254 if (RtsFlags.ProfFlags.bioSelector == NULL) {
255 censuses[t].void_total += (int)size;
256 censuses[era].void_total -= (int)size;
258 id = closureIdentity(c);
259 ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
260 ASSERT( ctr != NULL );
261 ctr->c.ldv.void_total += (int)size;
262 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
264 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
266 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
268 ctr->next = censuses[era].ctrs;
269 censuses[era].ctrs = ctr;
271 ctr->c.ldv.void_total -= (int)size;
275 t = LDVW((c)) & LDV_LAST_MASK;
277 if (RtsFlags.ProfFlags.bioSelector == NULL) {
278 censuses[t+1].drag_total += size;
279 censuses[era].drag_total -= size;
282 id = closureIdentity(c);
283 ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
284 ASSERT( ctr != NULL );
285 ctr->c.ldv.drag_total += (int)size;
286 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
288 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
290 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
292 ctr->next = censuses[era].ctrs;
293 censuses[era].ctrs = ctr;
295 ctr->c.ldv.drag_total -= (int)size;
303 /* --------------------------------------------------------------------------
304 * Initialize censuses[era];
305 * ----------------------------------------------------------------------- */
307 initEra(Census *census)
309 census->hash = allocHashTable();
311 census->arena = newArena();
313 census->not_used = 0;
316 census->void_total = 0;
317 census->drag_total = 0;
320 /* --------------------------------------------------------------------------
321 * Increases era by 1 and initialize census[era].
322 * Reallocates gi[] and increases its size if needed.
323 * ----------------------------------------------------------------------- */
328 if (doingLDVProfiling()) {
331 if (era == max_era) {
332 barf("maximum number of censuses reached; use +RTS -i to reduce");
335 if (era == n_censuses) {
337 censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
343 initEra( &censuses[era] );
346 /* -----------------------------------------------------------------------------
347 * DEBUG heap profiling, by info table
348 * -------------------------------------------------------------------------- */
350 #ifdef DEBUG_HEAP_PROF
353 void initProfiling1( void )
357 void initProfiling2( void )
362 void endProfiling( void )
366 #endif /* DEBUG_HEAP_PROF */
368 /* --------------------------------------------------------------------------
369 * Initialize the heap profilier
370 * ----------------------------------------------------------------------- */
372 initHeapProfiling(void)
374 if (! RtsFlags.ProfFlags.doHeapProfile) {
379 if (doingLDVProfiling() && doingRetainerProfiling()) {
380 prog_belch("cannot mix -hb and -hr");
385 // we only count eras if we're doing LDV profiling. Otherwise era
388 if (doingLDVProfiling()) {
396 { // max_era = 2^LDV_SHIFT
399 for (p = 0; p < LDV_SHIFT; p++)
404 censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
406 initEra( &censuses[era] );
408 fprintf(hp_file, "JOB \"%s", prog_argv[0]);
413 for(count = 1; count < prog_argc; count++)
414 fprintf(hp_file, " %s", prog_argv[count]);
415 fprintf(hp_file, " +RTS ");
416 for(count = 0; count < rts_argc; count++)
417 fprintf(hp_file, "%s ", rts_argv[count]);
418 fprintf(hp_file, "\n");
420 #endif /* PROFILING */
422 fprintf(hp_file, "\"\n" );
424 fprintf(hp_file, "DATE \"%s\"\n", time_str());
426 fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
427 fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
429 fprintf(hp_file, "BEGIN_SAMPLE 0.00\n");
430 fprintf(hp_file, "END_SAMPLE 0.00\n");
432 #ifdef DEBUG_HEAP_PROF
433 DEBUG_LoadSymbols(prog_argv[0]);
437 if (doingRetainerProfiling()) {
438 initRetainerProfiling();
446 endHeapProfiling(void)
450 if (! RtsFlags.ProfFlags.doHeapProfile) {
455 if (doingRetainerProfiling()) {
456 endRetainerProfiling();
461 if (doingLDVProfiling()) {
464 aggregateCensusInfo();
465 for (t = 1; t < era; t++) {
466 dumpCensus( &censuses[t] );
471 seconds = mut_user_time();
472 fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
473 fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
481 fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
483 char buf[max_length+1];
488 // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
489 if (ccs == CCS_MAIN) {
494 fprintf(fp, "(%d)", ccs->ccsID);
496 // keep printing components of the stack until we run out of space
497 // in the buffer. If we run out of space, end with "...".
498 for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
500 // CAF cost centres print as M.CAF, but we leave the module
501 // name out of all the others to save space.
502 if (!strcmp(ccs->cc->label,"CAF")) {
504 written = snprintf(buf+next_offset,
505 (int)max_length-3-(int)next_offset,
506 "%s.CAF", ccs->cc->module);
508 written = sprintf(buf+next_offset,
509 "%s.CAF", ccs->cc->module);
512 if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
518 written = snprintf(buf+next_offset,
519 (int)max_length-3-(int)next_offset,
520 template, ccs->cc->label);
522 written = sprintf(buf+next_offset,
523 template, ccs->cc->label);
527 if (next_offset+written >= max_length-4) {
528 sprintf(buf+max_length-4, "...");
531 next_offset += written;
534 fprintf(fp, "%s", buf);
539 strMatchesSelector( char* str, char* sel )
542 // fprintf(stderr, "str_matches_selector %s %s\n", str, sel);
544 // Compare str against wherever we've got to in sel.
546 while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
549 // Match if all of str used and have reached the end of a sel fragment.
550 if (*p == '\0' && (*sel == ',' || *sel == '\0'))
553 // No match. Advance sel to the start of the next elem.
554 while (*sel != ',' && *sel != '\0') sel++;
555 if (*sel == ',') sel++;
557 /* Run out of sel ?? */
558 if (*sel == '\0') return rtsFalse;
562 /* -----------------------------------------------------------------------------
563 * Figure out whether a closure should be counted in this census, by
564 * testing against all the specified constraints.
565 * -------------------------------------------------------------------------- */
567 closureSatisfiesConstraints( StgClosure* p )
569 #ifdef DEBUG_HEAP_PROF
574 // The CCS has a selected field to indicate whether this closure is
575 // deselected by not being mentioned in the module, CC, or CCS
577 if (!p->header.prof.ccs->selected) {
581 if (RtsFlags.ProfFlags.descrSelector) {
582 b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_desc,
583 RtsFlags.ProfFlags.descrSelector );
584 if (!b) return rtsFalse;
586 if (RtsFlags.ProfFlags.typeSelector) {
587 b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_type,
588 RtsFlags.ProfFlags.typeSelector );
589 if (!b) return rtsFalse;
591 if (RtsFlags.ProfFlags.retainerSelector) {
594 rs = retainerSetOf((StgClosure *)p);
596 for (i = 0; i < rs->num; i++) {
597 b = strMatchesSelector( rs->element[i]->cc->label,
598 RtsFlags.ProfFlags.retainerSelector );
599 if (b) return rtsTrue;
605 #endif /* PROFILING */
608 /* -----------------------------------------------------------------------------
609 * Aggregate the heap census info for biographical profiling
610 * -------------------------------------------------------------------------- */
613 aggregateCensusInfo( void )
617 counter *c, *d, *ctrs;
620 if (!doingLDVProfiling()) return;
622 // Aggregate the LDV counters when displaying by biography.
623 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
624 int void_total, drag_total;
626 // Now we compute void_total and drag_total for each census
629 for (t = 1; t < era; t++) { // note: start at 1, not 0
630 void_total += censuses[t].void_total;
631 drag_total += censuses[t].drag_total;
632 censuses[t].void_total = void_total;
633 censuses[t].drag_total = drag_total;
634 ASSERT( censuses[t].void_total <= censuses[t].not_used );
635 ASSERT( censuses[t].drag_total <= censuses[t].used );
641 // otherwise... we're doing a heap profile that is restricted to
642 // some combination of lag, drag, void or use. We've kept all the
643 // census info for all censuses so far, but we still need to
644 // aggregate the counters forwards.
647 acc = allocHashTable();
650 for (t = 1; t < era; t++) {
652 // first look through all the counters we're aggregating
653 for (c = ctrs; c != NULL; c = c->next) {
654 // if one of the totals is non-zero, then this closure
655 // type must be present in the heap at this census time...
656 d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
659 // if this closure identity isn't present in the
660 // census for this time period, then our running
661 // totals *must* be zero.
662 ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
664 // fprintCCS(stderr,c->identity);
665 // fprintf(stderr," census=%d void_total=%d drag_total=%d\n",
666 // t, c->c.ldv.void_total, c->c.ldv.drag_total);
668 d->c.ldv.void_total += c->c.ldv.void_total;
669 d->c.ldv.drag_total += c->c.ldv.drag_total;
670 c->c.ldv.void_total = d->c.ldv.void_total;
671 c->c.ldv.drag_total = d->c.ldv.drag_total;
673 ASSERT( c->c.ldv.void_total >= 0 );
674 ASSERT( c->c.ldv.drag_total >= 0 );
678 // now look through the counters in this census to find new ones
679 for (c = censuses[t].ctrs; c != NULL; c = c->next) {
680 d = lookupHashTable(acc, (StgWord)c->identity);
682 d = arenaAlloc( arena, sizeof(counter) );
684 insertHashTable( acc, (StgWord)c->identity, d );
685 d->identity = c->identity;
688 d->c.ldv.void_total = c->c.ldv.void_total;
689 d->c.ldv.drag_total = c->c.ldv.drag_total;
691 ASSERT( c->c.ldv.void_total >= 0 );
692 ASSERT( c->c.ldv.drag_total >= 0 );
696 freeHashTable(acc, NULL);
701 /* -----------------------------------------------------------------------------
702 * Print out the results of a heap census.
703 * -------------------------------------------------------------------------- */
705 dumpCensus( Census *census )
710 fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", census->time);
713 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
714 fprintf(hp_file, "VOID\t%u\n", census->void_total * sizeof(W_));
715 fprintf(hp_file, "LAG\t%u\n",
716 (census->not_used - census->void_total) * sizeof(W_));
717 fprintf(hp_file, "USE\t%u\n",
718 (census->used - census->drag_total) * sizeof(W_));
719 fprintf(hp_file, "INHERENT_USE\t%u\n",
720 census->prim * sizeof(W_));
721 fprintf(hp_file, "DRAG\t%u\n", census->drag_total *
723 fprintf(hp_file, "END_SAMPLE %0.2f\n", census->time);
728 for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
731 if (RtsFlags.ProfFlags.bioSelector != NULL) {
733 if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
734 count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
735 if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
736 count += ctr->c.ldv.drag_total;
737 if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
738 count += ctr->c.ldv.void_total;
739 if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
740 count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
744 count = ctr->c.resid;
747 ASSERT( count >= 0 );
749 if (count == 0) continue;
751 #ifdef DEBUG_HEAP_PROF
752 switch (RtsFlags.ProfFlags.doHeapProfile) {
753 case HEAP_BY_INFOPTR:
754 fprintf(hp_file, "%s", lookupGHCName(ctr->identity));
756 case HEAP_BY_CLOSURE_TYPE:
757 fprintf(hp_file, "%s", (char *)ctr->identity);
763 switch (RtsFlags.ProfFlags.doHeapProfile) {
765 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, 25);
770 fprintf(hp_file, "%s", (char *)ctr->identity);
772 case HEAP_BY_RETAINER:
774 RetainerSet *rs = (RetainerSet *)ctr->identity;
776 // it might be the distinguished retainer set rs_MANY:
777 if (rs == &rs_MANY) {
778 fprintf(hp_file, "MANY");
782 // Mark this retainer set by negating its id, because it
783 // has appeared in at least one census. We print the
784 // values of all such retainer sets into the log file at
785 // the end. A retainer set may exist but not feature in
786 // any censuses if it arose as the intermediate retainer
787 // set for some closure during retainer set calculation.
791 // report in the unit of bytes: * sizeof(StgWord)
792 printRetainerSetShort(hp_file, rs);
796 barf("dumpCensus; doHeapProfile");
800 fprintf(hp_file, "\t%d\n", count * sizeof(W_));
803 fprintf(hp_file, "END_SAMPLE %0.2f\n", census->time);
806 /* -----------------------------------------------------------------------------
807 * Code to perform a heap census.
808 * -------------------------------------------------------------------------- */
810 heapCensusChain( Census *census, bdescr *bd )
820 for (; bd != NULL; bd = bd->link) {
822 while (p < bd->free) {
823 info = get_itbl((StgClosure *)p);
826 switch (info->type) {
832 case IND_OLDGEN_PERM:
834 case SE_CAF_BLACKHOLE:
839 case CONSTR_CHARLIKE:
853 size = sizeW_fromITBL(info);
864 size = sizeW_fromITBL(info);
867 case THUNK_1_0: /* ToDo - shouldn't be here */
868 case THUNK_0_1: /* " ditto " */
870 size = sizeofW(StgHeader) + MIN_UPD_SIZE;
875 size = pap_sizeW((StgPAP *)p);
880 size = arr_words_sizeW(stgCast(StgArrWords*,p));
884 case MUT_ARR_PTRS_FROZEN:
886 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
891 size = tso_sizeW((StgTSO *)p);
900 #ifdef DEBUG_HEAP_PROF
903 // subtract the profiling overhead
904 real_size = size - sizeofW(StgProfHeader);
907 if (closureSatisfiesConstraints((StgClosure*)p)) {
909 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
911 census->prim += real_size;
912 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
913 census->not_used += real_size;
915 census->used += real_size;
919 identity = closureIdentity((StgClosure *)p);
921 if (identity != NULL) {
922 ctr = lookupHashTable( census->hash, (StgWord)identity );
925 if (RtsFlags.ProfFlags.bioSelector != NULL) {
927 ctr->c.ldv.prim += real_size;
928 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
929 ctr->c.ldv.not_used += real_size;
931 ctr->c.ldv.used += real_size;
935 ctr->c.resid += real_size;
938 ctr = arenaAlloc( census->arena, sizeof(counter) );
940 insertHashTable( census->hash, (StgWord)identity, ctr );
941 ctr->identity = identity;
942 ctr->next = census->ctrs;
946 if (RtsFlags.ProfFlags.bioSelector != NULL) {
948 ctr->c.ldv.prim = real_size;
949 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
950 ctr->c.ldv.not_used = real_size;
952 ctr->c.ldv.used = real_size;
956 ctr->c.resid = real_size;
974 census = &censuses[era];
975 census->time = mut_user_time();
977 // calculate retainer sets if necessary
979 if (doingRetainerProfiling()) {
985 stat_startHeapCensus();
988 // Traverse the heap, collecting the census info
990 // First the small_alloc_list: we have to fix the free pointer at
991 // the end by calling tidyAllocatedLists() first.
993 heapCensusChain( census, small_alloc_list );
995 // Now traverse the heap in each generation/step.
996 if (RtsFlags.GcFlags.generations == 1) {
997 heapCensusChain( census, g0s0->to_blocks );
999 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1000 for (s = 0; s < generations[g].n_steps; s++) {
1001 heapCensusChain( census, generations[g].steps[s].blocks );
1002 // Are we interested in large objects? might be
1003 // confusing to include the stack in a heap profile.
1004 // heapCensusChain( census, generations[g].steps[s].large_objects );
1009 // dump out the census info
1011 // We can't generate any info for LDV profiling until
1012 // the end of the run...
1013 if (!doingLDVProfiling())
1014 dumpCensus( census );
1016 dumpCensus( census );
1020 // free our storage, unless we're keeping all the census info for
1021 // future restriction by biography.
1023 if (RtsFlags.ProfFlags.bioSelector == NULL)
1026 freeHashTable( census->hash, NULL/* don't free the elements */ );
1027 arenaFree( census->arena );
1028 census->hash = NULL;
1029 census->arena = NULL;
1032 // we're into the next time period now
1036 stat_endHeapCensus();
1040 #endif /* PROFILING || DEBUG_HEAP_PROF */