1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.22 2001/08/14 13:40:09 sewardj Exp $
4 * (c) The GHC Team, 1998-2000
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
12 #include "PosixSource.h"
16 #include "Profiling.h"
18 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_prof_ticks;
50 /* Globals for opening the profiling log file(s)
52 static char *prof_filename; /* prof report file name = <program>.prof */
55 static char *hp_filename; /* heap profile (hp2ps style) log file */
58 /* The Current Cost Centre Stack (for attributing costs)
60 CostCentreStack *CCCS;
62 /* Linked lists to keep track of cc's and ccs's that haven't
63 * been declared in the log file yet
66 CostCentreStack *CCS_LIST;
69 * Built-in cost centres and cost-centre stacks:
71 * MAIN is the root of the cost-centre stack tree. If there are
72 * no _scc_s in the program, all costs will be attributed
75 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
76 * RTS operations apart from garbage collection are attributed
79 * GC is the storage manager / garbage collector.
81 * OVERHEAD gets all costs generated by the profiling system
82 * itself. These are costs that would not be incurred
83 * during non-profiled execution of the program.
85 * SUBSUMED is the one-and-only CCS placed on top-level functions.
86 * It indicates that all costs are to be attributed to the
87 * enclosing cost centre stack. SUBSUMED never accumulates
88 * any costs. The is_caf flag is set on the subsumed cost
91 * DONT_CARE is a placeholder cost-centre we assign to static
92 * constructors. It should *never* accumulate any costs.
95 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
96 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
97 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
98 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
99 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
100 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
102 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
103 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
104 CCS_DECLARE(CCS_GC, CC_GC, );
105 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
106 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
107 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
110 * Uniques for the XML log-file format
115 #define HEAP_OBJ_UQ 4
116 #define TIME_UPD_UQ 5
117 #define HEAP_UPD_UQ 6
123 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
124 CostCentreStack *new_ccs );
125 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
126 static void count_ticks ( CostCentreStack *ccs );
127 static void inherit_costs ( CostCentreStack *ccs );
128 static void reportCCS ( CostCentreStack *ccs, nat indent );
129 static void DecCCS ( CostCentreStack *ccs );
130 static void DecBackEdge ( CostCentreStack *ccs,
131 CostCentreStack *oldccs );
132 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
133 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
134 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
135 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
136 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
137 CostCentre *, unsigned int );
142 static void printCCS ( CostCentreStack *ccs );
144 static void initTimeProfiling ( void );
145 static void initProfilingLogFile( void );
147 static void reportCCS_XML ( CostCentreStack *ccs );
149 /* -----------------------------------------------------------------------------
150 Initialise the profiling environment
151 -------------------------------------------------------------------------- */
154 initProfiling1 (void)
156 /* for the benefit of allocate()... */
159 /* Initialize counters for IDs */
164 /* Initialize Declaration lists to NULL */
168 /* Register all the cost centres / stacks in the program
169 * CC_MAIN gets link = 0, all others have non-zero link.
171 REGISTER_CC(CC_MAIN);
172 REGISTER_CC(CC_SYSTEM);
174 REGISTER_CC(CC_OVERHEAD);
175 REGISTER_CC(CC_SUBSUMED);
176 REGISTER_CC(CC_DONT_CARE);
177 REGISTER_CCS(CCS_MAIN);
178 REGISTER_CCS(CCS_SYSTEM);
179 REGISTER_CCS(CCS_GC);
180 REGISTER_CCS(CCS_OVERHEAD);
181 REGISTER_CCS(CCS_SUBSUMED);
182 REGISTER_CCS(CCS_DONT_CARE);
186 /* cost centres are registered by the per-module
187 * initialisation code now...
192 initProfiling2 (void)
194 CostCentreStack *ccs, *next;
198 /* Set up the log file, and dump the header and cost centre
199 * information into it. */
200 initProfilingLogFile();
202 /* find all the "special" cost centre stacks, and make them children
205 ASSERT(CCS_MAIN->prevStack == 0);
206 CCS_MAIN->root = CC_MAIN;
208 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
209 next = ccs->prevStack;
211 ActualPush_(CCS_MAIN,ccs->cc,ccs);
216 if (RtsFlags.CcFlags.doCostCentres) {
220 if (RtsFlags.ProfFlags.doHeapProfile) {
226 initProfilingLogFile(void)
228 /* Initialise the log file name */
229 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
230 sprintf(prof_filename, "%s.prof", prog_argv[0]);
232 /* open the log file */
233 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
234 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
235 RtsFlags.CcFlags.doCostCentres = 0;
239 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
240 /* dump the time, and the profiling interval */
241 fprintf(prof_file, "\"%s\"\n", time_str());
242 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
244 /* declare all the cost centres */
247 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
248 fprintf(prof_file, "%d %d \"%s\" \"%s\"\n",
249 CC_UQ, cc->ccID, cc->label, cc->module);
254 if (RtsFlags.ProfFlags.doHeapProfile) {
255 /* Initialise the log file name */
256 hp_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
257 sprintf(hp_filename, "%s.hp", prog_argv[0]);
259 /* open the log file */
260 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
261 fprintf(stderr, "Can't open profiling report file %s\n",
263 RtsFlags.ProfFlags.doHeapProfile = 0;
270 initTimeProfiling(void)
272 time_profiling = rtsTrue;
279 endProfiling ( void )
281 if (RtsFlags.CcFlags.doCostCentres) {
284 if (RtsFlags.ProfFlags.doHeapProfile) {
289 /* -----------------------------------------------------------------------------
290 Set cost centre stack when entering a function.
291 -------------------------------------------------------------------------- */
292 rtsBool entering_PAP;
295 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
297 /* PAP_entry has already set CCCS for us */
299 entering_PAP = rtsFalse;
303 if (ccsfn->root->is_caf == CC_IS_CAF) {
304 return AppendCCS(cccs,ccsfn);
310 /* -----------------------------------------------------------------------------
311 Cost-centre stack manipulation
312 -------------------------------------------------------------------------- */
315 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
317 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
318 #define PushCostCentre _PushCostCentre
321 fprintf(stderr,"Pushing %s on ", cc->label);
323 fprintf(stderr,"\n"));
324 return PushCostCentre(ccs,cc);
329 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
331 CostCentreStack *temp_ccs;
333 if (ccs == EMPTY_STACK)
334 return ActualPush(ccs,cc);
339 /* check if we've already memoized this stack */
340 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
342 if (temp_ccs != EMPTY_STACK)
345 temp_ccs = CheckLoop(ccs,cc);
346 if (temp_ccs != NULL) {
347 /* we have recursed to an older CCS. Mark this in
348 * the index table, and emit a "back edge" into the
351 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
352 DecBackEdge(temp_ccs,ccs);
355 return ActualPush(ccs,cc);
362 static CostCentreStack *
363 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
365 while (ccs != EMPTY_STACK) {
368 ccs = ccs->prevStack;
373 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
376 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
378 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
379 #define AppendCCS _AppendCCS
383 fprintf(stderr,"Appending ");
385 fprintf(stderr," to ");
387 fprintf(stderr,"\n");});
388 return AppendCCS(ccs1,ccs2);
393 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
395 CostCentreStack *ccs = NULL;
401 if (ccs2->cc->is_caf == CC_IS_CAF) {
405 if (ccs2->prevStack != NULL) {
406 ccs = AppendCCS(ccs1, ccs2->prevStack);
409 return PushCostCentre(ccs,ccs2->cc);
412 static CostCentreStack *
413 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
415 CostCentreStack *new_ccs;
417 /* allocate space for a new CostCentreStack */
418 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
420 return ActualPush_(ccs, cc, new_ccs);
423 static CostCentreStack *
424 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
426 /* assign values to each member of the structure */
427 ASSIGN_CCS_ID(new_ccs->ccsID);
430 new_ccs->prevStack = ccs;
432 new_ccs->indexTable = EMPTY_TABLE;
434 /* Initialise the various _scc_ counters to zero
436 new_ccs->scc_count = 0;
438 /* Initialize all other stats here. There should be a quick way
439 * that's easily used elsewhere too
441 new_ccs->time_ticks = 0;
442 new_ccs->mem_alloc = 0;
443 new_ccs->inherited_ticks = 0;
444 new_ccs->inherited_alloc = 0;
446 new_ccs->root = ccs->root;
448 /* update the memoization table for the parent stack */
449 if (ccs != EMPTY_STACK)
450 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
451 0/*not a back edge*/);
453 /* make sure this CC is declared at the next heap/time sample */
456 /* return a pointer to the new stack */
461 static CostCentreStack *
462 IsInIndexTable(IndexTable *it, CostCentre *cc)
464 while (it!=EMPTY_TABLE)
472 /* otherwise we never found it so return EMPTY_TABLE */
478 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
479 CostCentre *cc, unsigned int back_edge)
483 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
486 new_it->ccs = new_ccs;
488 new_it->back_edge = back_edge;
494 DecCCS(CostCentreStack *ccs)
496 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
497 if (ccs->prevStack == EMPTY_STACK)
498 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
499 ccs->ccsID, ccs->cc->ccID);
501 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
502 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
507 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
509 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
510 if (ccs->prevStack == EMPTY_STACK)
511 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
512 ccs->ccsID, ccs->cc->ccID);
514 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
515 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
519 /* -----------------------------------------------------------------------------
520 Generating a time & allocation profiling report.
521 -------------------------------------------------------------------------- */
523 /* -----------------------------------------------------------------------------
524 Generating the aggregated per-cost-centre time/alloc report.
525 -------------------------------------------------------------------------- */
527 static CostCentre *sorted_cc_list;
530 aggregate_cc_costs( CostCentreStack *ccs )
534 ccs->cc->mem_alloc += ccs->mem_alloc;
535 ccs->cc->time_ticks += ccs->time_ticks;
537 for (i = ccs->indexTable; i != 0; i = i->next) {
539 aggregate_cc_costs(i->ccs);
545 insert_cc_in_sorted_list( CostCentre *new_cc )
547 CostCentre **prev, *cc;
549 prev = &sorted_cc_list;
550 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
551 if (new_cc->time_ticks > cc->time_ticks) {
564 report_per_cc_costs( void )
566 CostCentre *cc, *next;
568 aggregate_cc_costs(CCS_MAIN);
569 sorted_cc_list = NULL;
571 for (cc = CC_LIST; cc != NULL; cc = next) {
573 if (cc->time_ticks > total_prof_ticks/100
574 || cc->mem_alloc > total_alloc/100
575 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
576 insert_cc_in_sorted_list(cc);
580 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
581 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
582 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
583 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
585 fprintf(prof_file, "\n\n");
587 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
588 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
589 fprintf(prof_file, "%6.1f %6.1f",
590 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
591 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
595 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
596 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
598 fprintf(prof_file, "\n");
601 fprintf(prof_file,"\n\n");
604 /* -----------------------------------------------------------------------------
605 Generate the cost-centre-stack time/alloc report
606 -------------------------------------------------------------------------- */
609 fprint_header( void )
611 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
613 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
614 fprintf(prof_file, "%8s %5s %5s %5s %5s", "entries", "%time", "%alloc", "%time", "%alloc");
616 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
617 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
618 #if defined(PROFILING_DETAIL_COUNTS)
619 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
620 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
624 fprintf(prof_file, "\n\n");
628 report_ccs_profiling( void )
631 char temp[128]; /* sigh: magic constant */
635 total_prof_ticks = 0;
637 count_ticks(CCS_MAIN);
639 switch (RtsFlags.CcFlags.doCostCentres) {
642 case COST_CENTRES_XML:
648 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
649 time_str(), "Final");
651 fprintf(prof_file, "\n\t ");
652 fprintf(prof_file, " %s", prog_argv[0]);
653 fprintf(prof_file, " +RTS");
654 for (count = 0; rts_argv[count]; count++)
655 fprintf(prof_file, " %s", rts_argv[count]);
656 fprintf(prof_file, " -RTS");
657 for (count = 1; prog_argv[count]; count++)
658 fprintf(prof_file, " %s", prog_argv[count]);
659 fprintf(prof_file, "\n\n");
661 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
662 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
663 total_prof_ticks, TICK_MILLISECS);
665 fprintf(prof_file, "\ttotal alloc = %11s bytes",
666 ullong_format_string((ullong) total_alloc * sizeof(W_),
667 temp, rtsTrue/*commas*/));
668 /* ToDo: 64-bit error! */
670 #if defined(PROFILING_DETAIL_COUNTS)
671 fprintf(prof_file, " (%lu closures)", total_allocs);
673 fprintf(prof_file, " (excludes profiling overheads)\n\n");
675 report_per_cc_costs();
677 inherit_costs(CCS_MAIN);
680 reportCCS(pruneCCSTree(CCS_MAIN), 0);
686 reportCCS(CostCentreStack *ccs, nat indent)
693 /* Only print cost centres with non 0 data ! */
695 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
696 ! ccs_to_ignore(ccs))
697 /* force printing of *all* cost centres if -P -P */
700 fprintf(prof_file, "%-*s%-*s %-10s",
701 indent, "", 24-indent, cc->label, cc->module);
703 fprintf(prof_file, "%8ld %5.1f %5.1f %5.1f %5.1f",
705 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
706 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
707 total_prof_ticks == 0 ? 0.0 : (ccs->inherited_ticks / (StgFloat) total_prof_ticks * 100),
708 total_alloc == 0 ? 0.0 : (ccs->inherited_alloc / (StgFloat) total_alloc * 100)
711 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
712 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
713 #if defined(PROFILING_DETAIL_COUNTS)
714 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
715 ccs->mem_allocs, ccs->thunk_count,
716 ccs->function_count, ccs->pap_count,
717 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
718 ccs->caffun_subsumed);
721 fprintf(prof_file, "\n");
724 for (i = ccs->indexTable; i != 0; i = i->next) {
726 reportCCS(i->ccs, indent+1);
732 /* Traverse the cost centre stack tree and accumulate
736 count_ticks(CostCentreStack *ccs)
740 if (!ccs_to_ignore(ccs)) {
741 total_alloc += ccs->mem_alloc;
742 total_prof_ticks += ccs->time_ticks;
744 for (i = ccs->indexTable; i != NULL; i = i->next)
750 /* Traverse the cost centre stack tree and inherit ticks & allocs.
753 inherit_costs(CostCentreStack *ccs)
757 if (ccs_to_ignore(ccs)) { return; }
759 ccs->inherited_ticks += ccs->time_ticks;
760 ccs->inherited_alloc += ccs->mem_alloc;
762 for (i = ccs->indexTable; i != NULL; i = i->next)
764 inherit_costs(i->ccs);
765 ccs->inherited_ticks += i->ccs->inherited_ticks;
766 ccs->inherited_alloc += i->ccs->inherited_alloc;
772 /* return rtsTrue if it is one of the ones that
773 * should not be reported normally (because it confuses
777 ccs_to_ignore (CostCentreStack *ccs)
779 if ( ccs == CCS_OVERHEAD
780 || ccs == CCS_DONT_CARE
782 || ccs == CCS_SYSTEM) {
789 static CostCentreStack *
790 pruneCCSTree( CostCentreStack *ccs )
792 CostCentreStack *ccs1;
793 IndexTable *i, **prev;
795 prev = &ccs->indexTable;
796 for (i = ccs->indexTable; i != 0; i = i->next) {
797 if (i->back_edge) { continue; }
799 ccs1 = pruneCCSTree(i->ccs);
807 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
808 /* force printing of *all* cost centres if -P -P */ )
810 || ( ccs->indexTable != 0 )
811 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
819 /* -----------------------------------------------------------------------------
820 Generate the XML time/allocation profile
821 -------------------------------------------------------------------------- */
824 gen_XML_logfile( void )
826 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
828 reportCCS_XML(pruneCCSTree(CCS_MAIN));
830 fprintf(prof_file, " 0\n");
836 reportCCS_XML(CostCentreStack *ccs)
841 if (ccs_to_ignore(ccs)) { return; }
845 fprintf(prof_file, " 1 %d %lu %lu %lu",
846 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
848 for (i = ccs->indexTable; i != 0; i = i->next) {
850 reportCCS_XML(i->ccs);
856 print_ccs (FILE *fp, CostCentreStack *ccs)
859 fprintf(fp, "Cost-Centre Stack: ");
864 print_ccs(fp, ccs->prevStack);
865 fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module);
867 fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module);
878 printCCS ( CostCentreStack *ccs )
881 for (; ccs; ccs = ccs->prevStack ) {
882 fprintf(stderr,ccs->cc->label);
883 if (ccs->prevStack) {
891 #endif /* PROFILING */