1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.16 2000/03/08 17:48:24 simonmar Exp $
4 * (c) The GHC Team, 1998-2000
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "Proftimer.h"
22 * Global variables used to assign unique IDs to cc's, ccs's, and
30 /* Table sizes from old profiling system. Not sure if we'll need
33 nat time_intervals = 0;
34 nat earlier_ticks = 0;
41 /* Are we time-profiling?
43 rtsBool time_profiling = rtsFalse;
45 /* figures for the profiling report.
47 static lnat total_alloc, total_prof_ticks;
49 /* Globals for opening the profiling log file
51 static char *prof_filename; /* prof report file name = <program>.prof */
54 /* The Current Cost Centre Stack (for attributing costs)
56 CostCentreStack *CCCS;
58 /* Linked lists to keep track of cc's and ccs's that haven't
59 * been declared in the log file yet
62 CostCentreStack *CCS_LIST;
65 * Built-in cost centres and cost-centre stacks:
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no _scc_s in the program, all costs will be attributed
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
75 * GC is the storage manager / garbage collector.
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
81 * SUBSUMED is the one-and-only CCS placed on top-level functions.
82 * It indicates that all costs are to be attributed to the
83 * enclosing cost centre stack. SUBSUMED never accumulates
86 * DONT_CARE is a placeholder cost-centre we assign to static
87 * constructors. It should *never* accumulate any costs.
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING,);
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING,);
92 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING,);
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF,);
94 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_SUBSUMED,);
95 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", CC_IS_BORING,);
97 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
98 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
99 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
100 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
101 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
102 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
105 * Uniques for the XML log-file format
110 #define HEAP_OBJ_UQ 4
111 #define TIME_UPD_UQ 5
112 #define HEAP_UPD_UQ 6
118 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
119 CostCentreStack *new_ccs );
121 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
122 static void count_ticks ( CostCentreStack *ccs );
123 static void reportCCS ( CostCentreStack *ccs, nat indent );
124 static void DecCCS ( CostCentreStack *ccs );
125 static void DecBackEdge ( CostCentreStack *ccs, CostCentreStack *oldccs );
126 static CostCentreStack *CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
127 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
129 static CostCentreStack *ActualPush ( CostCentreStack *, CostCentre * );
130 static CostCentreStack *IsInIndexTable ( IndexTable *, CostCentre * );
131 static IndexTable *AddToIndexTable ( IndexTable *, CostCentreStack *,
132 CostCentre *, unsigned int );
135 static void printCCS ( CostCentreStack *ccs );
137 static void initTimeProfiling ( void );
138 static void initProfilingLogFile( void );
140 static void reportCCS_XML ( CostCentreStack *ccs );
142 /* -----------------------------------------------------------------------------
143 Initialise the profiling environment
144 -------------------------------------------------------------------------- */
147 initProfiling1 (void)
149 /* for the benefit of allocate()... */
152 /* Initialize counters for IDs */
157 /* Initialize Declaration lists to NULL */
161 /* Register all the cost centres / stacks in the program
162 * CC_MAIN gets link = 0, all others have non-zero link.
164 REGISTER_CC(CC_MAIN);
165 REGISTER_CC(CC_SYSTEM);
167 REGISTER_CC(CC_OVERHEAD);
168 REGISTER_CC(CC_SUBSUMED);
169 REGISTER_CC(CC_DONTZuCARE);
170 REGISTER_CCS(CCS_MAIN);
171 REGISTER_CCS(CCS_SYSTEM);
172 REGISTER_CCS(CCS_GC);
173 REGISTER_CCS(CCS_OVERHEAD);
174 REGISTER_CCS(CCS_SUBSUMED);
175 REGISTER_CCS(CCS_DONTZuCARE);
179 /* cost centres are registered by the per-module
180 * initialisation code now...
185 initProfiling2 (void)
187 CostCentreStack *ccs, *next;
191 /* Set up the log file, and dump the header and cost centre
192 * information into it. */
193 initProfilingLogFile();
195 /* find all the "special" cost centre stacks, and make them children
198 ASSERT(CCS_MAIN->prevStack == 0);
199 CCS_MAIN->root = CC_MAIN;
201 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
202 next = ccs->prevStack;
204 ActualPush_(CCS_MAIN,ccs->cc,ccs);
209 if (RtsFlags.CcFlags.doCostCentres) {
213 if (RtsFlags.ProfFlags.doHeapProfile) {
219 initProfilingLogFile(void)
221 /* Initialise the log file name */
222 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
223 sprintf(prof_filename, "%s.prof", prog_argv[0]);
225 /* open the log file */
226 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
227 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
228 RtsFlags.CcFlags.doCostCentres = 0;
232 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
233 /* dump the time, and the profiling interval */
234 fprintf(prof_file, "\"%s\"\n", time_str());
235 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
237 /* declare all the cost centres */
240 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
241 fprintf(prof_file, "%d %d \"%s\" \"%s\"\n",
242 CC_UQ, cc->ccID, cc->label, cc->module);
249 initTimeProfiling(void)
251 time_profiling = rtsTrue;
258 endProfiling ( void )
260 if (RtsFlags.CcFlags.doCostCentres) {
263 if (RtsFlags.ProfFlags.doHeapProfile) {
268 /* -----------------------------------------------------------------------------
269 Set cost centre stack when entering a function.
270 -------------------------------------------------------------------------- */
271 rtsBool entering_PAP;
274 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
276 /* PAP_entry has already set CCCS for us */
278 entering_PAP = rtsFalse;
282 if (ccsfn->root->is_subsumed == CC_IS_CAF
283 || ccsfn->root->is_subsumed == CC_IS_SUBSUMED) {
284 return AppendCCS(cccs,ccsfn);
290 /* -----------------------------------------------------------------------------
291 Cost-centre stack manipulation
292 -------------------------------------------------------------------------- */
295 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
297 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
298 #define PushCostCentre _PushCostCentre
301 fprintf(stderr,"Pushing %s on ", cc->label);
303 fprintf(stderr,"\n"));
304 return PushCostCentre(ccs,cc);
309 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
311 CostCentreStack *temp_ccs;
313 if (ccs == EMPTY_STACK)
314 return ActualPush(ccs,cc);
319 /* check if we've already memoized this stack */
320 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
322 if (temp_ccs != EMPTY_STACK)
325 temp_ccs = CheckLoop(ccs,cc);
326 if (temp_ccs != NULL) {
327 /* we have recursed to an older CCS. Mark this in
328 * the index table, and emit a "back edge" into the
331 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
332 DecBackEdge(temp_ccs,ccs);
335 return ActualPush(ccs,cc);
342 static CostCentreStack *
343 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
345 while (ccs != EMPTY_STACK) {
348 ccs = ccs->prevStack;
353 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
356 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
358 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
359 #define AppendCCS _AppendCCS
363 fprintf(stderr,"Appending ");
365 fprintf(stderr," to ");
367 fprintf(stderr,"\n");});
368 return AppendCCS(ccs1,ccs2);
373 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
375 CostCentreStack *ccs = NULL;
381 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
385 if (ccs2->prevStack != NULL) {
386 ccs = AppendCCS(ccs1, ccs2->prevStack);
389 return PushCostCentre(ccs,ccs2->cc);
392 static CostCentreStack *
393 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
395 CostCentreStack *new_ccs;
397 /* allocate space for a new CostCentreStack */
398 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
400 return ActualPush_(ccs, cc, new_ccs);
403 static CostCentreStack *
404 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
406 /* assign values to each member of the structure */
407 ASSIGN_CCS_ID(new_ccs->ccsID);
410 new_ccs->prevStack = ccs;
412 new_ccs->indexTable = EMPTY_TABLE;
414 /* Initialise the various _scc_ counters to zero
416 new_ccs->scc_count = 0;
417 new_ccs->sub_scc_count = 0;
418 new_ccs->sub_cafcc_count = 0;
420 /* Initialize all other stats here. There should be a quick way
421 * that's easily used elsewhere too
423 new_ccs->time_ticks = 0;
424 new_ccs->mem_alloc = 0;
426 new_ccs->root = ccs->root;
428 /* update the memoization table for the parent stack */
429 if (ccs != EMPTY_STACK)
430 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
431 0/*not a back edge*/);
433 /* make sure this CC is declared at the next heap/time sample */
436 /* return a pointer to the new stack */
441 static CostCentreStack *
442 IsInIndexTable(IndexTable *it, CostCentre *cc)
444 while (it!=EMPTY_TABLE)
452 /* otherwise we never found it so return EMPTY_TABLE */
458 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
459 CostCentre *cc, unsigned int back_edge)
463 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
466 new_it->ccs = new_ccs;
468 new_it->back_edge = back_edge;
474 print_ccs (FILE *fp, CostCentreStack *ccs)
477 fprintf(fp, "Cost-Centre Stack: ");
482 print_ccs(fp, ccs->prevStack);
483 fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module);
485 fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module);
495 DecCCS(CostCentreStack *ccs)
497 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
498 if (ccs->prevStack == EMPTY_STACK)
499 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
500 ccs->ccsID, ccs->cc->ccID);
502 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
503 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
508 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
510 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
511 if (ccs->prevStack == EMPTY_STACK)
512 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
513 ccs->ccsID, ccs->cc->ccID);
515 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
516 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
520 /* -----------------------------------------------------------------------------
521 Generating a time & allocation profiling report.
522 -------------------------------------------------------------------------- */
524 /* -----------------------------------------------------------------------------
525 Generating the aggregated per-cost-centre time/alloc report.
526 -------------------------------------------------------------------------- */
528 static CostCentre *sorted_cc_list;
531 aggregate_cc_costs( CostCentreStack *ccs )
535 ccs->cc->mem_alloc += ccs->mem_alloc;
536 ccs->cc->time_ticks += ccs->time_ticks;
538 for (i = ccs->indexTable; i != 0; i = i->next) {
540 aggregate_cc_costs(i->ccs);
546 insert_cc_in_sorted_list( CostCentre *new_cc )
548 CostCentre **prev, *cc;
550 prev = &sorted_cc_list;
551 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
552 if (new_cc->time_ticks > cc->time_ticks) {
565 report_per_cc_costs( void )
567 CostCentre *cc, *next;
569 aggregate_cc_costs(CCS_MAIN);
570 sorted_cc_list = NULL;
572 for (cc = CC_LIST; cc != NULL; cc = next) {
574 if (cc->time_ticks > total_prof_ticks/100
575 || cc->mem_alloc > total_alloc/100) {
576 insert_cc_in_sorted_list(cc);
580 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
581 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
582 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
583 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
585 fprintf(prof_file, "\n\n");
587 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
588 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
589 fprintf(prof_file, "%6.1f %6.1f",
590 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
591 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
595 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
596 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
598 fprintf(prof_file, "\n");
601 fprintf(prof_file,"\n\n");
604 /* -----------------------------------------------------------------------------
605 Generate the cost-centre-stack time/alloc report
606 -------------------------------------------------------------------------- */
609 fprint_header( void )
611 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
613 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
615 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
616 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
617 #if defined(PROFILING_DETAIL_COUNTS)
618 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
619 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
623 fprintf(prof_file, "\n\n");
627 report_ccs_profiling( void )
630 char temp[128]; /* sigh: magic constant */
634 total_prof_ticks = 0;
636 count_ticks(CCS_MAIN);
638 switch (RtsFlags.CcFlags.doCostCentres) {
641 case COST_CENTRES_XML:
647 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
648 time_str(), "Final");
650 fprintf(prof_file, "\n\t ");
651 fprintf(prof_file, " %s", prog_argv[0]);
652 fprintf(prof_file, " +RTS");
653 for (count = 0; rts_argv[count]; count++)
654 fprintf(prof_file, " %s", rts_argv[count]);
655 fprintf(prof_file, " -RTS");
656 for (count = 1; prog_argv[count]; count++)
657 fprintf(prof_file, " %s", prog_argv[count]);
658 fprintf(prof_file, "\n\n");
660 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
661 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
662 total_prof_ticks, TICK_MILLISECS);
664 fprintf(prof_file, "\ttotal alloc = %11s bytes",
665 ullong_format_string((ullong) total_alloc * sizeof(W_),
666 temp, rtsTrue/*commas*/));
667 /* ToDo: 64-bit error! */
669 #if defined(PROFILING_DETAIL_COUNTS)
670 fprintf(prof_file, " (%lu closures)", total_allocs);
672 fprintf(prof_file, " (excludes profiling overheads)\n\n");
674 report_per_cc_costs();
677 reportCCS(pruneCCSTree(CCS_MAIN), 0);
683 reportCCS(CostCentreStack *ccs, nat indent)
690 /* Only print cost centres with non 0 data ! */
692 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
693 ! ccs_to_ignore(ccs))
694 /* force printing of *all* cost centres if -P -P */
697 fprintf(prof_file, "%-*s%-*s %-10s",
698 indent, "", 24-indent, cc->label, cc->module);
700 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
702 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
703 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
704 ccs->sub_scc_count, ccs->sub_cafcc_count);
706 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
707 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
708 #if defined(PROFILING_DETAIL_COUNTS)
709 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
710 ccs->mem_allocs, ccs->thunk_count,
711 ccs->function_count, ccs->pap_count,
712 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
713 ccs->caffun_subsumed);
716 fprintf(prof_file, "\n");
719 for (i = ccs->indexTable; i != 0; i = i->next) {
721 reportCCS(i->ccs, indent+1);
726 /* Traverse the cost centre stack tree and accumulate
730 count_ticks(CostCentreStack *ccs)
734 if (!ccs_to_ignore(ccs)) {
735 total_alloc += ccs->mem_alloc;
736 total_prof_ticks += ccs->time_ticks;
738 for (i = ccs->indexTable; i != NULL; i = i->next)
744 /* return rtsTrue if it is one of the ones that
745 * should not be reported normally (because it confuses
749 ccs_to_ignore (CostCentreStack *ccs)
751 if ( ccs == CCS_OVERHEAD
752 || ccs == CCS_DONTZuCARE
754 || ccs == CCS_SYSTEM) {
761 static CostCentreStack *
762 pruneCCSTree( CostCentreStack *ccs )
764 CostCentreStack *ccs1;
765 IndexTable *i, **prev;
767 prev = &ccs->indexTable;
768 for (i = ccs->indexTable; i != 0; i = i->next) {
769 if (i->back_edge) { continue; }
771 ccs1 = pruneCCSTree(i->ccs);
779 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
780 /* force printing of *all* cost centres if -P -P */ )
782 || ( ccs->indexTable != 0 )
783 || ( (ccs->scc_count || ccs->sub_scc_count ||
784 ccs->time_ticks || ccs->mem_alloc
785 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
786 && (ccs->sub_cafcc_count
787 #if defined(PROFILING_DETAIL_COUNTS)
788 || cc->thunk_count || cc->function_count || cc->pap_count
797 /* -----------------------------------------------------------------------------
798 Generate the XML time/allocation profile
799 -------------------------------------------------------------------------- */
802 gen_XML_logfile( void )
804 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
806 reportCCS_XML(pruneCCSTree(CCS_MAIN));
808 fprintf(prof_file, " 0\n");
814 reportCCS_XML(CostCentreStack *ccs)
821 fprintf(prof_file, " 1 %d %lu %lu %lu",
822 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
824 for (i = ccs->indexTable; i != 0; i = i->next) {
826 reportCCS_XML(i->ccs);
833 printCCS ( CostCentreStack *ccs )
836 for (; ccs; ccs = ccs->prevStack ) {
837 fprintf(stderr,ccs->cc->label);
838 if (ccs->prevStack) {
846 #endif /* PROFILING */