1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.15 2000/03/07 11:53:12 simonmar Exp $
4 * (c) The GHC Team, 1998-2000
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
24 * Global variables used to assign unique IDs to cc's, ccs's, and
32 /* Table sizes from old profiling system. Not sure if we'll need
35 nat time_intervals = 0;
36 nat earlier_ticks = 0;
43 /* Are we time-profiling?
45 rtsBool time_profiling = rtsFalse;
47 /* figures for the profiling report.
49 static lnat total_alloc, total_prof_ticks;
51 /* Globals for opening the profiling log file
53 static char *prof_filename; /* prof report file name = <program>.prof */
56 /* The Current Cost Centre Stack (for attributing costs)
58 CostCentreStack *CCCS;
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
64 CostCentreStack *CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
107 * Uniques for the XML log-file format
112 #define HEAP_OBJ_UQ 4
113 #define TIME_UPD_UQ 5
114 #define HEAP_UPD_UQ 6
120 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
121 CostCentreStack *new_ccs );
123 static void registerCostCentres ( void );
124 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
125 static void count_ticks ( CostCentreStack *ccs );
126 static void reportCCS ( CostCentreStack *ccs, nat indent );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs, CostCentreStack *oldccs );
129 static CostCentreStack *CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
130 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack *ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack *IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable *AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
138 static void printCCS ( CostCentreStack *ccs );
140 static void initTimeProfiling ( void );
141 static void initProfilingLogFile( void );
143 static void reportCCS_XML ( CostCentreStack *ccs );
145 /* -----------------------------------------------------------------------------
146 Initialise the profiling environment
147 -------------------------------------------------------------------------- */
152 CostCentreStack *ccs, *next;
154 /* for the benefit of allocate()... */
157 /* Initialize counters for IDs */
162 /* Initialize Declaration lists to NULL */
166 /* Register all the cost centres / stacks in the program
167 * CC_MAIN gets link = 0, all others have non-zero link.
169 REGISTER_CC(CC_MAIN);
170 REGISTER_CC(CC_SYSTEM);
172 REGISTER_CC(CC_OVERHEAD);
173 REGISTER_CC(CC_SUBSUMED);
174 REGISTER_CC(CC_DONTZuCARE);
175 REGISTER_CCS(CCS_MAIN);
176 REGISTER_CCS(CCS_SYSTEM);
177 REGISTER_CCS(CCS_GC);
178 REGISTER_CCS(CCS_OVERHEAD);
179 REGISTER_CCS(CCS_SUBSUMED);
180 REGISTER_CCS(CCS_DONTZuCARE);
183 registerCostCentres();
186 /* Set up the log file, and dump the header and cost centre
187 * information into it.
189 initProfilingLogFile();
191 /* find all the "special" cost centre stacks, and make them children
194 ASSERT(CCS_MAIN->prevStack == 0);
195 CCS_MAIN->root = CC_MAIN;
197 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
198 next = ccs->prevStack;
200 ActualPush_(CCS_MAIN,ccs->cc,ccs);
205 if (RtsFlags.CcFlags.doCostCentres) {
209 if (RtsFlags.ProfFlags.doHeapProfile) {
215 initProfilingLogFile(void)
217 /* Initialise the log file name */
218 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
219 sprintf(prof_filename, "%s.prof", prog_argv[0]);
221 /* open the log file */
222 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
223 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
224 RtsFlags.CcFlags.doCostCentres = 0;
228 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
229 /* dump the time, and the profiling interval */
230 fprintf(prof_file, "\"%s\"\n", time_str());
231 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
233 /* declare all the cost centres */
236 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
237 fprintf(prof_file, "%d %d \"%s\" \"%s\" \"%s\"\n",
238 CC_UQ, cc->ccID, cc->label, cc->module, cc->group);
245 initTimeProfiling(void)
247 time_profiling = rtsTrue;
254 endProfiling ( void )
256 if (RtsFlags.CcFlags.doCostCentres) {
259 if (RtsFlags.ProfFlags.doHeapProfile) {
264 /* -----------------------------------------------------------------------------
265 Register Cost Centres
267 At the moment, this process just supplies a unique integer to each
268 statically declared cost centre and cost centre stack in the
271 The code generator inserts a small function "reg<moddule>" in each
272 module which registers any cost centres from that module and calls
273 the registration functions in each of the modules it imports. So,
274 if we call "regMain", each reachable module in the program will be
277 The reg* functions are compiled in the same way as STG code,
278 i.e. without normal C call/return conventions. Hence we must use
279 StgRun to call this stuff.
280 -------------------------------------------------------------------------- */
282 /* The registration functions use an explicit stack...
284 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
288 registerCostCentres ( void )
290 /* this storage will be reclaimed by the garbage collector,
293 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
295 StgRun((StgFunPtr)stg_register, &MainRegTable);
299 /* -----------------------------------------------------------------------------
300 Set cost centre stack when entering a function. Here we implement
303 "if CCSfn is an initial segment of CCCS,
304 then set CCCS to CCSfn,
305 else append CCSfn to CCCS"
306 -------------------------------------------------------------------------- */
307 rtsBool entering_PAP;
310 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
312 /* PAP_entry has already set CCCS for us */
314 entering_PAP = rtsFalse;
318 if (cccs->root == ccsfn->root) {
321 return AppendCCS(cccs,ccsfn);
325 /* -----------------------------------------------------------------------------
326 Cost-centre stack manipulation
327 -------------------------------------------------------------------------- */
330 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
332 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
333 #define PushCostCentre _PushCostCentre
336 fprintf(stderr,"Pushing %s on ", cc->label);
338 fprintf(stderr,"\n"));
339 return PushCostCentre(ccs,cc);
344 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
346 CostCentreStack *temp_ccs;
348 if (ccs == EMPTY_STACK)
349 return ActualPush(ccs,cc);
354 /* check if we've already memoized this stack */
355 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
357 if (temp_ccs != EMPTY_STACK)
360 temp_ccs = CheckLoop(ccs,cc);
361 if (temp_ccs != NULL) {
362 /* we have recursed to an older CCS. Mark this in
363 * the index table, and emit a "back edge" into the
366 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
367 DecBackEdge(temp_ccs,ccs);
370 return ActualPush(ccs,cc);
377 static CostCentreStack *
378 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
380 while (ccs != EMPTY_STACK) {
383 ccs = ccs->prevStack;
388 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
391 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
393 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
394 #define AppendCCS _AppendCCS
398 fprintf(stderr,"Appending ");
400 fprintf(stderr," to ");
402 fprintf(stderr,"\n");});
403 return AppendCCS(ccs1,ccs2);
408 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
410 CostCentreStack *ccs = NULL;
416 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
420 if (ccs2->prevStack != NULL) {
421 ccs = AppendCCS(ccs1, ccs2->prevStack);
424 return PushCostCentre(ccs,ccs2->cc);
427 static CostCentreStack *
428 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
430 CostCentreStack *new_ccs;
432 /* allocate space for a new CostCentreStack */
433 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
435 return ActualPush_(ccs, cc, new_ccs);
438 static CostCentreStack *
439 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
441 /* assign values to each member of the structure */
442 ASSIGN_CCS_ID(new_ccs->ccsID);
445 new_ccs->prevStack = ccs;
447 new_ccs->indexTable = EMPTY_TABLE;
449 /* Initialise the various _scc_ counters to zero
451 new_ccs->scc_count = 0;
452 new_ccs->sub_scc_count = 0;
453 new_ccs->sub_cafcc_count = 0;
455 /* Initialize all other stats here. There should be a quick way
456 * that's easily used elsewhere too
458 new_ccs->time_ticks = 0;
459 new_ccs->mem_alloc = 0;
461 new_ccs->root = ccs->root;
463 /* update the memoization table for the parent stack */
464 if (ccs != EMPTY_STACK)
465 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
466 0/*not a back edge*/);
468 /* make sure this CC is declared at the next heap/time sample */
471 /* return a pointer to the new stack */
476 static CostCentreStack *
477 IsInIndexTable(IndexTable *it, CostCentre *cc)
479 while (it!=EMPTY_TABLE)
487 /* otherwise we never found it so return EMPTY_TABLE */
493 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
494 CostCentre *cc, unsigned int back_edge)
498 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
501 new_it->ccs = new_ccs;
503 new_it->back_edge = back_edge;
509 print_ccs (FILE *fp, CostCentreStack *ccs)
512 fprintf(fp, "Cost-Centre Stack: ");
517 print_ccs(fp, ccs->prevStack);
518 fprintf(fp, "->[%s,%s,%s]",
519 ccs->cc->label, ccs->cc->module, ccs->cc->group);
521 fprintf(fp, "[%s,%s,%s]",
522 ccs->cc->label, ccs->cc->module, ccs->cc->group);
532 DecCCS(CostCentreStack *ccs)
534 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
535 if (ccs->prevStack == EMPTY_STACK)
536 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
537 ccs->ccsID, ccs->cc->ccID);
539 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
540 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
545 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
547 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
548 if (ccs->prevStack == EMPTY_STACK)
549 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
550 ccs->ccsID, ccs->cc->ccID);
552 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
553 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
557 /* -----------------------------------------------------------------------------
558 Generating a time & allocation profiling report.
559 -------------------------------------------------------------------------- */
561 /* -----------------------------------------------------------------------------
562 Generating the aggregated per-cost-centre time/alloc report.
563 -------------------------------------------------------------------------- */
565 static CostCentre *sorted_cc_list;
568 aggregate_cc_costs( CostCentreStack *ccs )
572 ccs->cc->mem_alloc += ccs->mem_alloc;
573 ccs->cc->time_ticks += ccs->time_ticks;
575 for (i = ccs->indexTable; i != 0; i = i->next) {
577 aggregate_cc_costs(i->ccs);
583 insert_cc_in_sorted_list( CostCentre *new_cc )
585 CostCentre **prev, *cc;
587 prev = &sorted_cc_list;
588 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
589 if (new_cc->time_ticks > cc->time_ticks) {
602 report_per_cc_costs( void )
604 CostCentre *cc, *next;
606 aggregate_cc_costs(CCS_MAIN);
607 sorted_cc_list = NULL;
609 for (cc = CC_LIST; cc != NULL; cc = next) {
611 if (cc->time_ticks > total_prof_ticks/100
612 || cc->mem_alloc > total_alloc/100) {
613 insert_cc_in_sorted_list(cc);
617 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
618 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
619 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
620 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
622 fprintf(prof_file, "\n\n");
624 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
625 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
626 fprintf(prof_file, "%6.1f %6.1f",
627 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
628 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
632 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
633 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
635 fprintf(prof_file, "\n");
638 fprintf(prof_file,"\n\n");
641 /* -----------------------------------------------------------------------------
642 Generate the cost-centre-stack time/alloc report
643 -------------------------------------------------------------------------- */
646 fprint_header( void )
648 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
651 do_groups = have_interesting_groups(Registered_CC);
652 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
655 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
657 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
658 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
659 #if defined(PROFILING_DETAIL_COUNTS)
660 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
661 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
665 fprintf(prof_file, "\n\n");
669 report_ccs_profiling( void )
672 char temp[128]; /* sigh: magic constant */
674 rtsBool do_groups = rtsFalse;
679 total_prof_ticks = 0;
681 count_ticks(CCS_MAIN);
683 switch (RtsFlags.CcFlags.doCostCentres) {
686 case COST_CENTRES_XML:
692 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
693 time_str(), "Final");
695 fprintf(prof_file, "\n\t ");
696 fprintf(prof_file, " %s", prog_argv[0]);
697 fprintf(prof_file, " +RTS");
698 for (count = 0; rts_argv[count]; count++)
699 fprintf(prof_file, " %s", rts_argv[count]);
700 fprintf(prof_file, " -RTS");
701 for (count = 1; prog_argv[count]; count++)
702 fprintf(prof_file, " %s", prog_argv[count]);
703 fprintf(prof_file, "\n\n");
705 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
706 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
707 total_prof_ticks, TICK_MILLISECS);
709 fprintf(prof_file, "\ttotal alloc = %11s bytes",
710 ullong_format_string((ullong) total_alloc * sizeof(W_),
711 temp, rtsTrue/*commas*/));
712 /* ToDo: 64-bit error! */
714 #if defined(PROFILING_DETAIL_COUNTS)
715 fprintf(prof_file, " (%lu closures)", total_allocs);
717 fprintf(prof_file, " (excludes profiling overheads)\n\n");
719 report_per_cc_costs();
722 reportCCS(pruneCCSTree(CCS_MAIN), 0);
728 reportCCS(CostCentreStack *ccs, nat indent)
735 /* Only print cost centres with non 0 data ! */
737 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
738 ! ccs_to_ignore(ccs))
739 /* force printing of *all* cost centres if -P -P */
742 fprintf(prof_file, "%-*s%-*s %-10s",
743 indent, "", 24-indent, cc->label, cc->module);
746 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
749 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
751 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
752 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
753 ccs->sub_scc_count, ccs->sub_cafcc_count);
755 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
756 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
757 #if defined(PROFILING_DETAIL_COUNTS)
758 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
759 ccs->mem_allocs, ccs->thunk_count,
760 ccs->function_count, ccs->pap_count,
761 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
762 ccs->caffun_subsumed);
765 fprintf(prof_file, "\n");
768 for (i = ccs->indexTable; i != 0; i = i->next) {
770 reportCCS(i->ccs, indent+1);
775 /* Traverse the cost centre stack tree and accumulate
779 count_ticks(CostCentreStack *ccs)
783 if (!ccs_to_ignore(ccs)) {
784 total_alloc += ccs->mem_alloc;
785 total_prof_ticks += ccs->time_ticks;
787 for (i = ccs->indexTable; i != NULL; i = i->next)
793 /* return rtsTrue if it is one of the ones that
794 * should not be reported normally (because it confuses
798 ccs_to_ignore (CostCentreStack *ccs)
800 if ( ccs == CCS_OVERHEAD
801 || ccs == CCS_DONTZuCARE
803 || ccs == CCS_SYSTEM) {
810 static CostCentreStack *
811 pruneCCSTree( CostCentreStack *ccs )
813 CostCentreStack *ccs1;
814 IndexTable *i, **prev;
816 prev = &ccs->indexTable;
817 for (i = ccs->indexTable; i != 0; i = i->next) {
818 if (i->back_edge) { continue; }
820 ccs1 = pruneCCSTree(i->ccs);
828 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
829 /* force printing of *all* cost centres if -P -P */ )
831 || ( ccs->indexTable != 0 )
832 || ( (ccs->scc_count || ccs->sub_scc_count ||
833 ccs->time_ticks || ccs->mem_alloc
834 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
835 && (ccs->sub_cafcc_count
836 #if defined(PROFILING_DETAIL_COUNTS)
837 || cc->thunk_count || cc->function_count || cc->pap_count
846 /* -----------------------------------------------------------------------------
847 Generate the XML time/allocation profile
848 -------------------------------------------------------------------------- */
851 gen_XML_logfile( void )
853 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
855 reportCCS_XML(pruneCCSTree(CCS_MAIN));
857 fprintf(prof_file, " 0\n");
863 reportCCS_XML(CostCentreStack *ccs)
870 fprintf(prof_file, " 1 %d %lu %lu %lu",
871 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
873 for (i = ccs->indexTable; i != 0; i = i->next) {
875 reportCCS_XML(i->ccs);
882 printCCS ( CostCentreStack *ccs )
885 for (; ccs; ccs = ccs->prevStack ) {
886 fprintf(stderr,ccs->cc->label);
887 if (ccs->prevStack) {
895 #endif /* PROFILING */