1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.9 1999/09/15 13:45:18 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
24 * Global variables used to assign unique IDs to cc's, ccs's, and
32 /* Table sizes from old profiling system. Not sure if we'll need
35 nat time_intervals = 0;
36 nat earlier_ticks = 0;
43 /* Are we time-profiling?
45 rtsBool time_profiling = rtsFalse;
47 /* figures for the profiling report.
49 static lnat total_alloc, total_prof_ticks;
51 /* Globals for opening the profiling log file
53 static char *prof_filename; /* prof report file name = <program>.prof */
54 static FILE *prof_file;
56 /* The Current Cost Centre Stack (for attributing costs)
58 CostCentreStack *CCCS;
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
64 CostCentreStack *CCS_LIST;
65 CCSDecList *New_CCS_LIST;
68 * Built-in cost centres and cost-centre stacks:
70 * MAIN is the root of the cost-centre stack tree. If there are
71 * no _scc_s in the program, all costs will be attributed
74 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
75 * RTS operations apart from garbage collection are attributed
78 * GC is the storage manager / garbage collector.
80 * OVERHEAD gets all costs generated by the profiling system
81 * itself. These are costs that would not be incurred
82 * during non-profiled execution of the program.
84 * SUBSUMED is the one-and-only CCS placed on top-level functions.
85 * It indicates that all costs are to be attributed to the
86 * enclosing cost centre stack. SUBSUMED never accumulates
89 * DONT_CARE is a placeholder cost-centre we assign to static
90 * constructors. It should *never* accumulate any costs.
93 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
95 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
96 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
97 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
98 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
102 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
104 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
105 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
111 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
112 CostCentreStack *new_ccs );
114 static void registerCostCentres ( void );
115 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
116 static void count_ticks ( CostCentreStack *ccs );
117 static void reportCCS ( CostCentreStack *ccs, nat indent );
118 static void DecCCS ( CostCentreStack *ccs );
119 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
121 static void printCCS ( CostCentreStack *ccs );
123 static void initTimeProfiling ( void );
125 /* -----------------------------------------------------------------------------
126 Initialise the profiling environment
127 -------------------------------------------------------------------------- */
132 CostCentreStack *ccs, *next;
134 /* for the benefit of allocate()... */
137 /* Initialize counters for IDs */
142 /* Initialize Declaration lists to NULL */
146 /* Register all the cost centres / stacks in the program
147 * CC_MAIN gets link = 0, all others have non-zero link.
149 REGISTER_CC(CC_MAIN);
150 REGISTER_CC(CC_SYSTEM);
152 REGISTER_CC(CC_OVERHEAD);
153 REGISTER_CC(CC_SUBSUMED);
154 REGISTER_CC(CC_DONTZuCARE);
155 REGISTER_CCS(CCS_MAIN);
156 REGISTER_CCS(CCS_SYSTEM);
157 REGISTER_CCS(CCS_GC);
158 REGISTER_CCS(CCS_OVERHEAD);
159 REGISTER_CCS(CCS_SUBSUMED);
160 REGISTER_CCS(CCS_DONTZuCARE);
163 registerCostCentres();
166 /* find all the "special" cost centre stacks, and make them children
169 ASSERT(CCS_MAIN->prevStack == 0);
170 CCS_MAIN->root = CC_MAIN;
171 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
172 next = ccs->prevStack;
174 ActualPush_(CCS_MAIN,ccs->cc,ccs);
179 if (RtsFlags.CcFlags.doCostCentres) {
183 if (RtsFlags.ProfFlags.doHeapProfile) {
189 initTimeProfiling(void)
191 time_profiling = rtsTrue;
193 /* Initialise the log file name */
194 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
195 sprintf(prof_filename, "%s.prof", prog_argv[0]);
202 endProfiling ( void )
204 if (RtsFlags.CcFlags.doCostCentres) {
207 if (RtsFlags.ProfFlags.doHeapProfile) {
212 /* -----------------------------------------------------------------------------
213 Register Cost Centres
215 At the moment, this process just supplies a unique integer to each
216 statically declared cost centre and cost centre stack in the
219 The code generator inserts a small function "reg<moddule>" in each
220 module which registers any cost centres from that module and calls
221 the registration functions in each of the modules it imports. So,
222 if we call "regMain", each reachable module in the program will be
225 The reg* functions are compiled in the same way as STG code,
226 i.e. without normal C call/return conventions. Hence we must use
227 StgRun to call this stuff.
228 -------------------------------------------------------------------------- */
230 /* The registration functions use an explicit stack...
232 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
236 registerCostCentres ( void )
238 /* this storage will be reclaimed by the garbage collector,
241 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
243 StgRun((StgFunPtr)stg_register);
247 /* -----------------------------------------------------------------------------
248 Set cost centre stack when entering a function. Here we implement
251 "if CCSfn is an initial segment of CCCS,
252 then set CCCS to CCSfn,
253 else append CCSfn to CCCS"
254 -------------------------------------------------------------------------- */
255 rtsBool entering_PAP;
258 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
260 /* PAP_entry has already set CCCS for us */
262 entering_PAP = rtsFalse;
266 if (cccs->root == ccsfn->root) {
269 return AppendCCS(cccs,ccsfn);
273 /* -----------------------------------------------------------------------------
274 Cost-centre stack manipulation
275 -------------------------------------------------------------------------- */
278 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
280 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
281 #define PushCostCentre _PushCostCentre
284 fprintf(stderr,"Pushing %s on ", cc->label);
286 fprintf(stderr,"\n"));
287 return PushCostCentre(ccs,cc);
292 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
294 CostCentreStack *temp_ccs;
296 if (ccs == EMPTY_STACK)
297 return ActualPush(ccs,cc);
302 /* check if we've already memoized this stack */
303 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
305 if (temp_ccs != EMPTY_STACK)
308 /* remove the CC to avoid loops */
309 ccs = RemoveCC(ccs,cc);
310 /* have a different stack now, need to check the memo table again */
311 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
312 if (temp_ccs != EMPTY_STACK)
315 return ActualPush(ccs,cc);
321 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
324 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
326 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
327 #define AppendCCS _AppendCCS
331 fprintf(stderr,"Appending ");
333 fprintf(stderr," to ");
335 fprintf(stderr,"\n");});
336 return AppendCCS(ccs1,ccs2);
341 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
343 CostCentreStack *ccs;
345 /* Optimisation: if we attempt to append a CCS to itself, we're
346 * going to end up with the same ccs after a great deal of pushing
347 * and removing of cost centres. Furthermore, we'll generate a lot
348 * of intermediate CCSs which would not otherwise be generated. So:
349 * let's cope with this common case first.
355 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
359 ASSERT(ccs2->prevStack != NULL);
360 ccs = AppendCCS(ccs1, ccs2->prevStack);
361 return PushCostCentre(ccs,ccs2->cc);
365 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
367 CostCentreStack *new_ccs;
369 /* allocate space for a new CostCentreStack */
370 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
372 return ActualPush_(ccs, cc, new_ccs);
375 static CostCentreStack *
376 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
378 /* assign values to each member of the structure */
379 ASSIGN_CCS_ID(new_ccs->ccsID);
382 new_ccs->prevStack = ccs;
384 new_ccs->indexTable = EMPTY_TABLE;
386 /* Initialise the various _scc_ counters to zero
388 new_ccs->scc_count = 0;
389 new_ccs->sub_scc_count = 0;
390 new_ccs->sub_cafcc_count = 0;
392 /* Initialize all other stats here. There should be a quick way
393 * that's easily used elsewhere too
395 new_ccs->time_ticks = 0;
396 new_ccs->mem_alloc = 0;
398 new_ccs->root = ccs->root;
400 /* update the memoization table for the parent stack */
401 if (ccs != EMPTY_STACK)
402 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
404 /* make sure this CC is declared at the next heap/time sample */
407 /* return a pointer to the new stack */
413 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
415 CostCentreStack *del_ccs;
417 if (ccs == EMPTY_STACK) {
421 return ccs->prevStack;
424 del_ccs = RemoveCC(ccs->prevStack, cc);
426 if (del_ccs == EMPTY_STACK)
429 return PushCostCentre(del_ccs,ccs->cc);
437 IsInIndexTable(IndexTable *it, CostCentre *cc)
439 while (it!=EMPTY_TABLE)
447 /* otherwise we never found it so return EMPTY_TABLE */
453 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
457 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
460 new_it->ccs = new_ccs;
467 print_ccs (FILE *fp, CostCentreStack *ccs)
470 fprintf(fp, "Cost-Centre Stack: ");
475 print_ccs(fp, ccs->prevStack);
476 fprintf(fp, "->[%s,%s,%s]",
477 ccs->cc->label, ccs->cc->module, ccs->cc->group);
479 fprintf(fp, "[%s,%s,%s]",
480 ccs->cc->label, ccs->cc->module, ccs->cc->group);
490 DecCCS(CostCentreStack *ccs)
492 CCSDecList *temp_list;
495 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
496 "Error allocating space for CCSDecList");
497 temp_list->ccs = ccs;
498 temp_list->nextList = New_CCS_LIST;
500 New_CCS_LIST = temp_list;
503 /* -----------------------------------------------------------------------------
504 Generating a time & allocation profiling report.
505 -------------------------------------------------------------------------- */
507 static FILE *prof_file;
509 /* -----------------------------------------------------------------------------
510 Generating the aggregated per-cost-centre time/alloc report.
511 -------------------------------------------------------------------------- */
513 static CostCentre *sorted_cc_list;
516 aggregate_cc_costs( CostCentreStack *ccs )
520 ccs->cc->mem_alloc += ccs->mem_alloc;
521 ccs->cc->time_ticks += ccs->time_ticks;
523 for (i = ccs->indexTable; i != 0; i = i->next) {
524 aggregate_cc_costs(i->ccs);
529 insert_cc_in_sorted_list( CostCentre *new_cc )
531 CostCentre **prev, *cc;
533 prev = &sorted_cc_list;
534 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
535 if (new_cc->time_ticks > cc->time_ticks) {
548 report_per_cc_costs( void )
550 CostCentre *cc, *next;
552 aggregate_cc_costs(CCS_MAIN);
553 sorted_cc_list = NULL;
555 for (cc = CC_LIST; cc != NULL; cc = next) {
557 if (cc->time_ticks > total_prof_ticks/100
558 || cc->mem_alloc > total_alloc/100) {
559 insert_cc_in_sorted_list(cc);
563 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
564 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
565 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
566 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
568 fprintf(prof_file, "\n\n");
570 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
571 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
572 fprintf(prof_file, "%6.1f %6.1f",
573 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
574 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
578 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
579 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
581 fprintf(prof_file, "\n");
584 fprintf(prof_file,"\n\n");
587 /* -----------------------------------------------------------------------------
588 Generate the cost-centre-stack time/alloc report
589 -------------------------------------------------------------------------- */
592 fprint_header( void )
594 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
597 do_groups = have_interesting_groups(Registered_CC);
598 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
601 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
603 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
604 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
605 #if defined(PROFILING_DETAIL_COUNTS)
606 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
607 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
611 fprintf(prof_file, "\n\n");
615 report_ccs_profiling( void )
618 char temp[128]; /* sigh: magic constant */
620 rtsBool do_groups = rtsFalse;
623 if (!RtsFlags.CcFlags.doCostCentres)
628 total_prof_ticks = 0;
630 count_ticks(CCS_MAIN);
632 /* open profiling output file */
633 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
634 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
637 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
638 time_str(), "Final");
640 fprintf(prof_file, "\n\t ");
641 fprintf(prof_file, " %s", prog_argv[0]);
642 fprintf(prof_file, " +RTS");
643 for (count = 0; rts_argv[count]; count++)
644 fprintf(prof_file, " %s", rts_argv[count]);
645 fprintf(prof_file, " -RTS");
646 for (count = 1; prog_argv[count]; count++)
647 fprintf(prof_file, " %s", prog_argv[count]);
648 fprintf(prof_file, "\n\n");
650 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
651 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
652 total_prof_ticks, TICK_MILLISECS);
654 fprintf(prof_file, "\ttotal alloc = %11s bytes",
655 ullong_format_string((ullong) total_alloc * sizeof(W_),
656 temp, rtsTrue/*commas*/));
657 /* ToDo: 64-bit error! */
659 #if defined(PROFILING_DETAIL_COUNTS)
660 fprintf(prof_file, " (%lu closures)", total_allocs);
662 fprintf(prof_file, " (excludes profiling overheads)\n\n");
664 report_per_cc_costs();
667 reportCCS(pruneCCSTree(CCS_MAIN), 0);
673 reportCCS(CostCentreStack *ccs, nat indent)
680 /* Only print cost centres with non 0 data ! */
682 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
683 ! ccs_to_ignore(ccs))
684 /* force printing of *all* cost centres if -P -P */
687 fprintf(prof_file, "%-*s%-*s %-10s",
688 indent, "", 24-indent, cc->label, cc->module);
691 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
694 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
696 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
697 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
698 ccs->sub_scc_count, ccs->sub_cafcc_count);
700 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
701 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
702 #if defined(PROFILING_DETAIL_COUNTS)
703 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
704 ccs->mem_allocs, ccs->thunk_count,
705 ccs->function_count, ccs->pap_count,
706 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
707 ccs->caffun_subsumed);
710 fprintf(prof_file, "\n");
713 for (i = ccs->indexTable; i != 0; i = i->next) {
714 reportCCS(i->ccs, indent+1);
718 /* Traverse the cost centre stack tree and accumulate
722 count_ticks(CostCentreStack *ccs)
726 if (!ccs_to_ignore(ccs)) {
727 total_alloc += ccs->mem_alloc;
728 total_prof_ticks += ccs->time_ticks;
730 for (i = ccs->indexTable; i != NULL; i = i->next)
734 /* return rtsTrue if it is one of the ones that
735 * should not be reported normally (because it confuses
739 ccs_to_ignore (CostCentreStack *ccs)
741 if ( ccs == CCS_OVERHEAD
742 || ccs == CCS_DONTZuCARE
744 || ccs == CCS_SYSTEM) {
751 static CostCentreStack *
752 pruneCCSTree( CostCentreStack *ccs )
754 CostCentreStack *ccs1;
755 IndexTable *i, **prev;
757 prev = &ccs->indexTable;
758 for (i = ccs->indexTable; i != 0; i = i->next) {
759 ccs1 = pruneCCSTree(i->ccs);
767 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
768 /* force printing of *all* cost centres if -P -P */ )
770 || ( ccs->indexTable != 0 )
771 || ( (ccs->scc_count || ccs->sub_scc_count ||
772 ccs->time_ticks || ccs->mem_alloc
773 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
774 && (ccs->sub_cafcc_count
775 #if defined(PROFILING_DETAIL_COUNTS)
776 || cc->thunk_count || cc->function_count || cc->pap_count
787 printCCS ( CostCentreStack *ccs )
790 for (; ccs; ccs = ccs->prevStack ) {
791 fprintf(stderr,ccs->cc->label);
792 if (ccs->prevStack) {
800 #endif /* PROFILING */