1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.11 1999/11/04 10:15:50 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
24 * Global variables used to assign unique IDs to cc's, ccs's, and
32 /* Table sizes from old profiling system. Not sure if we'll need
35 nat time_intervals = 0;
36 nat earlier_ticks = 0;
43 /* Are we time-profiling?
45 rtsBool time_profiling = rtsFalse;
47 /* figures for the profiling report.
49 static lnat total_alloc, total_prof_ticks;
51 /* Globals for opening the profiling log file
53 static char *prof_filename; /* prof report file name = <program>.prof */
54 static FILE *prof_file;
56 /* The Current Cost Centre Stack (for attributing costs)
58 CostCentreStack *CCCS;
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
64 CostCentreStack *CCS_LIST;
65 CCSDecList *New_CCS_LIST;
68 * Built-in cost centres and cost-centre stacks:
70 * MAIN is the root of the cost-centre stack tree. If there are
71 * no _scc_s in the program, all costs will be attributed
74 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
75 * RTS operations apart from garbage collection are attributed
78 * GC is the storage manager / garbage collector.
80 * OVERHEAD gets all costs generated by the profiling system
81 * itself. These are costs that would not be incurred
82 * during non-profiled execution of the program.
84 * SUBSUMED is the one-and-only CCS placed on top-level functions.
85 * It indicates that all costs are to be attributed to the
86 * enclosing cost centre stack. SUBSUMED never accumulates
89 * DONT_CARE is a placeholder cost-centre we assign to static
90 * constructors. It should *never* accumulate any costs.
93 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
95 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
96 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
97 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
98 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
102 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
104 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
105 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
111 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
112 CostCentreStack *new_ccs );
114 static void registerCostCentres ( void );
115 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
116 static void count_ticks ( CostCentreStack *ccs );
117 static void reportCCS ( CostCentreStack *ccs, nat indent );
118 static void DecCCS ( CostCentreStack *ccs );
119 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
121 static void printCCS ( CostCentreStack *ccs );
123 static void initTimeProfiling ( void );
125 /* -----------------------------------------------------------------------------
126 Initialise the profiling environment
127 -------------------------------------------------------------------------- */
132 CostCentreStack *ccs, *next;
134 /* for the benefit of allocate()... */
137 /* Initialize counters for IDs */
142 /* Initialize Declaration lists to NULL */
146 /* Register all the cost centres / stacks in the program
147 * CC_MAIN gets link = 0, all others have non-zero link.
149 REGISTER_CC(CC_MAIN);
150 REGISTER_CC(CC_SYSTEM);
152 REGISTER_CC(CC_OVERHEAD);
153 REGISTER_CC(CC_SUBSUMED);
154 REGISTER_CC(CC_DONTZuCARE);
155 REGISTER_CCS(CCS_MAIN);
156 REGISTER_CCS(CCS_SYSTEM);
157 REGISTER_CCS(CCS_GC);
158 REGISTER_CCS(CCS_OVERHEAD);
159 REGISTER_CCS(CCS_SUBSUMED);
160 REGISTER_CCS(CCS_DONTZuCARE);
163 registerCostCentres();
166 /* find all the "special" cost centre stacks, and make them children
169 ASSERT(CCS_MAIN->prevStack == 0);
170 CCS_MAIN->root = CC_MAIN;
171 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
172 next = ccs->prevStack;
174 ActualPush_(CCS_MAIN,ccs->cc,ccs);
179 if (RtsFlags.CcFlags.doCostCentres) {
183 if (RtsFlags.ProfFlags.doHeapProfile) {
189 initTimeProfiling(void)
191 time_profiling = rtsTrue;
193 /* Initialise the log file name */
194 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
195 sprintf(prof_filename, "%s.prof", prog_argv[0]);
202 endProfiling ( void )
204 if (RtsFlags.CcFlags.doCostCentres) {
207 if (RtsFlags.ProfFlags.doHeapProfile) {
212 /* -----------------------------------------------------------------------------
213 Register Cost Centres
215 At the moment, this process just supplies a unique integer to each
216 statically declared cost centre and cost centre stack in the
219 The code generator inserts a small function "reg<moddule>" in each
220 module which registers any cost centres from that module and calls
221 the registration functions in each of the modules it imports. So,
222 if we call "regMain", each reachable module in the program will be
225 The reg* functions are compiled in the same way as STG code,
226 i.e. without normal C call/return conventions. Hence we must use
227 StgRun to call this stuff.
228 -------------------------------------------------------------------------- */
230 /* The registration functions use an explicit stack...
232 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
236 registerCostCentres ( void )
238 /* this storage will be reclaimed by the garbage collector,
241 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
243 StgRun((StgFunPtr)stg_register, &MainRegTable);
247 /* -----------------------------------------------------------------------------
248 Set cost centre stack when entering a function. Here we implement
251 "if CCSfn is an initial segment of CCCS,
252 then set CCCS to CCSfn,
253 else append CCSfn to CCCS"
254 -------------------------------------------------------------------------- */
255 rtsBool entering_PAP;
258 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
260 /* PAP_entry has already set CCCS for us */
262 entering_PAP = rtsFalse;
266 if (cccs->root == ccsfn->root) {
269 return AppendCCS(cccs,ccsfn);
273 /* -----------------------------------------------------------------------------
274 Cost-centre stack manipulation
275 -------------------------------------------------------------------------- */
278 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
280 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
281 #define PushCostCentre _PushCostCentre
284 fprintf(stderr,"Pushing %s on ", cc->label);
286 fprintf(stderr,"\n"));
287 return PushCostCentre(ccs,cc);
292 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
294 CostCentreStack *temp_ccs;
296 if (ccs == EMPTY_STACK)
297 return ActualPush(ccs,cc);
302 /* check if we've already memoized this stack */
303 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
305 if (temp_ccs != EMPTY_STACK)
308 /* remove the CC to avoid loops */
309 ccs = RemoveCC(ccs,cc);
310 /* have a different stack now, need to check the memo table again */
311 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
312 if (temp_ccs != EMPTY_STACK)
315 return ActualPush(ccs,cc);
321 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
324 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
326 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
327 #define AppendCCS _AppendCCS
331 fprintf(stderr,"Appending ");
333 fprintf(stderr," to ");
335 fprintf(stderr,"\n");});
336 return AppendCCS(ccs1,ccs2);
341 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
343 CostCentreStack *ccs;
345 /* Optimisation: if we attempt to append a CCS to itself, we're
346 * going to end up with the same ccs after a great deal of pushing
347 * and removing of cost centres. Furthermore, we'll generate a lot
348 * of intermediate CCSs which would not otherwise be generated. So:
349 * let's cope with this common case first.
355 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
359 if (ccs2->prevStack != NULL) {
360 ccs = AppendCCS(ccs1, ccs2->prevStack);
363 return PushCostCentre(ccs,ccs2->cc);
367 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
369 CostCentreStack *new_ccs;
371 /* allocate space for a new CostCentreStack */
372 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
374 return ActualPush_(ccs, cc, new_ccs);
377 static CostCentreStack *
378 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
380 /* assign values to each member of the structure */
381 ASSIGN_CCS_ID(new_ccs->ccsID);
384 new_ccs->prevStack = ccs;
386 new_ccs->indexTable = EMPTY_TABLE;
388 /* Initialise the various _scc_ counters to zero
390 new_ccs->scc_count = 0;
391 new_ccs->sub_scc_count = 0;
392 new_ccs->sub_cafcc_count = 0;
394 /* Initialize all other stats here. There should be a quick way
395 * that's easily used elsewhere too
397 new_ccs->time_ticks = 0;
398 new_ccs->mem_alloc = 0;
400 new_ccs->root = ccs->root;
402 /* update the memoization table for the parent stack */
403 if (ccs != EMPTY_STACK)
404 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
406 /* make sure this CC is declared at the next heap/time sample */
409 /* return a pointer to the new stack */
415 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
417 CostCentreStack *del_ccs;
419 if (ccs == EMPTY_STACK) {
423 return ccs->prevStack;
426 del_ccs = RemoveCC(ccs->prevStack, cc);
428 if (del_ccs == EMPTY_STACK)
431 return PushCostCentre(del_ccs,ccs->cc);
439 IsInIndexTable(IndexTable *it, CostCentre *cc)
441 while (it!=EMPTY_TABLE)
449 /* otherwise we never found it so return EMPTY_TABLE */
455 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
459 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
462 new_it->ccs = new_ccs;
469 print_ccs (FILE *fp, CostCentreStack *ccs)
472 fprintf(fp, "Cost-Centre Stack: ");
477 print_ccs(fp, ccs->prevStack);
478 fprintf(fp, "->[%s,%s,%s]",
479 ccs->cc->label, ccs->cc->module, ccs->cc->group);
481 fprintf(fp, "[%s,%s,%s]",
482 ccs->cc->label, ccs->cc->module, ccs->cc->group);
492 DecCCS(CostCentreStack *ccs)
494 CCSDecList *temp_list;
497 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
498 "Error allocating space for CCSDecList");
499 temp_list->ccs = ccs;
500 temp_list->nextList = New_CCS_LIST;
502 New_CCS_LIST = temp_list;
505 /* -----------------------------------------------------------------------------
506 Generating a time & allocation profiling report.
507 -------------------------------------------------------------------------- */
509 static FILE *prof_file;
511 /* -----------------------------------------------------------------------------
512 Generating the aggregated per-cost-centre time/alloc report.
513 -------------------------------------------------------------------------- */
515 static CostCentre *sorted_cc_list;
518 aggregate_cc_costs( CostCentreStack *ccs )
522 ccs->cc->mem_alloc += ccs->mem_alloc;
523 ccs->cc->time_ticks += ccs->time_ticks;
525 for (i = ccs->indexTable; i != 0; i = i->next) {
526 aggregate_cc_costs(i->ccs);
531 insert_cc_in_sorted_list( CostCentre *new_cc )
533 CostCentre **prev, *cc;
535 prev = &sorted_cc_list;
536 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
537 if (new_cc->time_ticks > cc->time_ticks) {
550 report_per_cc_costs( void )
552 CostCentre *cc, *next;
554 aggregate_cc_costs(CCS_MAIN);
555 sorted_cc_list = NULL;
557 for (cc = CC_LIST; cc != NULL; cc = next) {
559 if (cc->time_ticks > total_prof_ticks/100
560 || cc->mem_alloc > total_alloc/100) {
561 insert_cc_in_sorted_list(cc);
565 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
566 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
567 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
568 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
570 fprintf(prof_file, "\n\n");
572 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
573 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
574 fprintf(prof_file, "%6.1f %6.1f",
575 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
576 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
580 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
581 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
583 fprintf(prof_file, "\n");
586 fprintf(prof_file,"\n\n");
589 /* -----------------------------------------------------------------------------
590 Generate the cost-centre-stack time/alloc report
591 -------------------------------------------------------------------------- */
594 fprint_header( void )
596 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
599 do_groups = have_interesting_groups(Registered_CC);
600 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
603 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
605 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
606 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
607 #if defined(PROFILING_DETAIL_COUNTS)
608 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
609 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
613 fprintf(prof_file, "\n\n");
617 report_ccs_profiling( void )
620 char temp[128]; /* sigh: magic constant */
622 rtsBool do_groups = rtsFalse;
625 if (!RtsFlags.CcFlags.doCostCentres)
630 total_prof_ticks = 0;
632 count_ticks(CCS_MAIN);
634 /* open profiling output file */
635 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
636 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
639 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
640 time_str(), "Final");
642 fprintf(prof_file, "\n\t ");
643 fprintf(prof_file, " %s", prog_argv[0]);
644 fprintf(prof_file, " +RTS");
645 for (count = 0; rts_argv[count]; count++)
646 fprintf(prof_file, " %s", rts_argv[count]);
647 fprintf(prof_file, " -RTS");
648 for (count = 1; prog_argv[count]; count++)
649 fprintf(prof_file, " %s", prog_argv[count]);
650 fprintf(prof_file, "\n\n");
652 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
653 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
654 total_prof_ticks, TICK_MILLISECS);
656 fprintf(prof_file, "\ttotal alloc = %11s bytes",
657 ullong_format_string((ullong) total_alloc * sizeof(W_),
658 temp, rtsTrue/*commas*/));
659 /* ToDo: 64-bit error! */
661 #if defined(PROFILING_DETAIL_COUNTS)
662 fprintf(prof_file, " (%lu closures)", total_allocs);
664 fprintf(prof_file, " (excludes profiling overheads)\n\n");
666 report_per_cc_costs();
669 reportCCS(pruneCCSTree(CCS_MAIN), 0);
675 reportCCS(CostCentreStack *ccs, nat indent)
682 /* Only print cost centres with non 0 data ! */
684 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
685 ! ccs_to_ignore(ccs))
686 /* force printing of *all* cost centres if -P -P */
689 fprintf(prof_file, "%-*s%-*s %-10s",
690 indent, "", 24-indent, cc->label, cc->module);
693 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
696 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
698 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
699 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
700 ccs->sub_scc_count, ccs->sub_cafcc_count);
702 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
703 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
704 #if defined(PROFILING_DETAIL_COUNTS)
705 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
706 ccs->mem_allocs, ccs->thunk_count,
707 ccs->function_count, ccs->pap_count,
708 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
709 ccs->caffun_subsumed);
712 fprintf(prof_file, "\n");
715 for (i = ccs->indexTable; i != 0; i = i->next) {
716 reportCCS(i->ccs, indent+1);
720 /* Traverse the cost centre stack tree and accumulate
724 count_ticks(CostCentreStack *ccs)
728 if (!ccs_to_ignore(ccs)) {
729 total_alloc += ccs->mem_alloc;
730 total_prof_ticks += ccs->time_ticks;
732 for (i = ccs->indexTable; i != NULL; i = i->next)
736 /* return rtsTrue if it is one of the ones that
737 * should not be reported normally (because it confuses
741 ccs_to_ignore (CostCentreStack *ccs)
743 if ( ccs == CCS_OVERHEAD
744 || ccs == CCS_DONTZuCARE
746 || ccs == CCS_SYSTEM) {
753 static CostCentreStack *
754 pruneCCSTree( CostCentreStack *ccs )
756 CostCentreStack *ccs1;
757 IndexTable *i, **prev;
759 prev = &ccs->indexTable;
760 for (i = ccs->indexTable; i != 0; i = i->next) {
761 ccs1 = pruneCCSTree(i->ccs);
769 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
770 /* force printing of *all* cost centres if -P -P */ )
772 || ( ccs->indexTable != 0 )
773 || ( (ccs->scc_count || ccs->sub_scc_count ||
774 ccs->time_ticks || ccs->mem_alloc
775 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
776 && (ccs->sub_cafcc_count
777 #if defined(PROFILING_DETAIL_COUNTS)
778 || cc->thunk_count || cc->function_count || cc->pap_count
789 printCCS ( CostCentreStack *ccs )
792 for (; ccs; ccs = ccs->prevStack ) {
793 fprintf(stderr,ccs->cc->label);
794 if (ccs->prevStack) {
802 #endif /* PROFILING */