1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.7 1999/06/29 13:04:40 panne Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
118 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
120 static void printCCS ( CostCentreStack *ccs );
123 /* -----------------------------------------------------------------------------
124 Initialise the profiling environment
125 -------------------------------------------------------------------------- */
130 CostCentreStack *ccs, *next;
132 /* for the benefit of allocate()... */
135 /* Initialize counters for IDs */
140 /* Initialize Declaration lists to NULL */
144 /* Register all the cost centres / stacks in the program
145 * CC_MAIN gets link = 0, all others have non-zero link.
147 REGISTER_CC(CC_MAIN);
148 REGISTER_CC(CC_SYSTEM);
150 REGISTER_CC(CC_OVERHEAD);
151 REGISTER_CC(CC_SUBSUMED);
152 REGISTER_CC(CC_DONTZuCARE);
153 REGISTER_CCS(CCS_MAIN);
154 REGISTER_CCS(CCS_SYSTEM);
155 REGISTER_CCS(CCS_GC);
156 REGISTER_CCS(CCS_OVERHEAD);
157 REGISTER_CCS(CCS_SUBSUMED);
158 REGISTER_CCS(CCS_DONTZuCARE);
161 registerCostCentres();
164 if (!RtsFlags.CcFlags.doCostCentres)
167 time_profiling = rtsTrue;
169 /* Initialise the log file name */
170 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
171 sprintf(prof_filename, "%s.prof", prog_argv[0]);
173 /* find all the "special" cost centre stacks, and make them children
176 ASSERT(CCS_MAIN->prevStack == 0);
177 CCS_MAIN->root = CC_MAIN;
178 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
179 next = ccs->prevStack;
181 ActualPush_(CCS_MAIN,ccs->cc,ccs);
186 /* profiling is the only client of the VTALRM system at the moment,
187 * so just install the profiling tick handler. */
188 install_vtalrm_handler(handleProfTick);
193 endProfiling ( void )
199 heapCensus ( bdescr *bd UNUSED )
204 /* -----------------------------------------------------------------------------
205 Register Cost Centres
207 At the moment, this process just supplies a unique integer to each
208 statically declared cost centre and cost centre stack in the
211 The code generator inserts a small function "reg<moddule>" in each
212 module which registers any cost centres from that module and calls
213 the registration functions in each of the modules it imports. So,
214 if we call "regMain", each reachable module in the program will be
217 The reg* functions are compiled in the same way as STG code,
218 i.e. without normal C call/return conventions. Hence we must use
219 StgRun to call this stuff.
220 -------------------------------------------------------------------------- */
222 /* The registration functions use an explicit stack...
224 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
228 registerCostCentres ( void )
230 /* this storage will be reclaimed by the garbage collector,
233 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
235 StgRun((StgFunPtr)stg_register);
239 /* -----------------------------------------------------------------------------
240 Set cost centre stack when entering a function. Here we implement
243 "if CCSfn is an initial segment of CCCS,
244 then set CCCS to CCSfn,
245 else append CCSfn to CCCS"
246 -------------------------------------------------------------------------- */
247 rtsBool entering_PAP;
250 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
252 /* PAP_entry has already set CCCS for us */
254 entering_PAP = rtsFalse;
258 if (cccs->root == ccsfn->root) {
261 return AppendCCS(cccs,ccsfn);
265 /* -----------------------------------------------------------------------------
266 Cost-centre stack manipulation
267 -------------------------------------------------------------------------- */
270 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
272 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
273 #define PushCostCentre _PushCostCentre
276 fprintf(stderr,"Pushing %s on ", cc->label);
278 fprintf(stderr,"\n"));
279 return PushCostCentre(ccs,cc);
284 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
286 CostCentreStack *temp_ccs;
288 if (ccs == EMPTY_STACK)
289 return ActualPush(ccs,cc);
294 /* check if we've already memoized this stack */
295 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
297 if (temp_ccs != EMPTY_STACK)
300 /* remove the CC to avoid loops */
301 ccs = RemoveCC(ccs,cc);
302 /* have a different stack now, need to check the memo table again */
303 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
304 if (temp_ccs != EMPTY_STACK)
307 return ActualPush(ccs,cc);
313 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
316 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
318 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
319 #define AppendCCS _AppendCCS
323 fprintf(stderr,"Appending ");
325 fprintf(stderr," to ");
327 fprintf(stderr,"\n");});
328 return AppendCCS(ccs1,ccs2);
333 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
335 CostCentreStack *ccs;
337 /* Optimisation: if we attempt to append a CCS to itself, we're
338 * going to end up with the same ccs after a great deal of pushing
339 * and removing of cost centres. Furthermore, we'll generate a lot
340 * of intermediate CCSs which would not otherwise be generated. So:
341 * let's cope with this common case first.
347 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
351 ASSERT(ccs2->prevStack != NULL);
352 ccs = AppendCCS(ccs1, ccs2->prevStack);
353 return PushCostCentre(ccs,ccs2->cc);
357 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
359 CostCentreStack *new_ccs;
361 /* allocate space for a new CostCentreStack */
362 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
364 return ActualPush_(ccs, cc, new_ccs);
367 static CostCentreStack *
368 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
370 /* assign values to each member of the structure */
371 ASSIGN_CCS_ID(new_ccs->ccsID);
374 new_ccs->prevStack = ccs;
376 new_ccs->indexTable = EMPTY_TABLE;
378 /* Initialise the various _scc_ counters to zero
380 new_ccs->scc_count = 0;
381 new_ccs->sub_scc_count = 0;
382 new_ccs->sub_cafcc_count = 0;
384 /* Initialize all other stats here. There should be a quick way
385 * that's easily used elsewhere too
387 new_ccs->time_ticks = 0;
388 new_ccs->mem_alloc = 0;
390 new_ccs->root = ccs->root;
392 /* update the memoization table for the parent stack */
393 if (ccs != EMPTY_STACK)
394 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
396 /* make sure this CC is declared at the next heap/time sample */
399 /* return a pointer to the new stack */
405 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
407 CostCentreStack *del_ccs;
409 if (ccs == EMPTY_STACK) {
413 return ccs->prevStack;
416 del_ccs = RemoveCC(ccs->prevStack, cc);
418 if (del_ccs == EMPTY_STACK)
421 return PushCostCentre(del_ccs,ccs->cc);
429 IsInIndexTable(IndexTable *it, CostCentre *cc)
431 while (it!=EMPTY_TABLE)
439 /* otherwise we never found it so return EMPTY_TABLE */
445 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
449 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
452 new_it->ccs = new_ccs;
459 print_ccs (FILE *fp, CostCentreStack *ccs)
462 fprintf(fp, "Cost-Centre Stack: ");
467 print_ccs(fp, ccs->prevStack);
468 fprintf(fp, "->[%s,%s,%s]",
469 ccs->cc->label, ccs->cc->module, ccs->cc->group);
471 fprintf(fp, "[%s,%s,%s]",
472 ccs->cc->label, ccs->cc->module, ccs->cc->group);
482 DecCCS(CostCentreStack *ccs)
484 CCSDecList *temp_list;
487 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
488 "Error allocating space for CCSDecList");
489 temp_list->ccs = ccs;
490 temp_list->nextList = New_CCS_LIST;
492 New_CCS_LIST = temp_list;
495 /* -----------------------------------------------------------------------------
496 Generating a time & allocation profiling report.
497 -------------------------------------------------------------------------- */
499 static FILE *prof_file;
502 report_ccs_profiling( void )
505 char temp[128]; /* sigh: magic constant */
507 rtsBool do_groups = rtsFalse;
510 if (!RtsFlags.CcFlags.doCostCentres)
517 count_ticks(CCS_MAIN);
519 /* open profiling output file */
520 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
521 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
524 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
525 time_str(), "Final");
527 fprintf(prof_file, "\n\t ");
528 fprintf(prof_file, " %s", prog_argv[0]);
529 fprintf(prof_file, " +RTS");
530 for (count = 0; rts_argv[count]; count++)
531 fprintf(prof_file, " %s", rts_argv[count]);
532 fprintf(prof_file, " -RTS");
533 for (count = 1; prog_argv[count]; count++)
534 fprintf(prof_file, " %s", prog_argv[count]);
535 fprintf(prof_file, "\n\n");
537 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
538 total_ticks / (StgFloat) TICK_FREQUENCY,
539 total_ticks, TICK_MILLISECS);
541 fprintf(prof_file, "\ttotal alloc = %11s bytes",
542 ullong_format_string((ullong) total_alloc * sizeof(W_),
543 temp, rtsTrue/*commas*/));
544 /* ToDo: 64-bit error! */
546 #if defined(PROFILING_DETAIL_COUNTS)
547 fprintf(prof_file, " (%lu closures)", total_allocs);
549 fprintf(prof_file, " (excludes profiling overheads)\n\n");
551 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
554 do_groups = have_interesting_groups(Registered_CC);
555 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
558 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
560 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
561 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
562 #if defined(PROFILING_DETAIL_COUNTS)
563 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
564 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
567 fprintf(prof_file, "\n\n");
569 reportCCS(pruneCCSTree(CCS_MAIN), 0);
575 reportCCS(CostCentreStack *ccs, nat indent)
581 ASSERT(cc == CC_MAIN || cc->link != 0);
583 /* Only print cost centres with non 0 data ! */
585 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
586 ! ccs_to_ignore(ccs))
587 /* force printing of *all* cost centres if -P -P */
590 fprintf(prof_file, "%-*s%-*s %-10s",
591 indent, "", 24-indent, cc->label, cc->module);
594 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
597 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld",
599 total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
600 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
601 ccs->sub_scc_count, ccs->sub_cafcc_count);
603 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
604 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
605 #if defined(PROFILING_DETAIL_COUNTS)
606 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
607 ccs->mem_allocs, ccs->thunk_count,
608 ccs->function_count, ccs->pap_count,
609 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
610 ccs->caffun_subsumed);
613 fprintf(prof_file, "\n");
616 for (i = ccs->indexTable; i != 0; i = i->next) {
617 reportCCS(i->ccs, indent+1);
621 /* Traverse the cost centre stack tree and accumulate
625 count_ticks(CostCentreStack *ccs)
629 if (!ccs_to_ignore(ccs)) {
630 total_alloc += ccs->mem_alloc;
631 total_ticks += ccs->time_ticks;
633 for (i = ccs->indexTable; i != NULL; i = i->next)
637 /* return rtsTrue if it is one of the ones that
638 * should not be reported normally (because it confuses
642 ccs_to_ignore (CostCentreStack *ccs)
644 if ( ccs == CCS_OVERHEAD
645 || ccs == CCS_DONTZuCARE
647 || ccs == CCS_SYSTEM) {
654 static CostCentreStack *
655 pruneCCSTree( CostCentreStack *ccs )
657 CostCentreStack *ccs1;
658 IndexTable *i, **prev;
660 prev = &ccs->indexTable;
661 for (i = ccs->indexTable; i != 0; i = i->next) {
662 ccs1 = pruneCCSTree(i->ccs);
670 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
671 /* force printing of *all* cost centres if -P -P */ )
673 || ( ccs->indexTable != 0 )
674 || ( (ccs->scc_count || ccs->sub_scc_count ||
675 ccs->time_ticks || ccs->mem_alloc
676 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
677 && (ccs->sub_cafcc_count
678 #if defined(PROFILING_DETAIL_COUNTS)
679 || cc->thunk_count || cc->function_count || cc->pap_count
690 printCCS ( CostCentreStack *ccs )
693 for (; ccs; ccs = ccs->prevStack ) {
694 fprintf(stderr,ccs->cc->label);
695 if (ccs->prevStack) {
703 #endif /* PROFILING */