1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.5 1999/04/08 15:43:45 simonm Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
118 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
120 static void printCCS ( CostCentreStack *ccs );
123 /* -----------------------------------------------------------------------------
124 Initialise the profiling environment
125 -------------------------------------------------------------------------- */
130 CostCentreStack *ccs, *next;
132 /* for the benefit of allocate()... */
135 if (!RtsFlags.CcFlags.doCostCentres)
138 time_profiling = rtsTrue;
140 /* Initialise the log file name */
141 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
142 sprintf(prof_filename, "%s.prof", prog_argv[0]);
144 /* Initialize counters for IDs */
149 /* Initialize Declaration lists to NULL */
153 /* Register all the cost centres / stacks in the program
154 * CC_MAIN gets link = 0, all others have non-zero link.
156 REGISTER_CC(CC_MAIN);
157 REGISTER_CC(CC_SYSTEM);
159 REGISTER_CC(CC_OVERHEAD);
160 REGISTER_CC(CC_SUBSUMED);
161 REGISTER_CC(CC_DONTZuCARE);
162 REGISTER_CCS(CCS_MAIN);
163 REGISTER_CCS(CCS_SYSTEM);
164 REGISTER_CCS(CCS_GC);
165 REGISTER_CCS(CCS_OVERHEAD);
166 REGISTER_CCS(CCS_SUBSUMED);
167 REGISTER_CCS(CCS_DONTZuCARE);
170 registerCostCentres();
172 /* find all the "special" cost centre stacks, and make them children
175 ASSERT(CCS_MAIN->prevStack == 0);
176 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
177 next = ccs->prevStack;
179 ActualPush_(CCS_MAIN,ccs->cc,ccs);
183 /* profiling is the only client of the VTALRM system at the moment,
184 * so just install the profiling tick handler. */
185 install_vtalrm_handler(handleProfTick);
190 endProfiling ( void )
196 heapCensus ( bdescr *bd )
201 /* -----------------------------------------------------------------------------
202 Register Cost Centres
204 At the moment, this process just supplies a unique integer to each
205 statically declared cost centre and cost centre stack in the
208 The code generator inserts a small function "reg<moddule>" in each
209 module which registers any cost centres from that module and calls
210 the registration functions in each of the modules it imports. So,
211 if we call "regMain", each reachable module in the program will be
214 The reg* functions are compiled in the same way as STG code,
215 i.e. without normal C call/return conventions. Hence we must use
216 StgRun to call this stuff.
217 -------------------------------------------------------------------------- */
219 /* The registration functions use an explicit stack...
221 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
225 registerCostCentres ( void )
227 /* this storage will be reclaimed by the garbage collector,
230 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
232 StgRun((StgFunPtr)stg_register);
236 /* -----------------------------------------------------------------------------
237 Cost-centre stack manipulation
238 -------------------------------------------------------------------------- */
241 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
243 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
244 #define PushCostCentre _PushCostCentre
247 fprintf(stderr,"Pushing %s on ", cc->label);
249 fprintf(stderr,"\n"));
250 return PushCostCentre(ccs,cc);
255 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
257 CostCentreStack *temp_ccs;
259 if (ccs == EMPTY_STACK)
260 return ActualPush(ccs,cc);
265 /* check if we've already memoized this stack */
266 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
268 if (temp_ccs != EMPTY_STACK)
271 /* remove the CC to avoid loops */
272 ccs = RemoveCC(ccs,cc);
273 /* have a different stack now, need to check the memo table again */
274 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
275 if (temp_ccs != EMPTY_STACK)
278 return ActualPush(ccs,cc);
284 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
287 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
289 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
290 #define AppendCCS _AppendCCS
292 CostCentreStack *ccs;
295 fprintf(stderr,"Appending ");
297 fprintf(stderr," to ");
299 fprintf(stderr,"\n");});
300 return AppendCCS(ccs1,ccs2);
305 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
307 CostCentreStack *ccs;
309 /* Optimisation: if we attempt to append a CCS to itself, we're
310 * going to end up with the same ccs after a great deal of pushing
311 * and removing of cost centres. Furthermore, we'll generate a lot
312 * of intermediate CCSs which would not otherwise be generated. So:
313 * let's cope with this common case first.
319 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
323 ASSERT(ccs2->prevStack != NULL);
324 ccs = AppendCCS(ccs1, ccs2->prevStack);
325 return PushCostCentre(ccs,ccs2->cc);
329 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
331 CostCentreStack *new_ccs;
333 /* allocate space for a new CostCentreStack */
334 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
336 return ActualPush_(ccs, cc, new_ccs);
339 static CostCentreStack *
340 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
342 /* assign values to each member of the structure */
343 ASSIGN_CCS_ID(new_ccs->ccsID);
346 new_ccs->prevStack = ccs;
348 new_ccs->indexTable = EMPTY_TABLE;
350 /* Initialise the various _scc_ counters to zero
352 new_ccs->scc_count = 0;
353 new_ccs->sub_scc_count = 0;
354 new_ccs->sub_cafcc_count = 0;
356 /* Initialize all other stats here. There should be a quick way
357 * that's easily used elsewhere too
359 new_ccs->time_ticks = 0;
360 new_ccs->mem_alloc = 0;
362 /* stacks are subsumed if either:
363 - the top cost centre is boring, and the rest of the CCS is subsumed
364 - the top cost centre is subsumed.
366 if (cc->is_subsumed == CC_IS_BORING) {
367 new_ccs->is_subsumed = ccs->is_subsumed;
369 new_ccs->is_subsumed = cc->is_subsumed;
372 /* update the memoization table for the parent stack */
373 if (ccs != EMPTY_STACK)
374 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
376 /* make sure this CC is declared at the next heap/time sample */
379 /* return a pointer to the new stack */
385 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
387 CostCentreStack *del_ccs;
389 if (ccs == EMPTY_STACK) {
393 return ccs->prevStack;
396 del_ccs = RemoveCC(ccs->prevStack, cc);
398 if (del_ccs == EMPTY_STACK)
401 return PushCostCentre(del_ccs,ccs->cc);
409 IsInIndexTable(IndexTable *it, CostCentre *cc)
411 while (it!=EMPTY_TABLE)
419 /* otherwise we never found it so return EMPTY_TABLE */
425 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
429 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
432 new_it->ccs = new_ccs;
439 print_ccs (FILE *fp, CostCentreStack *ccs)
442 fprintf(fp, "Cost-Centre Stack: ");
447 print_ccs(fp, ccs->prevStack);
448 fprintf(fp, "->[%s,%s,%s]",
449 ccs->cc->label, ccs->cc->module, ccs->cc->group);
451 fprintf(fp, "[%s,%s,%s]",
452 ccs->cc->label, ccs->cc->module, ccs->cc->group);
462 DecCCS(CostCentreStack *ccs)
464 CCSDecList *temp_list;
467 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
468 "Error allocating space for CCSDecList");
469 temp_list->ccs = ccs;
470 temp_list->nextList = New_CCS_LIST;
472 New_CCS_LIST = temp_list;
475 /* -----------------------------------------------------------------------------
476 Generating a time & allocation profiling report.
477 -------------------------------------------------------------------------- */
479 static FILE *prof_file;
482 report_ccs_profiling( void )
485 char temp[128]; /* sigh: magic constant */
486 rtsBool do_groups = rtsFalse;
488 if (!RtsFlags.CcFlags.doCostCentres)
495 count_ticks(CCS_MAIN);
497 /* open profiling output file */
498 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
499 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
502 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
503 time_str(), "Final");
505 fprintf(prof_file, "\n\t ");
506 fprintf(prof_file, " %s", prog_argv[0]);
507 fprintf(prof_file, " +RTS");
508 for (count = 0; rts_argv[count]; count++)
509 fprintf(prof_file, " %s", rts_argv[count]);
510 fprintf(prof_file, " -RTS");
511 for (count = 1; prog_argv[count]; count++)
512 fprintf(prof_file, " %s", prog_argv[count]);
513 fprintf(prof_file, "\n\n");
515 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
516 total_ticks / (StgFloat) TICK_FREQUENCY,
517 total_ticks, TICK_MILLISECS);
519 fprintf(prof_file, "\ttotal alloc = %11s bytes",
520 ullong_format_string((ullong) total_alloc * sizeof(W_),
521 temp, rtsTrue/*commas*/));
522 /* ToDo: 64-bit error! */
524 #if defined(PROFILING_DETAIL_COUNTS)
525 fprintf(prof_file, " (%lu closures)", total_allocs);
527 fprintf(prof_file, " (excludes profiling overheads)\n\n");
529 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
532 do_groups = have_interesting_groups(Registered_CC);
533 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
536 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
538 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
539 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
540 #if defined(PROFILING_DETAIL_COUNTS)
541 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
542 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
545 fprintf(prof_file, "\n\n");
547 reportCCS(pruneCCSTree(CCS_MAIN), 0);
553 reportCCS(CostCentreStack *ccs, nat indent)
559 ASSERT(cc == CC_MAIN || cc->link != 0);
561 /* Only print cost centres with non 0 data ! */
563 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
564 ! ccs_to_ignore(ccs))
565 /* force printing of *all* cost centres if -P -P */
568 fprintf(prof_file, "%-*s%-*s %-10s",
569 indent, "", 24-indent, cc->label, cc->module);
572 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
575 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld",
577 total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
578 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
579 ccs->sub_scc_count, ccs->sub_cafcc_count);
581 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
582 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
583 #if defined(PROFILING_DETAIL_COUNTS)
584 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
585 ccs->mem_allocs, ccs->thunk_count,
586 ccs->function_count, ccs->pap_count,
587 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
588 ccs->caffun_subsumed);
591 fprintf(prof_file, "\n");
594 for (i = ccs->indexTable; i != 0; i = i->next) {
595 reportCCS(i->ccs, indent+1);
599 /* Traverse the cost centre stack tree and accumulate
603 count_ticks(CostCentreStack *ccs)
607 if (!ccs_to_ignore(ccs)) {
608 total_alloc += ccs->mem_alloc;
609 total_ticks += ccs->time_ticks;
611 for (i = ccs->indexTable; i != NULL; i = i->next)
615 /* return rtsTrue if it is one of the ones that
616 * should not be reported normally (because it confuses
620 ccs_to_ignore (CostCentreStack *ccs)
622 if ( ccs == CCS_OVERHEAD
623 || ccs == CCS_DONTZuCARE
625 || ccs == CCS_SYSTEM) {
632 static CostCentreStack *
633 pruneCCSTree( CostCentreStack *ccs )
635 CostCentreStack *ccs1;
636 IndexTable *i, **prev;
638 prev = &ccs->indexTable;
639 for (i = ccs->indexTable; i != 0; i = i->next) {
640 ccs1 = pruneCCSTree(i->ccs);
648 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
649 /* force printing of *all* cost centres if -P -P */ )
651 || ( ccs->indexTable != 0 )
652 || ( (ccs->scc_count || ccs->sub_scc_count ||
653 ccs->time_ticks || ccs->mem_alloc
654 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
655 && (ccs->sub_cafcc_count
656 #if defined(PROFILING_DETAIL_COUNTS)
657 || cc->thunk_count || cc->function_count || cc->pap_count
668 printCCS ( CostCentreStack *ccs )
671 for (; ccs; ccs = ccs->prevStack ) {
672 fprintf(stderr,ccs->cc->label);
673 if (ccs->prevStack) {
681 #endif /* PROFILING */