1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.4 1999/03/25 13:14:06 simonm Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
118 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
120 static void printCCS ( CostCentreStack *ccs );
123 /* -----------------------------------------------------------------------------
124 Initialise the profiling environment
125 -------------------------------------------------------------------------- */
130 CostCentreStack *ccs, *next;
132 /* for the benefit of allocate()... */
135 if (!RtsFlags.CcFlags.doCostCentres)
138 time_profiling = rtsTrue;
140 /* Initialise the log file name */
141 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
142 sprintf(prof_filename, "%s.prof", prog_argv[0]);
144 /* Initialize counters for IDs */
149 /* Initialize Declaration lists to NULL */
153 /* Register all the cost centres / stacks in the program
154 * CC_MAIN gets link = 0, all others have non-zero link.
156 REGISTER_CC(CC_MAIN);
157 REGISTER_CC(CC_SYSTEM);
159 REGISTER_CC(CC_OVERHEAD);
160 REGISTER_CC(CC_SUBSUMED);
161 REGISTER_CC(CC_DONTZuCARE);
162 REGISTER_CCS(CCS_MAIN);
163 REGISTER_CCS(CCS_SYSTEM);
164 REGISTER_CCS(CCS_GC);
165 REGISTER_CCS(CCS_OVERHEAD);
166 REGISTER_CCS(CCS_SUBSUMED);
167 REGISTER_CCS(CCS_DONTZuCARE);
170 registerCostCentres();
172 /* find all the "special" cost centre stacks, and make them children
175 ASSERT(CCS_MAIN->prevStack == 0);
176 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
177 next = ccs->prevStack;
179 ActualPush_(CCS_MAIN,ccs->cc,ccs);
183 /* profiling is the only client of the VTALRM system at the moment,
184 * so just install the profiling tick handler. */
185 install_vtalrm_handler(handleProfTick);
190 endProfiling ( void )
196 heapCensus ( bdescr *bd )
201 /* -----------------------------------------------------------------------------
202 Register Cost Centres
204 At the moment, this process just supplies a unique integer to each
205 statically declared cost centre and cost centre stack in the
208 The code generator inserts a small function "reg<moddule>" in each
209 module which registers any cost centres from that module and calls
210 the registration functions in each of the modules it imports. So,
211 if we call "regMain", each reachable module in the program will be
214 The reg* functions are compiled in the same way as STG code,
215 i.e. without normal C call/return conventions. Hence we must use
216 StgRun to call this stuff.
217 -------------------------------------------------------------------------- */
219 /* The registration functions use an explicit stack...
221 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
225 registerCostCentres ( void )
227 /* this storage will be reclaimed by the garbage collector,
230 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
232 StgRun((StgFunPtr)stg_register);
236 /* -----------------------------------------------------------------------------
237 Cost-centre stack manipulation
238 -------------------------------------------------------------------------- */
241 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
243 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
244 #define PushCostCentre _PushCostCentre
247 fprintf(stderr,"Pushing %s on ", cc->label);
249 fprintf(stderr,"\n"));
250 return PushCostCentre(ccs,cc);
255 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
257 CostCentreStack *temp_ccs;
259 if (ccs == EMPTY_STACK)
260 return ActualPush(ccs,cc);
265 /* check if we've already memoized this stack */
266 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
268 if (temp_ccs != EMPTY_STACK)
271 /* remove the CC to avoid loops */
272 ccs = RemoveCC(ccs,cc);
273 /* have a different stack now, need to check the memo table again */
274 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
275 if (temp_ccs != EMPTY_STACK)
278 return ActualPush(ccs,cc);
284 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
287 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
289 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
290 #define AppendCCS _AppendCCS
292 CostCentreStack *ccs;
294 fprintf(stderr,"Appending ");
296 fprintf(stderr," to ");
298 fprintf(stderr,"\n"));
299 return AppendCCS(ccs1,ccs2);
304 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
306 CostCentreStack *ccs;
308 /* Optimisation: if we attempt to append a CCS to itself, we're
309 * going to end up with the same ccs after a great deal of pushing
310 * and removing of cost centres. Furthermore, we'll generate a lot
311 * of intermediate CCSs which would not otherwise be generated. So:
312 * let's cope with this common case first.
318 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
322 ASSERT(ccs2->prevStack != NULL);
323 ccs = AppendCCS(ccs1, ccs2->prevStack);
324 return PushCostCentre(ccs,ccs2->cc);
328 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
330 CostCentreStack *new_ccs;
332 /* allocate space for a new CostCentreStack */
333 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
335 return ActualPush_(ccs, cc, new_ccs);
338 static CostCentreStack *
339 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
341 /* assign values to each member of the structure */
342 ASSIGN_CCS_ID(new_ccs->ccsID);
345 new_ccs->prevStack = ccs;
347 new_ccs->indexTable = EMPTY_TABLE;
349 /* Initialise the various _scc_ counters to zero
351 new_ccs->scc_count = 0;
352 new_ccs->sub_scc_count = 0;
353 new_ccs->sub_cafcc_count = 0;
355 /* Initialize all other stats here. There should be a quick way
356 * that's easily used elsewhere too
358 new_ccs->time_ticks = 0;
359 new_ccs->mem_alloc = 0;
361 /* stacks are subsumed if either:
362 - the top cost centre is boring, and the rest of the CCS is subsumed
363 - the top cost centre is subsumed.
365 if (cc->is_subsumed == CC_IS_BORING) {
366 new_ccs->is_subsumed = ccs->is_subsumed;
368 new_ccs->is_subsumed = cc->is_subsumed;
371 /* update the memoization table for the parent stack */
372 if (ccs != EMPTY_STACK)
373 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
375 /* make sure this CC is declared at the next heap/time sample */
378 /* return a pointer to the new stack */
384 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
386 CostCentreStack *del_ccs;
388 if (ccs == EMPTY_STACK) {
392 return ccs->prevStack;
395 del_ccs = RemoveCC(ccs->prevStack, cc);
397 if (del_ccs == EMPTY_STACK)
400 return PushCostCentre(del_ccs,ccs->cc);
408 IsInIndexTable(IndexTable *it, CostCentre *cc)
410 while (it!=EMPTY_TABLE)
418 /* otherwise we never found it so return EMPTY_TABLE */
424 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
428 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
431 new_it->ccs = new_ccs;
438 print_ccs (FILE *fp, CostCentreStack *ccs)
441 fprintf(fp, "Cost-Centre Stack: ");
446 print_ccs(fp, ccs->prevStack);
447 fprintf(fp, "->[%s,%s,%s]",
448 ccs->cc->label, ccs->cc->module, ccs->cc->group);
450 fprintf(fp, "[%s,%s,%s]",
451 ccs->cc->label, ccs->cc->module, ccs->cc->group);
461 DecCCS(CostCentreStack *ccs)
463 CCSDecList *temp_list;
466 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
467 "Error allocating space for CCSDecList");
468 temp_list->ccs = ccs;
469 temp_list->nextList = New_CCS_LIST;
471 New_CCS_LIST = temp_list;
474 /* -----------------------------------------------------------------------------
475 Generating a time & allocation profiling report.
476 -------------------------------------------------------------------------- */
478 static FILE *prof_file;
481 report_ccs_profiling( void )
484 char temp[128]; /* sigh: magic constant */
485 rtsBool do_groups = rtsFalse;
487 if (!RtsFlags.CcFlags.doCostCentres)
494 count_ticks(CCS_MAIN);
496 /* open profiling output file */
497 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
498 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
501 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
502 time_str(), "Final");
504 fprintf(prof_file, "\n\t ");
505 fprintf(prof_file, " %s", prog_argv[0]);
506 fprintf(prof_file, " +RTS");
507 for (count = 0; rts_argv[count]; count++)
508 fprintf(prof_file, " %s", rts_argv[count]);
509 fprintf(prof_file, " -RTS");
510 for (count = 1; prog_argv[count]; count++)
511 fprintf(prof_file, " %s", prog_argv[count]);
512 fprintf(prof_file, "\n\n");
514 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
515 total_ticks / (StgFloat) TICK_FREQUENCY,
516 total_ticks, TICK_MILLISECS);
518 fprintf(prof_file, "\ttotal alloc = %11s bytes",
519 ullong_format_string((ullong) total_alloc * sizeof(W_),
520 temp, rtsTrue/*commas*/));
521 /* ToDo: 64-bit error! */
523 #if defined(PROFILING_DETAIL_COUNTS)
524 fprintf(prof_file, " (%lu closures)", total_allocs);
526 fprintf(prof_file, " (excludes profiling overheads)\n\n");
528 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
531 do_groups = have_interesting_groups(Registered_CC);
532 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
535 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
537 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
538 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
539 #if defined(PROFILING_DETAIL_COUNTS)
540 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
541 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
544 fprintf(prof_file, "\n\n");
546 reportCCS(pruneCCSTree(CCS_MAIN), 0);
552 reportCCS(CostCentreStack *ccs, nat indent)
558 ASSERT(cc == CC_MAIN || cc->link != 0);
560 /* Only print cost centres with non 0 data ! */
562 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
563 ! ccs_to_ignore(ccs))
564 /* force printing of *all* cost centres if -P -P */
567 fprintf(prof_file, "%-*s%-*s %-10s",
568 indent, "", 24-indent, cc->label, cc->module);
571 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
574 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld",
576 total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
577 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
578 ccs->sub_scc_count, ccs->sub_cafcc_count);
580 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
581 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
582 #if defined(PROFILING_DETAIL_COUNTS)
583 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
584 ccs->mem_allocs, ccs->thunk_count,
585 ccs->function_count, ccs->pap_count,
586 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
587 ccs->caffun_subsumed);
590 fprintf(prof_file, "\n");
593 for (i = ccs->indexTable; i != 0; i = i->next) {
594 reportCCS(i->ccs, indent+1);
598 /* Traverse the cost centre stack tree and accumulate
602 count_ticks(CostCentreStack *ccs)
606 if (!ccs_to_ignore(ccs)) {
607 total_alloc += ccs->mem_alloc;
608 total_ticks += ccs->time_ticks;
610 for (i = ccs->indexTable; i != NULL; i = i->next)
614 /* return rtsTrue if it is one of the ones that
615 * should not be reported normally (because it confuses
619 ccs_to_ignore (CostCentreStack *ccs)
621 if ( ccs == CCS_OVERHEAD
622 || ccs == CCS_DONTZuCARE
624 || ccs == CCS_SYSTEM) {
631 static CostCentreStack *
632 pruneCCSTree( CostCentreStack *ccs )
634 CostCentreStack *ccs1;
635 IndexTable *i, **prev;
637 prev = &ccs->indexTable;
638 for (i = ccs->indexTable; i != 0; i = i->next) {
639 ccs1 = pruneCCSTree(i->ccs);
647 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
648 /* force printing of *all* cost centres if -P -P */ )
650 || ( ccs->indexTable != 0 )
651 || ( (ccs->scc_count || ccs->sub_scc_count ||
652 ccs->time_ticks || ccs->mem_alloc
653 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
654 && (ccs->sub_cafcc_count
655 #if defined(PROFILING_DETAIL_COUNTS)
656 || cc->thunk_count || cc->function_count || cc->pap_count
667 printCCS ( CostCentreStack *ccs )
670 for (; ccs; ccs = ccs->prevStack ) {
671 fprintf(stderr,ccs->cc->label);
672 if (ccs->prevStack) {
680 #endif /* PROFILING */