1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.6 1999/04/23 09:47:32 simonm Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
118 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
120 static void printCCS ( CostCentreStack *ccs );
123 /* -----------------------------------------------------------------------------
124 Initialise the profiling environment
125 -------------------------------------------------------------------------- */
130 CostCentreStack *ccs, *next;
132 /* for the benefit of allocate()... */
135 /* Initialize counters for IDs */
140 /* Initialize Declaration lists to NULL */
144 /* Register all the cost centres / stacks in the program
145 * CC_MAIN gets link = 0, all others have non-zero link.
147 REGISTER_CC(CC_MAIN);
148 REGISTER_CC(CC_SYSTEM);
150 REGISTER_CC(CC_OVERHEAD);
151 REGISTER_CC(CC_SUBSUMED);
152 REGISTER_CC(CC_DONTZuCARE);
153 REGISTER_CCS(CCS_MAIN);
154 REGISTER_CCS(CCS_SYSTEM);
155 REGISTER_CCS(CCS_GC);
156 REGISTER_CCS(CCS_OVERHEAD);
157 REGISTER_CCS(CCS_SUBSUMED);
158 REGISTER_CCS(CCS_DONTZuCARE);
161 registerCostCentres();
164 if (!RtsFlags.CcFlags.doCostCentres)
167 time_profiling = rtsTrue;
169 /* Initialise the log file name */
170 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
171 sprintf(prof_filename, "%s.prof", prog_argv[0]);
173 /* find all the "special" cost centre stacks, and make them children
176 ASSERT(CCS_MAIN->prevStack == 0);
177 CCS_MAIN->root = CC_MAIN;
178 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
179 next = ccs->prevStack;
181 ActualPush_(CCS_MAIN,ccs->cc,ccs);
186 /* profiling is the only client of the VTALRM system at the moment,
187 * so just install the profiling tick handler. */
188 install_vtalrm_handler(handleProfTick);
193 endProfiling ( void )
199 heapCensus ( bdescr *bd UNUSED )
204 /* -----------------------------------------------------------------------------
205 Register Cost Centres
207 At the moment, this process just supplies a unique integer to each
208 statically declared cost centre and cost centre stack in the
211 The code generator inserts a small function "reg<moddule>" in each
212 module which registers any cost centres from that module and calls
213 the registration functions in each of the modules it imports. So,
214 if we call "regMain", each reachable module in the program will be
217 The reg* functions are compiled in the same way as STG code,
218 i.e. without normal C call/return conventions. Hence we must use
219 StgRun to call this stuff.
220 -------------------------------------------------------------------------- */
222 /* The registration functions use an explicit stack...
224 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
228 registerCostCentres ( void )
230 /* this storage will be reclaimed by the garbage collector,
233 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
235 StgRun((StgFunPtr)stg_register);
239 /* -----------------------------------------------------------------------------
240 Set cost centre stack when entering a function. Here we implement
243 "if CCSfn is an initial segment of CCCS,
244 then set CCCS to CCSfn,
245 else append CCSfn to CCCS"
246 -------------------------------------------------------------------------- */
247 rtsBool entering_PAP;
250 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
252 /* PAP_entry has already set CCCS for us */
254 entering_PAP = rtsFalse;
258 if (cccs->root == ccsfn->root) {
261 return AppendCCS(cccs,ccsfn);
265 /* -----------------------------------------------------------------------------
266 Cost-centre stack manipulation
267 -------------------------------------------------------------------------- */
270 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
272 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
273 #define PushCostCentre _PushCostCentre
276 fprintf(stderr,"Pushing %s on ", cc->label);
278 fprintf(stderr,"\n"));
279 return PushCostCentre(ccs,cc);
284 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
286 CostCentreStack *temp_ccs;
288 if (ccs == EMPTY_STACK)
289 return ActualPush(ccs,cc);
294 /* check if we've already memoized this stack */
295 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
297 if (temp_ccs != EMPTY_STACK)
300 /* remove the CC to avoid loops */
301 ccs = RemoveCC(ccs,cc);
302 /* have a different stack now, need to check the memo table again */
303 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
304 if (temp_ccs != EMPTY_STACK)
307 return ActualPush(ccs,cc);
313 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
316 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
318 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
319 #define AppendCCS _AppendCCS
323 fprintf(stderr,"Appending ");
325 fprintf(stderr," to ");
327 fprintf(stderr,"\n");});
328 return AppendCCS(ccs1,ccs2);
333 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
335 CostCentreStack *ccs;
337 /* Optimisation: if we attempt to append a CCS to itself, we're
338 * going to end up with the same ccs after a great deal of pushing
339 * and removing of cost centres. Furthermore, we'll generate a lot
340 * of intermediate CCSs which would not otherwise be generated. So:
341 * let's cope with this common case first.
347 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
351 ASSERT(ccs2->prevStack != NULL);
352 ccs = AppendCCS(ccs1, ccs2->prevStack);
353 return PushCostCentre(ccs,ccs2->cc);
357 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
359 CostCentreStack *new_ccs;
361 /* allocate space for a new CostCentreStack */
362 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
364 return ActualPush_(ccs, cc, new_ccs);
367 static CostCentreStack *
368 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
370 /* assign values to each member of the structure */
371 ASSIGN_CCS_ID(new_ccs->ccsID);
374 new_ccs->prevStack = ccs;
376 new_ccs->indexTable = EMPTY_TABLE;
378 /* Initialise the various _scc_ counters to zero
380 new_ccs->scc_count = 0;
381 new_ccs->sub_scc_count = 0;
382 new_ccs->sub_cafcc_count = 0;
384 /* Initialize all other stats here. There should be a quick way
385 * that's easily used elsewhere too
387 new_ccs->time_ticks = 0;
388 new_ccs->mem_alloc = 0;
390 new_ccs->root = ccs->root;
392 /* update the memoization table for the parent stack */
393 if (ccs != EMPTY_STACK)
394 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
396 /* make sure this CC is declared at the next heap/time sample */
399 /* return a pointer to the new stack */
405 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
407 CostCentreStack *del_ccs;
409 if (ccs == EMPTY_STACK) {
413 return ccs->prevStack;
416 del_ccs = RemoveCC(ccs->prevStack, cc);
418 if (del_ccs == EMPTY_STACK)
421 return PushCostCentre(del_ccs,ccs->cc);
429 IsInIndexTable(IndexTable *it, CostCentre *cc)
431 while (it!=EMPTY_TABLE)
439 /* otherwise we never found it so return EMPTY_TABLE */
445 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
449 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
452 new_it->ccs = new_ccs;
459 print_ccs (FILE *fp, CostCentreStack *ccs)
462 fprintf(fp, "Cost-Centre Stack: ");
467 print_ccs(fp, ccs->prevStack);
468 fprintf(fp, "->[%s,%s,%s]",
469 ccs->cc->label, ccs->cc->module, ccs->cc->group);
471 fprintf(fp, "[%s,%s,%s]",
472 ccs->cc->label, ccs->cc->module, ccs->cc->group);
482 DecCCS(CostCentreStack *ccs)
484 CCSDecList *temp_list;
487 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
488 "Error allocating space for CCSDecList");
489 temp_list->ccs = ccs;
490 temp_list->nextList = New_CCS_LIST;
492 New_CCS_LIST = temp_list;
495 /* -----------------------------------------------------------------------------
496 Generating a time & allocation profiling report.
497 -------------------------------------------------------------------------- */
499 static FILE *prof_file;
502 report_ccs_profiling( void )
505 char temp[128]; /* sigh: magic constant */
506 rtsBool do_groups = rtsFalse;
508 if (!RtsFlags.CcFlags.doCostCentres)
515 count_ticks(CCS_MAIN);
517 /* open profiling output file */
518 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
519 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
522 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
523 time_str(), "Final");
525 fprintf(prof_file, "\n\t ");
526 fprintf(prof_file, " %s", prog_argv[0]);
527 fprintf(prof_file, " +RTS");
528 for (count = 0; rts_argv[count]; count++)
529 fprintf(prof_file, " %s", rts_argv[count]);
530 fprintf(prof_file, " -RTS");
531 for (count = 1; prog_argv[count]; count++)
532 fprintf(prof_file, " %s", prog_argv[count]);
533 fprintf(prof_file, "\n\n");
535 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
536 total_ticks / (StgFloat) TICK_FREQUENCY,
537 total_ticks, TICK_MILLISECS);
539 fprintf(prof_file, "\ttotal alloc = %11s bytes",
540 ullong_format_string((ullong) total_alloc * sizeof(W_),
541 temp, rtsTrue/*commas*/));
542 /* ToDo: 64-bit error! */
544 #if defined(PROFILING_DETAIL_COUNTS)
545 fprintf(prof_file, " (%lu closures)", total_allocs);
547 fprintf(prof_file, " (excludes profiling overheads)\n\n");
549 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
552 do_groups = have_interesting_groups(Registered_CC);
553 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
556 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
558 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
559 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
560 #if defined(PROFILING_DETAIL_COUNTS)
561 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
562 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
565 fprintf(prof_file, "\n\n");
567 reportCCS(pruneCCSTree(CCS_MAIN), 0);
573 reportCCS(CostCentreStack *ccs, nat indent)
579 ASSERT(cc == CC_MAIN || cc->link != 0);
581 /* Only print cost centres with non 0 data ! */
583 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
584 ! ccs_to_ignore(ccs))
585 /* force printing of *all* cost centres if -P -P */
588 fprintf(prof_file, "%-*s%-*s %-10s",
589 indent, "", 24-indent, cc->label, cc->module);
592 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
595 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld",
597 total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
598 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
599 ccs->sub_scc_count, ccs->sub_cafcc_count);
601 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
602 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
603 #if defined(PROFILING_DETAIL_COUNTS)
604 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
605 ccs->mem_allocs, ccs->thunk_count,
606 ccs->function_count, ccs->pap_count,
607 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
608 ccs->caffun_subsumed);
611 fprintf(prof_file, "\n");
614 for (i = ccs->indexTable; i != 0; i = i->next) {
615 reportCCS(i->ccs, indent+1);
619 /* Traverse the cost centre stack tree and accumulate
623 count_ticks(CostCentreStack *ccs)
627 if (!ccs_to_ignore(ccs)) {
628 total_alloc += ccs->mem_alloc;
629 total_ticks += ccs->time_ticks;
631 for (i = ccs->indexTable; i != NULL; i = i->next)
635 /* return rtsTrue if it is one of the ones that
636 * should not be reported normally (because it confuses
640 ccs_to_ignore (CostCentreStack *ccs)
642 if ( ccs == CCS_OVERHEAD
643 || ccs == CCS_DONTZuCARE
645 || ccs == CCS_SYSTEM) {
652 static CostCentreStack *
653 pruneCCSTree( CostCentreStack *ccs )
655 CostCentreStack *ccs1;
656 IndexTable *i, **prev;
658 prev = &ccs->indexTable;
659 for (i = ccs->indexTable; i != 0; i = i->next) {
660 ccs1 = pruneCCSTree(i->ccs);
668 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
669 /* force printing of *all* cost centres if -P -P */ )
671 || ( ccs->indexTable != 0 )
672 || ( (ccs->scc_count || ccs->sub_scc_count ||
673 ccs->time_ticks || ccs->mem_alloc
674 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
675 && (ccs->sub_cafcc_count
676 #if defined(PROFILING_DETAIL_COUNTS)
677 || cc->thunk_count || cc->function_count || cc->pap_count
688 printCCS ( CostCentreStack *ccs )
691 for (; ccs; ccs = ccs->prevStack ) {
692 fprintf(stderr,ccs->cc->label);
693 if (ccs->prevStack) {
701 #endif /* PROFILING */