1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.8 1999/08/25 16:11:49 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_prof_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
118 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
120 static void printCCS ( CostCentreStack *ccs );
123 /* -----------------------------------------------------------------------------
124 Initialise the profiling environment
125 -------------------------------------------------------------------------- */
130 CostCentreStack *ccs, *next;
132 /* for the benefit of allocate()... */
135 /* Initialize counters for IDs */
140 /* Initialize Declaration lists to NULL */
144 /* Register all the cost centres / stacks in the program
145 * CC_MAIN gets link = 0, all others have non-zero link.
147 REGISTER_CC(CC_MAIN);
148 REGISTER_CC(CC_SYSTEM);
150 REGISTER_CC(CC_OVERHEAD);
151 REGISTER_CC(CC_SUBSUMED);
152 REGISTER_CC(CC_DONTZuCARE);
153 REGISTER_CCS(CCS_MAIN);
154 REGISTER_CCS(CCS_SYSTEM);
155 REGISTER_CCS(CCS_GC);
156 REGISTER_CCS(CCS_OVERHEAD);
157 REGISTER_CCS(CCS_SUBSUMED);
158 REGISTER_CCS(CCS_DONTZuCARE);
161 registerCostCentres();
164 if (!RtsFlags.CcFlags.doCostCentres)
167 time_profiling = rtsTrue;
169 /* Initialise the log file name */
170 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
171 sprintf(prof_filename, "%s.prof", prog_argv[0]);
173 /* find all the "special" cost centre stacks, and make them children
176 ASSERT(CCS_MAIN->prevStack == 0);
177 CCS_MAIN->root = CC_MAIN;
178 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
179 next = ccs->prevStack;
181 ActualPush_(CCS_MAIN,ccs->cc,ccs);
191 endProfiling ( void )
197 heapCensus ( bdescr *bd STG_UNUSED )
202 /* -----------------------------------------------------------------------------
203 Register Cost Centres
205 At the moment, this process just supplies a unique integer to each
206 statically declared cost centre and cost centre stack in the
209 The code generator inserts a small function "reg<moddule>" in each
210 module which registers any cost centres from that module and calls
211 the registration functions in each of the modules it imports. So,
212 if we call "regMain", each reachable module in the program will be
215 The reg* functions are compiled in the same way as STG code,
216 i.e. without normal C call/return conventions. Hence we must use
217 StgRun to call this stuff.
218 -------------------------------------------------------------------------- */
220 /* The registration functions use an explicit stack...
222 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
226 registerCostCentres ( void )
228 /* this storage will be reclaimed by the garbage collector,
231 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
233 StgRun((StgFunPtr)stg_register);
237 /* -----------------------------------------------------------------------------
238 Set cost centre stack when entering a function. Here we implement
241 "if CCSfn is an initial segment of CCCS,
242 then set CCCS to CCSfn,
243 else append CCSfn to CCCS"
244 -------------------------------------------------------------------------- */
245 rtsBool entering_PAP;
248 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
250 /* PAP_entry has already set CCCS for us */
252 entering_PAP = rtsFalse;
256 if (cccs->root == ccsfn->root) {
259 return AppendCCS(cccs,ccsfn);
263 /* -----------------------------------------------------------------------------
264 Cost-centre stack manipulation
265 -------------------------------------------------------------------------- */
268 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
270 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
271 #define PushCostCentre _PushCostCentre
274 fprintf(stderr,"Pushing %s on ", cc->label);
276 fprintf(stderr,"\n"));
277 return PushCostCentre(ccs,cc);
282 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
284 CostCentreStack *temp_ccs;
286 if (ccs == EMPTY_STACK)
287 return ActualPush(ccs,cc);
292 /* check if we've already memoized this stack */
293 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
295 if (temp_ccs != EMPTY_STACK)
298 /* remove the CC to avoid loops */
299 ccs = RemoveCC(ccs,cc);
300 /* have a different stack now, need to check the memo table again */
301 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
302 if (temp_ccs != EMPTY_STACK)
305 return ActualPush(ccs,cc);
311 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
314 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
316 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
317 #define AppendCCS _AppendCCS
321 fprintf(stderr,"Appending ");
323 fprintf(stderr," to ");
325 fprintf(stderr,"\n");});
326 return AppendCCS(ccs1,ccs2);
331 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
333 CostCentreStack *ccs;
335 /* Optimisation: if we attempt to append a CCS to itself, we're
336 * going to end up with the same ccs after a great deal of pushing
337 * and removing of cost centres. Furthermore, we'll generate a lot
338 * of intermediate CCSs which would not otherwise be generated. So:
339 * let's cope with this common case first.
345 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
349 ASSERT(ccs2->prevStack != NULL);
350 ccs = AppendCCS(ccs1, ccs2->prevStack);
351 return PushCostCentre(ccs,ccs2->cc);
355 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
357 CostCentreStack *new_ccs;
359 /* allocate space for a new CostCentreStack */
360 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
362 return ActualPush_(ccs, cc, new_ccs);
365 static CostCentreStack *
366 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
368 /* assign values to each member of the structure */
369 ASSIGN_CCS_ID(new_ccs->ccsID);
372 new_ccs->prevStack = ccs;
374 new_ccs->indexTable = EMPTY_TABLE;
376 /* Initialise the various _scc_ counters to zero
378 new_ccs->scc_count = 0;
379 new_ccs->sub_scc_count = 0;
380 new_ccs->sub_cafcc_count = 0;
382 /* Initialize all other stats here. There should be a quick way
383 * that's easily used elsewhere too
385 new_ccs->time_ticks = 0;
386 new_ccs->mem_alloc = 0;
388 new_ccs->root = ccs->root;
390 /* update the memoization table for the parent stack */
391 if (ccs != EMPTY_STACK)
392 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
394 /* make sure this CC is declared at the next heap/time sample */
397 /* return a pointer to the new stack */
403 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
405 CostCentreStack *del_ccs;
407 if (ccs == EMPTY_STACK) {
411 return ccs->prevStack;
414 del_ccs = RemoveCC(ccs->prevStack, cc);
416 if (del_ccs == EMPTY_STACK)
419 return PushCostCentre(del_ccs,ccs->cc);
427 IsInIndexTable(IndexTable *it, CostCentre *cc)
429 while (it!=EMPTY_TABLE)
437 /* otherwise we never found it so return EMPTY_TABLE */
443 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
447 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
450 new_it->ccs = new_ccs;
457 print_ccs (FILE *fp, CostCentreStack *ccs)
460 fprintf(fp, "Cost-Centre Stack: ");
465 print_ccs(fp, ccs->prevStack);
466 fprintf(fp, "->[%s,%s,%s]",
467 ccs->cc->label, ccs->cc->module, ccs->cc->group);
469 fprintf(fp, "[%s,%s,%s]",
470 ccs->cc->label, ccs->cc->module, ccs->cc->group);
480 DecCCS(CostCentreStack *ccs)
482 CCSDecList *temp_list;
485 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
486 "Error allocating space for CCSDecList");
487 temp_list->ccs = ccs;
488 temp_list->nextList = New_CCS_LIST;
490 New_CCS_LIST = temp_list;
493 /* -----------------------------------------------------------------------------
494 Generating a time & allocation profiling report.
495 -------------------------------------------------------------------------- */
497 static FILE *prof_file;
500 report_ccs_profiling( void )
503 char temp[128]; /* sigh: magic constant */
505 rtsBool do_groups = rtsFalse;
508 if (!RtsFlags.CcFlags.doCostCentres)
513 total_prof_ticks = 0;
515 count_ticks(CCS_MAIN);
517 /* open profiling output file */
518 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
519 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
522 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
523 time_str(), "Final");
525 fprintf(prof_file, "\n\t ");
526 fprintf(prof_file, " %s", prog_argv[0]);
527 fprintf(prof_file, " +RTS");
528 for (count = 0; rts_argv[count]; count++)
529 fprintf(prof_file, " %s", rts_argv[count]);
530 fprintf(prof_file, " -RTS");
531 for (count = 1; prog_argv[count]; count++)
532 fprintf(prof_file, " %s", prog_argv[count]);
533 fprintf(prof_file, "\n\n");
535 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
536 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
537 total_prof_ticks, TICK_MILLISECS);
539 fprintf(prof_file, "\ttotal alloc = %11s bytes",
540 ullong_format_string((ullong) total_alloc * sizeof(W_),
541 temp, rtsTrue/*commas*/));
542 /* ToDo: 64-bit error! */
544 #if defined(PROFILING_DETAIL_COUNTS)
545 fprintf(prof_file, " (%lu closures)", total_allocs);
547 fprintf(prof_file, " (excludes profiling overheads)\n\n");
549 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
552 do_groups = have_interesting_groups(Registered_CC);
553 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
556 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
558 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
559 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
560 #if defined(PROFILING_DETAIL_COUNTS)
561 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
562 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
565 fprintf(prof_file, "\n\n");
567 reportCCS(pruneCCSTree(CCS_MAIN), 0);
573 reportCCS(CostCentreStack *ccs, nat indent)
579 ASSERT(cc == CC_MAIN || cc->link != 0);
581 /* Only print cost centres with non 0 data ! */
583 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
584 ! ccs_to_ignore(ccs))
585 /* force printing of *all* cost centres if -P -P */
588 fprintf(prof_file, "%-*s%-*s %-10s",
589 indent, "", 24-indent, cc->label, cc->module);
592 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
595 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld",
597 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
598 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
599 ccs->sub_scc_count, ccs->sub_cafcc_count);
601 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
602 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
603 #if defined(PROFILING_DETAIL_COUNTS)
604 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
605 ccs->mem_allocs, ccs->thunk_count,
606 ccs->function_count, ccs->pap_count,
607 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
608 ccs->caffun_subsumed);
611 fprintf(prof_file, "\n");
614 for (i = ccs->indexTable; i != 0; i = i->next) {
615 reportCCS(i->ccs, indent+1);
619 /* Traverse the cost centre stack tree and accumulate
623 count_ticks(CostCentreStack *ccs)
627 if (!ccs_to_ignore(ccs)) {
628 total_alloc += ccs->mem_alloc;
629 total_prof_ticks += ccs->time_ticks;
631 for (i = ccs->indexTable; i != NULL; i = i->next)
635 /* return rtsTrue if it is one of the ones that
636 * should not be reported normally (because it confuses
640 ccs_to_ignore (CostCentreStack *ccs)
642 if ( ccs == CCS_OVERHEAD
643 || ccs == CCS_DONTZuCARE
645 || ccs == CCS_SYSTEM) {
652 static CostCentreStack *
653 pruneCCSTree( CostCentreStack *ccs )
655 CostCentreStack *ccs1;
656 IndexTable *i, **prev;
658 prev = &ccs->indexTable;
659 for (i = ccs->indexTable; i != 0; i = i->next) {
660 ccs1 = pruneCCSTree(i->ccs);
668 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
669 /* force printing of *all* cost centres if -P -P */ )
671 || ( ccs->indexTable != 0 )
672 || ( (ccs->scc_count || ccs->sub_scc_count ||
673 ccs->time_ticks || ccs->mem_alloc
674 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
675 && (ccs->sub_cafcc_count
676 #if defined(PROFILING_DETAIL_COUNTS)
677 || cc->thunk_count || cc->function_count || cc->pap_count
688 printCCS ( CostCentreStack *ccs )
691 for (; ccs; ccs = ccs->prevStack ) {
692 fprintf(stderr,ccs->cc->label);
693 if (ccs->prevStack) {
701 #endif /* PROFILING */