1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.13 2000/02/29 16:58:09 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
24 * Global variables used to assign unique IDs to cc's, ccs's, and
32 /* Table sizes from old profiling system. Not sure if we'll need
35 nat time_intervals = 0;
36 nat earlier_ticks = 0;
43 /* Are we time-profiling?
45 rtsBool time_profiling = rtsFalse;
47 /* figures for the profiling report.
49 static lnat total_alloc, total_prof_ticks;
51 /* Globals for opening the profiling log file
53 static char *prof_filename; /* prof report file name = <program>.prof */
54 static FILE *prof_file;
56 /* The Current Cost Centre Stack (for attributing costs)
58 CostCentreStack *CCCS;
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
64 CostCentreStack *CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
107 * Uniques for the XML log-file format
112 #define HEAP_OBJ_UQ 4
113 #define TIME_UPD_UQ 5
114 #define HEAP_UPD_UQ 6
120 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
121 CostCentreStack *new_ccs );
123 static void registerCostCentres ( void );
124 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
125 static void count_ticks ( CostCentreStack *ccs );
126 static void reportCCS ( CostCentreStack *ccs, nat indent );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs, CostCentreStack *oldccs );
129 static CostCentreStack *CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
130 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack *ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack *IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable *AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
138 static void printCCS ( CostCentreStack *ccs );
140 static void initTimeProfiling ( void );
141 static void initProfilingLogFile( void );
143 static void reportCCS_XML ( CostCentreStack *ccs );
145 /* -----------------------------------------------------------------------------
146 Initialise the profiling environment
147 -------------------------------------------------------------------------- */
152 CostCentreStack *ccs, *next;
154 /* for the benefit of allocate()... */
157 /* Initialize counters for IDs */
162 /* Initialize Declaration lists to NULL */
166 /* Register all the cost centres / stacks in the program
167 * CC_MAIN gets link = 0, all others have non-zero link.
169 REGISTER_CC(CC_MAIN);
170 REGISTER_CC(CC_SYSTEM);
172 REGISTER_CC(CC_OVERHEAD);
173 REGISTER_CC(CC_SUBSUMED);
174 REGISTER_CC(CC_DONTZuCARE);
175 REGISTER_CCS(CCS_MAIN);
176 REGISTER_CCS(CCS_SYSTEM);
177 REGISTER_CCS(CCS_GC);
178 REGISTER_CCS(CCS_OVERHEAD);
179 REGISTER_CCS(CCS_SUBSUMED);
180 REGISTER_CCS(CCS_DONTZuCARE);
183 registerCostCentres();
186 /* Set up the log file, and dump the header and cost centre
187 * information into it.
189 initProfilingLogFile();
191 /* find all the "special" cost centre stacks, and make them children
194 ASSERT(CCS_MAIN->prevStack == 0);
195 CCS_MAIN->root = CC_MAIN;
197 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
198 next = ccs->prevStack;
200 ActualPush_(CCS_MAIN,ccs->cc,ccs);
205 if (RtsFlags.CcFlags.doCostCentres) {
209 if (RtsFlags.ProfFlags.doHeapProfile) {
215 initProfilingLogFile(void)
217 /* Initialise the log file name */
218 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
219 sprintf(prof_filename, "%s.prof", prog_argv[0]);
221 /* open the log file */
222 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
223 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
224 RtsFlags.CcFlags.doCostCentres = 0;
228 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
229 /* dump the time, and the profiling interval */
230 fprintf(prof_file, "\"%s\"\n", time_str());
231 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
233 /* declare all the cost centres */
236 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
237 fprintf(prof_file, "%d %d \"%s\" \"%s\" \"%s\"\n",
238 CC_UQ, cc->ccID, cc->label, cc->module, cc->group);
245 initTimeProfiling(void)
247 time_profiling = rtsTrue;
254 endProfiling ( void )
256 if (RtsFlags.CcFlags.doCostCentres) {
259 if (RtsFlags.ProfFlags.doHeapProfile) {
264 /* -----------------------------------------------------------------------------
265 Register Cost Centres
267 At the moment, this process just supplies a unique integer to each
268 statically declared cost centre and cost centre stack in the
271 The code generator inserts a small function "reg<moddule>" in each
272 module which registers any cost centres from that module and calls
273 the registration functions in each of the modules it imports. So,
274 if we call "regMain", each reachable module in the program will be
277 The reg* functions are compiled in the same way as STG code,
278 i.e. without normal C call/return conventions. Hence we must use
279 StgRun to call this stuff.
280 -------------------------------------------------------------------------- */
282 /* The registration functions use an explicit stack...
284 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
288 registerCostCentres ( void )
290 /* this storage will be reclaimed by the garbage collector,
293 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
295 StgRun((StgFunPtr)stg_register, &MainRegTable);
299 /* -----------------------------------------------------------------------------
300 Set cost centre stack when entering a function. Here we implement
303 "if CCSfn is an initial segment of CCCS,
304 then set CCCS to CCSfn,
305 else append CCSfn to CCCS"
306 -------------------------------------------------------------------------- */
307 rtsBool entering_PAP;
310 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
312 /* PAP_entry has already set CCCS for us */
314 entering_PAP = rtsFalse;
318 if (cccs->root == ccsfn->root) {
321 return AppendCCS(cccs,ccsfn);
325 /* -----------------------------------------------------------------------------
326 Cost-centre stack manipulation
327 -------------------------------------------------------------------------- */
330 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
332 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
333 #define PushCostCentre _PushCostCentre
336 fprintf(stderr,"Pushing %s on ", cc->label);
338 fprintf(stderr,"\n"));
339 return PushCostCentre(ccs,cc);
344 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
346 CostCentreStack *temp_ccs;
348 if (ccs == EMPTY_STACK)
349 return ActualPush(ccs,cc);
354 /* check if we've already memoized this stack */
355 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
357 if (temp_ccs != EMPTY_STACK)
360 temp_ccs = CheckLoop(ccs,cc);
361 if (temp_ccs != NULL) {
362 /* we have recursed to an older CCS. Mark this in
363 * the index table, and emit a "back edge" into the
366 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
367 DecBackEdge(temp_ccs,ccs);
370 return ActualPush(ccs,cc);
377 static CostCentreStack *
378 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
380 while (ccs != EMPTY_STACK) {
383 ccs = ccs->prevStack;
388 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
391 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
393 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
394 #define AppendCCS _AppendCCS
398 fprintf(stderr,"Appending ");
400 fprintf(stderr," to ");
402 fprintf(stderr,"\n");});
403 return AppendCCS(ccs1,ccs2);
408 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
410 CostCentreStack *ccs = NULL;
416 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
420 if (ccs2->prevStack != NULL) {
421 ccs = AppendCCS(ccs1, ccs2->prevStack);
424 return PushCostCentre(ccs,ccs2->cc);
427 static CostCentreStack *
428 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
430 CostCentreStack *new_ccs;
432 /* allocate space for a new CostCentreStack */
433 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
435 return ActualPush_(ccs, cc, new_ccs);
438 static CostCentreStack *
439 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
441 /* assign values to each member of the structure */
442 ASSIGN_CCS_ID(new_ccs->ccsID);
445 new_ccs->prevStack = ccs;
447 new_ccs->indexTable = EMPTY_TABLE;
449 /* Initialise the various _scc_ counters to zero
451 new_ccs->scc_count = 0;
452 new_ccs->sub_scc_count = 0;
453 new_ccs->sub_cafcc_count = 0;
455 /* Initialize all other stats here. There should be a quick way
456 * that's easily used elsewhere too
458 new_ccs->time_ticks = 0;
459 new_ccs->mem_alloc = 0;
461 new_ccs->root = ccs->root;
462 new_ccs->emitted = 0;
464 /* update the memoization table for the parent stack */
465 if (ccs != EMPTY_STACK)
466 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
467 0/*not a back edge*/);
469 /* make sure this CC is declared at the next heap/time sample */
472 /* return a pointer to the new stack */
477 static CostCentreStack *
478 IsInIndexTable(IndexTable *it, CostCentre *cc)
480 while (it!=EMPTY_TABLE)
488 /* otherwise we never found it so return EMPTY_TABLE */
494 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
495 CostCentre *cc, unsigned int back_edge)
499 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
502 new_it->ccs = new_ccs;
504 new_it->back_edge = back_edge;
510 print_ccs (FILE *fp, CostCentreStack *ccs)
513 fprintf(fp, "Cost-Centre Stack: ");
518 print_ccs(fp, ccs->prevStack);
519 fprintf(fp, "->[%s,%s,%s]",
520 ccs->cc->label, ccs->cc->module, ccs->cc->group);
522 fprintf(fp, "[%s,%s,%s]",
523 ccs->cc->label, ccs->cc->module, ccs->cc->group);
533 DecCCS(CostCentreStack *ccs)
535 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
536 if (ccs->prevStack == EMPTY_STACK)
537 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
538 ccs->ccsID, ccs->cc->ccID);
540 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
541 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
546 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
548 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
549 if (ccs->prevStack == EMPTY_STACK)
550 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
551 ccs->ccsID, ccs->cc->ccID);
553 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
554 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
558 /* -----------------------------------------------------------------------------
559 Generating a time & allocation profiling report.
560 -------------------------------------------------------------------------- */
562 /* -----------------------------------------------------------------------------
563 Generating the aggregated per-cost-centre time/alloc report.
564 -------------------------------------------------------------------------- */
566 static CostCentre *sorted_cc_list;
569 aggregate_cc_costs( CostCentreStack *ccs )
573 ccs->cc->mem_alloc += ccs->mem_alloc;
574 ccs->cc->time_ticks += ccs->time_ticks;
576 for (i = ccs->indexTable; i != 0; i = i->next) {
578 aggregate_cc_costs(i->ccs);
584 insert_cc_in_sorted_list( CostCentre *new_cc )
586 CostCentre **prev, *cc;
588 prev = &sorted_cc_list;
589 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
590 if (new_cc->time_ticks > cc->time_ticks) {
603 report_per_cc_costs( void )
605 CostCentre *cc, *next;
607 aggregate_cc_costs(CCS_MAIN);
608 sorted_cc_list = NULL;
610 for (cc = CC_LIST; cc != NULL; cc = next) {
612 if (cc->time_ticks > total_prof_ticks/100
613 || cc->mem_alloc > total_alloc/100) {
614 insert_cc_in_sorted_list(cc);
618 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
619 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
620 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
621 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
623 fprintf(prof_file, "\n\n");
625 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
626 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
627 fprintf(prof_file, "%6.1f %6.1f",
628 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
629 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
633 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
634 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
636 fprintf(prof_file, "\n");
639 fprintf(prof_file,"\n\n");
642 /* -----------------------------------------------------------------------------
643 Generate the cost-centre-stack time/alloc report
644 -------------------------------------------------------------------------- */
647 fprint_header( void )
649 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
652 do_groups = have_interesting_groups(Registered_CC);
653 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
656 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
658 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
659 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
660 #if defined(PROFILING_DETAIL_COUNTS)
661 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
662 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
666 fprintf(prof_file, "\n\n");
670 report_ccs_profiling( void )
673 char temp[128]; /* sigh: magic constant */
675 rtsBool do_groups = rtsFalse;
680 total_prof_ticks = 0;
682 count_ticks(CCS_MAIN);
684 switch (RtsFlags.CcFlags.doCostCentres) {
687 case COST_CENTRES_XML:
693 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
694 time_str(), "Final");
696 fprintf(prof_file, "\n\t ");
697 fprintf(prof_file, " %s", prog_argv[0]);
698 fprintf(prof_file, " +RTS");
699 for (count = 0; rts_argv[count]; count++)
700 fprintf(prof_file, " %s", rts_argv[count]);
701 fprintf(prof_file, " -RTS");
702 for (count = 1; prog_argv[count]; count++)
703 fprintf(prof_file, " %s", prog_argv[count]);
704 fprintf(prof_file, "\n\n");
706 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
707 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
708 total_prof_ticks, TICK_MILLISECS);
710 fprintf(prof_file, "\ttotal alloc = %11s bytes",
711 ullong_format_string((ullong) total_alloc * sizeof(W_),
712 temp, rtsTrue/*commas*/));
713 /* ToDo: 64-bit error! */
715 #if defined(PROFILING_DETAIL_COUNTS)
716 fprintf(prof_file, " (%lu closures)", total_allocs);
718 fprintf(prof_file, " (excludes profiling overheads)\n\n");
720 report_per_cc_costs();
723 reportCCS(pruneCCSTree(CCS_MAIN), 0);
729 reportCCS(CostCentreStack *ccs, nat indent)
736 /* Only print cost centres with non 0 data ! */
738 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
739 ! ccs_to_ignore(ccs))
740 /* force printing of *all* cost centres if -P -P */
743 fprintf(prof_file, "%-*s%-*s %-10s",
744 indent, "", 24-indent, cc->label, cc->module);
747 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
750 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
752 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
753 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
754 ccs->sub_scc_count, ccs->sub_cafcc_count);
756 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
757 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
758 #if defined(PROFILING_DETAIL_COUNTS)
759 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
760 ccs->mem_allocs, ccs->thunk_count,
761 ccs->function_count, ccs->pap_count,
762 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
763 ccs->caffun_subsumed);
766 fprintf(prof_file, "\n");
769 for (i = ccs->indexTable; i != 0; i = i->next) {
771 reportCCS(i->ccs, indent+1);
776 /* Traverse the cost centre stack tree and accumulate
780 count_ticks(CostCentreStack *ccs)
784 if (!ccs_to_ignore(ccs)) {
785 total_alloc += ccs->mem_alloc;
786 total_prof_ticks += ccs->time_ticks;
788 for (i = ccs->indexTable; i != NULL; i = i->next)
794 /* return rtsTrue if it is one of the ones that
795 * should not be reported normally (because it confuses
799 ccs_to_ignore (CostCentreStack *ccs)
801 if ( ccs == CCS_OVERHEAD
802 || ccs == CCS_DONTZuCARE
804 || ccs == CCS_SYSTEM) {
811 static CostCentreStack *
812 pruneCCSTree( CostCentreStack *ccs )
814 CostCentreStack *ccs1;
815 IndexTable *i, **prev;
817 prev = &ccs->indexTable;
818 for (i = ccs->indexTable; i != 0; i = i->next) {
819 if (i->back_edge) { continue; }
821 ccs1 = pruneCCSTree(i->ccs);
829 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
830 /* force printing of *all* cost centres if -P -P */ )
832 || ( ccs->indexTable != 0 )
833 || ( (ccs->scc_count || ccs->sub_scc_count ||
834 ccs->time_ticks || ccs->mem_alloc
835 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
836 && (ccs->sub_cafcc_count
837 #if defined(PROFILING_DETAIL_COUNTS)
838 || cc->thunk_count || cc->function_count || cc->pap_count
847 /* -----------------------------------------------------------------------------
848 Generate the XML time/allocation profile
849 -------------------------------------------------------------------------- */
852 gen_XML_logfile( void )
854 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
856 reportCCS_XML(pruneCCSTree(CCS_MAIN));
858 fprintf(prof_file, " 0\n");
864 reportCCS_XML(CostCentreStack *ccs)
871 fprintf(prof_file, " 1 %d %lu %lu %lu",
872 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
874 for (i = ccs->indexTable; i != 0; i = i->next) {
876 reportCCS_XML(i->ccs);
883 printCCS ( CostCentreStack *ccs )
886 for (; ccs; ccs = ccs->prevStack ) {
887 fprintf(stderr,ccs->cc->label);
888 if (ccs->prevStack) {
896 #endif /* PROFILING */