1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.12 2000/02/17 17:19:42 simonmar Exp $
4 * (c) The GHC Team, 1998-1999
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
24 * Global variables used to assign unique IDs to cc's, ccs's, and
32 /* Table sizes from old profiling system. Not sure if we'll need
35 nat time_intervals = 0;
36 nat earlier_ticks = 0;
43 /* Are we time-profiling?
45 rtsBool time_profiling = rtsFalse;
47 /* figures for the profiling report.
49 static lnat total_alloc, total_prof_ticks;
51 /* Globals for opening the profiling log file
53 static char *prof_filename; /* prof report file name = <program>.prof */
54 static FILE *prof_file;
56 /* The Current Cost Centre Stack (for attributing costs)
58 CostCentreStack *CCCS;
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
64 CostCentreStack *CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
107 * Uniques for the XML log-file format
112 #define HEAP_OBJ_UQ 4
113 #define TIME_UPD_UQ 5
114 #define HEAP_UPD_UQ 6
120 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
121 CostCentreStack *new_ccs );
123 static void registerCostCentres ( void );
124 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
125 static void count_ticks ( CostCentreStack *ccs );
126 static void reportCCS ( CostCentreStack *ccs, nat indent );
127 static void DecCCS ( CostCentreStack *ccs );
128 static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
130 static void printCCS ( CostCentreStack *ccs );
132 static void initTimeProfiling ( void );
133 static void initProfilingLogFile( void );
135 static void reportCCS_XML ( CostCentreStack *ccs );
137 /* -----------------------------------------------------------------------------
138 Initialise the profiling environment
139 -------------------------------------------------------------------------- */
144 CostCentreStack *ccs, *next;
146 /* for the benefit of allocate()... */
149 /* Initialize counters for IDs */
154 /* Initialize Declaration lists to NULL */
158 /* Register all the cost centres / stacks in the program
159 * CC_MAIN gets link = 0, all others have non-zero link.
161 REGISTER_CC(CC_MAIN);
162 REGISTER_CC(CC_SYSTEM);
164 REGISTER_CC(CC_OVERHEAD);
165 REGISTER_CC(CC_SUBSUMED);
166 REGISTER_CC(CC_DONTZuCARE);
167 REGISTER_CCS(CCS_MAIN);
168 REGISTER_CCS(CCS_SYSTEM);
169 REGISTER_CCS(CCS_GC);
170 REGISTER_CCS(CCS_OVERHEAD);
171 REGISTER_CCS(CCS_SUBSUMED);
172 REGISTER_CCS(CCS_DONTZuCARE);
175 registerCostCentres();
178 /* Set up the log file, and dump the header and cost centre
179 * information into it.
181 initProfilingLogFile();
183 /* find all the "special" cost centre stacks, and make them children
186 ASSERT(CCS_MAIN->prevStack == 0);
187 CCS_MAIN->root = CC_MAIN;
189 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
190 next = ccs->prevStack;
192 ActualPush_(CCS_MAIN,ccs->cc,ccs);
197 if (RtsFlags.CcFlags.doCostCentres) {
201 if (RtsFlags.ProfFlags.doHeapProfile) {
207 initProfilingLogFile(void)
209 /* Initialise the log file name */
210 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
211 sprintf(prof_filename, "%s.prof", prog_argv[0]);
213 /* open the log file */
214 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
215 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
216 RtsFlags.CcFlags.doCostCentres = 0;
220 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
221 /* dump the time, and the profiling interval */
222 fprintf(prof_file, "\"%s\"\n", time_str());
223 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
225 /* declare all the cost centres */
228 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
229 fprintf(prof_file, "%d %d \"%s\" \"%s\" \"%s\"\n",
230 CC_UQ, cc->ccID, cc->label, cc->module, cc->group);
237 initTimeProfiling(void)
239 time_profiling = rtsTrue;
246 endProfiling ( void )
248 if (RtsFlags.CcFlags.doCostCentres) {
251 if (RtsFlags.ProfFlags.doHeapProfile) {
256 /* -----------------------------------------------------------------------------
257 Register Cost Centres
259 At the moment, this process just supplies a unique integer to each
260 statically declared cost centre and cost centre stack in the
263 The code generator inserts a small function "reg<moddule>" in each
264 module which registers any cost centres from that module and calls
265 the registration functions in each of the modules it imports. So,
266 if we call "regMain", each reachable module in the program will be
269 The reg* functions are compiled in the same way as STG code,
270 i.e. without normal C call/return conventions. Hence we must use
271 StgRun to call this stuff.
272 -------------------------------------------------------------------------- */
274 /* The registration functions use an explicit stack...
276 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
280 registerCostCentres ( void )
282 /* this storage will be reclaimed by the garbage collector,
285 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
287 StgRun((StgFunPtr)stg_register, &MainRegTable);
291 /* -----------------------------------------------------------------------------
292 Set cost centre stack when entering a function. Here we implement
295 "if CCSfn is an initial segment of CCCS,
296 then set CCCS to CCSfn,
297 else append CCSfn to CCCS"
298 -------------------------------------------------------------------------- */
299 rtsBool entering_PAP;
302 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
304 /* PAP_entry has already set CCCS for us */
306 entering_PAP = rtsFalse;
310 if (cccs->root == ccsfn->root) {
313 return AppendCCS(cccs,ccsfn);
317 /* -----------------------------------------------------------------------------
318 Cost-centre stack manipulation
319 -------------------------------------------------------------------------- */
322 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
324 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
325 #define PushCostCentre _PushCostCentre
328 fprintf(stderr,"Pushing %s on ", cc->label);
330 fprintf(stderr,"\n"));
331 return PushCostCentre(ccs,cc);
336 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
338 CostCentreStack *temp_ccs;
340 if (ccs == EMPTY_STACK)
341 return ActualPush(ccs,cc);
346 /* check if we've already memoized this stack */
347 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
349 if (temp_ccs != EMPTY_STACK)
352 /* remove the CC to avoid loops */
353 ccs = RemoveCC(ccs,cc);
354 /* have a different stack now, need to check the memo table again */
355 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
356 if (temp_ccs != EMPTY_STACK)
359 return ActualPush(ccs,cc);
365 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
368 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
370 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
371 #define AppendCCS _AppendCCS
375 fprintf(stderr,"Appending ");
377 fprintf(stderr," to ");
379 fprintf(stderr,"\n");});
380 return AppendCCS(ccs1,ccs2);
385 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
387 CostCentreStack *ccs = NULL;
389 /* Optimisation: if we attempt to append a CCS to itself, we're
390 * going to end up with the same ccs after a great deal of pushing
391 * and removing of cost centres. Furthermore, we'll generate a lot
392 * of intermediate CCSs which would not otherwise be generated. So:
393 * let's cope with this common case first.
399 if (ccs2->cc->is_subsumed != CC_IS_BORING) {
403 if (ccs2->prevStack != NULL) {
404 ccs = AppendCCS(ccs1, ccs2->prevStack);
407 return PushCostCentre(ccs,ccs2->cc);
411 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
413 CostCentreStack *new_ccs;
415 /* allocate space for a new CostCentreStack */
416 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
418 return ActualPush_(ccs, cc, new_ccs);
421 static CostCentreStack *
422 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
424 /* assign values to each member of the structure */
425 ASSIGN_CCS_ID(new_ccs->ccsID);
428 new_ccs->prevStack = ccs;
430 new_ccs->indexTable = EMPTY_TABLE;
432 /* Initialise the various _scc_ counters to zero
434 new_ccs->scc_count = 0;
435 new_ccs->sub_scc_count = 0;
436 new_ccs->sub_cafcc_count = 0;
438 /* Initialize all other stats here. There should be a quick way
439 * that's easily used elsewhere too
441 new_ccs->time_ticks = 0;
442 new_ccs->mem_alloc = 0;
444 new_ccs->root = ccs->root;
446 /* update the memoization table for the parent stack */
447 if (ccs != EMPTY_STACK)
448 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
450 /* make sure this CC is declared at the next heap/time sample */
453 /* return a pointer to the new stack */
459 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
461 CostCentreStack *del_ccs;
463 if (ccs == EMPTY_STACK) {
467 return ccs->prevStack;
470 del_ccs = RemoveCC(ccs->prevStack, cc);
472 if (del_ccs == EMPTY_STACK)
475 return PushCostCentre(del_ccs,ccs->cc);
483 IsInIndexTable(IndexTable *it, CostCentre *cc)
485 while (it!=EMPTY_TABLE)
493 /* otherwise we never found it so return EMPTY_TABLE */
499 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
503 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
506 new_it->ccs = new_ccs;
513 print_ccs (FILE *fp, CostCentreStack *ccs)
516 fprintf(fp, "Cost-Centre Stack: ");
521 print_ccs(fp, ccs->prevStack);
522 fprintf(fp, "->[%s,%s,%s]",
523 ccs->cc->label, ccs->cc->module, ccs->cc->group);
525 fprintf(fp, "[%s,%s,%s]",
526 ccs->cc->label, ccs->cc->module, ccs->cc->group);
536 DecCCS(CostCentreStack *ccs)
538 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
539 if (ccs->prevStack == EMPTY_STACK)
540 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
541 ccs->ccsID, ccs->cc->ccID);
543 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
544 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
548 /* -----------------------------------------------------------------------------
549 Generating a time & allocation profiling report.
550 -------------------------------------------------------------------------- */
552 /* -----------------------------------------------------------------------------
553 Generating the aggregated per-cost-centre time/alloc report.
554 -------------------------------------------------------------------------- */
556 static CostCentre *sorted_cc_list;
559 aggregate_cc_costs( CostCentreStack *ccs )
563 ccs->cc->mem_alloc += ccs->mem_alloc;
564 ccs->cc->time_ticks += ccs->time_ticks;
566 for (i = ccs->indexTable; i != 0; i = i->next) {
567 aggregate_cc_costs(i->ccs);
572 insert_cc_in_sorted_list( CostCentre *new_cc )
574 CostCentre **prev, *cc;
576 prev = &sorted_cc_list;
577 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
578 if (new_cc->time_ticks > cc->time_ticks) {
591 report_per_cc_costs( void )
593 CostCentre *cc, *next;
595 aggregate_cc_costs(CCS_MAIN);
596 sorted_cc_list = NULL;
598 for (cc = CC_LIST; cc != NULL; cc = next) {
600 if (cc->time_ticks > total_prof_ticks/100
601 || cc->mem_alloc > total_alloc/100) {
602 insert_cc_in_sorted_list(cc);
606 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
607 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
608 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
609 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
611 fprintf(prof_file, "\n\n");
613 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
614 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
615 fprintf(prof_file, "%6.1f %6.1f",
616 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
617 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
621 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
622 fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
624 fprintf(prof_file, "\n");
627 fprintf(prof_file,"\n\n");
630 /* -----------------------------------------------------------------------------
631 Generate the cost-centre-stack time/alloc report
632 -------------------------------------------------------------------------- */
635 fprint_header( void )
637 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
640 do_groups = have_interesting_groups(Registered_CC);
641 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
644 fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
646 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
647 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
648 #if defined(PROFILING_DETAIL_COUNTS)
649 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
650 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
654 fprintf(prof_file, "\n\n");
658 report_ccs_profiling( void )
661 char temp[128]; /* sigh: magic constant */
663 rtsBool do_groups = rtsFalse;
668 total_prof_ticks = 0;
670 count_ticks(CCS_MAIN);
672 switch (RtsFlags.CcFlags.doCostCentres) {
675 case COST_CENTRES_XML:
681 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
682 time_str(), "Final");
684 fprintf(prof_file, "\n\t ");
685 fprintf(prof_file, " %s", prog_argv[0]);
686 fprintf(prof_file, " +RTS");
687 for (count = 0; rts_argv[count]; count++)
688 fprintf(prof_file, " %s", rts_argv[count]);
689 fprintf(prof_file, " -RTS");
690 for (count = 1; prog_argv[count]; count++)
691 fprintf(prof_file, " %s", prog_argv[count]);
692 fprintf(prof_file, "\n\n");
694 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
695 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
696 total_prof_ticks, TICK_MILLISECS);
698 fprintf(prof_file, "\ttotal alloc = %11s bytes",
699 ullong_format_string((ullong) total_alloc * sizeof(W_),
700 temp, rtsTrue/*commas*/));
701 /* ToDo: 64-bit error! */
703 #if defined(PROFILING_DETAIL_COUNTS)
704 fprintf(prof_file, " (%lu closures)", total_allocs);
706 fprintf(prof_file, " (excludes profiling overheads)\n\n");
708 report_per_cc_costs();
711 reportCCS(pruneCCSTree(CCS_MAIN), 0);
717 reportCCS(CostCentreStack *ccs, nat indent)
724 /* Only print cost centres with non 0 data ! */
726 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
727 ! ccs_to_ignore(ccs))
728 /* force printing of *all* cost centres if -P -P */
731 fprintf(prof_file, "%-*s%-*s %-10s",
732 indent, "", 24-indent, cc->label, cc->module);
735 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
738 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
740 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
741 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
742 ccs->sub_scc_count, ccs->sub_cafcc_count);
744 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
745 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
746 #if defined(PROFILING_DETAIL_COUNTS)
747 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
748 ccs->mem_allocs, ccs->thunk_count,
749 ccs->function_count, ccs->pap_count,
750 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
751 ccs->caffun_subsumed);
754 fprintf(prof_file, "\n");
757 for (i = ccs->indexTable; i != 0; i = i->next) {
758 reportCCS(i->ccs, indent+1);
762 /* Traverse the cost centre stack tree and accumulate
766 count_ticks(CostCentreStack *ccs)
770 if (!ccs_to_ignore(ccs)) {
771 total_alloc += ccs->mem_alloc;
772 total_prof_ticks += ccs->time_ticks;
774 for (i = ccs->indexTable; i != NULL; i = i->next)
778 /* return rtsTrue if it is one of the ones that
779 * should not be reported normally (because it confuses
783 ccs_to_ignore (CostCentreStack *ccs)
785 if ( ccs == CCS_OVERHEAD
786 || ccs == CCS_DONTZuCARE
788 || ccs == CCS_SYSTEM) {
795 static CostCentreStack *
796 pruneCCSTree( CostCentreStack *ccs )
798 CostCentreStack *ccs1;
799 IndexTable *i, **prev;
801 prev = &ccs->indexTable;
802 for (i = ccs->indexTable; i != 0; i = i->next) {
803 ccs1 = pruneCCSTree(i->ccs);
811 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
812 /* force printing of *all* cost centres if -P -P */ )
814 || ( ccs->indexTable != 0 )
815 || ( (ccs->scc_count || ccs->sub_scc_count ||
816 ccs->time_ticks || ccs->mem_alloc
817 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
818 && (ccs->sub_cafcc_count
819 #if defined(PROFILING_DETAIL_COUNTS)
820 || cc->thunk_count || cc->function_count || cc->pap_count
829 /* -----------------------------------------------------------------------------
830 Generate the XML time/allocation profile
831 -------------------------------------------------------------------------- */
834 gen_XML_logfile( void )
836 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
838 reportCCS_XML(pruneCCSTree(CCS_MAIN));
840 fprintf(prof_file, " 0\n");
846 reportCCS_XML(CostCentreStack *ccs)
853 fprintf(prof_file, " 1 %d %lu %lu %lu",
854 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
856 for (i = ccs->indexTable; i != 0; i = i->next) {
857 reportCCS_XML(i->ccs);
863 printCCS ( CostCentreStack *ccs )
866 for (; ccs; ccs = ccs->prevStack ) {
867 fprintf(stderr,ccs->cc->label);
868 if (ccs->prevStack) {
876 #endif /* PROFILING */