1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.24 2001/10/18 14:41:01 simonmar Exp $
4 * (c) The GHC Team, 1998-2000
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
12 #include "PosixSource.h"
16 #include "Profiling.h"
18 #include "Proftimer.h"
24 * Profiling allocation arena.
29 * Global variables used to assign unique IDs to cc's, ccs's, and
37 /* Table sizes from old profiling system. Not sure if we'll need
40 nat time_intervals = 0;
41 nat earlier_ticks = 0;
48 /* Are we time-profiling?
50 rtsBool time_profiling = rtsFalse;
52 /* figures for the profiling report.
54 static lnat total_alloc, total_prof_ticks;
56 /* Globals for opening the profiling log file(s)
58 static char *prof_filename; /* prof report file name = <program>.prof */
61 static char *hp_filename; /* heap profile (hp2ps style) log file */
64 /* The Current Cost Centre Stack (for attributing costs)
66 CostCentreStack *CCCS;
68 /* Linked lists to keep track of cc's and ccs's that haven't
69 * been declared in the log file yet
72 CostCentreStack *CCS_LIST;
75 * Built-in cost centres and cost-centre stacks:
77 * MAIN is the root of the cost-centre stack tree. If there are
78 * no _scc_s in the program, all costs will be attributed
81 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
82 * RTS operations apart from garbage collection are attributed
85 * GC is the storage manager / garbage collector.
87 * OVERHEAD gets all costs generated by the profiling system
88 * itself. These are costs that would not be incurred
89 * during non-profiled execution of the program.
91 * SUBSUMED is the one-and-only CCS placed on top-level functions.
92 * It indicates that all costs are to be attributed to the
93 * enclosing cost centre stack. SUBSUMED never accumulates
94 * any costs. The is_caf flag is set on the subsumed cost
97 * DONT_CARE is a placeholder cost-centre we assign to static
98 * constructors. It should *never* accumulate any costs.
101 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
102 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
103 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
104 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
105 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
106 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
108 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
109 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
110 CCS_DECLARE(CCS_GC, CC_GC, );
111 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
112 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
113 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
116 * Uniques for the XML log-file format
121 #define HEAP_OBJ_UQ 4
122 #define TIME_UPD_UQ 5
123 #define HEAP_UPD_UQ 6
129 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
130 CostCentreStack *new_ccs );
131 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
132 static void count_ticks ( CostCentreStack *ccs );
133 static void inherit_costs ( CostCentreStack *ccs );
134 static void reportCCS ( CostCentreStack *ccs, nat indent );
135 static void DecCCS ( CostCentreStack *ccs );
136 static void DecBackEdge ( CostCentreStack *ccs,
137 CostCentreStack *oldccs );
138 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
139 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
140 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
141 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
142 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
143 CostCentre *, unsigned int );
148 static void printCCS ( CostCentreStack *ccs );
150 static void initTimeProfiling ( void );
151 static void initProfilingLogFile( void );
153 static void reportCCS_XML ( CostCentreStack *ccs );
155 /* -----------------------------------------------------------------------------
156 Initialise the profiling environment
157 -------------------------------------------------------------------------- */
160 initProfiling1 (void)
162 // initialise our arena
163 prof_arena = newArena();
165 /* for the benefit of allocate()... */
168 /* Initialize counters for IDs */
173 /* Initialize Declaration lists to NULL */
177 /* Register all the cost centres / stacks in the program
178 * CC_MAIN gets link = 0, all others have non-zero link.
180 REGISTER_CC(CC_MAIN);
181 REGISTER_CC(CC_SYSTEM);
183 REGISTER_CC(CC_OVERHEAD);
184 REGISTER_CC(CC_SUBSUMED);
185 REGISTER_CC(CC_DONT_CARE);
186 REGISTER_CCS(CCS_MAIN);
187 REGISTER_CCS(CCS_SYSTEM);
188 REGISTER_CCS(CCS_GC);
189 REGISTER_CCS(CCS_OVERHEAD);
190 REGISTER_CCS(CCS_SUBSUMED);
191 REGISTER_CCS(CCS_DONT_CARE);
195 /* cost centres are registered by the per-module
196 * initialisation code now...
201 initProfiling2 (void)
203 CostCentreStack *ccs, *next;
207 /* Set up the log file, and dump the header and cost centre
208 * information into it. */
209 initProfilingLogFile();
211 /* find all the "special" cost centre stacks, and make them children
214 ASSERT(CCS_MAIN->prevStack == 0);
215 CCS_MAIN->root = CC_MAIN;
217 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
218 next = ccs->prevStack;
220 ActualPush_(CCS_MAIN,ccs->cc,ccs);
225 if (RtsFlags.CcFlags.doCostCentres) {
229 if (RtsFlags.ProfFlags.doHeapProfile) {
235 initProfilingLogFile(void)
237 /* Initialise the log file name */
238 prof_filename = arenaAlloc(prof_arena, strlen(prog_argv[0]) + 6);
239 sprintf(prof_filename, "%s.prof", prog_argv[0]);
241 /* open the log file */
242 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
243 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
244 RtsFlags.CcFlags.doCostCentres = 0;
248 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
249 /* dump the time, and the profiling interval */
250 fprintf(prof_file, "\"%s\"\n", time_str());
251 fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
253 /* declare all the cost centres */
256 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
257 fprintf(prof_file, "%d %d \"%s\" \"%s\"\n",
258 CC_UQ, cc->ccID, cc->label, cc->module);
263 if (RtsFlags.ProfFlags.doHeapProfile) {
264 /* Initialise the log file name */
265 hp_filename = arenaAlloc(prof_arena, strlen(prog_argv[0]) + 6);
266 sprintf(hp_filename, "%s.hp", prog_argv[0]);
268 /* open the log file */
269 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
270 fprintf(stderr, "Can't open profiling report file %s\n",
272 RtsFlags.ProfFlags.doHeapProfile = 0;
279 initTimeProfiling(void)
281 time_profiling = rtsTrue;
288 endProfiling ( void )
290 if (RtsFlags.CcFlags.doCostCentres) {
293 if (RtsFlags.ProfFlags.doHeapProfile) {
298 /* -----------------------------------------------------------------------------
299 Set cost centre stack when entering a function.
300 -------------------------------------------------------------------------- */
301 rtsBool entering_PAP;
304 EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
306 /* PAP_entry has already set CCCS for us */
308 entering_PAP = rtsFalse;
312 if (ccsfn->root->is_caf == CC_IS_CAF) {
313 return AppendCCS(cccs,ccsfn);
319 /* -----------------------------------------------------------------------------
320 Cost-centre stack manipulation
321 -------------------------------------------------------------------------- */
324 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
326 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
327 #define PushCostCentre _PushCostCentre
330 fprintf(stderr,"Pushing %s on ", cc->label);
332 fprintf(stderr,"\n"));
333 return PushCostCentre(ccs,cc);
338 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
340 CostCentreStack *temp_ccs;
342 if (ccs == EMPTY_STACK)
343 return ActualPush(ccs,cc);
348 /* check if we've already memoized this stack */
349 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
351 if (temp_ccs != EMPTY_STACK)
354 temp_ccs = CheckLoop(ccs,cc);
355 if (temp_ccs != NULL) {
356 /* we have recursed to an older CCS. Mark this in
357 * the index table, and emit a "back edge" into the
360 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
361 DecBackEdge(temp_ccs,ccs);
364 return ActualPush(ccs,cc);
371 static CostCentreStack *
372 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
374 while (ccs != EMPTY_STACK) {
377 ccs = ccs->prevStack;
382 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
385 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
387 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
388 #define AppendCCS _AppendCCS
392 fprintf(stderr,"Appending ");
394 fprintf(stderr," to ");
396 fprintf(stderr,"\n");});
397 return AppendCCS(ccs1,ccs2);
402 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
404 CostCentreStack *ccs = NULL;
410 if (ccs2->cc->is_caf == CC_IS_CAF) {
414 if (ccs2->prevStack != NULL) {
415 ccs = AppendCCS(ccs1, ccs2->prevStack);
418 return PushCostCentre(ccs,ccs2->cc);
421 static CostCentreStack *
422 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
424 CostCentreStack *new_ccs;
426 /* allocate space for a new CostCentreStack */
427 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
429 return ActualPush_(ccs, cc, new_ccs);
432 static CostCentreStack *
433 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
435 /* assign values to each member of the structure */
436 ASSIGN_CCS_ID(new_ccs->ccsID);
439 new_ccs->prevStack = ccs;
441 new_ccs->indexTable = EMPTY_TABLE;
443 /* Initialise the various _scc_ counters to zero
445 new_ccs->scc_count = 0;
447 /* Initialize all other stats here. There should be a quick way
448 * that's easily used elsewhere too
450 new_ccs->time_ticks = 0;
451 new_ccs->mem_alloc = 0;
452 new_ccs->inherited_ticks = 0;
453 new_ccs->inherited_alloc = 0;
455 new_ccs->root = ccs->root;
457 /* update the memoization table for the parent stack */
458 if (ccs != EMPTY_STACK)
459 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
460 0/*not a back edge*/);
462 /* make sure this CC is declared at the next heap/time sample */
465 /* return a pointer to the new stack */
470 static CostCentreStack *
471 IsInIndexTable(IndexTable *it, CostCentre *cc)
473 while (it!=EMPTY_TABLE)
481 /* otherwise we never found it so return EMPTY_TABLE */
487 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
488 CostCentre *cc, unsigned int back_edge)
492 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
495 new_it->ccs = new_ccs;
497 new_it->back_edge = back_edge;
503 DecCCS(CostCentreStack *ccs)
505 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
506 if (ccs->prevStack == EMPTY_STACK)
507 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
508 ccs->ccsID, ccs->cc->ccID);
510 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
511 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
516 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
518 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
519 if (ccs->prevStack == EMPTY_STACK)
520 fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
521 ccs->ccsID, ccs->cc->ccID);
523 fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
524 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
528 /* -----------------------------------------------------------------------------
529 Generating a time & allocation profiling report.
530 -------------------------------------------------------------------------- */
532 /* -----------------------------------------------------------------------------
533 Generating the aggregated per-cost-centre time/alloc report.
534 -------------------------------------------------------------------------- */
536 static CostCentre *sorted_cc_list;
539 aggregate_cc_costs( CostCentreStack *ccs )
543 ccs->cc->mem_alloc += ccs->mem_alloc;
544 ccs->cc->time_ticks += ccs->time_ticks;
546 for (i = ccs->indexTable; i != 0; i = i->next) {
548 aggregate_cc_costs(i->ccs);
554 insert_cc_in_sorted_list( CostCentre *new_cc )
556 CostCentre **prev, *cc;
558 prev = &sorted_cc_list;
559 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
560 if (new_cc->time_ticks > cc->time_ticks) {
573 report_per_cc_costs( void )
575 CostCentre *cc, *next;
577 aggregate_cc_costs(CCS_MAIN);
578 sorted_cc_list = NULL;
580 for (cc = CC_LIST; cc != NULL; cc = next) {
582 if (cc->time_ticks > total_prof_ticks/100
583 || cc->mem_alloc > total_alloc/100
584 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
585 insert_cc_in_sorted_list(cc);
589 fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
590 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
591 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
592 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
594 fprintf(prof_file, "\n\n");
596 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
597 fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
598 fprintf(prof_file, "%6.1f %6.1f",
599 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
600 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
604 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
605 fprintf(prof_file, " %5ld %9lld", cc->time_ticks, cc->mem_alloc);
607 fprintf(prof_file, "\n");
610 fprintf(prof_file,"\n\n");
613 /* -----------------------------------------------------------------------------
614 Generate the cost-centre-stack time/alloc report
615 -------------------------------------------------------------------------- */
618 fprint_header( void )
620 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
622 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
623 fprintf(prof_file, "%8s %5s %5s %5s %5s", "entries", "%time", "%alloc", "%time", "%alloc");
625 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
626 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
627 #if defined(PROFILING_DETAIL_COUNTS)
628 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
629 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
633 fprintf(prof_file, "\n\n");
637 report_ccs_profiling( void )
640 char temp[128]; /* sigh: magic constant */
644 total_prof_ticks = 0;
646 count_ticks(CCS_MAIN);
648 switch (RtsFlags.CcFlags.doCostCentres) {
651 case COST_CENTRES_XML:
657 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
658 time_str(), "Final");
660 fprintf(prof_file, "\n\t ");
661 fprintf(prof_file, " %s", prog_argv[0]);
662 fprintf(prof_file, " +RTS");
663 for (count = 0; rts_argv[count]; count++)
664 fprintf(prof_file, " %s", rts_argv[count]);
665 fprintf(prof_file, " -RTS");
666 for (count = 1; prog_argv[count]; count++)
667 fprintf(prof_file, " %s", prog_argv[count]);
668 fprintf(prof_file, "\n\n");
670 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
671 total_prof_ticks / (StgFloat) TICK_FREQUENCY,
672 total_prof_ticks, TICK_MILLISECS);
674 fprintf(prof_file, "\ttotal alloc = %11s bytes",
675 ullong_format_string((ullong) total_alloc * sizeof(W_),
676 temp, rtsTrue/*commas*/));
677 /* ToDo: 64-bit error! */
679 #if defined(PROFILING_DETAIL_COUNTS)
680 fprintf(prof_file, " (%lu closures)", total_allocs);
682 fprintf(prof_file, " (excludes profiling overheads)\n\n");
684 report_per_cc_costs();
686 inherit_costs(CCS_MAIN);
689 reportCCS(pruneCCSTree(CCS_MAIN), 0);
695 reportCCS(CostCentreStack *ccs, nat indent)
702 /* Only print cost centres with non 0 data ! */
704 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
705 ! ccs_to_ignore(ccs))
706 /* force printing of *all* cost centres if -P -P */
709 fprintf(prof_file, "%-*s%-*s %-10s",
710 indent, "", 24-indent, cc->label, cc->module);
712 fprintf(prof_file, "%8lld %5.1f %5.1f %5.1f %5.1f",
714 total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
715 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
716 total_prof_ticks == 0 ? 0.0 : (ccs->inherited_ticks / (StgFloat) total_prof_ticks * 100),
717 total_alloc == 0 ? 0.0 : (ccs->inherited_alloc / (StgFloat) total_alloc * 100)
720 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
721 fprintf(prof_file, " %5ld %9lld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
722 #if defined(PROFILING_DETAIL_COUNTS)
723 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
724 ccs->mem_allocs, ccs->thunk_count,
725 ccs->function_count, ccs->pap_count,
726 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
727 ccs->caffun_subsumed);
730 fprintf(prof_file, "\n");
733 for (i = ccs->indexTable; i != 0; i = i->next) {
735 reportCCS(i->ccs, indent+1);
741 /* Traverse the cost centre stack tree and accumulate
745 count_ticks(CostCentreStack *ccs)
749 if (!ccs_to_ignore(ccs)) {
750 total_alloc += ccs->mem_alloc;
751 total_prof_ticks += ccs->time_ticks;
753 for (i = ccs->indexTable; i != NULL; i = i->next)
759 /* Traverse the cost centre stack tree and inherit ticks & allocs.
762 inherit_costs(CostCentreStack *ccs)
766 if (ccs_to_ignore(ccs)) { return; }
768 ccs->inherited_ticks += ccs->time_ticks;
769 ccs->inherited_alloc += ccs->mem_alloc;
771 for (i = ccs->indexTable; i != NULL; i = i->next)
773 inherit_costs(i->ccs);
774 ccs->inherited_ticks += i->ccs->inherited_ticks;
775 ccs->inherited_alloc += i->ccs->inherited_alloc;
781 /* return rtsTrue if it is one of the ones that
782 * should not be reported normally (because it confuses
786 ccs_to_ignore (CostCentreStack *ccs)
788 if ( ccs == CCS_OVERHEAD
789 || ccs == CCS_DONT_CARE
791 || ccs == CCS_SYSTEM) {
798 static CostCentreStack *
799 pruneCCSTree( CostCentreStack *ccs )
801 CostCentreStack *ccs1;
802 IndexTable *i, **prev;
804 prev = &ccs->indexTable;
805 for (i = ccs->indexTable; i != 0; i = i->next) {
806 if (i->back_edge) { continue; }
808 ccs1 = pruneCCSTree(i->ccs);
816 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
817 /* force printing of *all* cost centres if -P -P */ )
819 || ( ccs->indexTable != 0 )
820 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
828 /* -----------------------------------------------------------------------------
829 Generate the XML time/allocation profile
830 -------------------------------------------------------------------------- */
833 gen_XML_logfile( void )
835 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
837 reportCCS_XML(pruneCCSTree(CCS_MAIN));
839 fprintf(prof_file, " 0\n");
845 reportCCS_XML(CostCentreStack *ccs)
850 if (ccs_to_ignore(ccs)) { return; }
854 fprintf(prof_file, " 1 %d %llu %lu %llu",
855 ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);
857 for (i = ccs->indexTable; i != 0; i = i->next) {
859 reportCCS_XML(i->ccs);
865 print_ccs (FILE *fp, CostCentreStack *ccs)
868 fprintf(fp, "Cost-Centre Stack: ");
873 print_ccs(fp, ccs->prevStack);
874 fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module);
876 fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module);
887 printCCS ( CostCentreStack *ccs )
890 for (; ccs; ccs = ccs->prevStack ) {
891 fprintf(stderr,ccs->cc->label);
892 if (ccs->prevStack) {
900 #endif /* PROFILING */