1 /* -----------------------------------------------------------------------------
2 * $Id: Profiling.c,v 1.2 1998/12/02 13:28:35 simonm Exp $
4 * (c) The GHC Team, 1998
6 * Support for profiling
8 * ---------------------------------------------------------------------------*/
17 #include "StgStartup.h"
19 #include "Proftimer.h"
23 * Global variables used to assign unique IDs to cc's, ccs's, and
31 /* Table sizes from old profiling system. Not sure if we'll need
34 nat time_intervals = 0;
35 nat earlier_ticks = 0;
42 /* Are we time-profiling?
44 rtsBool time_profiling = rtsFalse;
46 /* figures for the profiling report.
48 static lnat total_alloc, total_ticks;
50 /* Globals for opening the profiling log file
52 static char *prof_filename; /* prof report file name = <program>.prof */
53 static FILE *prof_file;
55 /* The Current Cost Centre Stack (for attributing costs)
57 CostCentreStack *CCCS;
59 /* Linked lists to keep track of cc's and ccs's that haven't
60 * been declared in the log file yet
63 CostCentreStack *CCS_LIST;
64 CCSDecList *New_CCS_LIST;
67 * Built-in cost centres and cost-centre stacks:
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
77 * GC is the storage manager / garbage collector.
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
88 * DONT_CARE is a placeholder cost-centre we assign to static
89 * constructors. It should *never* accumulate any costs.
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,);
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", "MAIN", CC_IS_BORING,);
94 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,);
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF,);
96 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,);
97 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,);
99 CCS_DECLARE(CCS_MAIN, CC_MAIN, CC_IS_BORING, );
100 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, CC_IS_BORING, );
101 CCS_DECLARE(CCS_GC, CC_GC, CC_IS_BORING, );
102 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, CC_IS_CAF, );
103 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, CC_IS_SUBSUMED, );
104 CCS_DECLARE(CCS_DONTZuCARE, CC_DONTZuCARE, CC_IS_BORING, );
110 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
113 static void registerCostCentres ( void );
114 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
115 static void count_ticks ( CostCentreStack *ccs );
116 static void reportCCS ( CostCentreStack *ccs, nat indent );
117 static void DecCCS ( CostCentreStack *ccs );
119 /* -----------------------------------------------------------------------------
120 Initialise the profiling environment
121 -------------------------------------------------------------------------- */
126 CostCentreStack *ccs, *next;
128 /* for the benefit of allocate()... */
131 if (!RtsFlags.CcFlags.doCostCentres)
134 time_profiling = rtsTrue;
136 /* Initialise the log file name */
137 prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
138 sprintf(prof_filename, "%s.prof", prog_argv[0]);
140 /* Initialize counters for IDs */
145 /* Initialize Declaration lists to NULL */
149 /* Register all the cost centres / stacks in the program
150 * CC_MAIN gets link = 0, all others have non-zero link.
152 REGISTER_CC(CC_MAIN);
153 REGISTER_CC(CC_SYSTEM);
155 REGISTER_CC(CC_OVERHEAD);
156 REGISTER_CC(CC_SUBSUMED);
157 REGISTER_CC(CC_DONTZuCARE);
158 REGISTER_CCS(CCS_MAIN);
159 REGISTER_CCS(CCS_SYSTEM);
160 REGISTER_CCS(CCS_GC);
161 REGISTER_CCS(CCS_OVERHEAD);
162 REGISTER_CCS(CCS_SUBSUMED);
163 REGISTER_CCS(CCS_DONTZuCARE);
166 registerCostCentres();
168 /* find all the "special" cost centre stacks, and make them children
171 ASSERT(CCS_MAIN->prevStack == 0);
172 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
173 next = ccs->prevStack;
175 ActualPush_(CCS_MAIN,ccs->cc,ccs);
179 /* profiling is the only client of the VTALRM system at the moment,
180 * so just install the profiling tick handler. */
181 install_vtalrm_handler(handleProfTick);
186 endProfiling ( void )
192 heapCensus ( bdescr *bd )
197 /* -----------------------------------------------------------------------------
198 Register Cost Centres
200 At the moment, this process just supplies a unique integer to each
201 statically declared cost centre and cost centre stack in the
204 The code generator inserts a small function "reg<moddule>" in each
205 module which registers any cost centres from that module and calls
206 the registration functions in each of the modules it imports. So,
207 if we call "regMain", each reachable module in the program will be
210 The reg* functions are compiled in the same way as STG code,
211 i.e. without normal C call/return conventions. Hence we must use
212 StgRun to call this stuff.
213 -------------------------------------------------------------------------- */
215 /* The registration functions use an explicit stack...
217 #define REGISTER_STACK_SIZE (BLOCK_SIZE * 4)
221 registerCostCentres ( void )
223 /* this storage will be reclaimed by the garbage collector,
226 register_stack = (F_ *)allocate(REGISTER_STACK_SIZE / sizeof(W_));
228 StgRun((StgFunPtr)stg_register);
232 /* -----------------------------------------------------------------------------
233 Cost-centre stack manipulation
234 -------------------------------------------------------------------------- */
237 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
239 CostCentreStack *temp_ccs;
241 if (ccs == EMPTY_STACK)
242 return ActualPush(ccs,cc);
247 /* check if we've already memoized this stack */
248 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
250 if (temp_ccs != EMPTY_STACK)
253 /* remove the CC to avoid loops */
254 ccs = RemoveCC(ccs,cc);
255 /* have a different stack now, need to check the memo table again */
256 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
257 if (temp_ccs != EMPTY_STACK)
260 return ActualPush(ccs,cc);
268 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
270 CostCentreStack *new_ccs;
272 /* allocate space for a new CostCentreStack */
273 new_ccs = (CostCentreStack *) stgMallocBytes(sizeof(CostCentreStack), "Error allocating space for CostCentreStack");
275 return ActualPush_(ccs, cc, new_ccs);
278 static CostCentreStack *
279 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
281 /* assign values to each member of the structure */
282 ASSIGN_CCS_ID(new_ccs->ccsID);
285 new_ccs->prevStack = ccs;
287 new_ccs->indexTable = EMPTY_TABLE;
289 /* Initialise the various _scc_ counters to zero
291 new_ccs->scc_count = 0;
292 new_ccs->sub_scc_count = 0;
293 new_ccs->sub_cafcc_count = 0;
294 new_ccs->sub_dictcc_count = 0;
296 /* Initialize all other stats here. There should be a quick way
297 * that's easily used elsewhere too
299 new_ccs->time_ticks = 0;
300 new_ccs->mem_alloc = 0;
302 /* stacks are subsumed only if their top CostCentres are subsumed */
303 new_ccs->is_subsumed = cc->is_subsumed;
305 /* update the memoization table for the parent stack */
306 if (ccs != EMPTY_STACK)
307 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
309 /* make sure this CC is decalred at the next heap/time sample */
312 /* return a pointer to the new stack */
318 RemoveCC(CostCentreStack *ccs, CostCentre *cc)
320 CostCentreStack *del_ccs;
322 if (ccs == EMPTY_STACK) {
326 return ccs->prevStack;
329 del_ccs = RemoveCC(ccs->prevStack, cc);
331 if (del_ccs == EMPTY_STACK)
334 return PushCostCentre(del_ccs,ccs->cc);
342 IsInIndexTable(IndexTable *it, CostCentre *cc)
344 while (it!=EMPTY_TABLE)
352 /* otherwise we never found it so return EMPTY_TABLE */
358 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, CostCentre *cc)
362 new_it = stgMallocBytes(sizeof(IndexTable), "AddToIndexTable");
365 new_it->ccs = new_ccs;
372 print_ccs (FILE *fp, CostCentreStack *ccs)
375 fprintf(fp, "Cost-Centre Stack: ");
380 print_ccs(fp, ccs->prevStack);
381 fprintf(fp, "->[%s,%s,%s]",
382 ccs->cc->label, ccs->cc->module, ccs->cc->group);
384 fprintf(fp, "[%s,%s,%s]",
385 ccs->cc->label, ccs->cc->module, ccs->cc->group);
395 DecCCS(CostCentreStack *ccs)
397 CCSDecList *temp_list;
400 (CCSDecList *) stgMallocBytes(sizeof(CCSDecList),
401 "Error allocating space for CCSDecList");
402 temp_list->ccs = ccs;
403 temp_list->nextList = New_CCS_LIST;
405 New_CCS_LIST = temp_list;
408 /* -----------------------------------------------------------------------------
409 Generating a time & allocation profiling report.
410 -------------------------------------------------------------------------- */
412 static FILE *prof_file;
415 report_ccs_profiling( void )
418 char temp[128]; /* sigh: magic constant */
419 rtsBool do_groups = rtsFalse;
421 if (!RtsFlags.CcFlags.doCostCentres)
428 count_ticks(CCS_MAIN);
430 /* open profiling output file */
431 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
432 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
435 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
436 time_str(), "Final");
438 fprintf(prof_file, "\n\t ");
439 fprintf(prof_file, " %s", prog_argv[0]);
440 fprintf(prof_file, " +RTS");
441 for (count = 0; rts_argv[count]; count++)
442 fprintf(prof_file, " %s", rts_argv[count]);
443 fprintf(prof_file, " -RTS");
444 for (count = 1; prog_argv[count]; count++)
445 fprintf(prof_file, " %s", prog_argv[count]);
446 fprintf(prof_file, "\n\n");
448 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
449 total_ticks / (StgFloat) TICK_FREQUENCY,
450 total_ticks, TICK_MILLISECS);
452 fprintf(prof_file, "\ttotal alloc = %11s bytes",
453 ullong_format_string((ullong) total_alloc * sizeof(W_),
454 temp, rtsTrue/*commas*/));
455 /* ToDo: 64-bit error! */
457 #if defined(PROFILING_DETAIL_COUNTS)
458 fprintf(prof_file, " (%lu closures)", total_allocs);
460 fprintf(prof_file, " (excludes profiling overheads)\n\n");
462 fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
465 do_groups = have_interesting_groups(Registered_CC);
466 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
469 fprintf(prof_file, "%8s %5s %5s %8s %5s %5s", "scc", "%time", "%alloc", "inner", "cafs", "dicts");
471 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
472 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
473 #if defined(PROFILING_DETAIL_COUNTS)
474 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
475 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
478 fprintf(prof_file, "\n\n");
480 reportCCS(CCS_MAIN, 0);
486 reportCCS(CostCentreStack *ccs, nat indent)
492 ASSERT(cc == CC_MAIN || cc->link != 0);
494 /* Only print cost centres with non 0 data ! */
496 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
497 /* force printing of *all* cost centres if -P -P */ )
499 || ( ccs->indexTable != 0 )
500 || ( ! ccs_to_ignore(ccs)
501 && (ccs->scc_count || ccs->sub_scc_count ||
502 ccs->time_ticks || ccs->mem_alloc
503 || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
504 && (ccs->sub_cafcc_count || ccs->sub_dictcc_count
505 #if defined(PROFILING_DETAIL_COUNTS)
506 || cc->thunk_count || cc->function_count || cc->pap_count
509 fprintf(prof_file, "%-*s%-*s %-10s",
510 indent, "", 24-indent, cc->label, cc->module);
513 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
516 fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld %5ld",
518 total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
519 total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
520 ccs->sub_scc_count, ccs->sub_cafcc_count, ccs->sub_dictcc_count);
522 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
523 fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
524 #if defined(PROFILING_DETAIL_COUNTS)
525 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
526 ccs->mem_allocs, ccs->thunk_count,
527 ccs->function_count, ccs->pap_count,
528 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
529 ccs->caffun_subsumed);
532 fprintf(prof_file, "\n");
535 for (i = ccs->indexTable; i != 0; i = i->next) {
536 reportCCS(i->ccs, indent+1);
540 /* Traverse the cost centre stack tree and accumulate
544 count_ticks(CostCentreStack *ccs)
548 if (!ccs_to_ignore(ccs)) {
549 total_alloc += ccs->mem_alloc;
550 total_ticks += ccs->time_ticks;
552 for (i = ccs->indexTable; i != NULL; i = i->next)
556 /* return rtsTrue if it is one of the ones that
557 * should not be reported normally (because it confuses
561 ccs_to_ignore (CostCentreStack *ccs)
563 if ( ccs == CCS_OVERHEAD
564 || ccs == CCS_DONTZuCARE
566 || ccs == CCS_SYSTEM) {
573 #endif /* PROFILING */