1 \section[CostCentre.lc]{Code for Cost Centre Profiling}
7 Only have cost centres if @PROFILING@ defined (by the driver),
11 #if defined(PROFILING) || defined(PAR)
12 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
13 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,/*not static*/);
16 CC_DECLARE(CC_MSG, "MSG", "MSG", "MSG", CC_IS_BORING,/*not static*/);
17 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "IDLE", CC_IS_BORING,/*not static*/);
22 CostCentre CCC; /* _not_ initialised */
24 #endif /* defined(PROFILING) || defined(PAR) */
27 The rest is for real cost centres (not thread activities).
30 #if defined(PROFILING) || defined(PAR)
32 %************************************************************************
34 \subsection{Initial Cost Centres}
36 %************************************************************************
38 Cost centres which are always required:
40 #if defined(PROFILING)
42 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF, /*not static*/);
43 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED, /*not static*/);
44 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING, /*not static*/);
48 The list of registered cost centres, initially empty:
50 CostCentre Registered_CC = REGISTERED_END;
54 %************************************************************************
56 \subsection{Profiling RTS Arguments}
58 %************************************************************************
61 I_ dump_intervals = 0;
63 /* And for the report ... */
64 static char prof_filename[STATS_FILENAME_MAXLEN]; /* prof report file name = <program>.prof */
65 static char **prog_argv_save;
66 static char **rts_argv_save;
68 /* And the serial report ... */
69 static char serial_filename[STATS_FILENAME_MAXLEN]; /* serial time profile file name = <program>.time */
70 static FILE *serial_file = NULL; /* serial time profile file */
73 init_cc_profiling(rts_argc, rts_argv, prog_argv)
75 char *rts_argv[], *prog_argv[];
79 prog_argv_save = prog_argv;
80 rts_argv_save = rts_argv;
83 sprintf(prof_filename, PROF_FILENAME_FMT_GUM, prog_argv[0], thisPE);
85 sprintf(prof_filename, PROF_FILENAME_FMT, prog_argv[0]);
88 /* Now perform any work to initialise profiling ... */
90 if (RTSflags.CcFlags.doCostCentres
92 || RTSflags.ProfFlags.doHeapProfile
98 /* set dump_intervals: if heap profiling only dump every 10 intervals */
100 dump_intervals = (RTSflags.ProfFlags.doHeapProfile) ? 10 : 1;
105 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
106 /* produce serial time profile */
109 sprintf(serial_filename, TIME_FILENAME_FMT_GUM, prog_argv[0], thisPE);
111 sprintf(serial_filename, TIME_FILENAME_FMT, prog_argv[0]);
113 if ( (serial_file = fopen(serial_filename,"w")) == NULL ) {
114 fprintf(stderr, "Can't open serial time log file %s\n", serial_filename);
118 fprintf(serial_file, "JOB \"%s", prog_argv[0]);
119 fprintf(serial_file, " +RTS -P -i%4.2f -RTS",
120 interval_ticks/(StgFloat)TICK_FREQUENCY);
121 for(arg = 1; prog_argv[arg]; arg++)
122 fprintf(serial_file, " %s", prog_argv[arg]);
123 fprintf(serial_file, "\"\n");
124 fprintf(serial_file, "DATE \"%s\"\n", time_str());
126 fprintf(serial_file, "SAMPLE_UNIT \"seconds\"\n");
128 fprintf(serial_file, "VALUE_UNIT \"percentage time\"\n");
130 fprintf(serial_file, "VALUE_UNIT \"time ticks\"\n");
133 /* output initial 0 sample */
134 fprintf(serial_file, "BEGIN_SAMPLE 0.00\n");
135 fprintf(serial_file, "END_SAMPLE 0.00\n");
139 #if defined(PROFILING)
140 if (heap_profile_init(prog_argv))
148 Registering the cost centres is done after heap allocated as we use
149 the area to hold the stack of modules still to register.
152 extern P_ heap_space; /* pointer to the heap space */
153 StgFunPtr * register_stack; /* stack of register routines -- heap area used */
155 EXTFUN(startCcRegisteringWorld);
160 REGISTER_CC(CC_MAIN); /* register cost centre CC_MAIN */
161 REGISTER_CC(CC_GC); /* register cost centre CC_GC */
164 REGISTER_CC(CC_MSG); /* register cost centre CC_MSG */
165 REGISTER_CC(CC_IDLE); /* register cost centre CC_MSG */
168 #if defined(PROFILING)
169 REGISTER_CC(CC_OVERHEAD); /* register cost centre CC_OVERHEAD */
170 REGISTER_CC(CC_DONTZuCARE); /* register cost centre CC_DONT_CARE Right??? ToDo */
173 SET_CCC_RTS(CC_MAIN,0,1); /* without the sub_scc_count++ */
175 #if defined(PROFILING)
176 /* always register -- if we do not, we get warnings (WDP 94/12) */
177 /* if (RTSflags.CcFlags.doCostCentres || RTSflags.ProfFlags.doHeapProfile) */
179 register_stack = (StgFunPtr *) heap_space;
180 miniInterpret((StgFunPtr) startCcRegisteringWorld);
185 %************************************************************************
187 \subsection{Cost Centre Profiling Report}
189 %************************************************************************
192 static I_ dump_interval = 0;
195 cc_to_ignore (CostCentre cc)
196 /* return rtsTrue if it is one of the ones that
197 should not be reported normally (because it confuses
201 # if !defined(PROFILING)
202 /* in parallel land, everything is interesting (not ignorable) */
206 if ( cc == CC_OVERHEAD || cc == CC_DONTZuCARE || cc == CC_GC ) {
211 # endif /* PROFILING */
215 have_interesting_groups(CostCentre cc)
217 char* interesting_group = NULL;
219 for (; cc != REGISTERED_END; cc = cc->registered) {
220 if (! cc_to_ignore(cc) && strcmp(cc->module,cc->group) != 0) {
221 if (interesting_group && strcmp(cc->group, interesting_group) != 0) {
224 interesting_group = cc->group;
232 report_cc_profiling(final)
238 char temp[128]; /* sigh: magic constant */
239 W_ total_ticks, ignored_ticks;
240 W_ total_alloc = 0, total_allocs = 0;
241 rtsBool do_groups = rtsFalse;
243 I_ final_ticks; /*No. ticks in last sample*/
246 if (!RTSflags.CcFlags.doCostCentres)
250 /* To avoid inconsistency, initialise the tick variables
251 after having blocked out VTALRM */
259 StgFloat seconds = (previous_ticks + current_ticks) / (StgFloat) TICK_FREQUENCY;
262 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
264 /*In the parallel world we're particularly interested in the last sample*/
265 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
266 if (! cc_to_ignore(cc))
267 final_ticks += cc->time_ticks;
270 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
271 if (cc->time_ticks != 0 && ! cc_to_ignore(cc))
272 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
273 cc->module, cc->label, cc->time_ticks*100 / final_ticks);
276 /* In the sequential world, ignore partial sample at end of execution */
277 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
282 /* output serial profile sample */
284 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
286 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
287 ASSERT_IS_REGISTERED(cc, 0);
288 if (cc->time_ticks != 0 && !cc_to_ignore(cc)) {
290 /* Print _percentages_ in the parallel world */
291 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
292 cc->module, cc->label, cc->time_ticks * 100/TICK_FREQUENCY);
294 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
295 cc->module, cc->label, cc->time_ticks);
300 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
305 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
306 ASSERT_IS_REGISTERED(cc, 0);
307 cc->prev_ticks += cc->time_ticks;
310 if ( cc_to_ignore(cc) ) { /* reporting these just confuses users... */
311 ignored_ticks += cc->prev_ticks;
313 total_ticks += cc->prev_ticks;
314 total_alloc += cc->mem_alloc;
315 #if defined(PROFILING_DETAIL_COUNTS)
316 total_allocs += cc->mem_allocs;
321 if (total_ticks + ignored_ticks != current_ticks + previous_ticks)
322 fprintf(stderr, "Warning: Cost Centre tick inconsistency: total=%ld, ignored=%ld, current=%ld, previous=%ld\n", total_ticks, ignored_ticks, current_ticks, previous_ticks);
324 unblockVtAlrmSignal();
326 /* return if no cc profile required */
327 if (!final && ++dump_interval < dump_intervals)
330 /* reset dump_interval -- dump again after dump_intervals */
333 /* sort cost centres */
334 cc_sort(&Registered_CC, RTSflags.CcFlags.sortBy);
336 /* open profiling output file */
337 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
338 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
341 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(),
342 final ? "Final" : "PARTIAL");
344 fprintf(prof_file, "\n\t ");
345 fprintf(prof_file, " %s", prog_argv_save[0]);
346 fprintf(prof_file, " +RTS");
347 for (count = 0; rts_argv_save[count]; count++)
348 fprintf(prof_file, " %s", rts_argv_save[count]);
349 fprintf(prof_file, " -RTS");
350 for (count = 1; prog_argv_save[count]; count++)
351 fprintf(prof_file, " %s", prog_argv_save[count]);
352 fprintf(prof_file, "\n\n");
355 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
356 total_ticks / (StgFloat) TICK_FREQUENCY, total_ticks, TICK_MILLISECS);
357 fprintf(prof_file, "\ttotal alloc = %11s bytes",
358 ullong_format_string((ullong) total_alloc * sizeof(W_), temp, rtsTrue/*commas*/));
359 /* ToDo: 64-bit error! */
361 #if defined(PROFILING_DETAIL_COUNTS)
362 fprintf(prof_file, " (%lu closures)", total_allocs);
364 fprintf(prof_file, " (excludes profiling overheads)\n\n");
367 fprintf(prof_file, "%-16.16s %-11.11s", "COST CENTRE", "MODULE");
369 do_groups = have_interesting_groups(Registered_CC);
370 if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
372 fprintf(prof_file, "%8s %6s %6s %8s %5s %5s", "scc", "%time", "%alloc", "inner", "cafs", "dicts");
374 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
375 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
376 #if defined(PROFILING_DETAIL_COUNTS)
377 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
378 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
381 fprintf(prof_file, "\n\n");
383 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
384 ASSERT_IS_REGISTERED(cc, 0);
386 /* Only print cost centres with non 0 data ! */
388 if ( (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_ALL
389 /* force printing of *all* cost centres if -P -P */ )
391 || ( ! cc_to_ignore(cc)
392 && (cc->scc_count || cc->sub_scc_count || cc->prev_ticks || cc->mem_alloc
393 || (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
394 && (cc->sub_cafcc_count || cc->sub_dictcc_count
395 #if defined(PROFILING_DETAIL_COUNTS)
396 || cc->thunk_count || cc->function_count || cc->pap_count
399 fprintf(prof_file, "%-16.16s %-11.11s", cc->label, cc->module);
400 if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
402 fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld %5ld",
404 total_ticks == 0 ? 0.0 : (cc->prev_ticks / (StgFloat) total_ticks * 100),
405 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) total_alloc * 100),
406 cc->sub_scc_count, cc->sub_cafcc_count, cc->sub_dictcc_count);
408 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
409 fprintf(prof_file, " %5ld %9ld", cc->prev_ticks, cc->mem_alloc*sizeof(W_));
410 #if defined(PROFILING_DETAIL_COUNTS)
411 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
412 cc->mem_allocs, cc->thunk_count,
413 cc->function_count, cc->pap_count,
414 cc->subsumed_fun_count, cc->subsumed_caf_count,
415 cc->caffun_subsumed);
418 fprintf(prof_file, "\n");
427 %************************************************************************
429 \subsection{Miscellaneous profiling routines}
431 %************************************************************************
433 Routine to sort the list of registered cost centres. Uses a simple
434 insertion sort. First we need the different comparison routines.
439 cc_lt_label(CostCentre cc1, CostCentre cc2)
443 cmp = strcmp(cc1->group, cc2->group);
446 return 1; /* group < */
448 return 0; /* group > */
450 cmp = strcmp(cc1->module, cc2->module);
453 return 1; /* mod < */
455 return 0; /* mod > */
457 return (strcmp(cc1->label, cc2->label) < 0); /* cmp labels */
461 cc_gt_time(CostCentre cc1, CostCentre cc2)
463 /* ToDo: normal then caf then dict (instead of scc at top) */
465 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
467 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
470 if (cc1->prev_ticks > cc2->prev_ticks) /* time greater */
472 else if (cc1->prev_ticks < cc2->prev_ticks) /* time less */
475 if (cc1->mem_alloc > cc2->mem_alloc) /* time equal; alloc greater */
477 else if (cc1->mem_alloc < cc2->mem_alloc) /* time equal; alloc less */
480 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
484 cc_gt_alloc(CostCentre cc1, CostCentre cc2)
486 /* ToDo: normal then caf then dict (instead of scc at top) */
488 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
490 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
493 if (cc1->mem_alloc > cc2->mem_alloc) /* alloc greater */
495 else if (cc1->mem_alloc < cc2->mem_alloc) /* alloc less */
498 if (cc1->prev_ticks > cc2->prev_ticks) /* alloc equal; time greater */
500 else if (cc1->prev_ticks < cc2->prev_ticks) /* alloc equal; time less */
503 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
507 cc_sort(CostCentre *sort, char sort_on)
510 CostCentre sorted, insert, *search, insert_rest;
523 abort(); /* "can't happen" */
526 sorted = REGISTERED_END;
529 while (insert != REGISTERED_END) {
531 /* set search to the address of cc required to follow insert */
533 while (*search != REGISTERED_END && (cc_lt)(*search,insert)) {
534 search = &((*search)->registered);
537 /* place insert at *search and go to next insert */
538 insert_rest = insert->registered;
539 insert->registered = *search;
541 insert = insert_rest;
549 #endif /* PROFILING || PAR */