1 \section[CostCentre.lc]{Code for Cost Centre Profiling}
7 Only have cost centres if @PROFILING@ defined (by the driver),
11 #if defined(PROFILING) || defined(PAR)
12 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
13 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,/*not static*/);
16 CC_DECLARE(CC_MSG, "MSG", "MSG", "MSG", CC_IS_BORING,/*not static*/);
17 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "IDLE", CC_IS_BORING,/*not static*/);
21 The current cost centre. It is initially set to "MAIN" by main.
22 We have to be careful when doing so, as an initial @SET_CCC(CC_MAIN)@
23 would try to increment some @sub_scc_count@ of the @CCC@ (nothing!).
26 CostCentre CCC; /* _not_ initialised */
28 #endif /* defined(PROFILING) || defined(PAR) */
31 The rest is for real cost centres (not thread activities).
34 #if defined(PROFILING) || defined(PAR)
36 %************************************************************************
38 \subsection{Initial Cost Centres}
40 %************************************************************************
42 Cost centres which are always required:
44 #if defined(PROFILING)
46 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "MAIN", CC_IS_CAF,/*not static*/);
47 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,/*not static*/);
48 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
52 The list of registered cost centres, initially empty:
54 CostCentre Registered_CC = REGISTERED_END;
58 %************************************************************************
60 \subsection{Profiling RTS Arguments}
62 %************************************************************************
65 I_ dump_intervals = 0;
67 /* And for the report ... */
68 static char prof_filename[STATS_FILENAME_MAXLEN]; /* prof report file name = <program>.prof */
69 static char **prog_argv_save;
70 static char **rts_argv_save;
72 /* And the serial report ... */
73 static char serial_filename[STATS_FILENAME_MAXLEN]; /* serial time profile file name = <program>.time */
74 static FILE *serial_file = NULL; /* serial time profile file */
77 init_cc_profiling(rts_argc, rts_argv, prog_argv)
79 char *rts_argv[], *prog_argv[];
86 char *select_descr = 0;
87 char *select_type = 0;
88 char *select_kind = 0;
92 prog_argv_save = prog_argv;
93 rts_argv_save = rts_argv;
96 sprintf(prof_filename, PROF_FILENAME_FMT_GUM, prog_argv[0], thisPE);
98 sprintf(prof_filename, PROF_FILENAME_FMT, prog_argv[0]);
101 /* Now perform any work to initialise profiling ... */
103 if (RTSflags.CcFlags.doCostCentres
105 || RTSflags.ProfFlags.doHeapProfile
111 /* set dump_intervals: if heap profiling only dump every 10 intervals */
113 dump_intervals = (RTSflags.ProfFlags.doHeapProfile) ? 10 : 1;
118 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
119 /* produce serial time profile */
122 sprintf(serial_filename, TIME_FILENAME_FMT_GUM, prog_argv[0], thisPE);
124 sprintf(serial_filename, TIME_FILENAME_FMT, prog_argv[0]);
126 if ( (serial_file = fopen(serial_filename,"w")) == NULL ) {
127 fprintf(stderr, "Can't open serial time log file %s\n", serial_filename);
131 fprintf(serial_file, "JOB \"%s", prog_argv[0]);
132 fprintf(serial_file, " +RTS -P -i%4.2f -RTS",
133 interval_ticks/(StgFloat)TICK_FREQUENCY);
134 for(arg = 1; prog_argv[arg]; arg++)
135 fprintf(serial_file, " %s", prog_argv[arg]);
136 fprintf(serial_file, "\"\n");
137 fprintf(serial_file, "DATE \"%s\"\n", time_str());
139 fprintf(serial_file, "SAMPLE_UNIT \"seconds\"\n");
141 fprintf(serial_file, "VALUE_UNIT \"percentage time\"\n");
143 fprintf(serial_file, "VALUE_UNIT \"time ticks\"\n");
146 /* output initial 0 sample */
147 fprintf(serial_file, "BEGIN_SAMPLE 0.00\n");
148 fprintf(serial_file, "END_SAMPLE 0.00\n");
152 #if defined(PROFILING)
153 if (heap_profile_init(select_cc, select_mod, select_grp,
154 select_descr, select_type, select_kind,
163 Registering the cost centres is done after heap allocated as we use
164 the area to hold the stack of modules still to register.
167 extern P_ heap_space; /* pointer to the heap space */
168 StgFunPtr * register_stack; /* stack of register routines -- heap area used */
170 EXTFUN(startCcRegisteringWorld);
175 REGISTER_CC(CC_MAIN); /* register cost centre CC_MAIN */
176 REGISTER_CC(CC_GC); /* register cost centre CC_GC */
179 REGISTER_CC(CC_MSG); /* register cost centre CC_MSG */
180 REGISTER_CC(CC_IDLE); /* register cost centre CC_MSG */
183 #if defined(PROFILING)
184 REGISTER_CC(CC_OVERHEAD); /* register cost centre CC_OVERHEAD */
185 REGISTER_CC(CC_DONTZuCARE); /* register cost centre CC_DONT_CARE Right??? ToDo */
188 /* as per SET_CCC macro, without the sub_scc_count++ bit */
189 CCC = (CostCentre)STATIC_CC_REF(CC_MAIN);
192 #if defined(PROFILING)
193 /* always register -- if we do not, we get warnings (WDP 94/12) */
194 /* if (RTSflags.CcFlags.doCostCentres || RTSflags.ProfFlags.doHeapProfile) */
196 register_stack = (StgFunPtr *) heap_space;
197 miniInterpret((StgFunPtr) startCcRegisteringWorld);
202 %************************************************************************
204 \subsection{Cost Centre Profiling Report}
206 %************************************************************************
209 static I_ dump_interval = 0;
212 cc_to_ignore (CostCentre cc)
213 /* return rtsTrue if it is one of the ones that
214 should not be reported normally (because it confuses
218 # if !defined(PROFILING)
219 /* in parallel land, everything is interesting (not ignorable) */
223 if ( cc == CC_OVERHEAD || cc == CC_DONTZuCARE || cc == CC_GC ) {
228 # endif /* PROFILING */
232 report_cc_profiling(final)
238 char temp[128]; /* sigh: magic constant */
239 W_ total_ticks = 0, total_alloc = 0, total_allocs = 0;
240 W_ ignored_ticks = 0, ignored_alloc = 0, ignored_allocs = 0;
242 I_ final_ticks = 0; /*No. ticks in last sample*/
245 if (!RTSflags.CcFlags.doCostCentres)
251 StgFloat seconds = (previous_ticks + current_ticks) / (StgFloat) TICK_FREQUENCY;
254 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
256 /*In the parallel world we're particularly interested in the last sample*/
257 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
258 if (! cc_to_ignore(cc))
259 final_ticks += cc->time_ticks;
262 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
263 if (cc->time_ticks != 0 && ! cc_to_ignore(cc))
264 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
265 cc->module, cc->label, cc->time_ticks*100 / final_ticks);
268 /* In the sequential world, ignore partial sample at end of execution */
269 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
274 /* output serial profile sample */
276 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
278 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
279 ASSERT_IS_REGISTERED(cc, 0);
280 if (cc->time_ticks != 0 && !cc_to_ignore(cc)) {
282 /* Print _percentages_ in the parallel world */
283 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
284 cc->module, cc->label, cc->time_ticks * 100/TICK_FREQUENCY);
286 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
287 cc->module, cc->label, cc->time_ticks);
292 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
297 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
298 ASSERT_IS_REGISTERED(cc, 0);
299 cc->prev_ticks += cc->time_ticks;
302 if ( cc_to_ignore(cc) ) { /* reporting these just confuses users... */
303 ignored_ticks += cc->prev_ticks;
304 ignored_alloc += cc->mem_alloc;
305 ignored_allocs += cc->mem_allocs;
307 total_ticks += cc->prev_ticks;
308 total_alloc += cc->mem_alloc;
309 total_allocs += cc->mem_allocs;
313 if (total_ticks + ignored_ticks != current_ticks + previous_ticks)
314 fprintf(stderr, "Warning: Cost Centre tick inconsistency: total=%ld, ignored=%ld, current=%ld, previous=%ld\n", total_ticks, ignored_ticks, current_ticks, previous_ticks);
316 unblockVtAlrmSignal();
318 /* return if no cc profile required */
319 if (!final && ++dump_interval < dump_intervals)
322 /* reset dump_interval -- dump again after dump_intervals */
325 /* sort cost centres */
326 cc_sort(&Registered_CC, RTSflags.CcFlags.sortBy);
328 /* open profiling output file */
329 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
330 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
333 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(),
334 final ? "Final" : "PARTIAL");
336 fprintf(prof_file, "\n\t ");
337 fprintf(prof_file, " %s", prog_argv_save[0]);
338 fprintf(prof_file, " +RTS");
339 for (count = 0; rts_argv_save[count]; count++)
340 fprintf(prof_file, " %s", rts_argv_save[count]);
341 fprintf(prof_file, " -RTS");
342 for (count = 1; prog_argv_save[count]; count++)
343 fprintf(prof_file, " %s", prog_argv_save[count]);
344 fprintf(prof_file, "\n\n");
346 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
347 total_ticks / (StgFloat) TICK_FREQUENCY, total_ticks, TICK_MILLISECS);
348 fprintf(prof_file, "\ttotal alloc = %11s bytes (%lu closures) (excludes profiling overheads)\n",
349 ullong_format_string((ullong) total_alloc * sizeof(W_), temp, rtsTrue/*commas*/), total_allocs);
350 /* ToDo: 64-bit error! */
351 fprintf(prof_file, "\n");
353 fprintf(prof_file, "%-16.16s %-11.11s", "COST CENTRE", "MODULE");
355 fprintf(prof_file, " %-11.11s", "GROUP");
357 fprintf(prof_file, " %5s %5s %6s %6s", "scc", "subcc", "%time", "%alloc");
359 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE)
360 fprintf(prof_file, " %11s %13s %8s %8s %8s (%5s %8s)", "cafcc", "thunks", "funcs", "PAPs", "closures", "ticks", "bytes");
361 fprintf(prof_file, "\n\n");
363 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
364 ASSERT_IS_REGISTERED(cc, 0);
366 /* Only print cost centres with non 0 data ! */
368 if ( (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_ALL
369 /* force printing of *all* cost centres if -P -P */ )
371 || ( ! cc_to_ignore(cc)
372 && (cc->scc_count || cc->sub_scc_count || cc->prev_ticks || cc->mem_alloc
373 || (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
374 && (cc->thunk_count || cc->function_count || cc->pap_count
375 || cc->cafcc_count || cc->sub_cafcc_count))))
378 fprintf(prof_file, "%-16.16s %-11.11s", cc->label, cc->module);
380 fprintf(prof_file, " %-11.11s",cc->group);
382 fprintf(prof_file, " %5ld %5ld %5.1f %5.1f",
383 cc->scc_count, cc->sub_scc_count,
384 total_ticks == 0 ? 0.0 : (cc->prev_ticks / (StgFloat) total_ticks * 100),
385 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) total_alloc * 100));
387 if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE)
388 fprintf(prof_file, " %8ld %-8ld %8ld %8ld %8ld %8ld (%5ld %8ld)",
389 cc->cafcc_count, cc->sub_cafcc_count,
390 cc->thunk_count, cc->function_count, cc->pap_count,
392 cc->prev_ticks, cc->mem_alloc*sizeof(W_));
393 fprintf(prof_file, "\n");
402 %************************************************************************
404 \subsection{Miscellaneous profiling routines}
406 %************************************************************************
408 Routine to sort the list of registered cost centres. Uses a simple
409 insertion sort. First we need the different comparison routines.
414 cc_lt_label(CostCentre cc1, CostCentre cc2)
418 cmp = strcmp(cc1->group, cc2->group);
421 return 1; /* group < */
423 return 0; /* group > */
425 cmp = strcmp(cc1->module, cc2->module);
428 return 1; /* mod < */
430 return 0; /* mod > */
432 return (strcmp(cc1->label, cc2->label) < 0); /* cmp labels */
436 cc_gt_time(CostCentre cc1, CostCentre cc2)
438 /* ToDo: normal then caf then dict (instead of scc at top) */
440 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
442 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
445 if (cc1->prev_ticks > cc2->prev_ticks) /* time greater */
447 else if (cc1->prev_ticks < cc2->prev_ticks) /* time less */
450 if (cc1->mem_alloc > cc2->mem_alloc) /* time equal; alloc greater */
452 else if (cc1->mem_alloc < cc2->mem_alloc) /* time equal; alloc less */
455 if (cc1->thunk_count > cc2->thunk_count) /* time & alloc equal: cmp enters */
457 else if (cc1->thunk_count < cc2->thunk_count)
460 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
464 cc_gt_alloc(CostCentre cc1, CostCentre cc2)
466 /* ToDo: normal then caf then dict (instead of scc at top) */
468 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
470 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
473 if (cc1->mem_alloc > cc2->mem_alloc) /* alloc greater */
475 else if (cc1->mem_alloc < cc2->mem_alloc) /* alloc less */
478 if (cc1->prev_ticks > cc2->prev_ticks) /* alloc equal; time greater */
480 else if (cc1->prev_ticks < cc2->prev_ticks) /* alloc equal; time less */
483 if (cc1->thunk_count > cc2->thunk_count) /* alloc & time: cmp enters */
485 else if (cc1->thunk_count < cc2->thunk_count)
488 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
492 cc_sort(CostCentre *sort, char sort_on)
495 CostCentre sorted, insert, *search, insert_rest;
508 abort(); /* "can't happen" */
511 sorted = REGISTERED_END;
514 while (insert != REGISTERED_END) {
516 /* set search to the address of cc required to follow insert */
518 while (*search != REGISTERED_END && (cc_lt)(*search,insert)) {
519 search = &((*search)->registered);
522 /* place insert at *search and go to next insert */
523 insert_rest = insert->registered;
524 insert->registered = *search;
526 insert = insert_rest;
534 #endif /* PROFILING || PAR */