1 \section[CostCentre.lc]{Code for Cost Centre Profiling}
7 Only have cost centres if @USE_COST_CENTRES@ defined (by the driver),
8 or if running CONCURRENT.
11 #if defined(USE_COST_CENTRES) || defined(CONCURRENT)
12 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
13 CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,/*not static*/);
16 CC_DECLARE(CC_MSG, "MSG", "MSG", "MSG", CC_IS_BORING,/*not static*/);
17 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "IDLE", CC_IS_BORING,/*not static*/);
21 The current cost centre. It is initially set to "MAIN" by main.
22 We have to be careful when doing so, as an initial @SET_CCC(CC_MAIN)@
23 would try to increment some @sub_scc_count@ of the @CCC@ (nothing!).
26 CostCentre CCC; /* _not_ initialised */
28 #endif /* defined(USE_COST_CENTRES) || defined(CONCURRENT) */
31 The rest is for real cost centres (not thread activities).
34 #if defined(USE_COST_CENTRES) || defined(GUM)
36 %************************************************************************
38 \subsection[initial-cost-centres]{Initial Cost Centres}
40 %************************************************************************
42 Cost centres which are always required:
44 #if defined(USE_COST_CENTRES)
46 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "MAIN", CC_IS_CAF,/*not static*/);
47 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,/*not static*/);
48 CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
52 The list of registered cost centres, initially empty:
54 CostCentre Registered_CC = REGISTERED_END;
57 %************************************************************************
59 \subsection[profiling-arguments]{Profiling RTS Arguments}
61 %************************************************************************
64 I_ cc_profiling = 0; /* 0 => not "cc_profiling"
65 >1 => do serial time profile
66 (other magic meanings, too, apparently) WDP 94/07
68 char cc_profiling_sort = SORTCC_TIME;
69 I_ dump_intervals = 0;
71 /* And for the report ... */
72 static char prof_filename[STATS_FILENAME_MAXLEN]; /* prof report file name = <program>.prof */
73 static char **prog_argv_save;
74 static char **rts_argv_save;
76 /* And the serial report ... */
77 static char serial_filename[STATS_FILENAME_MAXLEN]; /* serial time profile file name = <program>.time */
78 static FILE *serial_file = NULL; /* serial time profile file */
81 init_cc_profiling(rts_argc, rts_argv, prog_argv)
83 char *rts_argv[], *prog_argv[];
85 I_ arg, ch, error = 0;
90 char *select_descr = 0;
91 char *select_type = 0;
92 char *select_kind = 0;
96 prog_argv_save = prog_argv;
97 rts_argv_save = rts_argv;
100 sprintf(prof_filename, PROF_FILENAME_FMT_GUM, prog_argv[0], thisPE);
102 sprintf(prof_filename, PROF_FILENAME_FMT, prog_argv[0]);
105 for (arg = 0; arg < rts_argc; arg++) {
106 if (rts_argv[arg][0] == '-') {
107 switch (rts_argv[arg][1]) {
108 case 'P': /* detailed cost centre profiling (time/alloc) */
110 case 'p': /* cost centre profiling (time/alloc) */
112 for (ch = 2; rts_argv[arg][ch]; ch++) {
113 switch (rts_argv[arg][2]) {
117 cc_profiling_sort = rts_argv[arg][ch];
120 fprintf(stderr, "Invalid profiling sort option %s\n", rts_argv[arg]);
125 #if defined(USE_COST_CENTRES)
126 case 'h': /* serial heap profile */
127 switch (rts_argv[arg][2]) {
130 prof_req = HEAP_BY_CC;
133 prof_req = HEAP_BY_MOD;
136 prof_req = HEAP_BY_GRP;
139 prof_req = HEAP_BY_DESCR;
142 prof_req = HEAP_BY_TYPE;
145 prof_req = HEAP_BY_TIME;
146 if (rts_argv[arg][3]) {
147 char *start_str = strchr(rts_argv[arg]+3, ',');
149 if (start_str) *start_str = '\0';
151 if ((intervals = decode(rts_argv[arg]+3)) != 0) {
152 time_intervals = (hash_t) intervals;
153 /* ToDo: and what if it *is* zero intervals??? */
156 earlier_ticks = (I_)((atof(start_str + 1) * TICK_FREQUENCY));
161 fprintf(stderr, "Invalid heap profile option: %s\n",
167 case 'z': /* size of index tables */
168 switch (rts_argv[arg][2]) {
170 max_cc_no = (hash_t) decode(rts_argv[arg]+3);
171 if (max_cc_no == 0) {
172 fprintf(stderr, "Bad number of cost centres %s\n", rts_argv[arg]);
177 max_mod_no = (hash_t) decode(rts_argv[arg]+3);
178 if (max_mod_no == 0) {
179 fprintf(stderr, "Bad number of modules %s\n", rts_argv[arg]);
184 max_grp_no = (hash_t) decode(rts_argv[arg]+3);
185 if (max_grp_no == 0) {
186 fprintf(stderr, "Bad number of groups %s\n", rts_argv[arg]);
191 max_descr_no = (hash_t) decode(rts_argv[arg]+3);
192 if (max_descr_no == 0) {
193 fprintf(stderr, "Bad number of closure descriptions %s\n", rts_argv[arg]);
198 max_type_no = (hash_t) decode(rts_argv[arg]+3);
199 if (max_type_no == 0) {
200 fprintf(stderr, "Bad number of type descriptions %s\n", rts_argv[arg]);
205 fprintf(stderr, "Invalid index table size option: %s\n",
211 case 'c': /* cost centre label select */
212 case 'm': /* cost centre module select */
213 case 'g': /* cost centre group select */
214 case 'd': /* closure descr select */
215 case 'y': /* closure type select */
216 case 'k': /* closure kind select */
217 left = strchr(rts_argv[arg], '{');
218 right = strrchr(rts_argv[arg], '}');
219 if (! left || ! right ||
220 strrchr(rts_argv[arg], '{') != left ||
221 strchr(rts_argv[arg], '}') != right) {
222 fprintf(stderr, "Invalid heap profiling selection bracketing\n %s\n", rts_argv[arg]);
226 switch (rts_argv[arg][1]) {
227 case 'c': /* cost centre label select */
228 select_cc = left + 1;
230 case 'm': /* cost centre module select */
231 select_mod = left + 1;
233 case 'g': /* cost centre group select */
234 select_grp = left + 1;
236 case 'd': /* closure descr select */
237 select_descr = left + 1;
239 case 't': /* closure type select */
240 select_type = left + 1;
242 case 'k': /* closure kind select */
243 select_kind = left + 1;
249 case 'a': /* closure age select */
250 select_age = decode(rts_argv[arg]+2);
252 #endif /* defined(USE_COST_CENTRES) */
254 case 'i': /* serial profiling -- initial timer interval */
255 interval_ticks = (I_) ((atof(rts_argv[arg]+2) * TICK_FREQUENCY));
256 if (interval_ticks <= 0)
264 /* Now perform any work to initialise profiling ... */
266 if (cc_profiling || prof_req != HEAP_NO_PROFILING) {
270 /* set dump_intervals: if heap profiling only dump every 10 intervals */
271 if (prof_req == HEAP_NO_PROFILING) {
277 if (cc_profiling > 1) {
278 /* produce serial time profile */
281 sprintf(serial_filename, TIME_FILENAME_FMT_GUM, prog_argv[0], thisPE);
283 sprintf(serial_filename, TIME_FILENAME_FMT, prog_argv[0]);
285 if ( (serial_file = fopen(serial_filename,"w")) == NULL ) {
286 fprintf(stderr, "Can't open serial time log file %s\n", serial_filename);
290 fprintf(serial_file, "JOB \"%s", prog_argv[0]);
291 fprintf(serial_file, " +RTS -P -i%4.2f -RTS",
292 interval_ticks/(StgFloat)TICK_FREQUENCY);
293 for(arg = 1; prog_argv[arg]; arg++)
294 fprintf(serial_file, " %s", prog_argv[arg]);
295 fprintf(serial_file, "\"\n");
296 fprintf(serial_file, "DATE \"%s\"\n", time_str());
298 fprintf(serial_file, "SAMPLE_UNIT \"seconds\"\n");
299 fprintf(serial_file, "VALUE_UNIT \"time ticks\"\n");
301 /* output initial 0 sample */
302 fprintf(serial_file, "BEGIN_SAMPLE 0.00\n");
303 fprintf(serial_file, "END_SAMPLE 0.00\n");
307 #if defined(USE_COST_CENTRES)
308 if (heap_profile_init(prof_req, select_cc, select_mod, select_grp,
309 select_descr, select_type, select_kind,
310 select_age, prog_argv))
318 Registering the cost centres is done after heap allocated as we use
319 the area to hold the stack of modules still to register.
322 extern P_ heap_space; /* pointer to the heap space */
323 StgFunPtr * register_stack; /* stack of register routines -- heap area used */
324 extern I_ heap_profiling_req;
326 EXTFUN(startCcRegisteringWorld);
331 REGISTER_CC(CC_MAIN); /* register cost centre CC_MAIN */
332 REGISTER_CC(CC_GC); /* register cost centre CC_GC */
335 REGISTER_CC(CC_MSG); /* register cost centre CC_MSG */
336 REGISTER_CC(CC_IDLE); /* register cost centre CC_MSG */
339 #if defined(USE_COST_CENTRES)
340 REGISTER_CC(CC_OVERHEAD); /* register cost centre CC_OVERHEAD */
341 REGISTER_CC(CC_DONTZuCARE); /* register cost centre CC_DONT_CARE Right??? ToDo */
344 /* as per SET_CCC macro, without the sub_scc_count++ bit */
345 CCC = (CostCentre)STATIC_CC_REF(CC_MAIN);
348 #if defined(USE_COST_CENTRES)
349 /* always register -- if we do not, we get warnings (WDP 94/12) */
350 /* if (cc_profiling || heap_profiling_req != HEAP_NO_PROFILING) */
352 register_stack = (StgFunPtr *) heap_space;
353 miniInterpret((StgFunPtr) startCcRegisteringWorld);
358 %************************************************************************
360 \subsection[cost-centre-profiling]{Cost Centre Profiling Report}
362 %************************************************************************
366 static I_ dump_interval = 0;
369 report_cc_profiling(final)
376 W_ total_ticks = 0, total_alloc = 0, total_allocs = 0;
384 StgFloat seconds = (previous_ticks + current_ticks) / (StgFloat) TICK_FREQUENCY;
387 /* ignore partial sample at end of execution */
389 /* output final 0 sample */
390 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
391 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
396 /* output serail profile sample */
398 fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
400 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
401 ASSERT_IS_REGISTERED(cc, 0);
402 if (cc->time_ticks) {
403 fprintf(serial_file, " %0.11s:%0.16s %3ld\n",
404 cc->module, cc->label, cc->time_ticks);
408 fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
413 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
414 ASSERT_IS_REGISTERED(cc, 0);
415 cc->prev_ticks += cc->time_ticks;
418 total_ticks += cc->prev_ticks;
419 total_alloc += cc->mem_alloc;
420 total_allocs += cc->mem_allocs;
423 if (total_ticks != current_ticks + previous_ticks)
424 fprintf(stderr, "Warning: Cost Centre tick inconsistency: total=%ld, current=%ld, previous=%ld\n", total_ticks, current_ticks, previous_ticks);
426 unblockVtAlrmSignal();
428 /* return if no cc profile required */
429 if (!final && ++dump_interval < dump_intervals)
432 /* reset dump_interval -- dump again after dump_intervals */
435 /* sort cost centres */
436 cc_sort(&Registered_CC, cc_profiling_sort);
438 /* open profiling output file */
439 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
440 fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
443 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(),
444 final ? "Final" : "PARTIAL");
446 fprintf(prof_file, "\n\t ");
447 fprintf(prof_file, " %s", prog_argv_save[0]);
448 fprintf(prof_file, " +RTS");
449 for (count = 0; rts_argv_save[count]; count++)
450 fprintf(prof_file, " %s", rts_argv_save[count]);
451 fprintf(prof_file, " -RTS");
452 for (count = 1; prog_argv_save[count]; count++)
453 fprintf(prof_file, " %s", prog_argv_save[count]);
454 fprintf(prof_file, "\n\n");
456 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
457 total_ticks / (StgFloat) TICK_FREQUENCY, total_ticks, TICK_MILLISECS);
458 fprintf(prof_file, "\ttotal alloc = %11s bytes (%lu closures) (excludes profiling overheads)\n",
459 ullong_format_string((ullong) total_alloc * sizeof(W_), temp, rtsTrue/*commas*/), total_allocs);
460 /* ToDo: 64-bit error! */
461 fprintf(prof_file, "\n");
463 fprintf(prof_file, "%-16.16s %-11.11s", "COST CENTRE", "MODULE");
465 fprintf(prof_file, " %-11.11s", "GROUP");
467 fprintf(prof_file, " %5s %5s %6s %6s", "scc", "subcc", "%time", "%alloc");
469 if (cc_profiling > 1)
470 fprintf(prof_file, " %11s %13s %8s %8s %8s (%5s %8s)", "cafcc", "thunks", "funcs", "PAPs", "closures", "ticks", "bytes");
471 fprintf(prof_file, "\n\n");
473 for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
474 ASSERT_IS_REGISTERED(cc, 0);
476 /* Only print cost centres with non 0 data ! */
478 if (cc->scc_count || cc->sub_scc_count || cc->prev_ticks || cc->mem_alloc
480 && (cc->thunk_count || cc->function_count || cc->pap_count
481 || cc->cafcc_count || cc->sub_cafcc_count))
483 /* print all cost centres if -P -P */ )
486 fprintf(prof_file, "%-16.16s %-11.11s", cc->label, cc->module);
488 fprintf(prof_file, " %-11.11s",cc->group);
490 fprintf(prof_file, " %5ld %5ld %5.1f %5.1f",
491 cc->scc_count, cc->sub_scc_count,
492 total_ticks == 0 ? 0.0 : (cc->prev_ticks / (StgFloat) total_ticks * 100),
493 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) total_alloc * 100));
495 if (cc_profiling > 1)
496 fprintf(prof_file, " %8ld %-8ld %8ld %8ld %8ld %8ld (%5ld %8ld)",
497 cc->cafcc_count, cc->sub_cafcc_count,
498 cc->thunk_count, cc->function_count, cc->pap_count,
500 cc->prev_ticks, cc->mem_alloc*sizeof(W_));
501 fprintf(prof_file, "\n");
510 %************************************************************************
512 \subsection[profiling-misc]{Miscellanious Profiling Routines}
514 %************************************************************************
516 Routine to sort the list of registered cost centres. Uses a simple
517 insertion sort. First we need the different comparison routines.
522 cc_lt_label(cc1, cc2)
527 cmp = strcmp(cc1->group, cc2->group);
530 return 1; /* group < */
532 return 0; /* group > */
534 cmp = strcmp(cc1->module, cc2->module);
537 return 1; /* mod < */
539 return 0; /* mod > */
541 return (strcmp(cc1->label, cc2->label) < 0); /* cmp labels */
548 /* ToDo: normal then caf then dict (instead of scc at top) */
550 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
552 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
555 if (cc1->prev_ticks > cc2->prev_ticks) /* time greater */
557 else if (cc1->prev_ticks < cc2->prev_ticks) /* time less */
560 if (cc1->mem_alloc > cc2->mem_alloc) /* time equal; alloc greater */
562 else if (cc1->mem_alloc < cc2->mem_alloc) /* time equal; alloc less */
565 if (cc1->thunk_count > cc2->thunk_count) /* time & alloc equal: cmp enters */
567 else if (cc1->thunk_count < cc2->thunk_count)
570 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
574 cc_gt_alloc(cc1, cc2)
577 /* ToDo: normal then caf then dict (instead of scc at top) */
579 if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
581 if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
584 if (cc1->mem_alloc > cc2->mem_alloc) /* alloc greater */
586 else if (cc1->mem_alloc < cc2->mem_alloc) /* alloc less */
589 if (cc1->prev_ticks > cc2->prev_ticks) /* alloc equal; time greater */
591 else if (cc1->prev_ticks < cc2->prev_ticks) /* alloc equal; time less */
594 if (cc1->thunk_count > cc2->thunk_count) /* alloc & time: cmp enters */
596 else if (cc1->thunk_count < cc2->thunk_count)
599 return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
604 cc_sort(CostCentre *sort, char sort_on)
607 cc_sort(sort, sort_on)
613 CostCentre sorted, insert, *search, insert_rest;
626 abort(); /* "can't happen" */
629 sorted = REGISTERED_END;
632 while (insert != REGISTERED_END) {
634 /* set search to the address of cc required to follow insert */
636 while (*search != REGISTERED_END && (cc_lt)(*search,insert)) {
637 search = &((*search)->registered);
640 /* place insert at *search and go to next insert */
641 insert_rest = insert->registered;
642 insert->registered = *search;
644 insert = insert_rest;
652 #endif /* USE_COST_CENTRES || GUM */