\section[CostCentre.lc]{Code for Cost Centre Profiling} \begin{code} #include "rtsdefs.h" \end{code} Only have cost centres if @USE_COST_CENTRES@ defined (by the driver), or if running CONCURRENT. \begin{code} #if defined(USE_COST_CENTRES) || defined(CONCURRENT) CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,/*not static*/); CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,/*not static*/); # ifdef GUM CC_DECLARE(CC_MSG, "MSG", "MSG", "MSG", CC_IS_BORING,/*not static*/); CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "IDLE", CC_IS_BORING,/*not static*/); # endif \end{code} The current cost centre. It is initially set to "MAIN" by main. We have to be careful when doing so, as an initial @SET_CCC(CC_MAIN)@ would try to increment some @sub_scc_count@ of the @CCC@ (nothing!). \begin{code} CostCentre CCC; /* _not_ initialised */ #endif /* defined(USE_COST_CENTRES) || defined(CONCURRENT) */ \end{code} The rest is for real cost centres (not thread activities). \begin{code} #if defined(USE_COST_CENTRES) || defined(GUM) \end{code} %************************************************************************ %* * \subsection[initial-cost-centres]{Initial Cost Centres} %* * %************************************************************************ Cost centres which are always required: \begin{code} #if defined(USE_COST_CENTRES) CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "MAIN", CC_IS_CAF,/*not static*/); CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED,/*not static*/); CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING,/*not static*/); #endif \end{code} The list of registered cost centres, initially empty: \begin{code} CostCentre Registered_CC = REGISTERED_END; \end{code} %************************************************************************ %* * \subsection[profiling-arguments]{Profiling RTS Arguments} %* * %************************************************************************ \begin{code} I_ cc_profiling = 0; /* 0 => not "cc_profiling" >1 => do serial time profile (other magic meanings, too, apparently) WDP 94/07 */ char cc_profiling_sort = SORTCC_TIME; I_ dump_intervals = 0; /* And for the report ... */ static char prof_filename[STATS_FILENAME_MAXLEN]; /* prof report file name = .prof */ static char **prog_argv_save; static char **rts_argv_save; /* And the serial report ... */ static char serial_filename[STATS_FILENAME_MAXLEN]; /* serial time profile file name = .time */ static FILE *serial_file = NULL; /* serial time profile file */ I_ init_cc_profiling(rts_argc, rts_argv, prog_argv) I_ rts_argc; char *rts_argv[], *prog_argv[]; { I_ arg, ch, error = 0; I_ prof_req = 0; char *select_cc = 0; char *select_mod = 0; char *select_grp = 0; char *select_descr = 0; char *select_type = 0; char *select_kind = 0; I_ select_age = 0; char *left, *right; prog_argv_save = prog_argv; rts_argv_save = rts_argv; #ifdef GUM sprintf(prof_filename, PROF_FILENAME_FMT_GUM, prog_argv[0], thisPE); #else sprintf(prof_filename, PROF_FILENAME_FMT, prog_argv[0]); #endif for (arg = 0; arg < rts_argc; arg++) { if (rts_argv[arg][0] == '-') { switch (rts_argv[arg][1]) { case 'P': /* detailed cost centre profiling (time/alloc) */ cc_profiling++; case 'p': /* cost centre profiling (time/alloc) */ cc_profiling++; for (ch = 2; rts_argv[arg][ch]; ch++) { switch (rts_argv[arg][2]) { case SORTCC_LABEL: case SORTCC_TIME: case SORTCC_ALLOC: cc_profiling_sort = rts_argv[arg][ch]; break; default: fprintf(stderr, "Invalid profiling sort option %s\n", rts_argv[arg]); error = 1; }} break; #if defined(USE_COST_CENTRES) case 'h': /* serial heap profile */ switch (rts_argv[arg][2]) { case '\0': case CCchar: prof_req = HEAP_BY_CC; break; case MODchar: prof_req = HEAP_BY_MOD; break; case GRPchar: prof_req = HEAP_BY_GRP; break; case DESCRchar: prof_req = HEAP_BY_DESCR; break; case TYPEchar: prof_req = HEAP_BY_TYPE; break; case TIMEchar: prof_req = HEAP_BY_TIME; if (rts_argv[arg][3]) { char *start_str = strchr(rts_argv[arg]+3, ','); I_ intervals; if (start_str) *start_str = '\0'; if ((intervals = decode(rts_argv[arg]+3)) != 0) { time_intervals = (hash_t) intervals; /* ToDo: and what if it *is* zero intervals??? */ } if (start_str) { earlier_ticks = (I_)((atof(start_str + 1) * TICK_FREQUENCY)); } } break; default: fprintf(stderr, "Invalid heap profile option: %s\n", rts_argv[arg]); error = 1; } break; case 'z': /* size of index tables */ switch (rts_argv[arg][2]) { case CCchar: max_cc_no = (hash_t) decode(rts_argv[arg]+3); if (max_cc_no == 0) { fprintf(stderr, "Bad number of cost centres %s\n", rts_argv[arg]); error = 1; } break; case MODchar: max_mod_no = (hash_t) decode(rts_argv[arg]+3); if (max_mod_no == 0) { fprintf(stderr, "Bad number of modules %s\n", rts_argv[arg]); error = 1; } break; case GRPchar: max_grp_no = (hash_t) decode(rts_argv[arg]+3); if (max_grp_no == 0) { fprintf(stderr, "Bad number of groups %s\n", rts_argv[arg]); error = 1; } break; case DESCRchar: max_descr_no = (hash_t) decode(rts_argv[arg]+3); if (max_descr_no == 0) { fprintf(stderr, "Bad number of closure descriptions %s\n", rts_argv[arg]); error = 1; } break; case TYPEchar: max_type_no = (hash_t) decode(rts_argv[arg]+3); if (max_type_no == 0) { fprintf(stderr, "Bad number of type descriptions %s\n", rts_argv[arg]); error = 1; } break; default: fprintf(stderr, "Invalid index table size option: %s\n", rts_argv[arg]); error = 1; } break; case 'c': /* cost centre label select */ case 'm': /* cost centre module select */ case 'g': /* cost centre group select */ case 'd': /* closure descr select */ case 'y': /* closure type select */ case 'k': /* closure kind select */ left = strchr(rts_argv[arg], '{'); right = strrchr(rts_argv[arg], '}'); if (! left || ! right || strrchr(rts_argv[arg], '{') != left || strchr(rts_argv[arg], '}') != right) { fprintf(stderr, "Invalid heap profiling selection bracketing\n %s\n", rts_argv[arg]); error = 1; } else { *right = '\0'; switch (rts_argv[arg][1]) { case 'c': /* cost centre label select */ select_cc = left + 1; break; case 'm': /* cost centre module select */ select_mod = left + 1; break; case 'g': /* cost centre group select */ select_grp = left + 1; break; case 'd': /* closure descr select */ select_descr = left + 1; break; case 't': /* closure type select */ select_type = left + 1; break; case 'k': /* closure kind select */ select_kind = left + 1; break; } } break; case 'a': /* closure age select */ select_age = decode(rts_argv[arg]+2); #endif /* defined(USE_COST_CENTRES) */ case 'i': /* serial profiling -- initial timer interval */ interval_ticks = (I_) ((atof(rts_argv[arg]+2) * TICK_FREQUENCY)); if (interval_ticks <= 0) interval_ticks = 1; break; } } } if (error) return 1; /* Now perform any work to initialise profiling ... */ if (cc_profiling || prof_req != HEAP_NO_PROFILING) { time_profiling++; /* set dump_intervals: if heap profiling only dump every 10 intervals */ if (prof_req == HEAP_NO_PROFILING) { dump_intervals = 1; } else { dump_intervals = 10; } if (cc_profiling > 1) { /* produce serial time profile */ #ifdef GUM sprintf(serial_filename, TIME_FILENAME_FMT_GUM, prog_argv[0], thisPE); #else sprintf(serial_filename, TIME_FILENAME_FMT, prog_argv[0]); #endif if ( (serial_file = fopen(serial_filename,"w")) == NULL ) { fprintf(stderr, "Can't open serial time log file %s\n", serial_filename); return 1; } fprintf(serial_file, "JOB \"%s", prog_argv[0]); fprintf(serial_file, " +RTS -P -i%4.2f -RTS", interval_ticks/(StgFloat)TICK_FREQUENCY); for(arg = 1; prog_argv[arg]; arg++) fprintf(serial_file, " %s", prog_argv[arg]); fprintf(serial_file, "\"\n"); fprintf(serial_file, "DATE \"%s\"\n", time_str()); fprintf(serial_file, "SAMPLE_UNIT \"seconds\"\n"); fprintf(serial_file, "VALUE_UNIT \"time ticks\"\n"); /* output initial 0 sample */ fprintf(serial_file, "BEGIN_SAMPLE 0.00\n"); fprintf(serial_file, "END_SAMPLE 0.00\n"); } } #if defined(USE_COST_CENTRES) if (heap_profile_init(prof_req, select_cc, select_mod, select_grp, select_descr, select_type, select_kind, select_age, prog_argv)) return 1; #endif return 0; } \end{code} Registering the cost centres is done after heap allocated as we use the area to hold the stack of modules still to register. \begin{code} extern P_ heap_space; /* pointer to the heap space */ StgFunPtr * register_stack; /* stack of register routines -- heap area used */ extern I_ heap_profiling_req; EXTFUN(startCcRegisteringWorld); void cc_register() { REGISTER_CC(CC_MAIN); /* register cost centre CC_MAIN */ REGISTER_CC(CC_GC); /* register cost centre CC_GC */ #if defined(GUM) REGISTER_CC(CC_MSG); /* register cost centre CC_MSG */ REGISTER_CC(CC_IDLE); /* register cost centre CC_MSG */ #endif #if defined(USE_COST_CENTRES) REGISTER_CC(CC_OVERHEAD); /* register cost centre CC_OVERHEAD */ REGISTER_CC(CC_DONTZuCARE); /* register cost centre CC_DONT_CARE Right??? ToDo */ #endif /* as per SET_CCC macro, without the sub_scc_count++ bit */ CCC = (CostCentre)STATIC_CC_REF(CC_MAIN); CCC->scc_count++; #if defined(USE_COST_CENTRES) /* always register -- if we do not, we get warnings (WDP 94/12) */ /* if (cc_profiling || heap_profiling_req != HEAP_NO_PROFILING) */ register_stack = (StgFunPtr *) heap_space; miniInterpret((StgFunPtr) startCcRegisteringWorld); #endif } \end{code} %************************************************************************ %* * \subsection[cost-centre-profiling]{Cost Centre Profiling Report} %* * %************************************************************************ \begin{code} static I_ dump_interval = 0; void report_cc_profiling(final) I_ final; { FILE *prof_file; CostCentre cc; I_ count; char temp[32]; W_ total_ticks = 0, total_alloc = 0, total_allocs = 0; if (!cc_profiling) return; blockVtAlrmSignal(); if (serial_file) { StgFloat seconds = (previous_ticks + current_ticks) / (StgFloat) TICK_FREQUENCY; if (final) { /* ignore partial sample at end of execution */ /* output final 0 sample */ fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds); fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds); fclose(serial_file); serial_file = NULL; } else { /* output serail profile sample */ fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds); for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) { ASSERT_IS_REGISTERED(cc, 0); if (cc->time_ticks) { fprintf(serial_file, " %0.11s:%0.16s %3ld\n", cc->module, cc->label, cc->time_ticks); } } fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds); fflush(serial_file); } } for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) { ASSERT_IS_REGISTERED(cc, 0); cc->prev_ticks += cc->time_ticks; cc->time_ticks = 0; total_ticks += cc->prev_ticks; total_alloc += cc->mem_alloc; total_allocs += cc->mem_allocs; } if (total_ticks != current_ticks + previous_ticks) fprintf(stderr, "Warning: Cost Centre tick inconsistency: total=%ld, current=%ld, previous=%ld\n", total_ticks, current_ticks, previous_ticks); unblockVtAlrmSignal(); /* return if no cc profile required */ if (!final && ++dump_interval < dump_intervals) return; /* reset dump_interval -- dump again after dump_intervals */ dump_interval = 0; /* sort cost centres */ cc_sort(&Registered_CC, cc_profiling_sort); /* open profiling output file */ if ((prof_file = fopen(prof_filename, "w")) == NULL) { fprintf(stderr, "Can't open profiling report file %s\n", prof_filename); return; } fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(), final ? "Final" : "PARTIAL"); fprintf(prof_file, "\n\t "); fprintf(prof_file, " %s", prog_argv_save[0]); fprintf(prof_file, " +RTS"); for (count = 0; rts_argv_save[count]; count++) fprintf(prof_file, " %s", rts_argv_save[count]); fprintf(prof_file, " -RTS"); for (count = 1; prog_argv_save[count]; count++) fprintf(prof_file, " %s", prog_argv_save[count]); fprintf(prof_file, "\n\n"); fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n", total_ticks / (StgFloat) TICK_FREQUENCY, total_ticks, TICK_MILLISECS); fprintf(prof_file, "\ttotal alloc = %11s bytes (%lu closures) (excludes profiling overheads)\n", ullong_format_string((ullong) total_alloc * sizeof(W_), temp, rtsTrue/*commas*/), total_allocs); /* ToDo: 64-bit error! */ fprintf(prof_file, "\n"); fprintf(prof_file, "%-16.16s %-11.11s", "COST CENTRE", "MODULE"); /* ToDo:group fprintf(prof_file, " %-11.11s", "GROUP"); */ fprintf(prof_file, " %5s %5s %6s %6s", "scc", "subcc", "%time", "%alloc"); if (cc_profiling > 1) fprintf(prof_file, " %11s %13s %8s %8s %8s (%5s %8s)", "cafcc", "thunks", "funcs", "PAPs", "closures", "ticks", "bytes"); fprintf(prof_file, "\n\n"); for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) { ASSERT_IS_REGISTERED(cc, 0); /* Only print cost centres with non 0 data ! */ if (cc->scc_count || cc->sub_scc_count || cc->prev_ticks || cc->mem_alloc || (cc_profiling > 1 && (cc->thunk_count || cc->function_count || cc->pap_count || cc->cafcc_count || cc->sub_cafcc_count)) || (cc_profiling > 2 /* print all cost centres if -P -P */ ) ) { fprintf(prof_file, "%-16.16s %-11.11s", cc->label, cc->module); /* ToDo:group fprintf(prof_file, " %-11.11s",cc->group); */ fprintf(prof_file, " %5ld %5ld %5.1f %5.1f", cc->scc_count, cc->sub_scc_count, total_ticks == 0 ? 0.0 : (cc->prev_ticks / (StgFloat) total_ticks * 100), total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) total_alloc * 100)); if (cc_profiling > 1) fprintf(prof_file, " %8ld %-8ld %8ld %8ld %8ld %8ld (%5ld %8ld)", cc->cafcc_count, cc->sub_cafcc_count, cc->thunk_count, cc->function_count, cc->pap_count, cc->mem_allocs, cc->prev_ticks, cc->mem_alloc*sizeof(W_)); fprintf(prof_file, "\n"); } } fclose(prof_file); } \end{code} %************************************************************************ %* * \subsection[profiling-misc]{Miscellanious Profiling Routines} %* * %************************************************************************ Routine to sort the list of registered cost centres. Uses a simple insertion sort. First we need the different comparison routines. \begin{code} static I_ cc_lt_label(cc1, cc2) CostCentre cc1, cc2; { I_ cmp; cmp = strcmp(cc1->group, cc2->group); if (cmp< 0) return 1; /* group < */ else if (cmp > 0) return 0; /* group > */ cmp = strcmp(cc1->module, cc2->module); if (cmp < 0) return 1; /* mod < */ else if (cmp > 0) return 0; /* mod > */ return (strcmp(cc1->label, cc2->label) < 0); /* cmp labels */ } static I_ cc_gt_time(cc1, cc2) CostCentre cc1, cc2; { /* ToDo: normal then caf then dict (instead of scc at top) */ if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */ return 1; if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */ return 0; if (cc1->prev_ticks > cc2->prev_ticks) /* time greater */ return 1; else if (cc1->prev_ticks < cc2->prev_ticks) /* time less */ return 0; if (cc1->mem_alloc > cc2->mem_alloc) /* time equal; alloc greater */ return 1; else if (cc1->mem_alloc < cc2->mem_alloc) /* time equal; alloc less */ return 0; if (cc1->thunk_count > cc2->thunk_count) /* time & alloc equal: cmp enters */ return 1; else if (cc1->thunk_count < cc2->thunk_count) return 0; return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */ } static I_ cc_gt_alloc(cc1, cc2) CostCentre cc1, cc2; { /* ToDo: normal then caf then dict (instead of scc at top) */ if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */ return 1; if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */ return 0; if (cc1->mem_alloc > cc2->mem_alloc) /* alloc greater */ return 1; else if (cc1->mem_alloc < cc2->mem_alloc) /* alloc less */ return 0; if (cc1->prev_ticks > cc2->prev_ticks) /* alloc equal; time greater */ return 1; else if (cc1->prev_ticks < cc2->prev_ticks) /* alloc equal; time less */ return 0; if (cc1->thunk_count > cc2->thunk_count) /* alloc & time: cmp enters */ return 1; else if (cc1->thunk_count < cc2->thunk_count) return 0; return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */ } #ifdef __STDC__ void cc_sort(CostCentre *sort, char sort_on) #else void cc_sort(sort, sort_on) CostCentre *sort; char sort_on; #endif { I_ (*cc_lt)(); CostCentre sorted, insert, *search, insert_rest; switch (sort_on) { case SORTCC_LABEL: cc_lt = cc_lt_label; break; case SORTCC_TIME: cc_lt = cc_gt_time; break; case SORTCC_ALLOC: cc_lt = cc_gt_alloc; break; default: abort(); /* "can't happen" */ } sorted = REGISTERED_END; insert = *sort; while (insert != REGISTERED_END) { /* set search to the address of cc required to follow insert */ search = &sorted; while (*search != REGISTERED_END && (cc_lt)(*search,insert)) { search = &((*search)->registered); } /* place insert at *search and go to next insert */ insert_rest = insert->registered; insert->registered = *search; *search = insert; insert = insert_rest; } *sort = sorted; } \end{code} \begin{code} #endif /* USE_COST_CENTRES || GUM */ \end{code}