-\section[CostCentre.lc]{Code for Cost Centre Profiling}
-
-\begin{code}
-#include "rtsdefs.h"
-\end{code}
-
-Only have cost centres if @PROFILING@ defined (by the driver),
-or if running PAR.
-
-\begin{code}
-#if defined(PROFILING) || defined(PAR)
-CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "MAIN", CC_IS_BORING,/*not static*/);
-CC_DECLARE(CC_GC, "GC", "GC", "GC", CC_IS_BORING,/*not static*/);
-
-# ifdef PAR
-CC_DECLARE(CC_MSG, "MSG", "MSG", "MSG", CC_IS_BORING,/*not static*/);
-CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "IDLE", CC_IS_BORING,/*not static*/);
-# endif
-\end{code}
-
-\begin{code}
-CostCentre CCC; /* _not_ initialised */
-
-#endif /* defined(PROFILING) || defined(PAR) */
-\end{code}
-
-The rest is for real cost centres (not thread activities).
-
-\begin{code}
-#if defined(PROFILING) || defined(PAR)
-\end{code}
-%************************************************************************
-%* *
-\subsection{Initial Cost Centres}
-%* *
-%************************************************************************
-
-Cost centres which are always required:
-\begin{code}
-#if defined(PROFILING)
-
-CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "PROFILING", CC_IS_CAF, /*not static*/);
-CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", "MAIN", CC_IS_SUBSUMED, /*not static*/);
-CC_DECLARE(CC_DONTZuCARE,"DONT_CARE", "MAIN", "MAIN", CC_IS_BORING, /*not static*/);
-#endif
-\end{code}
-
-The list of registered cost centres, initially empty:
-\begin{code}
-CostCentre Registered_CC = REGISTERED_END;
-\end{code}
-
-
-%************************************************************************
-%* *
-\subsection{Profiling RTS Arguments}
-%* *
-%************************************************************************
-
-\begin{code}
-I_ dump_intervals = 0;
-
-/* And for the report ... */
-static char prof_filename[STATS_FILENAME_MAXLEN]; /* prof report file name = <program>.prof */
-static char **prog_argv_save;
-static char **rts_argv_save;
-
-/* And the serial report ... */
-static char serial_filename[STATS_FILENAME_MAXLEN]; /* serial time profile file name = <program>.time */
-static FILE *serial_file = NULL; /* serial time profile file */
-
-I_
-init_cc_profiling(rts_argc, rts_argv, prog_argv)
- I_ rts_argc;
- char *rts_argv[], *prog_argv[];
-{
- I_ arg, ch;
-
- prog_argv_save = prog_argv;
- rts_argv_save = rts_argv;
-
-#ifdef PAR
- sprintf(prof_filename, PROF_FILENAME_FMT_GUM, prog_argv[0], thisPE);
-#else
- sprintf(prof_filename, PROF_FILENAME_FMT, prog_argv[0]);
-#endif
-
- /* Now perform any work to initialise profiling ... */
-
- if (RTSflags.CcFlags.doCostCentres
-#ifdef PROFILING
- || RTSflags.ProfFlags.doHeapProfile
-#endif
- ) {
-
- time_profiling++;
-
- /* set dump_intervals: if heap profiling only dump every 10 intervals */
-#ifdef PROFILING
- dump_intervals = (RTSflags.ProfFlags.doHeapProfile) ? 10 : 1;
-#else
- dump_intervals = 1;
-#endif
-
- if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- /* produce serial time profile */
-
-#ifdef PAR
- sprintf(serial_filename, TIME_FILENAME_FMT_GUM, prog_argv[0], thisPE);
-#else
- sprintf(serial_filename, TIME_FILENAME_FMT, prog_argv[0]);
-#endif
- if ( (serial_file = fopen(serial_filename,"w")) == NULL ) {
- fprintf(stderr, "Can't open serial time log file %s\n", serial_filename);
- return 1;
- }
-
- fprintf(serial_file, "JOB \"%s", prog_argv[0]);
- fprintf(serial_file, " +RTS -P -i%4.2f -RTS",
- interval_ticks/(StgFloat)TICK_FREQUENCY);
- for(arg = 1; prog_argv[arg]; arg++)
- fprintf(serial_file, " %s", prog_argv[arg]);
- fprintf(serial_file, "\"\n");
- fprintf(serial_file, "DATE \"%s\"\n", time_str());
-
- fprintf(serial_file, "SAMPLE_UNIT \"seconds\"\n");
-#ifdef PAR
- fprintf(serial_file, "VALUE_UNIT \"percentage time\"\n");
-#else
- fprintf(serial_file, "VALUE_UNIT \"time ticks\"\n");
-#endif
-
- /* output initial 0 sample */
- fprintf(serial_file, "BEGIN_SAMPLE 0.00\n");
- fprintf(serial_file, "END_SAMPLE 0.00\n");
- }
- }
-
-#if defined(PROFILING)
- if (heap_profile_init(prog_argv))
- return 1;
-#endif
-
- return 0;
-}
-\end{code}
-
-Registering the cost centres is done after heap allocated as we use
-the area to hold the stack of modules still to register.
-
-\begin{code}
-extern P_ heap_space; /* pointer to the heap space */
-StgFunPtr * register_stack; /* stack of register routines -- heap area used */
-
-EXTFUN(startCcRegisteringWorld);
-
-void
-cc_register()
-{
- REGISTER_CC(CC_MAIN); /* register cost centre CC_MAIN */
- REGISTER_CC(CC_GC); /* register cost centre CC_GC */
-
-#if defined(PAR)
- REGISTER_CC(CC_MSG); /* register cost centre CC_MSG */
- REGISTER_CC(CC_IDLE); /* register cost centre CC_MSG */
-#endif
-
-#if defined(PROFILING)
- REGISTER_CC(CC_OVERHEAD); /* register cost centre CC_OVERHEAD */
- REGISTER_CC(CC_DONTZuCARE); /* register cost centre CC_DONT_CARE Right??? ToDo */
-#endif
-
- SET_CCC_RTS(CC_MAIN,0,1); /* without the sub_scc_count++ */
-
-#if defined(PROFILING)
-/* always register -- if we do not, we get warnings (WDP 94/12) */
-/* if (RTSflags.CcFlags.doCostCentres || RTSflags.ProfFlags.doHeapProfile) */
-
- register_stack = (StgFunPtr *) heap_space;
- miniInterpret((StgFunPtr) startCcRegisteringWorld);
-#endif
-}
-\end{code}
-
-%************************************************************************
-%* *
-\subsection{Cost Centre Profiling Report}
-%* *
-%************************************************************************
-
-\begin{code}
-static I_ dump_interval = 0;
-
-rtsBool
-cc_to_ignore (CostCentre cc)
- /* return rtsTrue if it is one of the ones that
- should not be reported normally (because it confuses
- the users)
- */
-{
-# if !defined(PROFILING)
- /* in parallel land, everything is interesting (not ignorable) */
- return rtsFalse;
-
-# else
- if ( cc == CC_OVERHEAD || cc == CC_DONTZuCARE || cc == CC_GC ) {
- return rtsTrue;
- } else {
- return rtsFalse;
- }
-# endif /* PROFILING */
-}
-
-rtsBool
-have_interesting_groups(CostCentre cc)
-{
- char* interesting_group = NULL;
-
- for (; cc != REGISTERED_END; cc = cc->registered) {
- if (! cc_to_ignore(cc) && strcmp(cc->module,cc->group) != 0) {
- if (interesting_group && strcmp(cc->group, interesting_group) != 0) {
- return(rtsTrue);
- } else {
- interesting_group = cc->group;
- }
- }
- }
- return(rtsFalse);
-}
-
-void
-report_cc_profiling(final)
- I_ final;
-{
- FILE *prof_file;
- CostCentre cc;
- I_ count;
- char temp[128]; /* sigh: magic constant */
- W_ total_ticks, ignored_ticks;
- W_ total_alloc = 0, total_allocs = 0;
- rtsBool do_groups = rtsFalse;
-#ifdef PAR
- I_ final_ticks; /*No. ticks in last sample*/
-#endif
-
- if (!RTSflags.CcFlags.doCostCentres)
- return;
-
- blockVtAlrmSignal();
- /* To avoid inconsistency, initialise the tick variables
- after having blocked out VTALRM */
- total_ticks = 0;
- ignored_ticks = 0;
-#ifdef PAR
- final_ticks = 0;
-#endif
-
- if (serial_file) {
- StgFloat seconds = (previous_ticks + current_ticks) / (StgFloat) TICK_FREQUENCY;
-
- if (final) {
- fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
-#ifdef PAR
- /*In the parallel world we're particularly interested in the last sample*/
- for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
- if (! cc_to_ignore(cc))
- final_ticks += cc->time_ticks;
- }
-
- for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
- if (cc->time_ticks != 0 && ! cc_to_ignore(cc))
- fprintf(serial_file, " %s:%s %3ld\n",
- cc->module, cc->label, cc->time_ticks*100 / final_ticks);
- }
-#endif
- /* In the sequential world, ignore partial sample at end of execution */
- fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
- fclose(serial_file);
- serial_file = NULL;
-
- } else {
- /* output serial profile sample */
-
- fprintf(serial_file, "BEGIN_SAMPLE %0.2f\n", seconds);
-
- for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
- ASSERT_IS_REGISTERED(cc, 0);
- if (cc->time_ticks != 0 && !cc_to_ignore(cc)) {
-#ifdef PAR
- /* Print _percentages_ in the parallel world */
- fprintf(serial_file, " %s:%s %3ld\n",
- cc->module, cc->label, cc->time_ticks * 100/TICK_FREQUENCY);
-#else
- fprintf(serial_file, " %s:%s %3ld\n",
- cc->module, cc->label, cc->time_ticks);
-#endif
- }
- }
-
- fprintf(serial_file, "END_SAMPLE %0.2f\n", seconds);
- fflush(serial_file);
- }
- }
-
- for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
- ASSERT_IS_REGISTERED(cc, 0);
- cc->prev_ticks += cc->time_ticks;
- cc->time_ticks = 0;
-
- if ( cc_to_ignore(cc) ) { /* reporting these just confuses users... */
- ignored_ticks += cc->prev_ticks;
- } else {
- total_ticks += cc->prev_ticks;
- total_alloc += cc->mem_alloc;
-#if defined(PROFILING_DETAIL_COUNTS)
- total_allocs += cc->mem_allocs;
-#endif
- }
- }
-
- if (total_ticks + ignored_ticks != current_ticks + previous_ticks)
- fprintf(stderr, "Warning: Cost Centre tick inconsistency: total=%ld, ignored=%ld, current=%ld, previous=%ld\n", total_ticks, ignored_ticks, current_ticks, previous_ticks);
-
- unblockVtAlrmSignal();
-
- /* return if no cc profile required */
- if (!final && ++dump_interval < dump_intervals)
- return;
-
- /* reset dump_interval -- dump again after dump_intervals */
- dump_interval = 0;
-
- /* sort cost centres */
- cc_sort(&Registered_CC, RTSflags.CcFlags.sortBy);
-
- /* open profiling output file */
- if ((prof_file = fopen(prof_filename, "w")) == NULL) {
- fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
- return;
- }
- fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(),
- final ? "Final" : "PARTIAL");
-
- fprintf(prof_file, "\n\t ");
- fprintf(prof_file, " %s", prog_argv_save[0]);
- fprintf(prof_file, " +RTS");
- for (count = 0; rts_argv_save[count]; count++)
- fprintf(prof_file, " %s", rts_argv_save[count]);
- fprintf(prof_file, " -RTS");
- for (count = 1; prog_argv_save[count]; count++)
- fprintf(prof_file, " %s", prog_argv_save[count]);
- fprintf(prof_file, "\n\n");
-
-
- fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
- total_ticks / (StgFloat) TICK_FREQUENCY, total_ticks, TICK_MILLISECS);
- fprintf(prof_file, "\ttotal alloc = %11s bytes",
- ullong_format_string((ullong) total_alloc * sizeof(W_), temp, rtsTrue/*commas*/));
- /* ToDo: 64-bit error! */
-
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " (%lu closures)", total_allocs);
-#endif
- fprintf(prof_file, " (excludes profiling overheads)\n\n");
-
-
- fprintf(prof_file, "%-16s %-11s", "COST CENTRE", "MODULE");
-
- do_groups = have_interesting_groups(Registered_CC);
- if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
-
- fprintf(prof_file, "%8s %6s %6s %8s %5s %5s", "scc", "%time", "%alloc", "inner", "cafs", "dicts");
-
- if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5s %9s", "ticks", "bytes");
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
- "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
-#endif
- }
- fprintf(prof_file, "\n\n");
-
- for (cc = Registered_CC; cc != REGISTERED_END; cc = cc->registered) {
- ASSERT_IS_REGISTERED(cc, 0);
-
- /* Only print cost centres with non 0 data ! */
-
- if ( (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_ALL
- /* force printing of *all* cost centres if -P -P */ )
-
- || ( ! cc_to_ignore(cc)
- && (cc->scc_count || cc->sub_scc_count || cc->prev_ticks || cc->mem_alloc
- || (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
- && (cc->sub_cafcc_count || cc->sub_dictcc_count
-#if defined(PROFILING_DETAIL_COUNTS)
- || cc->thunk_count || cc->function_count || cc->pap_count
-#endif
- ))))) {
- fprintf(prof_file, "%-16s %-11s", cc->label, cc->module);
- if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
-
- fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld %5ld",
- cc->scc_count,
- total_ticks == 0 ? 0.0 : (cc->prev_ticks / (StgFloat) total_ticks * 100),
- total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) total_alloc * 100),
- cc->sub_scc_count, cc->sub_cafcc_count, cc->sub_dictcc_count);
-
- if (RTSflags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5ld %9ld", cc->prev_ticks, cc->mem_alloc*sizeof(W_));
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
- cc->mem_allocs, cc->thunk_count,
- cc->function_count, cc->pap_count,
- cc->subsumed_fun_count, cc->subsumed_caf_count,
- cc->caffun_subsumed);
-#endif
- }
- fprintf(prof_file, "\n");
- }
- }
-
- fclose(prof_file);
-}
-
-\end{code}
-
-%************************************************************************
-%* *
-\subsection{Miscellaneous profiling routines}
-%* *
-%************************************************************************
-
-Routine to sort the list of registered cost centres. Uses a simple
-insertion sort. First we need the different comparison routines.
-
-\begin{code}
-
-static I_
-cc_lt_label(CostCentre cc1, CostCentre cc2)
-{
- I_ cmp;
-
- cmp = strcmp(cc1->group, cc2->group);
-
- if (cmp< 0)
- return 1; /* group < */
- else if (cmp > 0)
- return 0; /* group > */
-
- cmp = strcmp(cc1->module, cc2->module);
-
- if (cmp < 0)
- return 1; /* mod < */
- else if (cmp > 0)
- return 0; /* mod > */
-
- return (strcmp(cc1->label, cc2->label) < 0); /* cmp labels */
-}
-
-static I_
-cc_gt_time(CostCentre cc1, CostCentre cc2)
-{
- /* ToDo: normal then caf then dict (instead of scc at top) */
-
- if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
- return 1;
- if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
- return 0;
-
- if (cc1->prev_ticks > cc2->prev_ticks) /* time greater */
- return 1;
- else if (cc1->prev_ticks < cc2->prev_ticks) /* time less */
- return 0;
-
- if (cc1->mem_alloc > cc2->mem_alloc) /* time equal; alloc greater */
- return 1;
- else if (cc1->mem_alloc < cc2->mem_alloc) /* time equal; alloc less */
- return 0;
-
- return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
-}
-
-static I_
-cc_gt_alloc(CostCentre cc1, CostCentre cc2)
-{
- /* ToDo: normal then caf then dict (instead of scc at top) */
-
- if (cc1->scc_count && ! cc2->scc_count) /* scc counts at top */
- return 1;
- if (cc2->scc_count && ! cc1->scc_count) /* scc counts at top */
- return 0;
-
- if (cc1->mem_alloc > cc2->mem_alloc) /* alloc greater */
- return 1;
- else if (cc1->mem_alloc < cc2->mem_alloc) /* alloc less */
- return 0;
-
- if (cc1->prev_ticks > cc2->prev_ticks) /* alloc equal; time greater */
- return 1;
- else if (cc1->prev_ticks < cc2->prev_ticks) /* alloc equal; time less */
- return 0;
-
- return (cc_lt_label(cc1, cc2)); /* all data equal: cmp labels */
-}
-
-void
-cc_sort(CostCentre *sort, char sort_on)
-{
- I_ (*cc_lt)();
- CostCentre sorted, insert, *search, insert_rest;
-
- switch (sort_on) {
- case SORTCC_LABEL:
- cc_lt = cc_lt_label;
- break;
- case SORTCC_TIME:
- cc_lt = cc_gt_time;
- break;
- case SORTCC_ALLOC:
- cc_lt = cc_gt_alloc;
- break;
- default:
- abort(); /* "can't happen" */
- }
-
- sorted = REGISTERED_END;
- insert = *sort;
-
- while (insert != REGISTERED_END) {
-
- /* set search to the address of cc required to follow insert */
- search = &sorted;
- while (*search != REGISTERED_END && (cc_lt)(*search,insert)) {
- search = &((*search)->registered);
- }
-
- /* place insert at *search and go to next insert */
- insert_rest = insert->registered;
- insert->registered = *search;
- *search = insert;
- insert = insert_rest;
- }
-
- *sort = sorted;
-}
-\end{code}
-
-\begin{code}
-#endif /* PROFILING || PAR */
-\end{code}