/* -----------------------------------------------------------------------------
- * $Id: Profiling.c,v 1.2 1998/12/02 13:28:35 simonm Exp $
+ * $Id: Profiling.c,v 1.9 1999/09/15 13:45:18 simonmar Exp $
*
- * (c) The GHC Team, 1998
+ * (c) The GHC Team, 1998-1999
*
* Support for profiling
*
#include "Storage.h"
#include "Proftimer.h"
#include "Itimer.h"
+#include "ProfHeap.h"
/*
* Global variables used to assign unique IDs to cc's, ccs's, and
/* figures for the profiling report.
*/
-static lnat total_alloc, total_ticks;
+static lnat total_alloc, total_prof_ticks;
/* Globals for opening the profiling log file
*/
static void count_ticks ( CostCentreStack *ccs );
static void reportCCS ( CostCentreStack *ccs, nat indent );
static void DecCCS ( CostCentreStack *ccs );
+static CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
+#ifdef DEBUG
+static void printCCS ( CostCentreStack *ccs );
+#endif
+static void initTimeProfiling ( void );
/* -----------------------------------------------------------------------------
Initialise the profiling environment
/* for the benefit of allocate()... */
CCCS = CCS_SYSTEM;
- if (!RtsFlags.CcFlags.doCostCentres)
- return;
-
- time_profiling = rtsTrue;
-
- /* Initialise the log file name */
- prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
- sprintf(prof_filename, "%s.prof", prog_argv[0]);
-
/* Initialize counters for IDs */
CC_ID = 0;
CCS_ID = 0;
CCCS = CCS_OVERHEAD;
registerCostCentres();
+ CCCS = CCS_SYSTEM;
/* find all the "special" cost centre stacks, and make them children
* of CCS_MAIN.
*/
ASSERT(CCS_MAIN->prevStack == 0);
+ CCS_MAIN->root = CC_MAIN;
for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
next = ccs->prevStack;
ccs->prevStack = 0;
ActualPush_(CCS_MAIN,ccs->cc,ccs);
+ ccs->root = ccs->cc;
ccs = next;
}
- /* profiling is the only client of the VTALRM system at the moment,
- * so just install the profiling tick handler. */
- install_vtalrm_handler(handleProfTick);
+ if (RtsFlags.CcFlags.doCostCentres) {
+ initTimeProfiling();
+ }
+
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ initHeapProfiling();
+ }
+}
+
+void
+initTimeProfiling(void)
+{
+ time_profiling = rtsTrue;
+
+ /* Initialise the log file name */
+ prof_filename = stgMallocBytes(strlen(prog_argv[0]) + 6, "initProfiling");
+ sprintf(prof_filename, "%s.prof", prog_argv[0]);
+
+ /* Start ticking */
startProfTimer();
};
void
endProfiling ( void )
{
- stopProfTimer();
-}
-
-void
-heapCensus ( bdescr *bd )
-{
- /* nothing yet */
+ if (RtsFlags.CcFlags.doCostCentres) {
+ stopProfTimer();
+ }
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ endHeapProfiling();
+ }
}
/* -----------------------------------------------------------------------------
/* -----------------------------------------------------------------------------
+ Set cost centre stack when entering a function. Here we implement
+ the rule
+
+ "if CCSfn is an initial segment of CCCS,
+ then set CCCS to CCSfn,
+ else append CCSfn to CCCS"
+ -------------------------------------------------------------------------- */
+rtsBool entering_PAP;
+
+CostCentreStack *
+EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
+{
+ /* PAP_entry has already set CCCS for us */
+ if (entering_PAP) {
+ entering_PAP = rtsFalse;
+ return CCCS;
+ }
+
+ if (cccs->root == ccsfn->root) {
+ return ccsfn;
+ } else {
+ return AppendCCS(cccs,ccsfn);
+ }
+}
+
+/* -----------------------------------------------------------------------------
Cost-centre stack manipulation
-------------------------------------------------------------------------- */
+#ifdef DEBUG
+CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
+CostCentreStack *
+PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
+#define PushCostCentre _PushCostCentre
+{
+ IF_DEBUG(prof,
+ fprintf(stderr,"Pushing %s on ", cc->label);
+ printCCS(ccs);
+ fprintf(stderr,"\n"));
+ return PushCostCentre(ccs,cc);
+}
+#endif
+
CostCentreStack *
PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
{
}
}
+/* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
+
+#ifdef DEBUG
+CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+#define AppendCCS _AppendCCS
+{
+ IF_DEBUG(prof,
+ if (ccs1 != ccs2) {
+ fprintf(stderr,"Appending ");
+ printCCS(ccs1);
+ fprintf(stderr," to ");
+ printCCS(ccs2);
+ fprintf(stderr,"\n");});
+ return AppendCCS(ccs1,ccs2);
+}
+#endif
+
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+{
+ CostCentreStack *ccs;
+
+ /* Optimisation: if we attempt to append a CCS to itself, we're
+ * going to end up with the same ccs after a great deal of pushing
+ * and removing of cost centres. Furthermore, we'll generate a lot
+ * of intermediate CCSs which would not otherwise be generated. So:
+ * let's cope with this common case first.
+ */
+ if (ccs1 == ccs2) {
+ return ccs1;
+ }
+
+ if (ccs2->cc->is_subsumed != CC_IS_BORING) {
+ return ccs1;
+ }
+
+ ASSERT(ccs2->prevStack != NULL);
+ ccs = AppendCCS(ccs1, ccs2->prevStack);
+ return PushCostCentre(ccs,ccs2->cc);
+}
CostCentreStack *
ActualPush ( CostCentreStack *ccs, CostCentre *cc )
new_ccs->scc_count = 0;
new_ccs->sub_scc_count = 0;
new_ccs->sub_cafcc_count = 0;
- new_ccs->sub_dictcc_count = 0;
/* Initialize all other stats here. There should be a quick way
* that's easily used elsewhere too
new_ccs->time_ticks = 0;
new_ccs->mem_alloc = 0;
- /* stacks are subsumed only if their top CostCentres are subsumed */
- new_ccs->is_subsumed = cc->is_subsumed;
-
+ new_ccs->root = ccs->root;
+
/* update the memoization table for the parent stack */
if (ccs != EMPTY_STACK)
ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
- /* make sure this CC is decalred at the next heap/time sample */
+ /* make sure this CC is declared at the next heap/time sample */
DecCCS(new_ccs);
/* return a pointer to the new stack */
static FILE *prof_file;
+/* -----------------------------------------------------------------------------
+ Generating the aggregated per-cost-centre time/alloc report.
+ -------------------------------------------------------------------------- */
+
+static CostCentre *sorted_cc_list;
+
+static void
+aggregate_cc_costs( CostCentreStack *ccs )
+{
+ IndexTable *i;
+
+ ccs->cc->mem_alloc += ccs->mem_alloc;
+ ccs->cc->time_ticks += ccs->time_ticks;
+
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ aggregate_cc_costs(i->ccs);
+ }
+}
+
+static void
+insert_cc_in_sorted_list( CostCentre *new_cc )
+{
+ CostCentre **prev, *cc;
+
+ prev = &sorted_cc_list;
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ if (new_cc->time_ticks > cc->time_ticks) {
+ new_cc->link = cc;
+ *prev = new_cc;
+ return;
+ } else {
+ prev = &(cc->link);
+ }
+ }
+ new_cc->link = NULL;
+ *prev = new_cc;
+}
+
+static void
+report_per_cc_costs( void )
+{
+ CostCentre *cc, *next;
+
+ aggregate_cc_costs(CCS_MAIN);
+ sorted_cc_list = NULL;
+
+ for (cc = CC_LIST; cc != NULL; cc = next) {
+ next = cc->link;
+ if (cc->time_ticks > total_prof_ticks/100
+ || cc->mem_alloc > total_alloc/100) {
+ insert_cc_in_sorted_list(cc);
+ }
+ }
+
+ fprintf(prof_file, "%-20s %-10s", "COST CENTRE", "MODULE");
+ fprintf(prof_file, "%6s %6s", "%time", "%alloc");
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+ }
+ fprintf(prof_file, "\n\n");
+
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ fprintf(prof_file, "%-20s %-10s", cc->label, cc->module);
+ fprintf(prof_file, "%6.1f %6.1f",
+ total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
+ total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
+ total_alloc * 100)
+ );
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5ld %9ld", cc->time_ticks, cc->mem_alloc);
+ }
+ fprintf(prof_file, "\n");
+ }
+
+ fprintf(prof_file,"\n\n");
+}
+
+/* -----------------------------------------------------------------------------
+ Generate the cost-centre-stack time/alloc report
+ -------------------------------------------------------------------------- */
+
+static void
+fprint_header( void )
+{
+ fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
+
+#ifdef NOT_YET
+ do_groups = have_interesting_groups(Registered_CC);
+ if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
+#endif
+
+ fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+#if defined(PROFILING_DETAIL_COUNTS)
+ fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
+ "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
+#endif
+ }
+
+ fprintf(prof_file, "\n\n");
+}
+
void
report_ccs_profiling( void )
{
nat count;
char temp[128]; /* sigh: magic constant */
+#ifdef NOT_YET
rtsBool do_groups = rtsFalse;
+#endif
if (!RtsFlags.CcFlags.doCostCentres)
return;
stopProfTimer();
- total_ticks = 0;
+ total_prof_ticks = 0;
total_alloc = 0;
count_ticks(CCS_MAIN);
fprintf(prof_file, "\n\n");
fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
- total_ticks / (StgFloat) TICK_FREQUENCY,
- total_ticks, TICK_MILLISECS);
+ total_prof_ticks / (StgFloat) TICK_FREQUENCY,
+ total_prof_ticks, TICK_MILLISECS);
fprintf(prof_file, "\ttotal alloc = %11s bytes",
ullong_format_string((ullong) total_alloc * sizeof(W_),
#endif
fprintf(prof_file, " (excludes profiling overheads)\n\n");
- fprintf(prof_file, "%-24s %-10s", "COST CENTRE", "MODULE");
-
-#ifdef NOT_YET
- do_groups = have_interesting_groups(Registered_CC);
- if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
-#endif
+ report_per_cc_costs();
- fprintf(prof_file, "%8s %5s %5s %8s %5s %5s", "scc", "%time", "%alloc", "inner", "cafs", "dicts");
-
- if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5s %9s", "ticks", "bytes");
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
- "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
-#endif
- }
- fprintf(prof_file, "\n\n");
-
- reportCCS(CCS_MAIN, 0);
+ fprint_header();
+ reportCCS(pruneCCSTree(CCS_MAIN), 0);
fclose(prof_file);
}
IndexTable *i;
cc = ccs->cc;
- ASSERT(cc == CC_MAIN || cc->link != 0);
/* Only print cost centres with non 0 data ! */
- if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
- /* force printing of *all* cost centres if -P -P */ )
-
- || ( ccs->indexTable != 0 )
- || ( ! ccs_to_ignore(ccs)
- && (ccs->scc_count || ccs->sub_scc_count ||
- ccs->time_ticks || ccs->mem_alloc
- || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
- && (ccs->sub_cafcc_count || ccs->sub_dictcc_count
-#if defined(PROFILING_DETAIL_COUNTS)
- || cc->thunk_count || cc->function_count || cc->pap_count
-#endif
- ))))) {
+ if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
+ ! ccs_to_ignore(ccs))
+ /* force printing of *all* cost centres if -P -P */
+ {
+
fprintf(prof_file, "%-*s%-*s %-10s",
indent, "", 24-indent, cc->label, cc->module);
if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
#endif
- fprintf(prof_file, "%8ld %4.1f %4.1f %8ld %5ld %5ld",
+ fprintf(prof_file, "%8ld %5.1f %5.1f %8ld %5ld",
ccs->scc_count,
- total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
+ total_prof_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_prof_ticks * 100),
total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
- ccs->sub_scc_count, ccs->sub_cafcc_count, ccs->sub_dictcc_count);
+ ccs->sub_scc_count, ccs->sub_cafcc_count);
if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
fprintf(prof_file, " %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
if (!ccs_to_ignore(ccs)) {
total_alloc += ccs->mem_alloc;
- total_ticks += ccs->time_ticks;
+ total_prof_ticks += ccs->time_ticks;
}
for (i = ccs->indexTable; i != NULL; i = i->next)
count_ticks(i->ccs);
}
}
+static CostCentreStack *
+pruneCCSTree( CostCentreStack *ccs )
+{
+ CostCentreStack *ccs1;
+ IndexTable *i, **prev;
+
+ prev = &ccs->indexTable;
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ ccs1 = pruneCCSTree(i->ccs);
+ if (ccs1 == NULL) {
+ *prev = i->next;
+ } else {
+ prev = &(i->next);
+ }
+ }
+
+ if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
+ /* force printing of *all* cost centres if -P -P */ )
+
+ || ( ccs->indexTable != 0 )
+ || ( (ccs->scc_count || ccs->sub_scc_count ||
+ ccs->time_ticks || ccs->mem_alloc
+ || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
+ && (ccs->sub_cafcc_count
+#if defined(PROFILING_DETAIL_COUNTS)
+ || cc->thunk_count || cc->function_count || cc->pap_count
+#endif
+ ))))) {
+ return ccs;
+ } else {
+ return NULL;
+ }
+}
+
+#ifdef DEBUG
+static void
+printCCS ( CostCentreStack *ccs )
+{
+ fprintf(stderr,"<");
+ for (; ccs; ccs = ccs->prevStack ) {
+ fprintf(stderr,ccs->cc->label);
+ if (ccs->prevStack) {
+ fprintf(stderr,",");
+ }
+ }
+ fprintf(stderr,">");
+}
+#endif
+
#endif /* PROFILING */