From 8db56c8606e6c0e89a87d34c3f67124f1e8b988e Mon Sep 17 00:00:00 2001 From: "simonmar@microsoft.com" Date: Tue, 20 Nov 2007 13:36:35 +0000 Subject: [PATCH] improvements to PAPI support - major (multithreaded) GC is measured separately from minor GC - events to measure can now be specified on the command line, e.g prog +RTS -a+PAPI_TOT_CYC --- includes/RtsFlags.h | 9 ++- rts/Papi.c | 177 ++++++++++++++++++++++++++++----------------------- rts/Papi.h | 8 ++- rts/RtsFlags.c | 9 +++ rts/RtsStartup.c | 8 +-- rts/Stats.c | 6 +- rts/sm/GC.c | 4 +- 7 files changed, 128 insertions(+), 93 deletions(-) diff --git a/includes/RtsFlags.h b/includes/RtsFlags.h index bc36ebd..902e6c0 100644 --- a/includes/RtsFlags.h +++ b/includes/RtsFlags.h @@ -304,11 +304,13 @@ struct TRACE_FLAGS { rtsBool timestamp; /* add timestamps to traces */ }; -/* Put them together: */ - #ifdef USE_PAPI +#define MAX_PAPI_USER_EVENTS 8 + struct PAPI_FLAGS { nat eventType; /* The type of events to count */ + nat numUserEvents; + char * userEvents[MAX_PAPI_USER_EVENTS]; }; #define PAPI_FLAG_CACHE_L1 1 @@ -316,9 +318,12 @@ struct PAPI_FLAGS { #define PAPI_FLAG_BRANCH 3 #define PAPI_FLAG_STALLS 4 #define PAPI_FLAG_CB_EVENTS 5 +#define PAPI_USER_EVENTS 6 #endif +/* Put them together: */ + typedef struct _RTS_FLAGS { /* The first portion of RTS_FLAGS is invariant. */ struct GC_FLAGS GcFlags; diff --git a/rts/Papi.c b/rts/Papi.c index d442718..d95e26c 100644 --- a/rts/Papi.c +++ b/rts/Papi.c @@ -30,7 +30,7 @@ static Mutex papi_counter_mutex; struct _papi_events { int event_code; - char * event_name; + const char * event_name; }; /* Beware, these counters are Opteron specific @@ -75,45 +75,54 @@ int papi_error; /* Arbitrary, to avoid using malloc */ #define MAX_PAPI_EVENTS 10 -int n_papi_events = 0; +static nat n_papi_events = 0; /* Events counted during GC and Mutator execution */ /* There's a trailing comma, do all C compilers accept that? */ static struct _papi_events papi_events[MAX_PAPI_EVENTS]; long_long MutatorCounters[MAX_PAPI_EVENTS]; -long_long GCCounters[MAX_PAPI_EVENTS]; +long_long GC0Counters[MAX_PAPI_EVENTS]; +long_long GC1Counters[MAX_PAPI_EVENTS]; long_long start_mutator_cycles; -long_long start_gc_cycles; long_long mutator_cycles; -long_long gc_cycles; +long_long start_gc_cycles; +long_long gc0_cycles; +long_long gc1_cycles; static long_long papi_counter(long_long values[],int event); static void papi_add_events(int EventSet); +static nat max_hardware_counters = 2; + /* If you want to add events to count, extend the * init_countable_events and the papi_report function. * Be aware that your processor can count a limited number * of events simultaneously, you can turn on multiplexing * to increase that number, though. */ +static void papi_add_event(const char *name, int code) +{ + if (n_papi_events >= max_hardware_counters) { + errorBelch("too many PAPI events for this CPU (max: %d)", + max_hardware_counters); + stg_exit(EXIT_FAILURE); + } + papi_events[n_papi_events].event_code = code; + papi_events[n_papi_events].event_name = name; + n_papi_events++; +} + static void init_countable_events(void) { -#define PAPI_ADD_EVENT(EVENT) \ - { \ - if (n_papi_events >= MAX_PAPI_EVENTS) { \ - barf("too many PAPI events"); \ - } \ - papi_events[n_papi_events].event_code = EVENT; \ - papi_events[n_papi_events].event_name = #EVENT; \ - n_papi_events++; \ - } + max_hardware_counters = PAPI_num_counters(); + +#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT) - PAPI_ADD_EVENT(PAPI_TOT_INS); if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { PAPI_ADD_EVENT(FR_BR); PAPI_ADD_EVENT(FR_BR_MIS); @@ -133,8 +142,35 @@ init_countable_events(void) PAPI_ADD_EVENT(DC_L2_REFILL_MOES); PAPI_ADD_EVENT(DC_SYS_REFILL_MOES); PAPI_ADD_EVENT(FR_BR_MIS); + } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) { + nat i; + char *name; + int code; + for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) { + name = RtsFlags.PapiFlags.userEvents[i]; + PAPI_CHECK(PAPI_event_name_to_code(name, &code)) + papi_add_event(name, code); + } } else { - PAPI_ADD_EVENT(PAPI_STL_ICY); + // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses + // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads + // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses + // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses + // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses + // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses + // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses + // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses + // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes + // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads + // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes + // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads + // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line + // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses + PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions + PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions + // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line + // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource + } // We might also consider: @@ -143,90 +179,58 @@ init_countable_events(void) }; -static char temp[BIG_STRING_LEN]; - -static void -papi_mut_cycles(void) -{ - ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (MUT_CYCLES) : %s\n",temp); -} - static void -papi_gc_cycles(void) +papi_report_event(const char *name, ullong value) { - ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (GC_CYCLES) : %s\n",temp); + static char temp[BIG_STRING_LEN]; + ullong_format_string(value,temp,rtsTrue/*commas*/); + statsPrintf(" %15s %15s\n", name, temp); } /* This function reports counters for GC and mutator */ static void -papi_report(long_long PapiCounters[]) +papi_report(long_long counters[]) { - -/* Report the value of a counter */ -#define PAPI_REPORT(EVENTSET,EVENT) \ - { \ - ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ - statsPrintf(" (" #EVENT ") : %s\n",temp); \ - } + nat i; /* Report the value of a counter as a percentage of another counter */ #define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ - statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ - papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) + statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) - /* I need to improve formatting aesthetics */ - PAPI_REPORT(PapiCounters,PAPI_TOT_INS); - - if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { - PAPI_REPORT(PapiCounters,FR_BR); - PAPI_REPORT(PapiCounters,FR_BR_MIS); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR); + for (i = 0; i < n_papi_events; i++) + { + papi_report_event(papi_events[i].event_name, counters[i]); } - else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC); + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR); + PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR); } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { - PAPI_REPORT(PapiCounters,PAPI_L1_DCA); - PAPI_REPORT(PapiCounters,PAPI_L1_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA); + PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA); } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { - PAPI_REPORT(PapiCounters,PAPI_L2_DCA); - PAPI_REPORT(PapiCounters,PAPI_L2_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA); + PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA); } - - else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { - PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES); - PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES); - PAPI_REPORT(PapiCounters,FR_BR_MIS); - } - - else { - PAPI_REPORT(PapiCounters,PAPI_STL_ICY); - } } void papi_stats_report (void) { - statsPrintf(" -- CPU Mutator counters --\n"); - papi_mut_cycles(); + statsPrintf(" Mutator CPU counters\n"); + papi_report_event("CYCLES", mutator_cycles); papi_report(MutatorCounters); - statsPrintf("\n -- CPU GC counters --\n"); - papi_gc_cycles(); - papi_report(GCCounters); + statsPrintf("\n GC(0) CPU counters\n"); + papi_report_event("CYCLES", gc0_cycles); + papi_report(GC0Counters); + + statsPrintf("\n GC(1) CPU counters\n"); + papi_report_event("CYCLES", gc1_cycles); + papi_report(GC1Counters); } void @@ -273,7 +277,7 @@ papi_init (void) static long_long papi_counter(long_long values[],int event) { - int i; + nat i; for(i=0;i= MAX_PAPI_USER_EVENTS) { + errorBelch("maximum number of PAPI events reached"); + stg_exit(EXIT_FAILURE); + } + RtsFlags.PapiFlags.eventType = PAPI_USER_EVENTS; + RtsFlags.PapiFlags.userEvents[RtsFlags.PapiFlags.numUserEvents++] = rts_argv[arg] + 3; + break; default: bad_option( rts_argv[arg] ); } diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index d1025a3..070275b 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -167,10 +167,6 @@ hs_init(int *argc, char **argv[]) argv++; argc--; #endif -#ifdef USE_PAPI - papi_init(); -#endif - /* Set the RTS flags to default values. */ initRtsFlagsDefaults(); @@ -185,6 +181,10 @@ hs_init(int *argc, char **argv[]) setProgArgv(*argc,*argv); } +#ifdef USE_PAPI + papi_init(); +#endif + /* initTracing must be after setupRtsFlags() */ initTracing(); diff --git a/rts/Stats.c b/rts/Stats.c index 1127b81..4bb9c82 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -347,7 +347,11 @@ stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen) #if USE_PAPI if(papi_is_reporting) { /* Switch to counting mutator events */ - papi_stop_gc_count(); + if (gen == 0) { + papi_stop_gc0_count(); + } else { + papi_stop_gc1_count(); + } papi_start_mutator_count(); } #endif diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 0dac20c..a07086e 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1024,14 +1024,14 @@ gc_thread_mainloop (void) if (gct->papi_events == -1) { papi_init_eventset(&gct->papi_events); } - papi_thread_start_gc_count(gct->papi_events); + papi_thread_start_gc1_count(gct->papi_events); #endif gc_thread_work(); #ifdef USE_PAPI // count events in this thread towards the GC totals - papi_thread_stop_gc_count(gct->papi_events); + papi_thread_stop_gc1_count(gct->papi_events); #endif } } -- 1.7.10.4