X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPapi.c;h=d95e26c8e7d09cdcaaea262a5ac77af9ef12bf47;hb=66579ff945831c5fc9a17c58c722ff01f2268d76;hp=78cf9b10523db7e9c33bedf73fdb1efb58b3cf57;hpb=9e5fe6be620eaf03a86f1321bef603ca43699a3c;p=ghc-hetmet.git diff --git a/rts/Papi.c b/rts/Papi.c index 78cf9b1..d95e26c 100644 --- a/rts/Papi.c +++ b/rts/Papi.c @@ -14,38 +14,25 @@ #ifdef USE_PAPI /* ugly */ +#include + #include "Papi.h" #include "Rts.h" #include "RtsUtils.h" #include "Stats.h" #include "RtsFlags.h" +#include "OSThreads.h" +// used to protect the aggregated counters +#ifdef THREADED_RTS +static Mutex papi_counter_mutex; +#endif struct _papi_events { int event_code; - char * event_name; + const char * event_name; }; -#define PAPI_ADD_EVENT(EVENT) \ - { \ - ASSERT(n_papi_events= max_hardware_counters) { + errorBelch("too many PAPI events for this CPU (max: %d)", + max_hardware_counters); + stg_exit(EXIT_FAILURE); + } + papi_events[n_papi_events].event_code = code; + papi_events[n_papi_events].event_name = name; + n_papi_events++; +} + static void init_countable_events(void) { - PAPI_ADD_EVENT(PAPI_TOT_INS); - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + max_hardware_counters = PAPI_num_counters(); + +#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT) + + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { PAPI_ADD_EVENT(FR_BR); PAPI_ADD_EVENT(FR_BR_MIS); /* Docs are wrong? Opteron does not count indirect branch misses exclusively */ PAPI_ADD_EVENT(FR_BR_MISCOMPARE); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { PAPI_ADD_EVENT(FR_DISPATCH_STALLS); PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR); PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { PAPI_ADD_EVENT(PAPI_L1_DCA); PAPI_ADD_EVENT(PAPI_L1_DCM); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { PAPI_ADD_EVENT(PAPI_L2_DCA); PAPI_ADD_EVENT(PAPI_L2_DCM); - } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { PAPI_ADD_EVENT(DC_L2_REFILL_MOES); PAPI_ADD_EVENT(DC_SYS_REFILL_MOES); PAPI_ADD_EVENT(FR_BR_MIS); + } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) { + nat i; + char *name; + int code; + for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) { + name = RtsFlags.PapiFlags.userEvents[i]; + PAPI_CHECK(PAPI_event_name_to_code(name, &code)) + papi_add_event(name, code); + } + } else { + // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses + // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads + // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses + // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses + // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses + // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses + // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses + // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses + // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes + // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads + // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes + // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads + // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line + // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses + PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions + PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions + // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line + // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource + } -}; - -static char temp[BIG_STRING_LEN]; + // We might also consider: + // PAPI_BR_MSP Conditional branch instructions mispredicted + // PAPI_RES_STL Cycles stalled on any resource +}; -void -papi_mut_cycles() -{ - ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (MUT_CYCLES) : %s\n",temp); -} -void -papi_gc_cycles() +static void +papi_report_event(const char *name, ullong value) { - ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/); - statsPrintf(" (GC_CYCLES) : %s\n",temp); + static char temp[BIG_STRING_LEN]; + ullong_format_string(value,temp,rtsTrue/*commas*/); + statsPrintf(" %15s %15s\n", name, temp); } /* This function reports counters for GC and mutator */ -void -papi_report(long_long PapiCounters[]) +static void +papi_report(long_long counters[]) { + nat i; - /* I need to improve formatting aesthetics */ - PAPI_REPORT(PapiCounters,PAPI_TOT_INS); - - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { - PAPI_REPORT(PapiCounters,FR_BR); - PAPI_REPORT(PapiCounters,FR_BR_MIS); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR); - PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR); - } +/* Report the value of a counter as a percentage of another counter */ +#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ + statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC); - PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS); - //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC); + for (i = 0; i < n_papi_events; i++) + { + papi_report_event(papi_events[i].event_name, counters[i]); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { - PAPI_REPORT(PapiCounters,PAPI_L1_DCA); - PAPI_REPORT(PapiCounters,PAPI_L1_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA); + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR); + PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { - PAPI_REPORT(PapiCounters,PAPI_L2_DCA); - PAPI_REPORT(PapiCounters,PAPI_L2_DCM); - PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA); + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA); } - if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { - PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES); - PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES); - PAPI_REPORT(PapiCounters,FR_BR_MIS); + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA); } - } - +void +papi_stats_report (void) +{ + statsPrintf(" Mutator CPU counters\n"); + papi_report_event("CYCLES", mutator_cycles); + papi_report(MutatorCounters); + + statsPrintf("\n GC(0) CPU counters\n"); + papi_report_event("CYCLES", gc0_cycles); + papi_report(GC0Counters); + + statsPrintf("\n GC(1) CPU counters\n"); + papi_report_event("CYCLES", gc1_cycles); + papi_report(GC1Counters); +} + +void +papi_init_eventset (int *event_set) +{ + PAPI_register_thread(); + PAPI_CHECK( PAPI_create_eventset(event_set)); + papi_add_events(*event_set); +} void -papi_init_eventsets(void) +papi_init (void) { + /* Initialise the performance tracking library */ + int ver; + if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { + if (ver > 0) { + errorBelch("PAPI_library_init: wrong version: %x", ver); + stg_exit(EXIT_FAILURE); + } else { + sysErrorBelch("PAPI_library_init"); + stg_exit(EXIT_FAILURE); + } + } - init_countable_events(); +#ifdef THREADED_RTS + { + int err; + if ((err = PAPI_thread_init(osThreadId)) < 0) { + barf("PAPI_thread_init: %d",err); + } - /* One event set for the mutator and another for the GC */ - PAPI_CHECK( PAPI_create_eventset(&MutatorEvents)); - PAPI_CHECK( PAPI_create_eventset(&GCEvents)); + initMutex(&papi_counter_mutex); + } +#endif - /* Both sets contain the same events */ - papi_add_events(MutatorEvents); - papi_add_events(GCEvents); + init_countable_events(); + papi_init_eventset(&MutatorEvents); + papi_init_eventset(&GCEvents); } /* Extract the value corresponding to an event */ -long_long +static long_long papi_counter(long_long values[],int event) { - int i; - for(i=0;i