X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPapi.c;h=62f5d0d3963bd1bf0eb3bbb67b4c0a39381f0780;hb=cd47700887365ca2a6af17d03e731efce65cf2ac;hp=8ce3cc2381134a8e083961c8af3d038e82218af1;hpb=fe07f054d7ae5e10b14d5fed730fe4424dabd587;p=ghc-hetmet.git diff --git a/rts/Papi.c b/rts/Papi.c index 8ce3cc2..62f5d0d 100644 --- a/rts/Papi.c +++ b/rts/Papi.c @@ -1,57 +1,72 @@ - - -#include "Papi.h" +/* ----------------------------------------------------------------------------- + * (c) The GHC Team 2006 + * + * Initialization and use of the PAPI performance monitoring library + * + * + * For adding events or add your processor counters modify + * + * init_countable_events + * papi_report + * + * ---------------------------------------------------------------------------*/ + + +#ifdef USE_PAPI /* ugly */ + +#include +/* The posix symbols get defined in a header included from papi.h. + * undefind them here to allow redefinition in PosixSource.h */ +#undef _POSIX_SOURCE +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE + +#include "PosixSource.h" #include "Rts.h" + #include "RtsUtils.h" #include "Stats.h" +#include "Papi.h" - -/* These constants specify which events to keep track of. - * Probably it is better to count one set of events at a time. - * The reason is that processors have limited counters and - * multiplexing is not enabled (yet). - */ -#define PAPI_COUNT_BRANCHES 0 -/* The one below is Opteron specific. - */ -#define PAPI_COUNT_STALLS 0 -#define PAPI_COUNT_DCACHE1_MISSES 1 -#define PAPI_COUNT_DCACHE2_MISSES 0 +// used to protect the aggregated counters +#ifdef THREADED_RTS +static Mutex papi_counter_mutex; +#endif struct _papi_events { int event_code; - char * event_name; + const char * event_name; }; -#define PAPI_ADD_EVENT(EVENT) { EVENT, #EVENT } - -/* Beware, these counters are Opteron specific */ +/* Beware, these counters are Opteron specific + * I obtained the numbers using the papi_avail + * and papi_native_avail utilities. + * This is certainly not the official PAPI way + * of doing things. + */ #define FR_BR 0x40000040 #define FR_BR_MIS 0x40000041 #define FR_BR_MISCOMPARE 0x40000048 #define DC_ACCESS 0x40000019 #define DC_MISS 0x4000001a +#define FR_DISPATCH_STALLS 0x40000054 #define FR_DISPATCH_STALLS_BR 0x40000055 +#define FR_DISPATCH_STALLS_FULL_REORDER 0x40000058 +#define FR_DISPATCH_STALLS_FULL_RESERVATION 0x40000059 #define FR_DISPATCH_STALLS_FULL_LS 0x4000005b - -/* Report the value of a counter */ -#define PAPI_REPORT(EVENTSET,EVENT) \ - { \ - ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ - statsPrintf(" (" #EVENT ") : %s\n",temp); \ - } - -/* Report the value of a counter as a percentage of another counter */ -#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ - statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ - papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) - -/* Number of counted events, computed from size of papi_events */ -#define N_PAPI_EVENTS ((int)(sizeof(papi_events)/sizeof(struct _papi_events))) +#define DC_L2_REFILL_MOES 0x40001e1b +#define DC_SYS_REFILL_MOES 0x40001e1c /* This is bad, it should be in a header */ #define BIG_STRING_LEN 512 + +#define PAPI_CHECK(CALL) \ + if((papi_error=(CALL)) != PAPI_OK) { \ + debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \ + __FILE__,__LINE__,papi_error); \ + } + /* While PAPI reporting is going on this flag is on */ int papi_is_reporting; @@ -62,45 +77,223 @@ int GCEvents = PAPI_NULL; int papi_error; +/* Arbitrary, to avoid using malloc */ +#define MAX_PAPI_EVENTS 10 +static char papiNativeEventNames[MAX_PAPI_EVENTS][PAPI_MAX_STR_LEN]; + +static nat n_papi_events = 0; -/* If you want to add events to count, extend the - * papi_events array and the papi_report function. - */ /* Events counted during GC and Mutator execution */ /* There's a trailing comma, do all C compilers accept that? */ -static struct _papi_events papi_events[] = { - PAPI_ADD_EVENT(PAPI_TOT_CYC), -#if PAPI_COUNT_BRANCHES - PAPI_ADD_EVENT(FR_BR), - PAPI_ADD_EVENT(FR_BR_MIS), - /* Docs are wrong? Opteron does not count indirect branch misses apparently */ - PAPI_ADD_EVENT(FR_BR_MISCOMPARE), -#endif -#if PAPI_COUNT_STALLS - PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR), - PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS), -#endif -#if PAPI_COUNT_DCACHE1_MISSES - PAPI_ADD_EVENT(PAPI_L1_DCA), - PAPI_ADD_EVENT(PAPI_L1_DCM), -#endif -#if PAPI_COUNT_DCACHE2_MISSES - PAPI_ADD_EVENT(PAPI_L2_DCA), - PAPI_ADD_EVENT(PAPI_L2_DCM), -#endif +static struct _papi_events papi_events[MAX_PAPI_EVENTS]; +long_long MutatorCounters[MAX_PAPI_EVENTS]; +long_long GC0Counters[MAX_PAPI_EVENTS]; +long_long GC1Counters[MAX_PAPI_EVENTS]; + +long_long start_mutator_cycles; +long_long mutator_cycles = 0; +long_long start_gc_cycles; +long_long gc0_cycles = 0; +long_long gc1_cycles = 0; + + + +static long_long papi_counter(long_long values[],int event); +static void papi_add_events(int EventSet); + +static nat max_hardware_counters = 2; + +/* If you want to add events to count, extend the + * init_countable_events and the papi_report function. + * Be aware that your processor can count a limited number + * of events simultaneously, you can turn on multiplexing + * to increase that number, though. + */ +static void papi_add_event(const char *name, int code) +{ + if (n_papi_events >= max_hardware_counters) { + errorBelch("too many PAPI events for this CPU (max: %d)", + max_hardware_counters); + stg_exit(EXIT_FAILURE); + } + papi_events[n_papi_events].event_code = code; + papi_events[n_papi_events].event_name = name; + n_papi_events++; +} + +static void +init_countable_events(void) +{ + max_hardware_counters = PAPI_num_counters(); + +#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT) + + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + PAPI_ADD_EVENT(FR_BR); + PAPI_ADD_EVENT(FR_BR_MIS); + /* Docs are wrong? Opteron does not count indirect branch misses exclusively */ + PAPI_ADD_EVENT(FR_BR_MISCOMPARE); + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) { + PAPI_ADD_EVENT(FR_DISPATCH_STALLS); + PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR); + PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS); + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + PAPI_ADD_EVENT(PAPI_L1_DCA); + PAPI_ADD_EVENT(PAPI_L1_DCM); + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + PAPI_ADD_EVENT(PAPI_L2_DCA); + PAPI_ADD_EVENT(PAPI_L2_DCM); + } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) { + PAPI_ADD_EVENT(DC_L2_REFILL_MOES); + PAPI_ADD_EVENT(DC_SYS_REFILL_MOES); + PAPI_ADD_EVENT(FR_BR_MIS); + } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) { + nat i; + char *name; + char *asciiEventCode; + int code; + for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) { + if(RtsFlags.PapiFlags.userEventsKind[i] == PAPI_PRESET_EVENT_KIND) { + name = RtsFlags.PapiFlags.userEvents[i]; + PAPI_CHECK(PAPI_event_name_to_code(name, &code)) + } + else { // PAPI_NATIVE_EVENT_KIND + asciiEventCode = RtsFlags.PapiFlags.userEvents[i]; + name = papiNativeEventNames[i]; + code = strtol(asciiEventCode, NULL, 16 /* hex number expected */); + PAPI_CHECK(PAPI_event_code_to_name(code, name)) + } + papi_add_event(name, code); + } + } else { + // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses + // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads + // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses + // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses + // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses + // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses + // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses + // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses + // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes + // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads + // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes + // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads + // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line + // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses + PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions + PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions + // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line + // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource + + } + + // We might also consider: + // PAPI_BR_MSP Conditional branch instructions mispredicted + // PAPI_RES_STL Cycles stalled on any resource }; -long_long MutatorCounters[N_PAPI_EVENTS]; -long_long GCCounters[N_PAPI_EVENTS]; +static void +papi_report_event(const char *name, StgWord64 value) +{ + static char temp[BIG_STRING_LEN]; + showStgWord64(value,temp,rtsTrue/*commas*/); + statsPrintf(" %15s %15s\n", name, temp); +} + +/* This function reports counters for GC and mutator */ +static void +papi_report(long_long counters[]) +{ + nat i; + +/* Report the value of a counter as a percentage of another counter */ +#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ + statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) + + for (i = 0; i < n_papi_events; i++) + { + papi_report_event(papi_events[i].event_name, counters[i]); + } + + if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) { + PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR); + PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR); + } + + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) { + PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA); + } + + else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) { + PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA); + } +} + +void +papi_stats_report (void) +{ + statsPrintf(" Mutator CPU counters\n"); + papi_report_event("CYCLES", mutator_cycles); + papi_report(MutatorCounters); + + statsPrintf("\n GC(0) CPU counters\n"); + papi_report_event("CYCLES", gc0_cycles); + papi_report(GC0Counters); + + statsPrintf("\n GC(1) CPU counters\n"); + papi_report_event("CYCLES", gc1_cycles); + papi_report(GC1Counters); +} + +void +papi_init_eventset (int *event_set) +{ + PAPI_register_thread(); + PAPI_CHECK( PAPI_create_eventset(event_set)); + papi_add_events(*event_set); +} + +void +papi_init (void) +{ + /* Initialise the performance tracking library */ + int ver; + if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { + if (ver > 0) { + errorBelch("PAPI_library_init: wrong version: %x", ver); + stg_exit(EXIT_FAILURE); + } else { + sysErrorBelch("PAPI_library_init"); + stg_exit(EXIT_FAILURE); + } + } + +#ifdef THREADED_RTS + { + int err; + if ((err = PAPI_thread_init(osThreadId)) < 0) { + barf("PAPI_thread_init: %d",err); + } + + initMutex(&papi_counter_mutex); + } +#endif + + init_countable_events(); + + papi_init_eventset(&MutatorEvents); + papi_init_eventset(&GCEvents); +} /* Extract the value corresponding to an event */ -long_long +static long_long papi_counter(long_long values[],int event) { - int i; - for(i=0;i