From fe07f054d7ae5e10b14d5fed730fe4424dabd587 Mon Sep 17 00:00:00 2001 From: "mrchebas@gmail.com" Date: Wed, 8 Nov 2006 17:14:52 +0000 Subject: [PATCH] Addition of PAPI to RTS This patch still requires the addition of the USE_PAPI define to compile with PAPI. Also, programs must be compiled and linked with the appropriate library flags for papi. --- rts/Papi.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++ rts/Papi.h | 37 ++++++++++ rts/RtsStartup.c | 24 ++++++- rts/Stats.c | 58 +++++++++++++-- rts/Stats.h | 5 ++ rts/package.conf.in | 3 + rts/posix/GetTime.c | 21 +++++- 7 files changed, 339 insertions(+), 6 deletions(-) create mode 100644 rts/Papi.c create mode 100644 rts/Papi.h diff --git a/rts/Papi.c b/rts/Papi.c new file mode 100644 index 0000000..8ce3cc2 --- /dev/null +++ b/rts/Papi.c @@ -0,0 +1,197 @@ + + +#include "Papi.h" +#include "Rts.h" +#include "RtsUtils.h" +#include "Stats.h" + + +/* These constants specify which events to keep track of. + * Probably it is better to count one set of events at a time. + * The reason is that processors have limited counters and + * multiplexing is not enabled (yet). + */ +#define PAPI_COUNT_BRANCHES 0 +/* The one below is Opteron specific. + */ +#define PAPI_COUNT_STALLS 0 +#define PAPI_COUNT_DCACHE1_MISSES 1 +#define PAPI_COUNT_DCACHE2_MISSES 0 + +struct _papi_events { + int event_code; + char * event_name; +}; + +#define PAPI_ADD_EVENT(EVENT) { EVENT, #EVENT } + +/* Beware, these counters are Opteron specific */ +#define FR_BR 0x40000040 +#define FR_BR_MIS 0x40000041 +#define FR_BR_MISCOMPARE 0x40000048 +#define DC_ACCESS 0x40000019 +#define DC_MISS 0x4000001a +#define FR_DISPATCH_STALLS_BR 0x40000055 +#define FR_DISPATCH_STALLS_FULL_LS 0x4000005b + +/* Report the value of a counter */ +#define PAPI_REPORT(EVENTSET,EVENT) \ + { \ + ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \ + statsPrintf(" (" #EVENT ") : %s\n",temp); \ + } + +/* Report the value of a counter as a percentage of another counter */ +#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \ + statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \ + papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT)) + +/* Number of counted events, computed from size of papi_events */ +#define N_PAPI_EVENTS ((int)(sizeof(papi_events)/sizeof(struct _papi_events))) + +/* This is bad, it should be in a header */ +#define BIG_STRING_LEN 512 + +/* While PAPI reporting is going on this flag is on */ +int papi_is_reporting; + +/* Event sets and counter arrays for GC and mutator */ + +int MutatorEvents = PAPI_NULL; +int GCEvents = PAPI_NULL; + +int papi_error; + + +/* If you want to add events to count, extend the + * papi_events array and the papi_report function. + */ + +/* Events counted during GC and Mutator execution */ +/* There's a trailing comma, do all C compilers accept that? */ +static struct _papi_events papi_events[] = { + PAPI_ADD_EVENT(PAPI_TOT_CYC), +#if PAPI_COUNT_BRANCHES + PAPI_ADD_EVENT(FR_BR), + PAPI_ADD_EVENT(FR_BR_MIS), + /* Docs are wrong? Opteron does not count indirect branch misses apparently */ + PAPI_ADD_EVENT(FR_BR_MISCOMPARE), +#endif +#if PAPI_COUNT_STALLS + PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR), + PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS), +#endif +#if PAPI_COUNT_DCACHE1_MISSES + PAPI_ADD_EVENT(PAPI_L1_DCA), + PAPI_ADD_EVENT(PAPI_L1_DCM), +#endif +#if PAPI_COUNT_DCACHE2_MISSES + PAPI_ADD_EVENT(PAPI_L2_DCA), + PAPI_ADD_EVENT(PAPI_L2_DCM), +#endif +}; + +long_long MutatorCounters[N_PAPI_EVENTS]; +long_long GCCounters[N_PAPI_EVENTS]; + + +/* Extract the value corresponding to an event */ +long_long +papi_counter(long_long values[],int event) +{ + int i; + for(i=0;i + + + +#define PAPI_CHECK(CALL) \ + if((papi_error=(CALL)) != PAPI_OK) { \ + debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \ + __FILE__,__LINE__,papi_error); \ + } + +/* Check the error value of a PAPI call, reporting an error, if needed */ +extern int papi_error; + +/* While PAPI reporting is going on this flag is on */ +extern int papi_is_reporting; + +/* Event sets and counter arrays for GC and mutator */ + +extern int MutatorEvents; +extern int GCEvents; + +extern long_long MutatorCounters[]; +extern long_long GCCounters[]; + +long_long papi_counter(long_long values[],int event); +void papi_report(long_long PapiCounters[]); +void papi_add_events(int EventSet); + +void papi_init_eventsets(void); +void papi_start_mutator_count(void); +void papi_stop_mutator_count(void); +void papi_start_gc_count(void); +void papi_stop_gc_count(void); + + diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 3e9fd2a..f2a0dd7 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -6,7 +6,9 @@ * * ---------------------------------------------------------------------------*/ -#include "PosixSource.h" +// PAPI uses caddr_t, which is not POSIX +// #include "PosixSource.h" + #include "Rts.h" #include "RtsAPI.h" #include "RtsUtils.h" @@ -67,6 +69,10 @@ #include #endif +#if USE_PAPI +#include "Papi.h" +#endif + // Count of how many outstanding hs_init()s there have been. static int hs_init_count = 0; @@ -152,7 +158,23 @@ hs_init(int *argc, char **argv[]) argv++; argc--; #endif + /* Initialise the performance tracking library */ +#ifdef USE_PAPI + /* Must fix to abort gracefully */ + if(PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) + exit(1); +#ifdef THREADED_RTS + { + int err; + if ((err = PAPI_thread_init(osThreadId)) < 0) { + barf("PAPI_thread_init: %d",err); + } + } +#endif +#endif + /* Set the RTS flags to default values. */ + initRtsFlagsDefaults(); /* Call the user hook to reset defaults, if present */ diff --git a/rts/Stats.c b/rts/Stats.c index aabe2590..9f12b6d 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -17,6 +17,10 @@ #include "Profiling.h" #include "GetTime.h" +#if USE_PAPI +#include "Papi.h" +#endif + /* huh? */ #define BIG_STRING_LEN 512 @@ -64,9 +68,6 @@ static lnat GC_start_faults = 0, GC_end_faults = 0; static Ticks *GC_coll_times; -static void statsPrintf( char *s, ... ) - GNUC3_ATTRIBUTE(format (printf, 1, 2)); - static void statsFlush( void ); static void statsClose( void ); @@ -170,6 +171,18 @@ stat_endInit(void) } else { InitElapsedTime = elapsed - ElapsedTimeStart; } +#if USE_PAPI + papi_init_eventsets(); + + /* We start counting events for the mutator + * when garbage collection starts + * we switch to the GC event set. */ + papi_start_mutator_count(); + + /* This flag is needed to avoid counting the last GC */ + papi_is_reporting = 1; + +#endif } /* ----------------------------------------------------------------------------- @@ -192,6 +205,16 @@ stat_startExit(void) MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime; if (MutUserTime < 0) { MutUserTime = 0; } + +#if USE_PAPI + /* We stop counting mutator events + * GC events are not being counted at this point */ + papi_stop_mutator_count(); + + /* This flag is needed, because GC is run once more after this function */ + papi_is_reporting = 0; + +#endif } void @@ -249,6 +272,15 @@ stat_startGC(void) GC_start_faults = getPageFaults(); } } + +#if USE_PAPI + if(papi_is_reporting) { + /* Switch to counting GC events */ + papi_stop_mutator_count(); + papi_start_gc_count(); + } +#endif + } /* ----------------------------------------------------------------------------- @@ -316,6 +348,14 @@ stat_endGC (lnat alloc, lnat live, lnat copied, debugBelch("\b\b\b \b\b\b"); rub_bell = 0; } + +#if USE_PAPI + if(papi_is_reporting) { + /* Switch to counting mutator events */ + papi_stop_gc_count(); + papi_start_mutator_count(); + } +#endif } /* ----------------------------------------------------------------------------- @@ -517,6 +557,16 @@ stat_exit(int alloc) TICK_TO_DBL(time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 / TICK_TO_DBL(etime)); +#if USE_PAPI + /* PAPI reporting, should put somewhere else? + * Note that the cycles are counted _after_ the initialization of the RTS -- AR */ + + statsPrintf(" -- CPU Mutator counters --\n"); + papi_report(MutatorCounters); + + statsPrintf("\n -- CPU GC counters --\n"); + papi_report(GCCounters); +#endif } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { @@ -606,7 +656,7 @@ extern HsInt64 getAllocations( void ) Dumping stuff in the stats file, or via the debug message interface -------------------------------------------------------------------------- */ -static void +void statsPrintf( char *s, ... ) { FILE *sf = RtsFlags.GcFlags.statsFile; diff --git a/rts/Stats.h b/rts/Stats.h index 9de6b71..f7a14a1 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -54,4 +54,9 @@ HsInt64 getAllocations( void ); Ticks stat_getElapsedGCTime(void); Ticks stat_getElapsedTime(void); +/* Only exported for Papi.c */ +void statsPrintf( char *s, ... ) + GNUC3_ATTRIBUTE(format (printf, 1, 2)); + + #endif /* STATS_H */ diff --git a/rts/package.conf.in b/rts/package.conf.in index 4cb9843..2b2229b 100644 --- a/rts/package.conf.in +++ b/rts/package.conf.in @@ -53,6 +53,9 @@ extra-libraries: "m" /* for ldexp() */ ,"mingwex" # endif #endif +#if USE_PAPI + , "papi" +#endif #ifdef INSTALLING include-dirs: INCLUDE_DIR diff --git a/rts/posix/GetTime.c b/rts/posix/GetTime.c index a2d9a31..db7378d 100644 --- a/rts/posix/GetTime.c +++ b/rts/posix/GetTime.c @@ -32,6 +32,10 @@ # include #endif +#ifdef USE_PAPI +# include +#endif + #if ! ((defined(HAVE_GETRUSAGE) && !irix_HOST_OS) || defined(HAVE_TIMES)) #error No implementation for getProcessCPUTime() available. #endif @@ -68,9 +72,17 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) Ticks getProcessCPUTime(void) { +#if !defined(THREADED_RTS) && USE_PAPI + long long usec; + if ((usec = PAPI_get_virt_usec()) < 0) { + barf("PAPI_get_virt_usec: %lld", usec); + } + return ((usec * TICKS_PER_SECOND) / 1000000); +#else Ticks user, elapsed; getProcessTimes(&user,&elapsed); return user; +#endif } Ticks getProcessElapsedTime(void) @@ -115,7 +127,14 @@ void getProcessTimes(Ticks *user, Ticks *elapsed) Ticks getThreadCPUTime(void) { -#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) +#if USE_PAPI + long long usec; + if ((usec = PAPI_get_virt_usec()) < 0) { + barf("PAPI_get_virt_usec: %lld", usec); + } + return ((usec * TICKS_PER_SECOND) / 1000000); + +#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) // clock_gettime() gives us per-thread CPU time. It isn't // reliable on Linux, but it's the best we have. struct timespec ts; -- 1.7.10.4