1 /* -----------------------------------------------------------------------------
2 * (c) The GHC Team 2006
4 * Initialization and use of the PAPI performance monitoring library
7 * For adding events or add your processor counters modify
9 * init_countable_events
12 * ---------------------------------------------------------------------------*/
15 #ifdef USE_PAPI /* ugly */
24 #include "OSThreads.h"
26 // used to protect the aggregated counters
28 static Mutex papi_counter_mutex;
36 /* Beware, these counters are Opteron specific
37 * I obtained the numbers using the papi_avail
38 * and papi_native_avail utilities.
39 * This is certainly not the official PAPI way
42 #define FR_BR 0x40000040
43 #define FR_BR_MIS 0x40000041
44 #define FR_BR_MISCOMPARE 0x40000048
45 #define DC_ACCESS 0x40000019
46 #define DC_MISS 0x4000001a
47 #define FR_DISPATCH_STALLS 0x40000054
48 #define FR_DISPATCH_STALLS_BR 0x40000055
49 #define FR_DISPATCH_STALLS_FULL_REORDER 0x40000058
50 #define FR_DISPATCH_STALLS_FULL_RESERVATION 0x40000059
51 #define FR_DISPATCH_STALLS_FULL_LS 0x4000005b
52 #define DC_L2_REFILL_MOES 0x40001e1b
53 #define DC_SYS_REFILL_MOES 0x40001e1c
55 /* This is bad, it should be in a header */
56 #define BIG_STRING_LEN 512
59 #define PAPI_CHECK(CALL) \
60 if((papi_error=(CALL)) != PAPI_OK) { \
61 debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \
62 __FILE__,__LINE__,papi_error); \
65 /* While PAPI reporting is going on this flag is on */
66 int papi_is_reporting;
68 /* Event sets and counter arrays for GC and mutator */
70 int MutatorEvents = PAPI_NULL;
71 int GCEvents = PAPI_NULL;
75 /* Arbitrary, to avoid using malloc */
76 #define MAX_PAPI_EVENTS 10
78 int n_papi_events = 0;
81 /* Events counted during GC and Mutator execution */
82 /* There's a trailing comma, do all C compilers accept that? */
83 static struct _papi_events papi_events[MAX_PAPI_EVENTS];
84 long_long MutatorCounters[MAX_PAPI_EVENTS];
85 long_long GCCounters[MAX_PAPI_EVENTS];
87 long_long start_mutator_cycles;
88 long_long start_gc_cycles;
89 long_long mutator_cycles;
94 static long_long papi_counter(long_long values[],int event);
95 static void papi_add_events(int EventSet);
97 /* If you want to add events to count, extend the
98 * init_countable_events and the papi_report function.
99 * Be aware that your processor can count a limited number
100 * of events simultaneously, you can turn on multiplexing
101 * to increase that number, though.
104 init_countable_events(void)
106 #define PAPI_ADD_EVENT(EVENT) \
108 if (n_papi_events >= MAX_PAPI_EVENTS) { \
109 barf("too many PAPI events"); \
111 papi_events[n_papi_events].event_code = EVENT; \
112 papi_events[n_papi_events].event_name = #EVENT; \
116 PAPI_ADD_EVENT(PAPI_TOT_INS);
117 if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
118 PAPI_ADD_EVENT(FR_BR);
119 PAPI_ADD_EVENT(FR_BR_MIS);
120 /* Docs are wrong? Opteron does not count indirect branch misses exclusively */
121 PAPI_ADD_EVENT(FR_BR_MISCOMPARE);
122 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
123 PAPI_ADD_EVENT(FR_DISPATCH_STALLS);
124 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR);
125 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS);
126 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
127 PAPI_ADD_EVENT(PAPI_L1_DCA);
128 PAPI_ADD_EVENT(PAPI_L1_DCM);
129 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
130 PAPI_ADD_EVENT(PAPI_L2_DCA);
131 PAPI_ADD_EVENT(PAPI_L2_DCM);
132 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
133 PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
134 PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
135 PAPI_ADD_EVENT(FR_BR_MIS);
137 PAPI_ADD_EVENT(PAPI_STL_ICY);
140 // We might also consider:
141 // PAPI_BR_MSP Conditional branch instructions mispredicted
142 // PAPI_RES_STL Cycles stalled on any resource
146 static char temp[BIG_STRING_LEN];
151 ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/);
152 statsPrintf(" (MUT_CYCLES) : %s\n",temp);
158 ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/);
159 statsPrintf(" (GC_CYCLES) : %s\n",temp);
162 /* This function reports counters for GC and mutator */
164 papi_report(long_long PapiCounters[])
167 /* Report the value of a counter */
168 #define PAPI_REPORT(EVENTSET,EVENT) \
170 ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \
171 statsPrintf(" (" #EVENT ") : %s\n",temp); \
174 /* Report the value of a counter as a percentage of another counter */
175 #define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
176 statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \
177 papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
179 /* I need to improve formatting aesthetics */
180 PAPI_REPORT(PapiCounters,PAPI_TOT_INS);
182 if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
183 PAPI_REPORT(PapiCounters,FR_BR);
184 PAPI_REPORT(PapiCounters,FR_BR_MIS);
185 PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR);
186 PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR);
189 else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
190 PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS);
191 PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR);
192 //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC);
193 PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS);
194 //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC);
197 else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
198 PAPI_REPORT(PapiCounters,PAPI_L1_DCA);
199 PAPI_REPORT(PapiCounters,PAPI_L1_DCM);
200 PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA);
203 else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
204 PAPI_REPORT(PapiCounters,PAPI_L2_DCA);
205 PAPI_REPORT(PapiCounters,PAPI_L2_DCM);
206 PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA);
209 else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
210 PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES);
211 PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES);
212 PAPI_REPORT(PapiCounters,FR_BR_MIS);
216 PAPI_REPORT(PapiCounters,PAPI_STL_ICY);
221 papi_stats_report (void)
223 statsPrintf(" -- CPU Mutator counters --\n");
225 papi_report(MutatorCounters);
227 statsPrintf("\n -- CPU GC counters --\n");
229 papi_report(GCCounters);
233 papi_init_eventset (int *event_set)
235 PAPI_register_thread();
236 PAPI_CHECK( PAPI_create_eventset(event_set));
237 papi_add_events(*event_set);
243 /* Initialise the performance tracking library */
245 if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) {
247 errorBelch("PAPI_library_init: wrong version: %x", ver);
248 stg_exit(EXIT_FAILURE);
250 sysErrorBelch("PAPI_library_init");
251 stg_exit(EXIT_FAILURE);
258 if ((err = PAPI_thread_init(osThreadId)) < 0) {
259 barf("PAPI_thread_init: %d",err);
262 initMutex(&papi_counter_mutex);
266 init_countable_events();
268 papi_init_eventset(&MutatorEvents);
269 papi_init_eventset(&GCEvents);
272 /* Extract the value corresponding to an event */
274 papi_counter(long_long values[],int event)
277 for(i=0;i<n_papi_events;i++) {
278 if(papi_events[i].event_code==event) {
282 /* Passed a wrong event? */
283 debugBelch("Event %d is not part of event set\n",event);
287 /* Add the events of papi_events into an event set */
289 papi_add_events(int EventSet)
292 for(i=0;i<n_papi_events;i++) {
293 if((papi_error=PAPI_add_event(EventSet,
294 papi_events[i].event_code))
296 debugBelch("Failed adding %s to event set with error code %d\n",
297 papi_events[i].event_name,papi_error);
301 /* We should be using elapsed cycles
302 * to be consistent with time metric chosen in Stats.c (Elapsed time).
303 * This is an approximation to the cycles that the program spends.
304 * Note that the counters, in contrast, are virtual and user space.
306 #define PAPI_cycles PAPI_get_virt_cyc
309 papi_start_mutator_count(void)
311 ACQUIRE_LOCK(&papi_counter_mutex);
312 PAPI_CHECK( PAPI_start(MutatorEvents));
313 start_mutator_cycles = PAPI_cycles();
314 RELEASE_LOCK(&papi_counter_mutex);
318 papi_stop_mutator_count(void)
320 ACQUIRE_LOCK(&papi_counter_mutex);
321 mutator_cycles += PAPI_cycles() - start_mutator_cycles;
322 PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters));
323 PAPI_CHECK( PAPI_stop(MutatorEvents,NULL));
324 RELEASE_LOCK(&papi_counter_mutex);
328 papi_start_gc_count(void)
330 ACQUIRE_LOCK(&papi_counter_mutex);
331 PAPI_CHECK( PAPI_start(GCEvents));
332 start_gc_cycles = PAPI_cycles();
333 RELEASE_LOCK(&papi_counter_mutex);
337 papi_stop_gc_count(void)
339 ACQUIRE_LOCK(&papi_counter_mutex);
340 PAPI_CHECK( PAPI_accum(GCEvents,GCCounters));
341 PAPI_CHECK( PAPI_stop(GCEvents,NULL));
342 gc_cycles += PAPI_cycles() - start_gc_cycles;
343 RELEASE_LOCK(&papi_counter_mutex);
348 papi_thread_start_gc_count(int event_set)
350 ACQUIRE_LOCK(&papi_counter_mutex);
351 PAPI_CHECK( PAPI_start(event_set));
352 RELEASE_LOCK(&papi_counter_mutex);
356 papi_thread_stop_gc_count(int event_set)
358 ACQUIRE_LOCK(&papi_counter_mutex);
359 PAPI_CHECK( PAPI_accum(event_set,GCCounters));
360 PAPI_CHECK( PAPI_stop(event_set,NULL));
361 RELEASE_LOCK(&papi_counter_mutex);
364 #endif /* USE_PAPI */