2 % (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996
5 % Time-stamp: <Sun Oct 19 1997 23:39:59 Stardate: [-30]0119.72 hwloidl>
7 %************************************************************************
9 \section[GranSim.lc]{Granularity Simulator Routines}
11 %************************************************************************
13 Macros for dealing with the new and improved GA field for simulating
14 parallel execution. Based on @CONCURRENT@ package. The GA field now
15 contains a mask, where the n-th bit stands for the n-th processor,
16 where this data can be found. In case of multiple copies, several bits
17 are set. The total number of processors is bounded by @MAX_PROC@,
18 which should be <= the length of a word in bits. -- HWL
21 #if defined(GRAN) || defined(PAR)
24 #define NON_POSIX_SOURCE /* gettimeofday */
29 /* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */
30 # if defined(HAVE_GETCLOCK)
31 # if defined(HAVE_SYS_TIMERS_H)
33 # include <sys/timers.h>
36 # if defined(HAVE_GETTIMEOFDAY)
37 # if defined(HAVE_SYS_TIME_H)
38 # include <sys/time.h>
49 %****************************************************************
51 \subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim}
53 %****************************************************************
57 /* See GranSim.lh for the definition of the enum gran_event_types */
58 char *gran_event_names[] = {
60 "STEALING", "STOLEN", "STOLEN(Q)",
61 "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
62 "SCHEDULE", "DESCHEDULE",
64 "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
67 "SYSTEM_START", "SYSTEM_END", /* only for debugging */
72 char *proc_status_names[] = {
73 "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
77 #define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */
79 unsigned CurrentProc = 0;
80 rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */
82 #if 0 && (defined(GCap) || defined(GCgn))
83 closq ex_RBH_q = NULL;
88 The following variables control the behaviour of GrAnSim. In general, there
89 is one RTS option for enabling each of these features. In getting the
90 desired setup of GranSim the following questions have to be answered:
92 \item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)?
93 Currently only unfair scheduling is supported.
94 \item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)?
95 Either block and wait for the
96 data or reschedule and do some other work.
97 Thus, if this variable is true, asynchronous communication is
98 modelled. Block on fetch mainly makes sense for incremental fetching.
100 There is also a simplified fetch variant available
101 (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model
102 communication. It is faster but the results will be less accurate.
103 \item How aggressive to be in getting work after a reschedule on fetch
104 (@RTSflags.GranFlags.FetchStrategy@)?
105 This is determined by the so-called {\em fetching
106 strategy\/}. Currently, there are four possibilities:
108 \item Only run a runnable thread.
109 \item Turn a spark into a thread, if necessary.
110 \item Steal a remote spark, if necessary.
111 \item Steal a runnable thread from another processor, if necessary.
113 The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list
114 when rescheduling on a fetch.
115 \item Should sparks or threads be stolen first when looking for work
116 (@RTSflags.GranFlags.DoStealThreadsFirst@)?
117 The default is to steal sparks first (much cheaper).
118 \item Should the RTS use a lazy thread creation scheme
119 (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
120 turned into threads when work is needed. Also note, that sparks
121 can be discarded by the RTS (this is done in the case of an overflow
122 of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
123 the creation of threads at the next possibility (i.e.\ when new work
124 is demanded the next time).
125 \item Should data be fetched closure-by-closure or in packets
126 (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental
127 (i.e.\ closure-by-closure) strategy. This makes sense in a
128 low-latency setting but is bad in a high-latency system. Setting
129 @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other
130 parameters determine the size of the packets (@pack_buffer_size@) and the number of
131 thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@).
132 \item If there is no other possibility to find work, should runnable threads
133 be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the
134 RTS tried to get sparks (either local or remote ones) first. Thread
135 migration is very expensive, since a whole TSO has to be transferred
136 and probably data locality becomes worse in the process. Note, that
137 the closure, which will be evaluated next by that TSO is not
138 transferred together with the TSO (that might block another thread).
139 \item Should the RTS distinguish between sparks created by local nodes and
140 stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
141 data locality by preferring sparks of local nodes (it is more likely
142 that the data for those sparks is already on the local processor).
143 However, such a distinction also imposes an overhead on the spark
144 queue management, and typically a large number of sparks are
145 generated during execution. By default this variable is set to @False@.
146 \item Should the RTS use granularity control mechanisms? The idea of a
147 granularity control mechanism is to make use of granularity
148 information provided via annotation of the @par@ construct in order
149 to prefer bigger threads when either turning a spark into a thread or
150 when choosing the next thread to schedule. Currently, three such
151 mechanisms are implemented:
153 \item Cut-off: The granularity information is interpreted as a
154 priority. If a threshold priority is given to the RTS, then
155 only those sparks with a higher priority than the threshold
156 are actually created. Other sparks are immediately discarded.
157 This is similar to a usual cut-off mechanism often used in
158 parallel programs, where parallelism is only created if the
159 input data is lage enough. With this option, the choice is
160 hidden in the RTS and only the threshold value has to be
161 provided as a parameter to the runtime system.
162 \item Priority Sparking: This mechanism keeps priorities for sparks
163 and chooses the spark with the highest priority when turning
164 a spark into a thread. After that the priority information is
165 discarded. The overhead of this mechanism comes from
166 maintaining a sorted spark queue.
167 \item Priority Scheduling: This mechanism keeps the granularity
168 information for threads, to. Thus, on each reschedule the
169 largest thread is chosen. This mechanism has a higher
170 overhead, as the thread queue is sorted, too.
177 /* Do we need to reschedule following a fetch? */
178 rtsBool NeedToReSchedule = rtsFalse;
179 TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */
180 /* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */
182 /* For internal use (event statistics) only */
183 char *event_names[] =
184 { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
185 "MOVESPARK", "MOVETHREAD", "FINDWORK",
186 "FETCHNODE", "FETCHREPLY",
187 "GLOBALBLOCK", "UNBLOCKTHREAD"
190 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
192 I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
195 I_ tot_low_pri_sparks = 0;
197 I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0;
199 /* Variables for gathering packet and queue statistics */
200 I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0;
201 I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0;
202 I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0;
205 # if defined(GRAN_COUNT)
206 /* Count the number of updates that are done. Mainly for testing, but
207 could be useful for other purposes, too. */
208 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
213 I_ HandleFetchRequest(P_, PROC, P_);
214 /* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */
215 static I_ blockFetch(P_ tso, PROC proc, P_ bh);
220 %****************************************************************
222 \subsection[global-address-op]{Global Address Operations}
224 %****************************************************************
226 These functions perform operations on the global-address (ga) part
227 of a closure. The ga is the only new field (1 word) in a closure introduced
228 by GrAnSim. It serves as a bitmask, indicating on which processor
229 the closure is residing. Since threads are described by Thread State
230 Object (TSO), which is nothing but another kind of closure, this
231 scheme allows gives placement information about threads.
233 A ga is just a bitmask, so the operations on them are mainly bitmask
234 manipulating functions. Note, that there are important macros like PROCS,
235 IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
237 NOTE: In GrAnSim-light we don't maintain placement information. This
238 allows to simulate an arbitrary number of processors. The price we have
239 to be is the lack of costing any communication properly. In short,
240 GrAnSim-light is meant to reveal the maximal parallelism in a program.
241 From an implementation point of view the important thing is:
242 {\em GrAnSim-light does not maintain global-addresses}.
247 /* ga_to_proc returns the first processor marked in the bitmask ga.
248 Normally only one bit in ga should be set. But for PLCs all bits
249 are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
255 for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
259 /* NB: This takes a *node* rather than just a ga as input */
262 { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
268 for(i=0, any_idle=rtsFalse;
269 !any_idle && i<RTSflags.GranFlags.proc;
270 any_idle = any_idle || IS_IDLE(i), i++)
278 i<RTSflags.GranFlags.proc;
279 j += IS_IDLE(i)?1:0, i++)
286 %****************************************************************
288 \subsection[event-queue]{The Global Event Queue}
290 %****************************************************************
292 The following routines implement an ADT of an event-queue (FIFO).
293 ToDo: Put that in an own file(?)
298 /* Pointer to the global event queue; events are currently malloc'ed */
299 eventq EventHd = NULL;
304 static eventq entry = NULL;
308 fprintf(stderr,"No next event. This may be caused by a circular data dependency in the program.\n");
315 # if defined(GRAN_CHECK) && defined(GRAN)
316 if (RTSflags.GranFlags.debug & 0x20) { /* count events */
318 event_counts[EVENT_TYPE(EventHd)]++;
323 EventHd = EVENT_NEXT(EventHd);
327 /* When getting the time of the next event we ignore CONTINUETHREAD events:
328 we don't want to be interrupted before the end of the current time slice
329 unless there is something important to handle.
332 get_time_of_next_event()
334 eventq event = EventHd;
336 while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) {
337 event = EVENT_NEXT(event);
342 return (EVENT_TIME(event));
345 /* ToDo: replace malloc/free with a free list */
348 insert_event(newentry)
351 EVTTYPE evttype = EVENT_TYPE(newentry);
354 /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
356 /* Search the queue and insert at the right point:
357 FINDWORK before everything, CONTINUETHREAD after everything.
359 This ensures that we find any available work after all threads have
360 executed the current cycle. This level of detail would normally be
361 irrelevant, but matters for ridiculously low latencies...
364 /* Changed the ordering: Now FINDWORK comes after everything but
365 CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
366 FINDWORK. This is important when a GranSimSparkAt happens and
367 DoAlwaysCreateThreads is turned on. Also important if a GC occurs
368 when trying to build a new thread (see much_spark) -- HWL 02/96 */
373 for (event = EventHd, prev=&EventHd;
375 prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) {
377 case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
378 ( (EVENT_TIME(event) == EVENT_TIME(newentry)) &&
379 (EVENT_TYPE(event) != CONTINUETHREAD) ) )
383 case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) )
387 default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
388 ((EVENT_TIME(event) == EVENT_TIME(newentry)) &&
389 (EVENT_TYPE(event) == EVENT_TYPE(newentry))) )
394 /* Insert newentry here (i.e. before event) */
396 EVENT_NEXT(newentry) = event;
405 new_event(proc,creator,time,evttype,tso,node,spark)
412 eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event");
414 EVENT_PROC(newentry) = proc;
415 EVENT_CREATOR(newentry) = creator;
416 EVENT_TIME(newentry) = time;
417 EVENT_TYPE(newentry) = evttype;
418 EVENT_TSO(newentry) = tso;
419 EVENT_NODE(newentry) = node;
420 EVENT_SPARK(newentry) = spark;
421 EVENT_GC_INFO(newentry) = 0;
422 EVENT_NEXT(newentry) = NULL;
424 insert_event(newentry);
428 prepend_event(eventq event) /* put event at beginning of EventQueue */
429 { /* only used for GC! */
430 EVENT_NEXT(event) = EventHd;
435 grab_event() /* undo prepend_event i.e. get the event */
436 { /* at the head of EventQ but don't free anything */
437 eventq event = EventHd;
439 if(EventHd == NULL) {
440 fprintf(stderr,"No next event (in grab_event). This may be caused by a circular data dependency in the program.\n");
444 EventHd = EVENT_NEXT(EventHd);
449 traverse_eventq_for_gc()
451 eventq event = EventHd;
453 P_ closure, tso, buffer, bufptr;
456 /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
457 orig closure (root of packed graph). This means that a graph, which is
458 between processors at the time of GC is fetched again at the time when
459 it would have arrived, had there been no GC. Slightly inaccurate but
461 This is only needed for GUM style fetchng. */
462 if (!RTSflags.GranFlags.DoGUMMFetching)
465 for(event = EventHd; event!=NULL; event=EVENT_NEXT(event)) {
466 if (EVENT_TYPE(event)==FETCHREPLY) {
467 buffer = EVENT_NODE(event);
468 ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
469 bufsize = buffer[PACK_SIZE_LOCN];
470 closure= (P_)buffer[PACK_HDR_SIZE];
471 tso = (P_)buffer[PACK_TSO_LOCN];
472 proc = EVENT_PROC(event);
473 creator = EVENT_CREATOR(event); /* similar to unpacking */
474 for (bufptr=buffer+PACK_HDR_SIZE; bufptr<(buffer+bufsize);
476 if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
477 (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
478 convertFromRBH((P_)*bufptr);
482 EVENT_TYPE(event) = FETCHNODE;
483 EVENT_PROC(event) = creator;
484 EVENT_CREATOR(event) = proc;
485 EVENT_NODE(event) = closure;
486 EVENT_TSO(event) = tso;
487 EVENT_GC_INFO(event) = 0;
497 char str_tso[16], str_node[16];
499 sprintf(str_tso,((EVENT_TSO(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
501 sprintf(str_node,((EVENT_NODE(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
505 fprintf(stderr,"Evt: NIL\n");
507 fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n",
508 event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
509 EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event),
510 str_tso, TSO_ID(EVENT_TSO(event)), str_node
511 /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ );
521 fprintf(stderr,"Event Queue with root at %x:\n",hd);
522 for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
533 sprintf(str,((SPARK_NODE(spark)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
534 (W_) SPARK_NODE(spark));
537 fprintf(stderr,"Spark: NIL\n");
539 fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n",
540 str, SPARK_NAME(spark),
541 ((SPARK_EXPORTED(spark))?"True":"False"),
542 SPARK_PREV(spark), SPARK_NEXT(spark) );
551 fprintf(stderr,"Spark Queue with root at %x:\n",hd);
552 for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
561 %****************************************************************************
563 \subsection[entry-points]{Routines directly called from Haskell world}
565 %****************************************************************************
567 The @GranSim...@ routines in here are directly called via macros from the
570 First some auxiliary routines.
574 /* Take the current thread off the thread queue and thereby activate the */
575 /* next thread. It's assumed that the next ReSchedule after this uses */
576 /* NEW_THREAD as param. */
577 /* This fct is called from GranSimBlock and GranSimFetch */
580 ActivateNextThread (PROC proc)
582 ASSERT(RunnableThreadsHd[proc]!=PrelBase_Z91Z93_closure);
584 RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]);
585 if(RunnableThreadsHd[proc]==PrelBase_Z91Z93_closure) {
587 RunnableThreadsTl[proc] = PrelBase_Z91Z93_closure;
589 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
590 if (RTSflags.GranFlags.granSimStats &&
591 (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000)))
592 DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc],
593 PrelBase_Z91Z93_closure,0);
598 Now the main stg-called routines:
601 /* ------------------------------------------------------------------------ */
602 /* The following GranSim... fcts are stg-called from the threaded world. */
603 /* ------------------------------------------------------------------------ */
605 /* Called from HEAP_CHK -- NB: node and liveness are junk here now.
606 They are left temporarily to avoid complete recompilation.
610 GranSimAllocate(n,node,liveness)
615 TSO_ALLOCS(CurrentTSO) += n;
616 ++TSO_BASICBLOCKS(CurrentTSO);
618 if (RTSflags.GranFlags.granSimStats_Heap) {
619 DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO,
620 PrelBase_Z91Z93_closure,n);
623 TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost;
624 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost;
628 Subtract the values added above, if a heap check fails and
632 GranSimUnallocate(n,node,liveness)
637 TSO_ALLOCS(CurrentTSO) -= n;
638 --TSO_BASICBLOCKS(CurrentTSO);
640 TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost;
641 CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost;
644 /* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
646 GranSimExec(ariths,branches,loads,stores,floats)
647 W_ ariths,branches,loads,stores,floats;
649 W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths +
650 RTSflags.GranFlags.gran_branch_cost*branches +
651 RTSflags.GranFlags.gran_load_cost * loads +
652 RTSflags.GranFlags.gran_store_cost*stores +
653 RTSflags.GranFlags.gran_float_cost*floats;
655 TSO_EXECTIME(CurrentTSO) += cost;
656 CurrentTime[CurrentProc] += cost;
661 Fetch the node if it isn't local
662 -- result indicates whether fetch has been done.
664 This is GRIP-style single item fetching.
667 /* This function in Threads.lc is only needed for SimplifiedFetch */
668 FetchNode PROTO((P_ node,PROC CurrentProc));
671 GranSimFetch(node /* , liveness_mask */ )
673 /* I_ liveness_mask; */
675 if (RTSflags.GranFlags.Light) {
676 /* Always reschedule in GrAnSim-Light to prevent one TSO from
678 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
679 CONTINUETHREAD,CurrentTSO,node,NULL);
681 NeedToReSchedule = rtsFalse;
685 /* Note: once a node has been fetched, this test will be passed */
686 if(!IS_LOCAL_TO(PROCS(node),CurrentProc))
688 /* Add mpacktime to the remote PE for the reply */
690 PROC p = where_is(node);
694 if ( ( RTSflags.GranFlags.debug & 0x40 ) &&
696 fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
697 # endif /* GRAN_CHECK */
699 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
700 /* NB: Fetch is counted on arrival (FETCHREPLY) */
702 if (RTSflags.GranFlags.SimplifiedFetch)
704 FetchNode(node,CurrentProc);
705 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+
706 RTSflags.GranFlags.gran_fetchtime+
707 RTSflags.GranFlags.gran_munpacktime;
711 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
712 RTSflags.GranFlags.gran_latency;
714 new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
715 if (!RTSflags.GranFlags.DoReScheduleOnFetch)
716 MAKE_FETCHING(CurrentProc);
717 ++OutstandingFetches[CurrentProc];
719 if (fetchtime<TimeOfNextEvent)
720 TimeOfNextEvent = fetchtime;
723 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
725 if (RTSflags.GranFlags.DoReScheduleOnFetch)
727 /* Remove CurrentTSO from the queue
728 -- assumes head of queue == CurrentTSO */
729 if(!RTSflags.GranFlags.DoFairSchedule)
731 if(RTSflags.GranFlags.granSimStats)
732 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,
735 ActivateNextThread(CurrentProc);
737 # if defined(GRAN_CHECK)
738 if (RTSflags.GranFlags.debug & 0x10) {
739 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
740 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
741 CurrentTSO,CurrentTime[CurrentProc]);
744 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
748 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
749 /* CurrentTSO = PrelBase_Z91Z93_closure; */
751 /* ThreadQueueHd is now the next TSO to schedule or NULL */
752 /* CurrentTSO is pointed to by the FETCHNODE event */
754 else /* fair scheduling currently not supported -- HWL */
756 fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
760 else /* !RTSflags.GranFlags.DoReScheduleOnFetch */
762 /* Note: CurrentProc is still busy as it's blocked on fetch */
763 if(RTSflags.GranFlags.granSimStats)
764 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,node,0);
766 # if defined(GRAN_CHECK)
767 if (RTSflags.GranFlags.debug & 0x04)
768 BlockedOnFetch[CurrentProc] = CurrentTSO; /*- rtsTrue; -*/
769 if (RTSflags.GranFlags.debug & 0x10) {
770 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
771 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
772 CurrentTSO,CurrentTime[CurrentProc]);
775 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
777 CurrentTSO = PrelBase_Z91Z93_closure;
781 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
783 /* Rescheduling is necessary */
784 NeedToReSchedule = rtsTrue;
793 GranSimSpark(local,node)
797 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
798 if(RTSflags.GranFlags.granSimStats_Sparks)
799 DumpRawGranEvent(CurrentProc,(PROC)0,SP_SPARK,PrelBase_Z91Z93_closure,node,
800 spark_queue_len(CurrentProc,ADVISORY_POOL)-1);
802 /* Force the PE to take notice of the spark */
803 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
804 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
805 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
806 if (CurrentTime[CurrentProc]<TimeOfNextEvent)
807 TimeOfNextEvent = CurrentTime[CurrentProc];
811 ++TSO_LOCALSPARKS(CurrentTSO);
813 ++TSO_GLOBALSPARKS(CurrentTSO);
817 GranSimSparkAt(spark,where,identifier)
819 P_ where; /* This should be a node; alternatively could be a GA */
822 PROC p = where_is(where);
823 GranSimSparkAtAbs(spark,p,identifier);
827 GranSimSparkAtAbs(spark,proc,identifier)
834 if ( spark == (sparkq)NULL) /* Note: Granularity control might have */
835 return; /* turned a spark into a NULL. */
837 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
838 if(RTSflags.GranFlags.granSimStats_Sparks)
839 DumpRawGranEvent(proc,0,SP_SPARKAT,PrelBase_Z91Z93_closure,SPARK_NODE(spark),
840 spark_queue_len(proc,ADVISORY_POOL));
842 if (proc!=CurrentProc) {
843 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
844 exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
845 CurrentTime[proc]: CurrentTime[CurrentProc])
846 + RTSflags.GranFlags.gran_latency;
848 exporttime = CurrentTime[CurrentProc];
851 if ( RTSflags.GranFlags.Light )
852 /* Need CurrentTSO in event field to associate costs with creating
853 spark even in a GrAnSim Light setup */
854 new_event(proc,CurrentProc,exporttime,
855 MOVESPARK,CurrentTSO,PrelBase_Z91Z93_closure,spark);
857 new_event(proc,CurrentProc,exporttime,
858 MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
859 /* Bit of a hack to treat placed sparks the same as stolen sparks */
860 ++OutstandingFishes[proc];
862 /* Force the PE to take notice of the spark (FINDWORK is put after a
863 MOVESPARK into the sparkq!) */
864 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
865 new_event(CurrentProc,CurrentProc,exporttime+1,
866 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
869 if (exporttime<TimeOfNextEvent)
870 TimeOfNextEvent = exporttime;
872 if (proc!=CurrentProc) {
873 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
874 ++TSO_GLOBALSPARKS(CurrentTSO);
876 ++TSO_LOCALSPARKS(CurrentTSO);
880 /* This function handles local and global blocking */
881 /* It's called either from threaded code (RBH_entry, BH_entry etc) or */
882 /* from blockFetch when trying to fetch an BH or RBH */
885 GranSimBlock(P_ tso, PROC proc, P_ node)
887 PROC node_proc = where_is(node);
889 ASSERT(tso==RunnableThreadsHd[proc]);
891 if(RTSflags.GranFlags.granSimStats)
892 DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,0);
894 ++TSO_BLOCKCOUNT(tso);
895 /* Distinction between local and global block is made in blockFetch */
896 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
898 CurrentTime[proc] += RTSflags.GranFlags.gran_threadqueuetime;
899 ActivateNextThread(proc);
900 TSO_LINK(tso) = PrelBase_Z91Z93_closure; /* not really necessary; only for testing */
907 %****************************************************************************
909 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
911 %****************************************************************************
913 Event dumping routines.
918 * If you're not using GNUC and you're on a 32-bit machine, you're
919 * probably out of luck here. However, since CONCURRENT currently
920 * requires GNUC, I'm not too worried about it. --JSM
925 static ullong startTime = 0;
930 # ifdef HAVE_GETCLOCK
933 if (getclock(TIMEOFDAY, &tv) != 0) {
935 fprintf(stderr, "Clock failed\n");
938 return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
940 # ifdef HAVE_GETTIMEOFDAY
943 if (gettimeofday(&tv, NULL) != 0) {
945 fprintf(stderr, "Clock failed\n");
948 return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
951 if ((t = time(NULL)) == (time_t) -1) {
953 fprintf(stderr, "Clock failed\n");
963 #if defined(GRAN) || defined(PAR)
966 DumpGranEvent(name, tso)
967 enum gran_event_types name;
970 DumpRawGranEvent(CURRENT_PROC, (PROC)0, name, tso, PrelBase_Z91Z93_closure, 0);
974 DumpRawGranEvent(proc, p, name, tso, node, len)
975 PROC proc, p; /* proc ... where it happens; p ... where node lives */
976 enum gran_event_types name;
981 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
982 ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
984 if (RTSflags.GranFlags.granSimStats_suppressed)
988 id = tso == NULL ? -1 : TSO_ID(tso);
989 if (node==PrelBase_Z91Z93_closure)
990 strcpy(node_str,"________"); /* "PrelBase_Z91Z93_closure"); */
992 sprintf(node_str,"0x%-6lx",node);
994 if (name > GR_EVENT_MAX)
997 if(GRANSIMSTATS_BINARY)
998 /* ToDo: fix code for writing binary GrAnSim statistics */
1004 abort(); /* die please: a single word */
1005 /* doesn't represent long long times */
1006 grputw(TIME_ON_PROC(proc));
1014 abort(); /* die please: a single word */
1015 /* doesn't represent long long times */
1016 grputw(TIME_ON_PROC(proc)); /* this line is bound to */
1017 grputw(id); /* do the wrong thing */
1022 abort(); /* die please: a single word */
1023 /* doesn't represent long long times */
1024 grputw(TIME_ON_PROC(proc));
1031 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
1032 /* using spark queue length as optional argument ^^^^^^^^^ */
1033 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
1034 /* using spark name as optional argument ^^^^^^ */
1035 proc,time_string,gran_event_names[name],
1043 fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
1044 proc, time_string, gran_event_names[name],
1051 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n",
1052 proc,time_string,gran_event_names[name],id);
1055 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
1056 proc,time_string,gran_event_names[name],id,p);
1059 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
1060 proc,time_string,gran_event_names[name],id,len);
1063 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
1064 proc,time_string,gran_event_names[name],id,node_str,len);
1070 /* Only needed for special dynamic spark labelling support */
1072 DumpStartEventAt(time, proc, p, name, tso, node, len)
1074 PROC proc, p; /* proc ... where it happens; p ... where node lives */
1075 enum gran_event_types name;
1080 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
1081 ullong_format_string(time, time_string, rtsFalse/*no commas!*/);
1082 /* ^^^^ only important change to DumpRawGranEvent */
1083 if (RTSflags.GranFlags.granSimStats_suppressed)
1086 id = tso == NULL ? -1 : TSO_ID(tso);
1087 if (node==PrelBase_Z91Z93_closure)
1088 strcpy(node_str,"________"); /* "Z91Z93_closure"); */
1090 sprintf(node_str,"0x%-6lx",node);
1092 if (name > GR_EVENT_MAX)
1093 name = GR_EVENT_MAX;
1095 if(GRANSIMSTATS_BINARY)
1096 /* ToDo: fix code for writing binary GrAnSim statistics */
1102 abort(); /* die please: a single word */
1103 /* doesn't represent long long times */
1104 grputw(TIME_ON_PROC(proc));
1108 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1109 gran_event_names[name]);
1115 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
1116 /* using spark queue length as optional argument ^^^^^^^^^ */
1117 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
1118 /* using spark name as optional argument ^^^^^^ */
1119 proc,time_string,gran_event_names[name],
1123 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1124 gran_event_names[name]);
1130 DumpGranInfo(proc, tso, mandatory_thread)
1133 rtsBool mandatory_thread;
1135 char time_string[500]; /* ToDo: kill magic constant */
1136 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1139 if (RTSflags.GranFlags.granSimStats_suppressed)
1143 if (GRANSIMSTATS_BINARY) {
1146 abort(); /* die please: a single word doesn't represent long long times */
1147 grputw(CURRENT_TIME); /* this line is bound to fail */
1148 grputw(TSO_ID(tso));
1163 grputw(TSO_SPARKNAME(tso));
1164 grputw(TSO_STARTEDAT(tso));
1165 grputw(TSO_EXPORTED(tso));
1166 grputw(TSO_BASICBLOCKS(tso));
1167 grputw(TSO_ALLOCS(tso));
1168 grputw(TSO_EXECTIME(tso));
1169 grputw(TSO_BLOCKTIME(tso));
1170 grputw(TSO_BLOCKCOUNT(tso));
1171 grputw(TSO_FETCHTIME(tso));
1172 grputw(TSO_FETCHCOUNT(tso));
1173 grputw(TSO_LOCALSPARKS(tso));
1174 grputw(TSO_GLOBALSPARKS(tso));
1176 grputw(mandatory_thread);
1180 * NB: DumpGranEvent cannot be used because PE may be wrong
1181 * (as well as the extra info)
1183 fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
1189 ,TSO_EXPORTED(tso) ? 'T' : 'F'
1190 ,TSO_BASICBLOCKS(tso)
1194 ,TSO_BLOCKCOUNT(tso)
1196 ,TSO_FETCHCOUNT(tso)
1197 ,TSO_LOCALSPARKS(tso)
1198 ,TSO_GLOBALSPARKS(tso)
1199 ,mandatory_thread ? 'T' : 'F'
1208 fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
1213 ,TSO_TYPE(tso)==T_MAIN?"MAIN":
1214 TSO_TYPE(tso)==T_FAIL?"FAIL":
1215 TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
1216 TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
1220 fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n"
1224 /* ,TSO_ARG2(tso) */
1228 fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
1232 ,TSO_EXPORTED(tso)?'T':'F'
1233 ,TSO_BASICBLOCKS(tso)
1237 ,TSO_BLOCKCOUNT(tso)
1239 ,TSO_FETCHCOUNT(tso)
1240 ,TSO_LOCALSPARKS(tso)
1241 ,TSO_GLOBALSPARKS(tso)
1246 Output a terminate event and an 8-byte time.
1254 if (RTSflags.GranFlags.granSimStats_suppressed)
1258 DumpGranEvent(GR_TERMINATE, PrelBase_Z91Z93_closure);
1260 if (sizeof(TIME) == 4) {
1261 putc('\0', gr_file);
1262 putc('\0', gr_file);
1263 putc('\0', gr_file);
1264 putc('\0', gr_file);
1266 putc(v >> 56l, gr_file);
1267 putc((v >> 48l) & 0xffl, gr_file);
1268 putc((v >> 40l) & 0xffl, gr_file);
1269 putc((v >> 32l) & 0xffl, gr_file);
1271 putc((v >> 24l) & 0xffl, gr_file);
1272 putc((v >> 16l) & 0xffl, gr_file);
1273 putc((v >> 8l) & 0xffl, gr_file);
1274 putc(v & 0xffl, gr_file);
1278 Length-coded output: first 3 bits contain length coding
1292 if (RTSflags.GranFlags.granSimStats_suppressed)
1296 if (v <= 0x3fl) { /* length v = 1 byte */
1297 fputc(v & 0x3f, gr_file);
1298 } else if (v <= 0x3fffl) { /* length v = 2 byte */
1299 fputc((v >> 8l) | 0x40l, gr_file);
1300 fputc(v & 0xffl, gr_file);
1301 } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
1302 fputc((v >> 24l) | 0x80l, gr_file);
1303 fputc((v >> 16l) & 0xffl, gr_file);
1304 fputc((v >> 8l) & 0xffl, gr_file);
1305 fputc(v & 0xffl, gr_file);
1306 } else if (sizeof(TIME) == 4) {
1307 fputc(0x70, gr_file);
1308 fputc((v >> 24l) & 0xffl, gr_file);
1309 fputc((v >> 16l) & 0xffl, gr_file);
1310 fputc((v >> 8l) & 0xffl, gr_file);
1311 fputc(v & 0xffl, gr_file);
1313 if (v <= 0x3fffffffffffffl)
1314 putc((v >> 56l) | 0x60l, gr_file);
1316 putc(0x70, gr_file);
1317 putc((v >> 56l) & 0xffl, gr_file);
1320 putc((v >> 48l) & 0xffl, gr_file);
1321 putc((v >> 40l) & 0xffl, gr_file);
1322 putc((v >> 32l) & 0xffl, gr_file);
1323 putc((v >> 24l) & 0xffl, gr_file);
1324 putc((v >> 16l) & 0xffl, gr_file);
1325 putc((v >> 8l) & 0xffl, gr_file);
1326 putc(v & 0xffl, gr_file);
1330 #endif /* GRAN || PAR */
1333 %****************************************************************************
1335 \subsection[gr-simulation]{Granularity Simulation}
1337 %****************************************************************************
1339 General routines for GranSim. Mainly, startup and shutdown routines, called
1344 FILE *gr_file = NULL;
1345 char gr_filename[STATS_FILENAME_MAXLEN];
1346 /* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */
1349 init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
1350 char *prog_argv[], *rts_argv[];
1351 int prog_argc, rts_argc;
1355 char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr";
1357 if (RTSflags.GranFlags.granSimStats_suppressed)
1360 sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
1362 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1363 fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
1366 # if 0 /* that's obsolete now, I think -- HWL */
1367 if (RTSflags.GranFlags.DoReScheduleOnFetch)
1368 setbuf(gr_file, NULL);
1371 fputs("Granularity Simulation for ", gr_file);
1372 for (i = 0; i < prog_argc; ++i) {
1373 fputs(prog_argv[i], gr_file);
1374 fputc(' ', gr_file);
1378 fputs("+RTS ", gr_file);
1380 for (i = 0; i < rts_argc; ++i) {
1381 fputs(rts_argv[i], gr_file);
1382 fputc(' ', gr_file);
1386 fputs("\nStart time: ", gr_file);
1387 fputs(time_str(), gr_file); /* defined in main.lc */
1388 fputc('\n', gr_file);
1390 fputs("\n\n--------------------\n\n", gr_file);
1392 fputs("General Parameters:\n\n", gr_file);
1394 if (RTSflags.GranFlags.Light)
1395 fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
1396 RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1397 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1398 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1399 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1400 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1403 fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
1404 RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1405 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1406 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1407 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1408 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1411 if (RTSflags.GranFlags.DoGUMMFetching)
1412 if (RTSflags.GranFlags.ThunksToPack)
1413 fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
1414 RTSflags.GranFlags.ThunksToPack,
1415 RTSflags.GranFlags.packBufferSize);
1417 fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
1418 RTSflags.GranFlags.packBufferSize);
1420 fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
1422 fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
1423 RTSflags.GranFlags.FetchStrategy,
1424 RTSflags.GranFlags.FetchStrategy==0 ?
1425 " block (block-on-fetch)":
1426 RTSflags.GranFlags.FetchStrategy==1 ?
1427 "only run runnable threads":
1428 RTSflags.GranFlags.FetchStrategy==2 ?
1429 "create threads only from local sparks":
1430 RTSflags.GranFlags.FetchStrategy==3 ?
1431 "create threads from local or global sparks":
1432 RTSflags.GranFlags.FetchStrategy==4 ?
1433 "create sparks and steal threads if necessary":
1436 if (RTSflags.GranFlags.DoPrioritySparking)
1437 fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
1439 if (RTSflags.GranFlags.DoPriorityScheduling)
1440 fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
1442 fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
1443 RTSflags.GranFlags.gran_threadcreatetime,
1444 RTSflags.GranFlags.gran_threadqueuetime);
1445 fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
1446 RTSflags.GranFlags.gran_threaddescheduletime,
1447 RTSflags.GranFlags.gran_threadscheduletime);
1448 fprintf(gr_file, "Thread Context-Switch Time %lu\n",
1449 RTSflags.GranFlags.gran_threadcontextswitchtime);
1450 fputs("\n\n--------------------\n\n", gr_file);
1452 fputs("Communication Metrics:\n\n", gr_file);
1454 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
1455 RTSflags.GranFlags.gran_latency,
1456 RTSflags.GranFlags.gran_additional_latency,
1457 RTSflags.GranFlags.gran_fetchtime,
1458 RTSflags.GranFlags.gran_gunblocktime,
1459 RTSflags.GranFlags.gran_lunblocktime);
1461 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
1462 RTSflags.GranFlags.gran_mpacktime,
1463 RTSflags.GranFlags.gran_mtidytime,
1464 RTSflags.GranFlags.gran_munpacktime);
1465 fputs("\n\n--------------------\n\n", gr_file);
1467 fputs("Instruction Metrics:\n\n", gr_file);
1468 fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
1469 RTSflags.GranFlags.gran_arith_cost,
1470 RTSflags.GranFlags.gran_branch_cost,
1471 RTSflags.GranFlags.gran_load_cost,
1472 RTSflags.GranFlags.gran_store_cost,
1473 RTSflags.GranFlags.gran_float_cost,
1474 RTSflags.GranFlags.gran_heapalloc_cost);
1475 fputs("\n\n++++++++++++++++++++\n\n", gr_file);
1477 if (RTSflags.GranFlags.granSimStats_Binary)
1478 grputw(sizeof(TIME));
1484 end_gr_simulation(STG_NO_ARGS)
1486 char time_string[500]; /* ToDo: kill magic constant */
1487 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1489 if (RTSflags.GranFlags.granSimStats_suppressed)
1492 #if defined(GRAN_CHECK) && defined(GRAN)
1493 /* Print event stats */
1494 if (RTSflags.GranFlags.debug & 0x20) {
1497 fprintf(stderr,"Event statistics (number of events: %d):\n",
1499 for (i=0; i<=MAX_EVENT; i++) {
1500 fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n",
1501 event_names[i],i,event_counts[i],
1502 (float)(100*event_counts[i])/(float)(noOfEvents),
1503 (i==CONTINUETHREAD ? 0.0 :
1504 (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) ));
1506 fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n",
1507 rs_sp_count, rs_t_count, no_of_steals,
1508 (float)ntimes_total/(float)STG_MAX(no_of_steals,1),
1509 (float)fl_total/(float)STG_MAX(no_of_steals,1));
1510 fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
1511 tot_sparks,withered_sparks,
1512 ( tot_sparks == 0 ? 0 :
1513 (float)(100*withered_sparks)/(float)(tot_sparks)) );
1514 /* Print statistics about priority sparking */
1515 if (RTSflags.GranFlags.DoPrioritySparking) {
1516 fprintf(stderr,"About Priority Sparking:\n");
1517 fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes);
1519 /* Print statistics about priority sparking */
1520 if (RTSflags.GranFlags.DoPriorityScheduling) {
1521 fprintf(stderr,"About Priority Scheduling:\n");
1522 fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
1523 tot_add_threads, non_end_add_threads,
1524 (float)tot_tq_len/(float)tot_add_threads);
1526 /* Print packet statistics if GUMM fetching is turned on */
1527 if (RTSflags.GranFlags.DoGUMMFetching) {
1528 fprintf(stderr,"Packet statistcs:\n");
1529 fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets);
1530 fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets);
1531 fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets);
1533 if (closure_queue_overflows>0)
1534 fprintf(stderr," Number of closure queue overflows: %u\n",
1535 closure_queue_overflows);
1540 if (RTSflags.GranFlags.PrintFetchMisses)
1541 fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
1543 # if defined(GRAN_COUNT)
1544 fprintf(stderr,"Update count statistics:\n");
1545 fprintf(stderr," Total number of updates: %u\n",nUPDs);
1546 fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
1547 nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
1548 fprintf(stderr," Number of PAPs: %u\n",nPAPs);
1551 #endif /* GRAN_CHECK */
1553 fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n",
1554 time_string,gr_filename);
1555 if (RTSflags.GranFlags.granSimStats)
1559 FILE *gr_file = NULL;
1560 char gr_filename[STATS_FILENAME_MAXLEN];
1562 /* I_ do_sp_profile = 0; */
1565 init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
1566 char *prog_argv[], *rts_argv[];
1567 int prog_argc, rts_argc;
1571 char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr";
1573 sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
1575 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1576 fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
1580 for (i = 0; i < prog_argc; ++i) {
1581 fputs(prog_argv[i], gr_file);
1582 fputc(' ', gr_file);
1586 fputs("+RTS ", gr_file);
1588 for (i = 0; i < rts_argc; ++i) {
1589 fputs(rts_argv[i], gr_file);
1590 fputc(' ', gr_file);
1593 fputc('\n', gr_file);
1595 fputs("Start-Time: ", gr_file);
1596 fputs(time_str(), gr_file); /* defined in main.lc */
1597 fputc('\n', gr_file);
1599 startTime = CURRENT_TIME;
1601 if (startTime > LL(1000000000)) {
1602 /* This shouldn't overflow twice */
1603 fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
1604 (TIME) (startTime / LL(1000000000)),
1605 (TIME) (startTime % LL(1000000000)));
1607 fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
1610 if (RTSflags.ParFlags.granSimStats_Binary)
1611 grputw(sizeof(TIME));
1615 #endif /* GRAN || PAR */