2 % (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996
5 % Time-stamp: <Thu Jul 25 1996 04:01:57 Stardate: [-31]7860.63 hwloidl>
7 %************************************************************************
9 \section[GranSim.lc]{Granularity Simulator Routines}
11 %************************************************************************
13 Macros for dealing with the new and improved GA field for simulating
14 parallel execution. Based on @CONCURRENT@ package. The GA field now
15 contains a mask, where the n-th bit stands for the n-th processor,
16 where this data can be found. In case of multiple copies, several bits
17 are set. The total number of processors is bounded by @MAX_PROC@,
18 which should be <= the length of a word in bits. -- HWL
21 #if defined(GRAN) || defined(PAR)
23 #define NON_POSIX_SOURCE /* gettimeofday */
27 /* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */
28 # if defined(HAVE_GETCLOCK)
29 # if defined(HAVE_SYS_TIMERS_H)
31 # include <sys/timers.h>
34 # if defined(HAVE_GETTIMEOFDAY)
35 # if defined(HAVE_SYS_TIME_H)
36 # include <sys/time.h>
47 %****************************************************************
49 \subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim}
51 %****************************************************************
55 /* See GranSim.lh for the definition of the enum gran_event_types */
56 char *gran_event_names[] = {
58 "STEALING", "STOLEN", "STOLEN(Q)",
59 "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
60 "SCHEDULE", "DESCHEDULE",
62 "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
65 "SYSTEM_START", "SYSTEM_END", /* only for debugging */
70 char *proc_status_names[] = {
71 "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
75 #define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */
77 unsigned CurrentProc = 0;
78 rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */
80 #if 0 && (defined(GCap) || defined(GCgn))
81 closq ex_RBH_q = NULL;
86 The following variables control the behaviour of GrAnSim. In general, there
87 is one RTS option for enabling each of these features. In getting the
88 desired setup of GranSim the following questions have to be answered:
90 \item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)?
91 Currently only unfair scheduling is supported.
92 \item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)?
93 Either block and wait for the
94 data or reschedule and do some other work.
95 Thus, if this variable is true, asynchronous communication is
96 modelled. Block on fetch mainly makes sense for incremental fetching.
98 There is also a simplified fetch variant available
99 (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model
100 communication. It is faster but the results will be less accurate.
101 \item How aggressive to be in getting work after a reschedule on fetch
102 (@RTSflags.GranFlags.FetchStrategy@)?
103 This is determined by the so-called {\em fetching
104 strategy\/}. Currently, there are four possibilities:
106 \item Only run a runnable thread.
107 \item Turn a spark into a thread, if necessary.
108 \item Steal a remote spark, if necessary.
109 \item Steal a runnable thread from another processor, if necessary.
111 The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list
112 when rescheduling on a fetch.
113 \item Should sparks or threads be stolen first when looking for work
114 (@RTSflags.GranFlags.DoStealThreadsFirst@)?
115 The default is to steal sparks first (much cheaper).
116 \item Should the RTS use a lazy thread creation scheme
117 (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
118 turned into threads when work is needed. Also note, that sparks
119 can be discarded by the RTS (this is done in the case of an overflow
120 of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
121 the creation of threads at the next possibility (i.e.\ when new work
122 is demanded the next time).
123 \item Should data be fetched closure-by-closure or in packets
124 (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental
125 (i.e.\ closure-by-closure) strategy. This makes sense in a
126 low-latency setting but is bad in a high-latency system. Setting
127 @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other
128 parameters determine the size of the packets (@pack_buffer_size@) and the number of
129 thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@).
130 \item If there is no other possibility to find work, should runnable threads
131 be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the
132 RTS tried to get sparks (either local or remote ones) first. Thread
133 migration is very expensive, since a whole TSO has to be transferred
134 and probably data locality becomes worse in the process. Note, that
135 the closure, which will be evaluated next by that TSO is not
136 transferred together with the TSO (that might block another thread).
137 \item Should the RTS distinguish between sparks created by local nodes and
138 stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
139 data locality by preferring sparks of local nodes (it is more likely
140 that the data for those sparks is already on the local processor).
141 However, such a distinction also imposes an overhead on the spark
142 queue management, and typically a large number of sparks are
143 generated during execution. By default this variable is set to @False@.
144 \item Should the RTS use granularity control mechanisms? The idea of a
145 granularity control mechanism is to make use of granularity
146 information provided via annotation of the @par@ construct in order
147 to prefer bigger threads when either turning a spark into a thread or
148 when choosing the next thread to schedule. Currently, three such
149 mechanisms are implemented:
151 \item Cut-off: The granularity information is interpreted as a
152 priority. If a threshold priority is given to the RTS, then
153 only those sparks with a higher priority than the threshold
154 are actually created. Other sparks are immediately discarded.
155 This is similar to a usual cut-off mechanism often used in
156 parallel programs, where parallelism is only created if the
157 input data is lage enough. With this option, the choice is
158 hidden in the RTS and only the threshold value has to be
159 provided as a parameter to the runtime system.
160 \item Priority Sparking: This mechanism keeps priorities for sparks
161 and chooses the spark with the highest priority when turning
162 a spark into a thread. After that the priority information is
163 discarded. The overhead of this mechanism comes from
164 maintaining a sorted spark queue.
165 \item Priority Scheduling: This mechanism keeps the granularity
166 information for threads, to. Thus, on each reschedule the
167 largest thread is chosen. This mechanism has a higher
168 overhead, as the thread queue is sorted, too.
175 /* Do we need to reschedule following a fetch? */
176 rtsBool NeedToReSchedule = rtsFalse;
177 TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */
178 /* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */
180 /* For internal use (event statistics) only */
181 char *event_names[] =
182 { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
183 "MOVESPARK", "MOVETHREAD", "FINDWORK",
184 "FETCHNODE", "FETCHREPLY",
185 "GLOBALBLOCK", "UNBLOCKTHREAD"
188 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
190 I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
193 I_ tot_low_pri_sparks = 0;
195 I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0;
197 /* Variables for gathering packet and queue statistics */
198 I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0;
199 I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0;
200 I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0;
203 # if defined(GRAN_COUNT)
204 /* Count the number of updates that are done. Mainly for testing, but
205 could be useful for other purposes, too. */
206 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
211 I_ HandleFetchRequest(P_, PROC, P_);
212 /* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */
213 static I_ blockFetch(P_ tso, PROC proc, P_ bh);
218 %****************************************************************
220 \subsection[global-address-op]{Global Address Operations}
222 %****************************************************************
224 These functions perform operations on the global-address (ga) part
225 of a closure. The ga is the only new field (1 word) in a closure introduced
226 by GrAnSim. It serves as a bitmask, indicating on which processor
227 the closure is residing. Since threads are described by Thread State
228 Object (TSO), which is nothing but another kind of closure, this
229 scheme allows gives placement information about threads.
231 A ga is just a bitmask, so the operations on them are mainly bitmask
232 manipulating functions. Note, that there are important macros like PROCS,
233 IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
235 NOTE: In GrAnSim-light we don't maintain placement information. This
236 allows to simulate an arbitrary number of processors. The price we have
237 to be is the lack of costing any communication properly. In short,
238 GrAnSim-light is meant to reveal the maximal parallelism in a program.
239 From an implementation point of view the important thing is:
240 {\em GrAnSim-light does not maintain global-addresses}.
245 /* ga_to_proc returns the first processor marked in the bitmask ga.
246 Normally only one bit in ga should be set. But for PLCs all bits
247 are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
253 for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
257 /* NB: This takes a *node* rather than just a ga as input */
260 { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
266 for(i=0, any_idle=rtsFalse;
267 !any_idle && i<RTSflags.GranFlags.proc;
268 any_idle = any_idle || IS_IDLE(i), i++)
276 i<RTSflags.GranFlags.proc;
277 j += IS_IDLE(i)?1:0, i++)
284 %****************************************************************
286 \subsection[event-queue]{The Global Event Queue}
288 %****************************************************************
290 The following routines implement an ADT of an event-queue (FIFO).
291 ToDo: Put that in an own file(?)
296 /* Pointer to the global event queue; events are currently malloc'ed */
297 eventq EventHd = NULL;
302 static eventq entry = NULL;
306 fprintf(stderr,"No next event\n");
313 # if defined(GRAN_CHECK) && defined(GRAN)
314 if (RTSflags.GranFlags.debug & 0x20) { /* count events */
316 event_counts[EVENT_TYPE(EventHd)]++;
321 EventHd = EVENT_NEXT(EventHd);
325 /* When getting the time of the next event we ignore CONTINUETHREAD events:
326 we don't want to be interrupted before the end of the current time slice
327 unless there is something important to handle.
330 get_time_of_next_event()
332 eventq event = EventHd;
334 while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) {
335 event = EVENT_NEXT(event);
340 return (EVENT_TIME(event));
343 /* ToDo: replace malloc/free with a free list */
346 insert_event(newentry)
349 EVTTYPE evttype = EVENT_TYPE(newentry);
352 /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
354 /* Search the queue and insert at the right point:
355 FINDWORK before everything, CONTINUETHREAD after everything.
357 This ensures that we find any available work after all threads have
358 executed the current cycle. This level of detail would normally be
359 irrelevant, but matters for ridiculously low latencies...
362 /* Changed the ordering: Now FINDWORK comes after everything but
363 CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
364 FINDWORK. This is important when a GranSimSparkAt happens and
365 DoAlwaysCreateThreads is turned on. Also important if a GC occurs
366 when trying to build a new thread (see much_spark) -- HWL 02/96 */
371 for (event = EventHd, prev=&EventHd;
373 prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) {
375 case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
376 ( (EVENT_TIME(event) == EVENT_TIME(newentry)) &&
377 (EVENT_TYPE(event) != CONTINUETHREAD) ) )
381 case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) )
385 default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
386 ((EVENT_TIME(event) == EVENT_TIME(newentry)) &&
387 (EVENT_TYPE(event) == EVENT_TYPE(newentry))) )
392 /* Insert newentry here (i.e. before event) */
394 EVENT_NEXT(newentry) = event;
403 new_event(proc,creator,time,evttype,tso,node,spark)
410 eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event");
412 EVENT_PROC(newentry) = proc;
413 EVENT_CREATOR(newentry) = creator;
414 EVENT_TIME(newentry) = time;
415 EVENT_TYPE(newentry) = evttype;
416 EVENT_TSO(newentry) = tso;
417 EVENT_NODE(newentry) = node;
418 EVENT_SPARK(newentry) = spark;
419 EVENT_GC_INFO(newentry) = 0;
420 EVENT_NEXT(newentry) = NULL;
422 insert_event(newentry);
426 prepend_event(eventq event) /* put event at beginning of EventQueue */
427 { /* only used for GC! */
428 EVENT_NEXT(event) = EventHd;
433 grab_event() /* undo prepend_event i.e. get the event */
434 { /* at the head of EventQ but don't free anything */
435 eventq event = EventHd;
437 if(EventHd == NULL) {
438 fprintf(stderr,"No next event (in grab_event)\n");
442 EventHd = EVENT_NEXT(EventHd);
447 traverse_eventq_for_gc()
449 eventq event = EventHd;
451 P_ closure, tso, buffer, bufptr;
454 /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
455 orig closure (root of packed graph). This means that a graph, which is
456 between processors at the time of GC is fetched again at the time when
457 it would have arrived, had there been no GC. Slightly inaccurate but
459 This is only needed for GUM style fetchng. */
460 if (!RTSflags.GranFlags.DoGUMMFetching)
463 for(event = EventHd; event!=NULL; event=EVENT_NEXT(event)) {
464 if (EVENT_TYPE(event)==FETCHREPLY) {
465 buffer = EVENT_NODE(event);
466 ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
467 bufsize = buffer[PACK_SIZE_LOCN];
468 closure= (P_)buffer[PACK_HDR_SIZE];
469 tso = (P_)buffer[PACK_TSO_LOCN];
470 proc = EVENT_PROC(event);
471 creator = EVENT_CREATOR(event); /* similar to unpacking */
472 for (bufptr=buffer+PACK_HDR_SIZE; bufptr<(buffer+bufsize);
474 if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
475 (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
476 convertFromRBH((P_)*bufptr);
480 EVENT_TYPE(event) = FETCHNODE;
481 EVENT_PROC(event) = creator;
482 EVENT_CREATOR(event) = proc;
483 EVENT_NODE(event) = closure;
484 EVENT_TSO(event) = tso;
485 EVENT_GC_INFO(event) = 0;
495 char str_tso[16], str_node[16];
497 sprintf(str_tso,((EVENT_TSO(event)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
499 sprintf(str_node,((EVENT_NODE(event)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
503 fprintf(stderr,"Evt: NIL\n");
505 fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n",
506 event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
507 EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event),
508 str_tso, TSO_ID(EVENT_TSO(event)), str_node
509 /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ );
519 fprintf(stderr,"Event Queue with root at %x:\n",hd);
520 for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
531 sprintf(str,((SPARK_NODE(spark)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
532 (W_) SPARK_NODE(spark));
535 fprintf(stderr,"Spark: NIL\n");
537 fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n",
538 str, SPARK_NAME(spark),
539 ((SPARK_EXPORTED(spark))?"True":"False"),
540 SPARK_PREV(spark), SPARK_NEXT(spark) );
549 fprintf(stderr,"Spark Queue with root at %x:\n",hd);
550 for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
559 %****************************************************************************
561 \subsection[entry-points]{Routines directly called from Haskell world}
563 %****************************************************************************
565 The @GranSim...@ routines in here are directly called via macros from the
568 First some auxiliary routines.
572 /* Take the current thread off the thread queue and thereby activate the */
573 /* next thread. It's assumed that the next ReSchedule after this uses */
574 /* NEW_THREAD as param. */
575 /* This fct is called from GranSimBlock and GranSimFetch */
578 ActivateNextThread (PROC proc)
580 ASSERT(RunnableThreadsHd[proc]!=Prelude_Z91Z93_closure);
582 RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]);
583 if(RunnableThreadsHd[proc]==Prelude_Z91Z93_closure) {
585 RunnableThreadsTl[proc] = Prelude_Z91Z93_closure;
587 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
588 if (RTSflags.GranFlags.granSimStats &&
589 (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000)))
590 DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc],
591 Prelude_Z91Z93_closure,0);
596 Now the main stg-called routines:
599 /* ------------------------------------------------------------------------ */
600 /* The following GranSim... fcts are stg-called from the threaded world. */
601 /* ------------------------------------------------------------------------ */
603 /* Called from HEAP_CHK -- NB: node and liveness are junk here now.
604 They are left temporarily to avoid complete recompilation.
608 GranSimAllocate(n,node,liveness)
613 TSO_ALLOCS(CurrentTSO) += n;
614 ++TSO_BASICBLOCKS(CurrentTSO);
616 if (RTSflags.GranFlags.granSimStats_Heap) {
617 DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO,
618 Prelude_Z91Z93_closure,n);
621 TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost;
622 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost;
626 Subtract the values added above, if a heap check fails and
630 GranSimUnallocate(n,node,liveness)
635 TSO_ALLOCS(CurrentTSO) -= n;
636 --TSO_BASICBLOCKS(CurrentTSO);
638 TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost;
639 CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost;
642 /* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
644 GranSimExec(ariths,branches,loads,stores,floats)
645 W_ ariths,branches,loads,stores,floats;
647 W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths +
648 RTSflags.GranFlags.gran_branch_cost*branches +
649 RTSflags.GranFlags.gran_load_cost * loads +
650 RTSflags.GranFlags.gran_store_cost*stores +
651 RTSflags.GranFlags.gran_float_cost*floats;
653 TSO_EXECTIME(CurrentTSO) += cost;
654 CurrentTime[CurrentProc] += cost;
659 Fetch the node if it isn't local
660 -- result indicates whether fetch has been done.
662 This is GRIP-style single item fetching.
665 /* This function in Threads.lc is only needed for SimplifiedFetch */
666 FetchNode PROTO((P_ node,PROC CurrentProc));
669 GranSimFetch(node /* , liveness_mask */ )
671 /* I_ liveness_mask; */
673 if (RTSflags.GranFlags.Light) {
674 /* Always reschedule in GrAnSim-Light to prevent one TSO from
676 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
677 CONTINUETHREAD,CurrentTSO,node,NULL);
679 NeedToReSchedule = rtsFalse;
683 /* Note: once a node has been fetched, this test will be passed */
684 if(!IS_LOCAL_TO(PROCS(node),CurrentProc))
686 /* Add mpacktime to the remote PE for the reply */
688 PROC p = where_is(node);
692 if ( ( RTSflags.GranFlags.debug & 0x40 ) &&
694 fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
695 # endif /* GRAN_CHECK */
697 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
698 /* NB: Fetch is counted on arrival (FETCHREPLY) */
700 if (RTSflags.GranFlags.SimplifiedFetch)
702 FetchNode(node,CurrentProc);
703 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+
704 RTSflags.GranFlags.gran_fetchtime+
705 RTSflags.GranFlags.gran_munpacktime;
709 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
710 RTSflags.GranFlags.gran_latency;
712 new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
713 if (!RTSflags.GranFlags.DoReScheduleOnFetch)
714 MAKE_FETCHING(CurrentProc);
715 ++OutstandingFetches[CurrentProc];
717 if (fetchtime<TimeOfNextEvent)
718 TimeOfNextEvent = fetchtime;
721 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
723 if (RTSflags.GranFlags.DoReScheduleOnFetch)
725 /* Remove CurrentTSO from the queue
726 -- assumes head of queue == CurrentTSO */
727 if(!RTSflags.GranFlags.DoFairSchedule)
729 if(RTSflags.GranFlags.granSimStats)
730 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,
733 ActivateNextThread(CurrentProc);
735 # if defined(GRAN_CHECK)
736 if (RTSflags.GranFlags.debug & 0x10) {
737 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
738 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
739 CurrentTSO,CurrentTime[CurrentProc]);
742 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
746 TSO_LINK(CurrentTSO) = Prelude_Z91Z93_closure;
747 /* CurrentTSO = Prelude_Z91Z93_closure; */
749 /* ThreadQueueHd is now the next TSO to schedule or NULL */
750 /* CurrentTSO is pointed to by the FETCHNODE event */
752 else /* fair scheduling currently not supported -- HWL */
754 fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
758 else /* !RTSflags.GranFlags.DoReScheduleOnFetch */
760 /* Note: CurrentProc is still busy as it's blocked on fetch */
761 if(RTSflags.GranFlags.granSimStats)
762 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,node,0);
764 # if defined(GRAN_CHECK)
765 if (RTSflags.GranFlags.debug & 0x04)
766 BlockedOnFetch[CurrentProc] = CurrentTSO; /*- rtsTrue; -*/
767 if (RTSflags.GranFlags.debug & 0x10) {
768 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
769 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
770 CurrentTSO,CurrentTime[CurrentProc]);
773 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
775 CurrentTSO = Prelude_Z91Z93_closure;
779 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
781 /* Rescheduling is necessary */
782 NeedToReSchedule = rtsTrue;
791 GranSimSpark(local,node)
795 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
796 if(RTSflags.GranFlags.granSimStats_Sparks)
797 DumpRawGranEvent(CurrentProc,(PROC)0,SP_SPARK,Prelude_Z91Z93_closure,node,
798 spark_queue_len(CurrentProc,ADVISORY_POOL)-1);
800 /* Force the PE to take notice of the spark */
801 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
802 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
803 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
804 if (CurrentTime[CurrentProc]<TimeOfNextEvent)
805 TimeOfNextEvent = CurrentTime[CurrentProc];
809 ++TSO_LOCALSPARKS(CurrentTSO);
811 ++TSO_GLOBALSPARKS(CurrentTSO);
815 GranSimSparkAt(spark,where,identifier)
817 P_ where; /* This should be a node; alternatively could be a GA */
820 PROC p = where_is(where);
821 GranSimSparkAtAbs(spark,p,identifier);
825 GranSimSparkAtAbs(spark,proc,identifier)
832 if ( spark == (sparkq)NULL) /* Note: Granularity control might have */
833 return; /* turned a spark into a NULL. */
835 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
836 if(RTSflags.GranFlags.granSimStats_Sparks)
837 DumpRawGranEvent(proc,0,SP_SPARKAT,Prelude_Z91Z93_closure,SPARK_NODE(spark),
838 spark_queue_len(proc,ADVISORY_POOL));
840 if (proc!=CurrentProc) {
841 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
842 exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
843 CurrentTime[proc]: CurrentTime[CurrentProc])
844 + RTSflags.GranFlags.gran_latency;
846 exporttime = CurrentTime[CurrentProc];
849 if ( RTSflags.GranFlags.Light )
850 /* Need CurrentTSO in event field to associate costs with creating
851 spark even in a GrAnSim Light setup */
852 new_event(proc,CurrentProc,exporttime,
853 MOVESPARK,CurrentTSO,Prelude_Z91Z93_closure,spark);
855 new_event(proc,CurrentProc,exporttime,
856 MOVESPARK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,spark);
857 /* Bit of a hack to treat placed sparks the same as stolen sparks */
858 ++OutstandingFishes[proc];
860 /* Force the PE to take notice of the spark (FINDWORK is put after a
861 MOVESPARK into the sparkq!) */
862 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
863 new_event(CurrentProc,CurrentProc,exporttime+1,
864 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
867 if (exporttime<TimeOfNextEvent)
868 TimeOfNextEvent = exporttime;
870 if (proc!=CurrentProc) {
871 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
872 ++TSO_GLOBALSPARKS(CurrentTSO);
874 ++TSO_LOCALSPARKS(CurrentTSO);
878 /* This function handles local and global blocking */
879 /* It's called either from threaded code (RBH_entry, BH_entry etc) or */
880 /* from blockFetch when trying to fetch an BH or RBH */
883 GranSimBlock(P_ tso, PROC proc, P_ node)
885 PROC node_proc = where_is(node);
887 ASSERT(tso==RunnableThreadsHd[proc]);
889 if(RTSflags.GranFlags.granSimStats)
890 DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,0);
892 ++TSO_BLOCKCOUNT(tso);
893 /* Distinction between local and global block is made in blockFetch */
894 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
896 CurrentTime[proc] += RTSflags.GranFlags.gran_threadqueuetime;
897 ActivateNextThread(proc);
898 TSO_LINK(tso) = Prelude_Z91Z93_closure; /* not really necessary; only for testing */
905 %****************************************************************************
907 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
909 %****************************************************************************
911 Event dumping routines.
916 * If you're not using GNUC and you're on a 32-bit machine, you're
917 * probably out of luck here. However, since CONCURRENT currently
918 * requires GNUC, I'm not too worried about it. --JSM
923 static ullong startTime = 0;
928 # ifdef HAVE_GETCLOCK
931 if (getclock(TIMEOFDAY, &tv) != 0) {
933 fprintf(stderr, "Clock failed\n");
936 return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
938 # ifdef HAVE_GETTIMEOFDAY
941 if (gettimeofday(&tv, NULL) != 0) {
943 fprintf(stderr, "Clock failed\n");
946 return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
949 if ((t = time(NULL)) == (time_t) -1) {
951 fprintf(stderr, "Clock failed\n");
961 #if defined(GRAN) || defined(PAR)
964 DumpGranEvent(name, tso)
965 enum gran_event_types name;
968 DumpRawGranEvent(CURRENT_PROC, (PROC)0, name, tso, Prelude_Z91Z93_closure, 0);
972 DumpRawGranEvent(proc, p, name, tso, node, len)
973 PROC proc, p; /* proc ... where it happens; p ... where node lives */
974 enum gran_event_types name;
979 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
980 ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
982 if (RTSflags.GranFlags.granSimStats_suppressed)
986 id = tso == NULL ? -1 : TSO_ID(tso);
987 if (node==Prelude_Z91Z93_closure)
988 strcpy(node_str,"________"); /* "Prelude_Z91Z93_closure"); */
990 sprintf(node_str,"0x%-6lx",node);
992 if (name > GR_EVENT_MAX)
995 if(GRANSIMSTATS_BINARY)
996 /* ToDo: fix code for writing binary GrAnSim statistics */
1002 abort(); /* die please: a single word */
1003 /* doesn't represent long long times */
1004 grputw(TIME_ON_PROC(proc));
1012 abort(); /* die please: a single word */
1013 /* doesn't represent long long times */
1014 grputw(TIME_ON_PROC(proc)); /* this line is bound to */
1015 grputw(id); /* do the wrong thing */
1020 abort(); /* die please: a single word */
1021 /* doesn't represent long long times */
1022 grputw(TIME_ON_PROC(proc));
1029 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
1030 /* using spark queue length as optional argument ^^^^^^^^^ */
1031 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
1032 /* using spark name as optional argument ^^^^^^ */
1033 proc,time_string,gran_event_names[name],
1041 fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
1042 proc, time_string, gran_event_names[name],
1049 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n",
1050 proc,time_string,gran_event_names[name],id);
1053 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
1054 proc,time_string,gran_event_names[name],id,p);
1057 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
1058 proc,time_string,gran_event_names[name],id,len);
1061 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
1062 proc,time_string,gran_event_names[name],id,node_str,len);
1068 /* Only needed for special dynamic spark labelling support */
1070 DumpStartEventAt(time, proc, p, name, tso, node, len)
1072 PROC proc, p; /* proc ... where it happens; p ... where node lives */
1073 enum gran_event_types name;
1078 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
1079 ullong_format_string(time, time_string, rtsFalse/*no commas!*/);
1080 /* ^^^^ only important change to DumpRawGranEvent */
1081 if (RTSflags.GranFlags.granSimStats_suppressed)
1084 id = tso == NULL ? -1 : TSO_ID(tso);
1085 if (node==Prelude_Z91Z93_closure)
1086 strcpy(node_str,"________"); /* "Z91Z93_closure"); */
1088 sprintf(node_str,"0x%-6lx",node);
1090 if (name > GR_EVENT_MAX)
1091 name = GR_EVENT_MAX;
1093 if(GRANSIMSTATS_BINARY)
1094 /* ToDo: fix code for writing binary GrAnSim statistics */
1100 abort(); /* die please: a single word */
1101 /* doesn't represent long long times */
1102 grputw(TIME_ON_PROC(proc));
1106 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1107 gran_event_names[name]);
1113 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
1114 /* using spark queue length as optional argument ^^^^^^^^^ */
1115 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
1116 /* using spark name as optional argument ^^^^^^ */
1117 proc,time_string,gran_event_names[name],
1121 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1122 gran_event_names[name]);
1128 DumpGranInfo(proc, tso, mandatory_thread)
1131 rtsBool mandatory_thread;
1133 char time_string[500]; /* ToDo: kill magic constant */
1134 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1137 if (RTSflags.GranFlags.granSimStats_suppressed)
1141 if (GRANSIMSTATS_BINARY) {
1144 abort(); /* die please: a single word doesn't represent long long times */
1145 grputw(CURRENT_TIME); /* this line is bound to fail */
1146 grputw(TSO_ID(tso));
1161 grputw(TSO_SPARKNAME(tso));
1162 grputw(TSO_STARTEDAT(tso));
1163 grputw(TSO_EXPORTED(tso));
1164 grputw(TSO_BASICBLOCKS(tso));
1165 grputw(TSO_ALLOCS(tso));
1166 grputw(TSO_EXECTIME(tso));
1167 grputw(TSO_BLOCKTIME(tso));
1168 grputw(TSO_BLOCKCOUNT(tso));
1169 grputw(TSO_FETCHTIME(tso));
1170 grputw(TSO_FETCHCOUNT(tso));
1171 grputw(TSO_LOCALSPARKS(tso));
1172 grputw(TSO_GLOBALSPARKS(tso));
1174 grputw(mandatory_thread);
1178 * NB: DumpGranEvent cannot be used because PE may be wrong
1179 * (as well as the extra info)
1181 fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
1187 ,TSO_EXPORTED(tso) ? 'T' : 'F'
1188 ,TSO_BASICBLOCKS(tso)
1192 ,TSO_BLOCKCOUNT(tso)
1194 ,TSO_FETCHCOUNT(tso)
1195 ,TSO_LOCALSPARKS(tso)
1196 ,TSO_GLOBALSPARKS(tso)
1197 ,mandatory_thread ? 'T' : 'F'
1206 fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
1211 ,TSO_TYPE(tso)==T_MAIN?"MAIN":
1212 TSO_TYPE(tso)==T_FAIL?"FAIL":
1213 TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
1214 TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
1218 fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n"
1222 /* ,TSO_ARG2(tso) */
1226 fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
1230 ,TSO_EXPORTED(tso)?'T':'F'
1231 ,TSO_BASICBLOCKS(tso)
1235 ,TSO_BLOCKCOUNT(tso)
1237 ,TSO_FETCHCOUNT(tso)
1238 ,TSO_LOCALSPARKS(tso)
1239 ,TSO_GLOBALSPARKS(tso)
1244 Output a terminate event and an 8-byte time.
1252 if (RTSflags.GranFlags.granSimStats_suppressed)
1256 DumpGranEvent(GR_TERMINATE, Prelude_Z91Z93_closure);
1258 if (sizeof(TIME) == 4) {
1259 putc('\0', gr_file);
1260 putc('\0', gr_file);
1261 putc('\0', gr_file);
1262 putc('\0', gr_file);
1264 putc(v >> 56l, gr_file);
1265 putc((v >> 48l) & 0xffl, gr_file);
1266 putc((v >> 40l) & 0xffl, gr_file);
1267 putc((v >> 32l) & 0xffl, gr_file);
1269 putc((v >> 24l) & 0xffl, gr_file);
1270 putc((v >> 16l) & 0xffl, gr_file);
1271 putc((v >> 8l) & 0xffl, gr_file);
1272 putc(v & 0xffl, gr_file);
1276 Length-coded output: first 3 bits contain length coding
1290 if (RTSflags.GranFlags.granSimStats_suppressed)
1294 if (v <= 0x3fl) { /* length v = 1 byte */
1295 fputc(v & 0x3f, gr_file);
1296 } else if (v <= 0x3fffl) { /* length v = 2 byte */
1297 fputc((v >> 8l) | 0x40l, gr_file);
1298 fputc(v & 0xffl, gr_file);
1299 } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
1300 fputc((v >> 24l) | 0x80l, gr_file);
1301 fputc((v >> 16l) & 0xffl, gr_file);
1302 fputc((v >> 8l) & 0xffl, gr_file);
1303 fputc(v & 0xffl, gr_file);
1304 } else if (sizeof(TIME) == 4) {
1305 fputc(0x70, gr_file);
1306 fputc((v >> 24l) & 0xffl, gr_file);
1307 fputc((v >> 16l) & 0xffl, gr_file);
1308 fputc((v >> 8l) & 0xffl, gr_file);
1309 fputc(v & 0xffl, gr_file);
1311 if (v <= 0x3fffffffffffffl)
1312 putc((v >> 56l) | 0x60l, gr_file);
1314 putc(0x70, gr_file);
1315 putc((v >> 56l) & 0xffl, gr_file);
1318 putc((v >> 48l) & 0xffl, gr_file);
1319 putc((v >> 40l) & 0xffl, gr_file);
1320 putc((v >> 32l) & 0xffl, gr_file);
1321 putc((v >> 24l) & 0xffl, gr_file);
1322 putc((v >> 16l) & 0xffl, gr_file);
1323 putc((v >> 8l) & 0xffl, gr_file);
1324 putc(v & 0xffl, gr_file);
1328 #endif /* GRAN || PAR */
1331 %****************************************************************************
1333 \subsection[gr-simulation]{Granularity Simulation}
1335 %****************************************************************************
1337 General routines for GranSim. Mainly, startup and shutdown routines, called
1342 FILE *gr_file = NULL;
1343 char gr_filename[STATS_FILENAME_MAXLEN];
1344 /* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */
1347 init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
1348 char *prog_argv[], *rts_argv[];
1349 int prog_argc, rts_argc;
1353 char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr";
1355 if (RTSflags.GranFlags.granSimStats_suppressed)
1358 sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
1360 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1361 fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
1364 # if 0 /* that's obsolete now, I think -- HWL */
1365 if (RTSflags.GranFlags.DoReScheduleOnFetch)
1366 setbuf(gr_file, NULL);
1369 fputs("Granularity Simulation for ", gr_file);
1370 for (i = 0; i < prog_argc; ++i) {
1371 fputs(prog_argv[i], gr_file);
1372 fputc(' ', gr_file);
1376 fputs("+RTS ", gr_file);
1378 for (i = 0; i < rts_argc; ++i) {
1379 fputs(rts_argv[i], gr_file);
1380 fputc(' ', gr_file);
1384 fputs("\nStart time: ", gr_file);
1385 fputs(time_str(), gr_file); /* defined in main.lc */
1386 fputc('\n', gr_file);
1388 fputs("\n\n--------------------\n\n", gr_file);
1390 fputs("General Parameters:\n\n", gr_file);
1392 if (RTSflags.GranFlags.Light)
1393 fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
1394 RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1395 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1396 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1397 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1398 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1401 fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
1402 RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1403 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1404 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1405 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1406 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1409 if (RTSflags.GranFlags.DoGUMMFetching)
1410 if (RTSflags.GranFlags.ThunksToPack)
1411 fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
1412 RTSflags.GranFlags.ThunksToPack,
1413 RTSflags.GranFlags.packBufferSize);
1415 fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
1416 RTSflags.GranFlags.packBufferSize);
1418 fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
1420 fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
1421 RTSflags.GranFlags.FetchStrategy,
1422 RTSflags.GranFlags.FetchStrategy==0 ?
1423 " block (block-on-fetch)":
1424 RTSflags.GranFlags.FetchStrategy==1 ?
1425 "only run runnable threads":
1426 RTSflags.GranFlags.FetchStrategy==2 ?
1427 "create threads only from local sparks":
1428 RTSflags.GranFlags.FetchStrategy==3 ?
1429 "create threads from local or global sparks":
1430 RTSflags.GranFlags.FetchStrategy==4 ?
1431 "create sparks and steal threads if necessary":
1434 if (RTSflags.GranFlags.DoPrioritySparking)
1435 fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
1437 if (RTSflags.GranFlags.DoPriorityScheduling)
1438 fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
1440 fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
1441 RTSflags.GranFlags.gran_threadcreatetime,
1442 RTSflags.GranFlags.gran_threadqueuetime);
1443 fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
1444 RTSflags.GranFlags.gran_threaddescheduletime,
1445 RTSflags.GranFlags.gran_threadscheduletime);
1446 fprintf(gr_file, "Thread Context-Switch Time %lu\n",
1447 RTSflags.GranFlags.gran_threadcontextswitchtime);
1448 fputs("\n\n--------------------\n\n", gr_file);
1450 fputs("Communication Metrics:\n\n", gr_file);
1452 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
1453 RTSflags.GranFlags.gran_latency,
1454 RTSflags.GranFlags.gran_additional_latency,
1455 RTSflags.GranFlags.gran_fetchtime,
1456 RTSflags.GranFlags.gran_gunblocktime,
1457 RTSflags.GranFlags.gran_lunblocktime);
1459 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
1460 RTSflags.GranFlags.gran_mpacktime,
1461 RTSflags.GranFlags.gran_mtidytime,
1462 RTSflags.GranFlags.gran_munpacktime);
1463 fputs("\n\n--------------------\n\n", gr_file);
1465 fputs("Instruction Metrics:\n\n", gr_file);
1466 fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
1467 RTSflags.GranFlags.gran_arith_cost,
1468 RTSflags.GranFlags.gran_branch_cost,
1469 RTSflags.GranFlags.gran_load_cost,
1470 RTSflags.GranFlags.gran_store_cost,
1471 RTSflags.GranFlags.gran_float_cost,
1472 RTSflags.GranFlags.gran_heapalloc_cost);
1473 fputs("\n\n++++++++++++++++++++\n\n", gr_file);
1475 if (RTSflags.GranFlags.granSimStats_Binary)
1476 grputw(sizeof(TIME));
1482 end_gr_simulation(STG_NO_ARGS)
1484 char time_string[500]; /* ToDo: kill magic constant */
1485 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1487 if (RTSflags.GranFlags.granSimStats_suppressed)
1490 #if defined(GRAN_CHECK) && defined(GRAN)
1491 /* Print event stats */
1492 if (RTSflags.GranFlags.debug & 0x20) {
1495 fprintf(stderr,"Event statistics (number of events: %d):\n",
1497 for (i=0; i<=MAX_EVENT; i++) {
1498 fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n",
1499 event_names[i],i,event_counts[i],
1500 (float)(100*event_counts[i])/(float)(noOfEvents),
1501 (i==CONTINUETHREAD ? 0.0 :
1502 (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) ));
1504 fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n",
1505 rs_sp_count, rs_t_count, no_of_steals,
1506 (float)ntimes_total/(float)STG_MAX(no_of_steals,1),
1507 (float)fl_total/(float)STG_MAX(no_of_steals,1));
1508 fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
1509 tot_sparks,withered_sparks,
1510 ( tot_sparks == 0 ? 0 :
1511 (float)(100*withered_sparks)/(float)(tot_sparks)) );
1512 /* Print statistics about priority sparking */
1513 if (RTSflags.GranFlags.DoPrioritySparking) {
1514 fprintf(stderr,"About Priority Sparking:\n");
1515 fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes);
1517 /* Print statistics about priority sparking */
1518 if (RTSflags.GranFlags.DoPriorityScheduling) {
1519 fprintf(stderr,"About Priority Scheduling:\n");
1520 fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
1521 tot_add_threads, non_end_add_threads,
1522 (float)tot_tq_len/(float)tot_add_threads);
1524 /* Print packet statistics if GUMM fetching is turned on */
1525 if (RTSflags.GranFlags.DoGUMMFetching) {
1526 fprintf(stderr,"Packet statistcs:\n");
1527 fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets);
1528 fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets);
1529 fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets);
1531 if (closure_queue_overflows>0)
1532 fprintf(stderr," Number of closure queue overflows: %u\n",
1533 closure_queue_overflows);
1538 if (RTSflags.GranFlags.PrintFetchMisses)
1539 fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
1541 # if defined(GRAN_COUNT)
1542 fprintf(stderr,"Update count statistics:\n");
1543 fprintf(stderr," Total number of updates: %u\n",nUPDs);
1544 fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
1545 nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
1546 fprintf(stderr," Number of PAPs: %u\n",nPAPs);
1549 #endif /* GRAN_CHECK */
1551 fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n",
1552 time_string,gr_filename);
1553 if (RTSflags.GranFlags.granSimStats)
1557 FILE *gr_file = NULL;
1558 char gr_filename[STATS_FILENAME_MAXLEN];
1560 /* I_ do_sp_profile = 0; */
1563 init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
1564 char *prog_argv[], *rts_argv[];
1565 int prog_argc, rts_argc;
1569 char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr";
1571 sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
1573 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1574 fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
1578 for (i = 0; i < prog_argc; ++i) {
1579 fputs(prog_argv[i], gr_file);
1580 fputc(' ', gr_file);
1584 fputs("+RTS ", gr_file);
1586 for (i = 0; i < rts_argc; ++i) {
1587 fputs(rts_argv[i], gr_file);
1588 fputc(' ', gr_file);
1591 fputc('\n', gr_file);
1593 fputs("Start-Time: ", gr_file);
1594 fputs(time_str(), gr_file); /* defined in main.lc */
1595 fputc('\n', gr_file);
1597 startTime = CURRENT_TIME;
1599 if (startTime > LL(1000000000)) {
1600 /* This shouldn't overflow twice */
1601 fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
1602 (TIME) (startTime / LL(1000000000)),
1603 (TIME) (startTime % LL(1000000000)));
1605 fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
1608 if (RTSflags.ParFlags.granSimStats_Binary)
1609 grputw(sizeof(TIME));
1613 #endif /* GRAN || PAR */