2 % (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996
5 % Time-stamp: <Wed Jun 19 1996 16:38:25 Stardate: [-31]7683.25 hwloidl>
7 %************************************************************************
9 \section[GranSim.lc]{Granularity Simulator Routines}
11 %************************************************************************
13 Macros for dealing with the new and improved GA field for simulating
14 parallel execution. Based on @CONCURRENT@ package. The GA field now
15 contains a mask, where the n-th bit stands for the n-th processor,
16 where this data can be found. In case of multiple copies, several bits
17 are set. The total number of processors is bounded by @MAX_PROC@,
18 which should be <= the length of a word in bits. -- HWL
21 #if defined(GRAN) || defined(PAR)
23 #define NON_POSIX_SOURCE /* gettimeofday */
27 /* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */
28 # if defined(HAVE_GETCLOCK)
29 # if defined(HAVE_SYS_TIMERS_H)
31 # include <sys/timers.h>
34 # if defined(HAVE_GETTIMEOFDAY)
35 # if defined(HAVE_SYS_TIME_H)
36 # include <sys/time.h>
47 %****************************************************************
49 \subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim}
51 %****************************************************************
55 /* See GranSim.lh for the definition of the enum gran_event_types */
56 char *gran_event_names[] = {
58 "STEALING", "STOLEN", "STOLEN(Q)",
59 "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
60 "SCHEDULE", "DESCHEDULE",
62 "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
65 "SYSTEM_START", "SYSTEM_END", /* only for debugging */
70 char *proc_status_names[] = {
71 "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
75 #define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */
77 unsigned CurrentProc = 0;
78 rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */
83 The following variables control the behaviour of GrAnSim. In general, there
84 is one RTS option for enabling each of these features. In getting the
85 desired setup of GranSim the following questions have to be answered:
87 \item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)?
88 Currently only unfair scheduling is supported.
89 \item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)?
90 Either block and wait for the
91 data or reschedule and do some other work.
92 Thus, if this variable is true, asynchronous communication is
93 modelled. Block on fetch mainly makes sense for incremental fetching.
95 There is also a simplified fetch variant available
96 (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model
97 communication. It is faster but the results will be less accurate.
98 \item How aggressive to be in getting work after a reschedule on fetch
99 (@RTSflags.GranFlags.FetchStrategy@)?
100 This is determined by the so-called {\em fetching
101 strategy\/}. Currently, there are four possibilities:
103 \item Only run a runnable thread.
104 \item Turn a spark into a thread, if necessary.
105 \item Steal a remote spark, if necessary.
106 \item Steal a runnable thread from another processor, if necessary.
108 The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list
109 when rescheduling on a fetch.
110 \item Should sparks or threads be stolen first when looking for work
111 (@RTSflags.GranFlags.DoStealThreadsFirst@)?
112 The default is to steal sparks first (much cheaper).
113 \item Should the RTS use a lazy thread creation scheme
114 (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
115 turned into threads when work is needed. Also note, that sparks
116 can be discarded by the RTS (this is done in the case of an overflow
117 of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
118 the creation of threads at the next possibility (i.e.\ when new work
119 is demanded the next time).
120 \item Should data be fetched closure-by-closure or in packets
121 (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental
122 (i.e.\ closure-by-closure) strategy. This makes sense in a
123 low-latency setting but is bad in a high-latency system. Setting
124 @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other
125 parameters determine the size of the packets (@pack_buffer_size@) and the number of
126 thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@).
127 \item If there is no other possibility to find work, should runnable threads
128 be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the
129 RTS tried to get sparks (either local or remote ones) first. Thread
130 migration is very expensive, since a whole TSO has to be transferred
131 and probably data locality becomes worse in the process. Note, that
132 the closure, which will be evaluated next by that TSO is not
133 transferred together with the TSO (that might block another thread).
134 \item Should the RTS distinguish between sparks created by local nodes and
135 stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
136 data locality by preferring sparks of local nodes (it is more likely
137 that the data for those sparks is already on the local processor).
138 However, such a distinction also imposes an overhead on the spark
139 queue management, and typically a large number of sparks are
140 generated during execution. By default this variable is set to @False@.
141 \item Should the RTS use granularity control mechanisms? The idea of a
142 granularity control mechanism is to make use of granularity
143 information provided via annotation of the @par@ construct in order
144 to prefer bigger threads when either turning a spark into a thread or
145 when choosing the next thread to schedule. Currently, three such
146 mechanisms are implemented:
148 \item Cut-off: The granularity information is interpreted as a
149 priority. If a threshold priority is given to the RTS, then
150 only those sparks with a higher priority than the threshold
151 are actually created. Other sparks are immediately discarded.
152 This is similar to a usual cut-off mechanism often used in
153 parallel programs, where parallelism is only created if the
154 input data is lage enough. With this option, the choice is
155 hidden in the RTS and only the threshold value has to be
156 provided as a parameter to the runtime system.
157 \item Priority Sparking: This mechanism keeps priorities for sparks
158 and chooses the spark with the highest priority when turning
159 a spark into a thread. After that the priority information is
160 discarded. The overhead of this mechanism comes from
161 maintaining a sorted spark queue.
162 \item Priority Scheduling: This mechanism keeps the granularity
163 information for threads, to. Thus, on each reschedule the
164 largest thread is chosen. This mechanism has a higher
165 overhead, as the thread queue is sorted, too.
172 /* Do we need to reschedule following a fetch? */
173 rtsBool NeedToReSchedule = rtsFalse;
174 TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */
175 /* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */
177 /* For internal use (event statistics) only */
178 char *event_names[] =
179 { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
180 "MOVESPARK", "MOVETHREAD", "FINDWORK",
181 "FETCHNODE", "FETCHREPLY",
182 "GLOBALBLOCK", "UNBLOCKTHREAD"
185 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
187 I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
190 I_ tot_low_pri_sparks = 0;
192 I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0;
194 /* Variables for gathering packet and queue statistics */
195 I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0;
196 I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0;
197 I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0;
200 # if defined(GRAN_COUNT)
201 /* Count the number of updates that are done. Mainly for testing, but
202 could be useful for other purposes, too. */
203 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
208 I_ HandleFetchRequest(P_, PROC, P_);
209 /* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */
210 static I_ blockFetch(P_ tso, PROC proc, P_ bh);
215 %****************************************************************
217 \subsection[global-address-op]{Global Address Operations}
219 %****************************************************************
221 These functions perform operations on the global-address (ga) part
222 of a closure. The ga is the only new field (1 word) in a closure introduced
223 by GrAnSim. It serves as a bitmask, indicating on which processor
224 the closure is residing. Since threads are described by Thread State
225 Object (TSO), which is nothing but another kind of closure, this
226 scheme allows gives placement information about threads.
228 A ga is just a bitmask, so the operations on them are mainly bitmask
229 manipulating functions. Note, that there are important macros like PROCS,
230 IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
232 NOTE: In GrAnSim-light we don't maintain placement information. This
233 allows to simulate an arbitrary number of processors. The price we have
234 to be is the lack of costing any communication properly. In short,
235 GrAnSim-light is meant to reveal the maximal parallelism in a program.
236 From an implementation point of view the important thing is:
237 {\em GrAnSim-light does not maintain global-addresses}.
242 /* ga_to_proc returns the first processor marked in the bitmask ga.
243 Normally only one bit in ga should be set. But for PLCs all bits
244 are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
250 for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
254 /* NB: This takes a *node* rather than just a ga as input */
257 { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
263 for(i=0, any_idle=rtsFalse;
264 !any_idle && i<RTSflags.GranFlags.proc;
265 any_idle = any_idle || IS_IDLE(i), i++)
273 i<RTSflags.GranFlags.proc;
274 j += IS_IDLE(i)?1:0, i++)
281 %****************************************************************
283 \subsection[event-queue]{The Global Event Queue}
285 %****************************************************************
287 The following routines implement an ADT of an event-queue (FIFO).
288 ToDo: Put that in an own file(?)
293 /* Pointer to the global event queue; events are currently malloc'ed */
294 eventq EventHd = NULL;
299 static eventq entry = NULL;
303 fprintf(stderr,"No next event\n");
310 # if defined(GRAN_CHECK) && defined(GRAN)
311 if (RTSflags.GranFlags.debug & 0x20) { /* count events */
313 event_counts[EVENT_TYPE(EventHd)]++;
318 EventHd = EVENT_NEXT(EventHd);
322 /* When getting the time of the next event we ignore CONTINUETHREAD events:
323 we don't want to be interrupted before the end of the current time slice
324 unless there is something important to handle.
327 get_time_of_next_event()
329 eventq event = EventHd;
331 while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) {
332 event = EVENT_NEXT(event);
337 return (EVENT_TIME(event));
340 /* ToDo: replace malloc/free with a free list */
343 insert_event(newentry)
346 EVTTYPE evttype = EVENT_TYPE(newentry);
349 /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
351 /* Search the queue and insert at the right point:
352 FINDWORK before everything, CONTINUETHREAD after everything.
354 This ensures that we find any available work after all threads have
355 executed the current cycle. This level of detail would normally be
356 irrelevant, but matters for ridiculously low latencies...
359 /* Changed the ordering: Now FINDWORK comes after everything but
360 CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
361 FINDWORK. This is important when a GranSimSparkAt happens and
362 DoAlwaysCreateThreads is turned on. Also important if a GC occurs
363 when trying to build a new thread (see much_spark) -- HWL 02/96 */
368 for (event = EventHd, prev=&EventHd;
370 prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) {
372 case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
373 ( (EVENT_TIME(event) == EVENT_TIME(newentry)) &&
374 (EVENT_TYPE(event) != CONTINUETHREAD) ) )
378 case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) )
382 default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
383 ((EVENT_TIME(event) == EVENT_TIME(newentry)) &&
384 (EVENT_TYPE(event) == EVENT_TYPE(newentry))) )
389 /* Insert newentry here (i.e. before event) */
391 EVENT_NEXT(newentry) = event;
400 new_event(proc,creator,time,evttype,tso,node,spark)
407 eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event");
409 EVENT_PROC(newentry) = proc;
410 EVENT_CREATOR(newentry) = creator;
411 EVENT_TIME(newentry) = time;
412 EVENT_TYPE(newentry) = evttype;
413 EVENT_TSO(newentry) = tso;
414 EVENT_NODE(newentry) = node;
415 EVENT_SPARK(newentry) = spark;
416 EVENT_GC_INFO(newentry) = 0;
417 EVENT_NEXT(newentry) = NULL;
419 insert_event(newentry);
423 prepend_event(eventq event) /* put event at beginning of EventQueue */
424 { /* only used for GC! */
425 EVENT_NEXT(event) = EventHd;
430 grab_event() /* undo prepend_event i.e. get the event */
431 { /* at the head of EventQ but don't free anything */
432 eventq event = EventHd;
434 if(EventHd == NULL) {
435 fprintf(stderr,"No next event (in grab_event)\n");
439 EventHd = EVENT_NEXT(EventHd);
448 char str_tso[16], str_node[16];
450 sprintf(str_tso,((EVENT_TSO(event)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
452 sprintf(str_node,((EVENT_NODE(event)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
456 fprintf(stderr,"Evt: NIL\n");
458 fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n",
459 event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
460 EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event),
461 str_tso, TSO_ID(EVENT_TSO(event)), str_node
462 /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ );
472 fprintf(stderr,"Event Queue with root at %x:\n",hd);
473 for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
484 sprintf(str,((SPARK_NODE(spark)==Prelude_Z91Z93_closure) ? "______" : "%#6lx"),
485 (W_) SPARK_NODE(spark));
488 fprintf(stderr,"Spark: NIL\n");
490 fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n",
491 str, SPARK_NAME(spark),
492 ((SPARK_EXPORTED(spark))?"True":"False"),
493 SPARK_PREV(spark), SPARK_NEXT(spark) );
502 fprintf(stderr,"Spark Queue with root at %x:\n",hd);
503 for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
512 %****************************************************************************
514 \subsection[entry-points]{Routines directly called from Haskell world}
516 %****************************************************************************
518 The @GranSim...@ routines in here are directly called via macros from the
521 First some auxiliary routines.
525 /* Take the current thread off the thread queue and thereby activate the */
526 /* next thread. It's assumed that the next ReSchedule after this uses */
527 /* NEW_THREAD as param. */
528 /* This fct is called from GranSimBlock and GranSimFetch */
531 ActivateNextThread (PROC proc)
533 ASSERT(RunnableThreadsHd[proc]!=Prelude_Z91Z93_closure);
535 RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]);
536 if(RunnableThreadsHd[proc]==Prelude_Z91Z93_closure) {
538 RunnableThreadsTl[proc] = Prelude_Z91Z93_closure;
540 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
541 if (RTSflags.GranFlags.granSimStats &&
542 (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000)))
543 DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc],
544 Prelude_Z91Z93_closure,0);
549 Now the main stg-called routines:
552 /* ------------------------------------------------------------------------ */
553 /* The following GranSim... fcts are stg-called from the threaded world. */
554 /* ------------------------------------------------------------------------ */
556 /* Called from HEAP_CHK -- NB: node and liveness are junk here now.
557 They are left temporarily to avoid complete recompilation.
561 GranSimAllocate(n,node,liveness)
566 TSO_ALLOCS(CurrentTSO) += n;
567 ++TSO_BASICBLOCKS(CurrentTSO);
569 if (RTSflags.GranFlags.granSimStats_Heap) {
570 DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO,
571 Prelude_Z91Z93_closure,n);
574 TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost;
575 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost;
579 Subtract the values added above, if a heap check fails and
583 GranSimUnallocate(n,node,liveness)
588 TSO_ALLOCS(CurrentTSO) -= n;
589 --TSO_BASICBLOCKS(CurrentTSO);
591 TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost;
592 CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost;
595 /* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
597 GranSimExec(ariths,branches,loads,stores,floats)
598 W_ ariths,branches,loads,stores,floats;
600 W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths +
601 RTSflags.GranFlags.gran_branch_cost*branches +
602 RTSflags.GranFlags.gran_load_cost * loads +
603 RTSflags.GranFlags.gran_store_cost*stores +
604 RTSflags.GranFlags.gran_float_cost*floats;
606 TSO_EXECTIME(CurrentTSO) += cost;
607 CurrentTime[CurrentProc] += cost;
612 Fetch the node if it isn't local
613 -- result indicates whether fetch has been done.
615 This is GRIP-style single item fetching.
618 /* This function in Threads.lc is only needed for SimplifiedFetch */
619 extern FetchNode PROTO((P_ node,PROC CurrentProc));
622 GranSimFetch(node /* , liveness_mask */ )
624 /* I_ liveness_mask; */
626 if (RTSflags.GranFlags.Light) {
627 /* Always reschedule in GrAnSim-Light to prevent one TSO from
629 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
630 CONTINUETHREAD,CurrentTSO,node,NULL);
632 NeedToReSchedule = rtsFalse;
636 /* Note: once a node has been fetched, this test will be passed */
637 if(!IS_LOCAL_TO(PROCS(node),CurrentProc))
639 /* Add mpacktime to the remote PE for the reply */
641 PROC p = where_is(node);
645 if ( ( RTSflags.GranFlags.debug & 0x40 ) &&
647 fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
648 # endif /* GRAN_CHECK */
650 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
651 /* NB: Fetch is counted on arrival (FETCHREPLY) */
653 if (RTSflags.GranFlags.SimplifiedFetch)
655 FetchNode(node,CurrentProc);
656 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+
657 RTSflags.GranFlags.gran_fetchtime+
658 RTSflags.GranFlags.gran_munpacktime;
662 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
663 RTSflags.GranFlags.gran_latency;
665 new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
666 if (!RTSflags.GranFlags.DoReScheduleOnFetch)
667 MAKE_FETCHING(CurrentProc);
668 ++OutstandingFetches[CurrentProc];
670 if (fetchtime<TimeOfNextEvent)
671 TimeOfNextEvent = fetchtime;
674 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
676 if (RTSflags.GranFlags.DoReScheduleOnFetch)
678 /* Remove CurrentTSO from the queue
679 -- assumes head of queue == CurrentTSO */
680 if(!RTSflags.GranFlags.DoFairSchedule)
682 if(RTSflags.GranFlags.granSimStats)
683 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,
686 ActivateNextThread(CurrentProc);
688 # if defined(GRAN_CHECK)
689 if (RTSflags.GranFlags.debug & 0x10) {
690 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
691 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
692 CurrentTSO,CurrentTime[CurrentProc]);
695 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
699 TSO_LINK(CurrentTSO) = Prelude_Z91Z93_closure;
700 /* CurrentTSO = Prelude_Z91Z93_closure; */
702 /* ThreadQueueHd is now the next TSO to schedule or NULL */
703 /* CurrentTSO is pointed to by the FETCHNODE event */
705 else /* fair scheduling currently not supported -- HWL */
707 fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
711 else /* !RTSflags.GranFlags.DoReScheduleOnFetch */
713 /* Note: CurrentProc is still busy as it's blocked on fetch */
714 if(RTSflags.GranFlags.granSimStats)
715 DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,node,0);
717 # if defined(GRAN_CHECK)
718 if (RTSflags.GranFlags.debug & 0x04)
719 BlockedOnFetch[CurrentProc] = CurrentTSO; /*- rtsTrue; -*/
720 if (RTSflags.GranFlags.debug & 0x10) {
721 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
722 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
723 CurrentTSO,CurrentTime[CurrentProc]);
726 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
728 CurrentTSO = Prelude_Z91Z93_closure;
732 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
734 /* Rescheduling is necessary */
735 NeedToReSchedule = rtsTrue;
744 GranSimSpark(local,node)
748 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
749 if(RTSflags.GranFlags.granSimStats_Sparks)
750 DumpRawGranEvent(CurrentProc,(PROC)0,SP_SPARK,Prelude_Z91Z93_closure,node,
751 spark_queue_len(CurrentProc,ADVISORY_POOL)-1);
753 /* Force the PE to take notice of the spark */
754 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
755 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
756 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
757 if (CurrentTime[CurrentProc]<TimeOfNextEvent)
758 TimeOfNextEvent = CurrentTime[CurrentProc];
762 ++TSO_LOCALSPARKS(CurrentTSO);
764 ++TSO_GLOBALSPARKS(CurrentTSO);
768 GranSimSparkAt(spark,where,identifier)
770 P_ where; /* This should be a node; alternatively could be a GA */
773 PROC p = where_is(where);
774 GranSimSparkAtAbs(spark,p,identifier);
778 GranSimSparkAtAbs(spark,proc,identifier)
785 if ( spark == (sparkq)NULL) /* Note: Granularity control might have */
786 return; /* turned a spark into a NULL. */
788 /* ++SparksAvail; Nope; do that in add_to_spark_queue */
789 if(RTSflags.GranFlags.granSimStats_Sparks)
790 DumpRawGranEvent(proc,0,SP_SPARKAT,Prelude_Z91Z93_closure,SPARK_NODE(spark),
791 spark_queue_len(proc,ADVISORY_POOL));
793 if (proc!=CurrentProc) {
794 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
795 exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
796 CurrentTime[proc]: CurrentTime[CurrentProc])
797 + RTSflags.GranFlags.gran_latency;
799 exporttime = CurrentTime[CurrentProc];
802 if ( RTSflags.GranFlags.Light )
803 /* Need CurrentTSO in event field to associate costs with creating
804 spark even in a GrAnSim Light setup */
805 new_event(proc,CurrentProc,exporttime,
806 MOVESPARK,CurrentTSO,Prelude_Z91Z93_closure,spark);
808 new_event(proc,CurrentProc,exporttime,
809 MOVESPARK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,spark);
810 /* Bit of a hack to treat placed sparks the same as stolen sparks */
811 ++OutstandingFishes[proc];
813 /* Force the PE to take notice of the spark (FINDWORK is put after a
814 MOVESPARK into the sparkq!) */
815 if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
816 new_event(CurrentProc,CurrentProc,exporttime+1,
817 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
820 if (exporttime<TimeOfNextEvent)
821 TimeOfNextEvent = exporttime;
823 if (proc!=CurrentProc) {
824 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
825 ++TSO_GLOBALSPARKS(CurrentTSO);
827 ++TSO_LOCALSPARKS(CurrentTSO);
831 /* This function handles local and global blocking */
832 /* It's called either from threaded code (RBH_entry, BH_entry etc) or */
833 /* from blockFetch when trying to fetch an BH or RBH */
836 GranSimBlock(P_ tso, PROC proc, P_ node)
838 PROC node_proc = where_is(node);
840 ASSERT(tso==RunnableThreadsHd[proc]);
842 if(RTSflags.GranFlags.granSimStats)
843 DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,0);
845 ++TSO_BLOCKCOUNT(tso);
846 /* Distinction between local and global block is made in blockFetch */
847 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
849 CurrentTime[proc] += RTSflags.GranFlags.gran_threadqueuetime;
850 ActivateNextThread(proc);
851 TSO_LINK(tso) = Prelude_Z91Z93_closure; /* not really necessary; only for testing */
858 %****************************************************************************
860 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
862 %****************************************************************************
864 Event dumping routines.
869 * If you're not using GNUC and you're on a 32-bit machine, you're
870 * probably out of luck here. However, since CONCURRENT currently
871 * requires GNUC, I'm not too worried about it. --JSM
876 static ullong startTime = 0;
881 # ifdef HAVE_GETCLOCK
884 if (getclock(TIMEOFDAY, &tv) != 0) {
886 fprintf(stderr, "Clock failed\n");
889 return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
891 # ifdef HAVE_GETTIMEOFDAY
894 if (gettimeofday(&tv, NULL) != 0) {
896 fprintf(stderr, "Clock failed\n");
899 return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
902 if ((t = time(NULL)) == (time_t) -1) {
904 fprintf(stderr, "Clock failed\n");
914 #if defined(GRAN) || defined(PAR)
917 DumpGranEvent(name, tso)
918 enum gran_event_types name;
921 DumpRawGranEvent(CURRENT_PROC, (PROC)0, name, tso, Prelude_Z91Z93_closure, 0);
925 DumpRawGranEvent(proc, p, name, tso, node, len)
926 PROC proc, p; /* proc ... where it happens; p ... where node lives */
927 enum gran_event_types name;
932 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
933 ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
935 if (RTSflags.GranFlags.granSimStats_suppressed)
939 id = tso == NULL ? -1 : TSO_ID(tso);
940 if (node==Prelude_Z91Z93_closure)
941 strcpy(node_str,"________"); /* "Prelude_Z91Z93_closure"); */
943 sprintf(node_str,"0x%-6lx",node);
945 if (name > GR_EVENT_MAX)
948 if(GRANSIMSTATS_BINARY)
949 /* ToDo: fix code for writing binary GrAnSim statistics */
955 abort(); /* die please: a single word */
956 /* doesn't represent long long times */
957 grputw(TIME_ON_PROC(proc));
965 abort(); /* die please: a single word */
966 /* doesn't represent long long times */
967 grputw(TIME_ON_PROC(proc)); /* this line is bound to */
968 grputw(id); /* do the wrong thing */
973 abort(); /* die please: a single word */
974 /* doesn't represent long long times */
975 grputw(TIME_ON_PROC(proc));
982 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
983 /* using spark queue length as optional argument ^^^^^^^^^ */
984 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
985 /* using spark name as optional argument ^^^^^^ */
986 proc,time_string,gran_event_names[name],
987 id,node_str,(len & NEW_SPARKNAME_MASK));
994 fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
995 proc, time_string, gran_event_names[name],
1002 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n",
1003 proc,time_string,gran_event_names[name],id);
1006 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
1007 proc,time_string,gran_event_names[name],id,p);
1010 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
1011 proc,time_string,gran_event_names[name],id,len);
1014 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
1015 proc,time_string,gran_event_names[name],id,node_str,len);
1021 /* Only needed for special dynamic spark labelling support */
1023 DumpStartEventAt(time, proc, p, name, tso, node, len)
1025 PROC proc, p; /* proc ... where it happens; p ... where node lives */
1026 enum gran_event_types name;
1031 char time_string[500], node_str[16]; /*ToDo: kill magic constants */
1032 ullong_format_string(time, time_string, rtsFalse/*no commas!*/);
1033 /* ^^^^ only important change to DumpRawGranEvent */
1034 if (RTSflags.GranFlags.granSimStats_suppressed)
1037 id = tso == NULL ? -1 : TSO_ID(tso);
1038 if (node==Nil_closure)
1039 strcpy(node_str,"________"); /* "Nil_closure"); */
1041 sprintf(node_str,"0x%-6lx",node);
1043 if (name > GR_EVENT_MAX)
1044 name = GR_EVENT_MAX;
1046 if(GRANSIMSTATS_BINARY)
1047 /* ToDo: fix code for writing binary GrAnSim statistics */
1053 abort(); /* die please: a single word */
1054 /* doesn't represent long long times */
1055 grputw(TIME_ON_PROC(proc));
1059 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1060 gran_event_names[name]);
1066 /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
1067 /* using spark queue length as optional argument ^^^^^^^^^ */
1068 fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
1069 /* using spark name as optional argument ^^^^^^ */
1070 proc,time_string,gran_event_names[name],
1071 id,node_str,(len & NEW_SPARKNAME_MASK));
1074 fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
1075 gran_event_names[name]);
1081 DumpGranInfo(proc, tso, mandatory_thread)
1084 rtsBool mandatory_thread;
1086 char time_string[500]; /* ToDo: kill magic constant */
1087 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1090 if (RTSflags.GranFlags.granSimStats_suppressed)
1094 if (GRANSIMSTATS_BINARY) {
1097 abort(); /* die please: a single word doesn't represent long long times */
1098 grputw(CURRENT_TIME); /* this line is bound to fail */
1099 grputw(TSO_ID(tso));
1114 grputw(TSO_SPARKNAME(tso));
1115 grputw(TSO_STARTEDAT(tso));
1116 grputw(TSO_EXPORTED(tso));
1117 grputw(TSO_BASICBLOCKS(tso));
1118 grputw(TSO_ALLOCS(tso));
1119 grputw(TSO_EXECTIME(tso));
1120 grputw(TSO_BLOCKTIME(tso));
1121 grputw(TSO_BLOCKCOUNT(tso));
1122 grputw(TSO_FETCHTIME(tso));
1123 grputw(TSO_FETCHCOUNT(tso));
1124 grputw(TSO_LOCALSPARKS(tso));
1125 grputw(TSO_GLOBALSPARKS(tso));
1127 grputw(mandatory_thread);
1131 * NB: DumpGranEvent cannot be used because PE may be wrong
1132 * (as well as the extra info)
1134 fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
1140 ,TSO_EXPORTED(tso) ? 'T' : 'F'
1141 ,TSO_BASICBLOCKS(tso)
1145 ,TSO_BLOCKCOUNT(tso)
1147 ,TSO_FETCHCOUNT(tso)
1148 ,TSO_LOCALSPARKS(tso)
1149 ,TSO_GLOBALSPARKS(tso)
1150 ,mandatory_thread ? 'T' : 'F'
1159 fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
1164 ,TSO_TYPE(tso)==T_MAIN?"MAIN":
1165 TSO_TYPE(tso)==T_FAIL?"FAIL":
1166 TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
1167 TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
1171 fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n"
1175 /* ,TSO_ARG2(tso) */
1179 fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
1183 ,TSO_EXPORTED(tso)?'T':'F'
1184 ,TSO_BASICBLOCKS(tso)
1188 ,TSO_BLOCKCOUNT(tso)
1190 ,TSO_FETCHCOUNT(tso)
1191 ,TSO_LOCALSPARKS(tso)
1192 ,TSO_GLOBALSPARKS(tso)
1197 Output a terminate event and an 8-byte time.
1205 if (RTSflags.GranFlags.granSimStats_suppressed)
1209 DumpGranEvent(GR_TERMINATE, Prelude_Z91Z93_closure);
1211 if (sizeof(TIME) == 4) {
1212 putc('\0', gr_file);
1213 putc('\0', gr_file);
1214 putc('\0', gr_file);
1215 putc('\0', gr_file);
1217 putc(v >> 56l, gr_file);
1218 putc((v >> 48l) & 0xffl, gr_file);
1219 putc((v >> 40l) & 0xffl, gr_file);
1220 putc((v >> 32l) & 0xffl, gr_file);
1222 putc((v >> 24l) & 0xffl, gr_file);
1223 putc((v >> 16l) & 0xffl, gr_file);
1224 putc((v >> 8l) & 0xffl, gr_file);
1225 putc(v & 0xffl, gr_file);
1229 Length-coded output: first 3 bits contain length coding
1243 if (RTSflags.GranFlags.granSimStats_suppressed)
1247 if (v <= 0x3fl) { /* length v = 1 byte */
1248 fputc(v & 0x3f, gr_file);
1249 } else if (v <= 0x3fffl) { /* length v = 2 byte */
1250 fputc((v >> 8l) | 0x40l, gr_file);
1251 fputc(v & 0xffl, gr_file);
1252 } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
1253 fputc((v >> 24l) | 0x80l, gr_file);
1254 fputc((v >> 16l) & 0xffl, gr_file);
1255 fputc((v >> 8l) & 0xffl, gr_file);
1256 fputc(v & 0xffl, gr_file);
1257 } else if (sizeof(TIME) == 4) {
1258 fputc(0x70, gr_file);
1259 fputc((v >> 24l) & 0xffl, gr_file);
1260 fputc((v >> 16l) & 0xffl, gr_file);
1261 fputc((v >> 8l) & 0xffl, gr_file);
1262 fputc(v & 0xffl, gr_file);
1264 if (v <= 0x3fffffffffffffl)
1265 putc((v >> 56l) | 0x60l, gr_file);
1267 putc(0x70, gr_file);
1268 putc((v >> 56l) & 0xffl, gr_file);
1271 putc((v >> 48l) & 0xffl, gr_file);
1272 putc((v >> 40l) & 0xffl, gr_file);
1273 putc((v >> 32l) & 0xffl, gr_file);
1274 putc((v >> 24l) & 0xffl, gr_file);
1275 putc((v >> 16l) & 0xffl, gr_file);
1276 putc((v >> 8l) & 0xffl, gr_file);
1277 putc(v & 0xffl, gr_file);
1281 #endif /* GRAN || PAR */
1284 %****************************************************************************
1286 \subsection[gr-simulation]{Granularity Simulation}
1288 %****************************************************************************
1290 General routines for GranSim. Mainly, startup and shutdown routines, called
1295 FILE *gr_file = NULL;
1296 char gr_filename[STATS_FILENAME_MAXLEN];
1297 /* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */
1300 init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
1301 char *prog_argv[], *rts_argv[];
1302 int prog_argc, rts_argc;
1306 char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr";
1308 if (RTSflags.GranFlags.granSimStats_suppressed)
1311 sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
1313 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1314 fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
1317 # if 0 /* that's obsolete now, I think -- HWL */
1318 if (RTSflags.GranFlags.DoReScheduleOnFetch)
1319 setbuf(gr_file, NULL);
1322 fputs("Granularity Simulation for ", gr_file);
1323 for (i = 0; i < prog_argc; ++i) {
1324 fputs(prog_argv[i], gr_file);
1325 fputc(' ', gr_file);
1329 fputs("+RTS ", gr_file);
1331 for (i = 0; i < rts_argc; ++i) {
1332 fputs(rts_argv[i], gr_file);
1333 fputc(' ', gr_file);
1337 fputs("\nStart time: ", gr_file);
1338 fputs(time_str(), gr_file); /* defined in main.lc */
1339 fputc('\n', gr_file);
1341 fputs("\n\n--------------------\n\n", gr_file);
1343 fputs("General Parameters:\n\n", gr_file);
1345 if (RTSflags.GranFlags.Light)
1346 fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
1347 RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1348 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1349 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1350 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1351 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1354 fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
1355 RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
1356 RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
1357 RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
1358 RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
1359 RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
1362 if (RTSflags.GranFlags.DoGUMMFetching)
1363 if (RTSflags.GranFlags.ThunksToPack)
1364 fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
1365 RTSflags.GranFlags.ThunksToPack,
1366 RTSflags.GranFlags.packBufferSize);
1368 fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
1369 RTSflags.GranFlags.packBufferSize);
1371 fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
1373 fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
1374 RTSflags.GranFlags.FetchStrategy,
1375 RTSflags.GranFlags.FetchStrategy==0 ?
1376 " block (block-on-fetch)":
1377 RTSflags.GranFlags.FetchStrategy==1 ?
1378 "only run runnable threads":
1379 RTSflags.GranFlags.FetchStrategy==2 ?
1380 "create threads only from local sparks":
1381 RTSflags.GranFlags.FetchStrategy==3 ?
1382 "create threads from local or global sparks":
1383 RTSflags.GranFlags.FetchStrategy==4 ?
1384 "create sparks and steal threads if necessary":
1387 if (RTSflags.GranFlags.DoPrioritySparking)
1388 fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
1390 if (RTSflags.GranFlags.DoPriorityScheduling)
1391 fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
1393 fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
1394 RTSflags.GranFlags.gran_threadcreatetime,
1395 RTSflags.GranFlags.gran_threadqueuetime);
1396 fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
1397 RTSflags.GranFlags.gran_threaddescheduletime,
1398 RTSflags.GranFlags.gran_threadscheduletime);
1399 fprintf(gr_file, "Thread Context-Switch Time %lu\n",
1400 RTSflags.GranFlags.gran_threadcontextswitchtime);
1401 fputs("\n\n--------------------\n\n", gr_file);
1403 fputs("Communication Metrics:\n\n", gr_file);
1405 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
1406 RTSflags.GranFlags.gran_latency,
1407 RTSflags.GranFlags.gran_additional_latency,
1408 RTSflags.GranFlags.gran_fetchtime,
1409 RTSflags.GranFlags.gran_gunblocktime,
1410 RTSflags.GranFlags.gran_lunblocktime);
1412 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
1413 RTSflags.GranFlags.gran_mpacktime,
1414 RTSflags.GranFlags.gran_mtidytime,
1415 RTSflags.GranFlags.gran_munpacktime);
1416 fputs("\n\n--------------------\n\n", gr_file);
1418 fputs("Instruction Metrics:\n\n", gr_file);
1419 fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
1420 RTSflags.GranFlags.gran_arith_cost,
1421 RTSflags.GranFlags.gran_branch_cost,
1422 RTSflags.GranFlags.gran_load_cost,
1423 RTSflags.GranFlags.gran_store_cost,
1424 RTSflags.GranFlags.gran_float_cost,
1425 RTSflags.GranFlags.gran_heapalloc_cost);
1426 fputs("\n\n++++++++++++++++++++\n\n", gr_file);
1428 if (RTSflags.GranFlags.granSimStats_Binary)
1429 grputw(sizeof(TIME));
1435 end_gr_simulation(STG_NO_ARGS)
1437 char time_string[500]; /* ToDo: kill magic constant */
1438 ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
1440 if (RTSflags.GranFlags.granSimStats_suppressed)
1443 #if defined(GRAN_CHECK) && defined(GRAN)
1444 /* Print event stats */
1445 if (RTSflags.GranFlags.debug & 0x20) {
1448 fprintf(stderr,"Event statistics (number of events: %d):\n",
1450 for (i=0; i<=MAX_EVENT; i++) {
1451 fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n",
1452 event_names[i],i,event_counts[i],
1453 (float)(100*event_counts[i])/(float)(noOfEvents),
1454 (i==CONTINUETHREAD ? 0.0 :
1455 (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) ));
1457 fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n",
1458 rs_sp_count, rs_t_count, no_of_steals,
1459 (float)ntimes_total/(float)STG_MAX(no_of_steals,1),
1460 (float)fl_total/(float)STG_MAX(no_of_steals,1));
1461 fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
1462 tot_sparks,withered_sparks,
1463 ( tot_sparks == 0 ? 0 :
1464 (float)(100*withered_sparks)/(float)(tot_sparks)) );
1465 /* Print statistics about priority sparking */
1466 if (RTSflags.GranFlags.DoPrioritySparking) {
1467 fprintf(stderr,"About Priority Sparking:\n");
1468 fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes);
1470 /* Print statistics about priority sparking */
1471 if (RTSflags.GranFlags.DoPriorityScheduling) {
1472 fprintf(stderr,"About Priority Scheduling:\n");
1473 fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
1474 tot_add_threads, non_end_add_threads,
1475 (float)tot_tq_len/(float)tot_add_threads);
1477 /* Print packet statistics if GUMM fetching is turned on */
1478 if (RTSflags.GranFlags.DoGUMMFetching) {
1479 fprintf(stderr,"Packet statistcs:\n");
1480 fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets);
1481 fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets);
1482 fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets);
1484 if (closure_queue_overflows>0)
1485 fprintf(stderr," Number of closure queue overflows: %u\n",
1486 closure_queue_overflows);
1491 if (RTSflags.GranFlags.PrintFetchMisses)
1492 fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
1494 # if defined(GRAN_COUNT)
1495 fprintf(stderr,"Update count statistics:\n");
1496 fprintf(stderr," Total number of updates: %u\n",nUPDs);
1497 fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
1498 nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
1499 fprintf(stderr," Number of PAPs: %u\n",nPAPs);
1502 #endif /* GRAN_CHECK */
1504 fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n",
1505 time_string,gr_filename);
1506 if (RTSflags.GranFlags.granSimStats)
1510 FILE *gr_file = NULL;
1511 char gr_filename[STATS_FILENAME_MAXLEN];
1513 /* I_ do_sp_profile = 0; */
1516 init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
1517 char *prog_argv[], *rts_argv[];
1518 int prog_argc, rts_argc;
1522 char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr";
1524 sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
1526 if ((gr_file = fopen(gr_filename, "w")) == NULL) {
1527 fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
1531 for (i = 0; i < prog_argc; ++i) {
1532 fputs(prog_argv[i], gr_file);
1533 fputc(' ', gr_file);
1537 fputs("+RTS ", gr_file);
1539 for (i = 0; i < rts_argc; ++i) {
1540 fputs(rts_argv[i], gr_file);
1541 fputc(' ', gr_file);
1544 fputc('\n', gr_file);
1546 fputs("Start-Time: ", gr_file);
1547 fputs(time_str(), gr_file); /* defined in main.lc */
1548 fputc('\n', gr_file);
1550 startTime = CURRENT_TIME;
1552 if (startTime > LL(1000000000)) {
1553 /* This shouldn't overflow twice */
1554 fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
1555 (TIME) (startTime / LL(1000000000)),
1556 (TIME) (startTime % LL(1000000000)));
1558 fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
1561 if (RTSflags.ParFlags.granSimStats_Binary)
1562 grputw(sizeof(TIME));
1566 #endif /* GRAN || PAR */