%
-% (c) The GRASP/AQUA Project, Glasgow University, 1995
+% (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996
+% Hans Wolfgang Loidl
+%
+% Time-stamp: <Sun Oct 19 1997 23:39:59 Stardate: [-30]0119.72 hwloidl>
%
%************************************************************************
%* *
\begin{code}
#if defined(GRAN) || defined(PAR)
+#ifndef _AIX
#define NON_POSIX_SOURCE /* gettimeofday */
+#endif
-#include "rtsdefs.h"
+#include "rtsdefs.h"
+
+/* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */
+# if defined(HAVE_GETCLOCK)
+# if defined(HAVE_SYS_TIMERS_H)
+# define POSIX_4D9 1
+# include <sys/timers.h>
+# endif
+# else
+# if defined(HAVE_GETTIMEOFDAY)
+# if defined(HAVE_SYS_TIME_H)
+# include <sys/time.h>
+# endif
+# else
+# ifdef HAVE_TIME_H
+# include <time.h>
+# endif
+# endif
+# endif
+\end{code}
-#ifdef HAVE_GETCLOCK
+%****************************************************************
+%* *
+\subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim}
+%* *
+%****************************************************************
-#ifdef HAVE_SYS_TIMERS_H
-#define POSIX_4D9 1
-#include <sys/timers.h>
-#endif
+\begin{code}
-#else
-#ifdef HAVE_GETTIMEOFDAY
+/* See GranSim.lh for the definition of the enum gran_event_types */
+char *gran_event_names[] = {
+ "START", "START(Q)",
+ "STEALING", "STOLEN", "STOLEN(Q)",
+ "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
+ "SCHEDULE", "DESCHEDULE",
+ "END",
+ "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
+ "ALLOC",
+ "TERMINATE",
+ "SYSTEM_START", "SYSTEM_END", /* only for debugging */
+ "??"
+};
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
+#if defined(GRAN)
+char *proc_status_names[] = {
+ "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
+ "UnknownProcStatus"
+};
-#else
+#define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */
-#ifdef HAVE_TIME_H
-#include <time.h>
-#endif
+unsigned CurrentProc = 0;
+rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */
+#if 0 && (defined(GCap) || defined(GCgn))
+closq ex_RBH_q = NULL;
#endif
-#endif
+#endif /* GRAN */
+\end{code}
+
+The following variables control the behaviour of GrAnSim. In general, there
+is one RTS option for enabling each of these features. In getting the
+desired setup of GranSim the following questions have to be answered:
+\begin{itemize}
+\item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)?
+ Currently only unfair scheduling is supported.
+\item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)?
+ Either block and wait for the
+ data or reschedule and do some other work.
+ Thus, if this variable is true, asynchronous communication is
+ modelled. Block on fetch mainly makes sense for incremental fetching.
+
+ There is also a simplified fetch variant available
+ (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model
+ communication. It is faster but the results will be less accurate.
+\item How aggressive to be in getting work after a reschedule on fetch
+ (@RTSflags.GranFlags.FetchStrategy@)?
+ This is determined by the so-called {\em fetching
+ strategy\/}. Currently, there are four possibilities:
+ \begin{enumerate}
+ \item Only run a runnable thread.
+ \item Turn a spark into a thread, if necessary.
+ \item Steal a remote spark, if necessary.
+ \item Steal a runnable thread from another processor, if necessary.
+ \end{itemize}
+ The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list
+ when rescheduling on a fetch.
+\item Should sparks or threads be stolen first when looking for work
+ (@RTSflags.GranFlags.DoStealThreadsFirst@)?
+ The default is to steal sparks first (much cheaper).
+\item Should the RTS use a lazy thread creation scheme
+ (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
+ turned into threads when work is needed. Also note, that sparks
+ can be discarded by the RTS (this is done in the case of an overflow
+ of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
+ the creation of threads at the next possibility (i.e.\ when new work
+ is demanded the next time).
+\item Should data be fetched closure-by-closure or in packets
+ (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental
+ (i.e.\ closure-by-closure) strategy. This makes sense in a
+ low-latency setting but is bad in a high-latency system. Setting
+ @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other
+ parameters determine the size of the packets (@pack_buffer_size@) and the number of
+ thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@).
+\item If there is no other possibility to find work, should runnable threads
+ be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the
+ RTS tried to get sparks (either local or remote ones) first. Thread
+ migration is very expensive, since a whole TSO has to be transferred
+ and probably data locality becomes worse in the process. Note, that
+ the closure, which will be evaluated next by that TSO is not
+ transferred together with the TSO (that might block another thread).
+\item Should the RTS distinguish between sparks created by local nodes and
+ stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
+ data locality by preferring sparks of local nodes (it is more likely
+ that the data for those sparks is already on the local processor).
+ However, such a distinction also imposes an overhead on the spark
+ queue management, and typically a large number of sparks are
+ generated during execution. By default this variable is set to @False@.
+\item Should the RTS use granularity control mechanisms? The idea of a
+ granularity control mechanism is to make use of granularity
+ information provided via annotation of the @par@ construct in order
+ to prefer bigger threads when either turning a spark into a thread or
+ when choosing the next thread to schedule. Currently, three such
+ mechanisms are implemented:
+ \begin{itemize}
+ \item Cut-off: The granularity information is interpreted as a
+ priority. If a threshold priority is given to the RTS, then
+ only those sparks with a higher priority than the threshold
+ are actually created. Other sparks are immediately discarded.
+ This is similar to a usual cut-off mechanism often used in
+ parallel programs, where parallelism is only created if the
+ input data is lage enough. With this option, the choice is
+ hidden in the RTS and only the threshold value has to be
+ provided as a parameter to the runtime system.
+ \item Priority Sparking: This mechanism keeps priorities for sparks
+ and chooses the spark with the highest priority when turning
+ a spark into a thread. After that the priority information is
+ discarded. The overhead of this mechanism comes from
+ maintaining a sorted spark queue.
+ \item Priority Scheduling: This mechanism keeps the granularity
+ information for threads, to. Thus, on each reschedule the
+ largest thread is chosen. This mechanism has a higher
+ overhead, as the thread queue is sorted, too.
+ \end{itemize}
+\end{itemize}
-void grputw PROTO((TIME v));
+\begin{code}
+#if defined(GRAN)
+
+/* Do we need to reschedule following a fetch? */
+rtsBool NeedToReSchedule = rtsFalse;
+TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */
+/* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */
+
+/* For internal use (event statistics) only */
+char *event_names[] =
+ { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
+ "MOVESPARK", "MOVETHREAD", "FINDWORK",
+ "FETCHNODE", "FETCHREPLY",
+ "GLOBALBLOCK", "UNBLOCKTHREAD"
+ };
+
+# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+I_ noOfEvents = 0;
+I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+I_ fetch_misses = 0;
+I_ tot_low_pri_sparks = 0;
+
+I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0;
+
+/* Variables for gathering packet and queue statistics */
+I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0;
+I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0;
+I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0;
+# endif
+
+# if defined(GRAN_COUNT)
+/* Count the number of updates that are done. Mainly for testing, but
+ could be useful for other purposes, too. */
+I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
+ BQ_lens = 0;
+# endif
+
+/* Prototypes */
+I_ HandleFetchRequest(P_, PROC, P_);
+/* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */
+static I_ blockFetch(P_ tso, PROC proc, P_ bh);
+
+#endif /* GRAN */
+\end{code}
+%****************************************************************
+%* *
+\subsection[global-address-op]{Global Address Operations}
+%* *
+%****************************************************************
+
+These functions perform operations on the global-address (ga) part
+of a closure. The ga is the only new field (1 word) in a closure introduced
+by GrAnSim. It serves as a bitmask, indicating on which processor
+the closure is residing. Since threads are described by Thread State
+Object (TSO), which is nothing but another kind of closure, this
+scheme allows gives placement information about threads.
+
+A ga is just a bitmask, so the operations on them are mainly bitmask
+manipulating functions. Note, that there are important macros like PROCS,
+IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
+
+NOTE: In GrAnSim-light we don't maintain placement information. This
+allows to simulate an arbitrary number of processors. The price we have
+to be is the lack of costing any communication properly. In short,
+GrAnSim-light is meant to reveal the maximal parallelism in a program.
+From an implementation point of view the important thing is:
+{\em GrAnSim-light does not maintain global-addresses}.
+
+\begin{code}
#if defined(GRAN)
-/* Pointer to the event queue; events are currently malloc'ed */
-static eventq EventHd = NULL;
+/* ga_to_proc returns the first processor marked in the bitmask ga.
+ Normally only one bit in ga should be set. But for PLCs all bits
+ are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
+
PROC
ga_to_proc(W_ ga)
{
PROC i;
-
for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
-
return (i);
}
where_is(P_ node)
{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
-#if 0
-PROC
-no_of_copies(W_ ga) /* DaH lo'lu'Qo'; currently unused */
-{
- PROC i, n;
-
- for (i = 0, n = 0; i < MAX_PROC; i++)
- if (IS_LOCAL_TO(ga, i))
- n++;;
+rtsBool
+any_idle() {
+ I_ i;
+ rtsBool any_idle;
+ for(i=0, any_idle=rtsFalse;
+ !any_idle && i<RTSflags.GranFlags.proc;
+ any_idle = any_idle || IS_IDLE(i), i++)
+ {} ;
+}
- return (n);
+int
+idlers() {
+ I_ i, j;
+ for(i=0, j=0;
+ i<RTSflags.GranFlags.proc;
+ j += IS_IDLE(i)?1:0, i++)
+ {} ;
+ return j;
}
-#endif
+#endif /* GRAN */
+\end{code}
+
+%****************************************************************
+%* *
+\subsection[event-queue]{The Global Event Queue}
+%* *
+%****************************************************************
+
+The following routines implement an ADT of an event-queue (FIFO).
+ToDo: Put that in an own file(?)
+
+\begin{code}
+#if defined(GRAN)
+
+/* Pointer to the global event queue; events are currently malloc'ed */
+eventq EventHd = NULL;
eventq
-getnextevent()
+get_next_event()
{
static eventq entry = NULL;
if(EventHd == NULL)
{
- fprintf(stderr,"No next event\n");
- exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */
+ fprintf(stderr,"No next event. This may be caused by a circular data dependency in the program.\n");
+ EXIT(EXIT_FAILURE);
}
if(entry != NULL)
free((char *)entry);
-#if defined(GRAN_CHECK) && defined(GRAN)
- if (debug & 0x20) { /* count events */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RTSflags.GranFlags.debug & 0x20) { /* count events */
noOfEvents++;
- event_counts[(EVENT_TYPE(EventHd)>=CONTINUETHREAD1) ?
- CONTINUETHREAD :
- EVENT_TYPE(EventHd)]++;
+ event_counts[EVENT_TYPE(EventHd)]++;
}
-#endif
+# endif
entry = EventHd;
EventHd = EVENT_NEXT(EventHd);
return(entry);
}
-/* ToDo: replace malloc/free with a free list */
+/* When getting the time of the next event we ignore CONTINUETHREAD events:
+ we don't want to be interrupted before the end of the current time slice
+ unless there is something important to handle.
+*/
+TIME
+get_time_of_next_event()
+{
+ eventq event = EventHd;
+
+ while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) {
+ event = EVENT_NEXT(event);
+ }
+ if(event == NULL)
+ return ((TIME) 0);
+ else
+ return (EVENT_TIME(event));
+}
-/* NB: newevent unused (WDP 95/07) */
+/* ToDo: replace malloc/free with a free list */
static
-newevent(proc,creator,time,evttype,tso,node,spark)
- PROC proc, creator;
- TIME time;
- EVTTYPE evttype;
- P_ tso, node;
- sparkq spark;
+insert_event(newentry)
+eventq newentry;
+{
+ EVTTYPE evttype = EVENT_TYPE(newentry);
+ eventq event, *prev;
+
+ /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ /* Changed the ordering: Now FINDWORK comes after everything but
+ CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
+ FINDWORK. This is important when a GranSimSparkAt happens and
+ DoAlwaysCreateThreads is turned on. Also important if a GC occurs
+ when trying to build a new thread (see much_spark) -- HWL 02/96 */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else {
+ for (event = EventHd, prev=&EventHd;
+ event != NULL;
+ prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) {
+ switch (evttype) {
+ case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
+ ( (EVENT_TIME(event) == EVENT_TIME(newentry)) &&
+ (EVENT_TYPE(event) != CONTINUETHREAD) ) )
+ continue;
+ else
+ break;
+ case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) )
+ continue;
+ else
+ break;
+ default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
+ ((EVENT_TIME(event) == EVENT_TIME(newentry)) &&
+ (EVENT_TYPE(event) == EVENT_TYPE(newentry))) )
+ continue;
+ else
+ break;
+ }
+ /* Insert newentry here (i.e. before event) */
+ *prev = newentry;
+ EVENT_NEXT(newentry) = event;
+ break;
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+void
+new_event(proc,creator,time,evttype,tso,node,spark)
+PROC proc, creator;
+TIME time;
+EVTTYPE evttype;
+P_ tso, node;
+sparkq spark;
{
- eventq newentry = (eventq) xmalloc(sizeof(struct event));
+ eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event");
EVENT_PROC(newentry) = proc;
EVENT_CREATOR(newentry) = creator;
EVENT_TSO(newentry) = tso;
EVENT_NODE(newentry) = node;
EVENT_SPARK(newentry) = spark;
+ EVENT_GC_INFO(newentry) = 0;
EVENT_NEXT(newentry) = NULL;
insert_event(newentry);
}
-#endif /* GRAN ; HWL */
+void
+prepend_event(eventq event) /* put event at beginning of EventQueue */
+{ /* only used for GC! */
+ EVENT_NEXT(event) = EventHd;
+ EventHd = event;
+}
+
+eventq
+grab_event() /* undo prepend_event i.e. get the event */
+{ /* at the head of EventQ but don't free anything */
+ eventq event = EventHd;
+
+ if(EventHd == NULL) {
+ fprintf(stderr,"No next event (in grab_event). This may be caused by a circular data dependency in the program.\n");
+ EXIT(EXIT_FAILURE);
+ }
+
+ EventHd = EVENT_NEXT(EventHd);
+ return (event);
+}
+
+void
+traverse_eventq_for_gc()
+{
+ eventq event = EventHd;
+ W_ bufsize;
+ P_ closure, tso, buffer, bufptr;
+ PROC proc, creator;
+
+ /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
+ orig closure (root of packed graph). This means that a graph, which is
+ between processors at the time of GC is fetched again at the time when
+ it would have arrived, had there been no GC. Slightly inaccurate but
+ safe for GC.
+ This is only needed for GUM style fetchng. */
+ if (!RTSflags.GranFlags.DoGUMMFetching)
+ return;
+
+ for(event = EventHd; event!=NULL; event=EVENT_NEXT(event)) {
+ if (EVENT_TYPE(event)==FETCHREPLY) {
+ buffer = EVENT_NODE(event);
+ ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
+ bufsize = buffer[PACK_SIZE_LOCN];
+ closure= (P_)buffer[PACK_HDR_SIZE];
+ tso = (P_)buffer[PACK_TSO_LOCN];
+ proc = EVENT_PROC(event);
+ creator = EVENT_CREATOR(event); /* similar to unpacking */
+ for (bufptr=buffer+PACK_HDR_SIZE; bufptr<(buffer+bufsize);
+ bufptr++) {
+ if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
+ (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
+ convertFromRBH((P_)*bufptr);
+ }
+ }
+ free(buffer);
+ EVENT_TYPE(event) = FETCHNODE;
+ EVENT_PROC(event) = creator;
+ EVENT_CREATOR(event) = proc;
+ EVENT_NODE(event) = closure;
+ EVENT_TSO(event) = tso;
+ EVENT_GC_INFO(event) = 0;
+ }
+ }
+}
+
+void
+print_event(event)
+eventq event;
+{
+
+ char str_tso[16], str_node[16];
+
+ sprintf(str_tso,((EVENT_TSO(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
+ EVENT_TSO(event));
+ sprintf(str_node,((EVENT_NODE(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
+ EVENT_NODE(event));
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n",
+ event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
+ EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event),
+ str_tso, TSO_ID(EVENT_TSO(event)), str_node
+ /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ );
+
+}
+
+void
+print_eventq(hd)
+eventq hd;
+{
+ eventq x;
+
+ fprintf(stderr,"Event Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
+ print_event(x);
+ }
+}
+
+void
+print_spark(spark)
+ sparkq spark;
+{
+ char str[16];
+
+ sprintf(str,((SPARK_NODE(spark)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
+ (W_) SPARK_NODE(spark));
+
+ if (spark==NULL)
+ fprintf(stderr,"Spark: NIL\n");
+ else
+ fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n",
+ str, SPARK_NAME(spark),
+ ((SPARK_EXPORTED(spark))?"True":"False"),
+ SPARK_PREV(spark), SPARK_NEXT(spark) );
+}
+
+void
+print_sparkq(hd)
+sparkq hd;
+{
+ sparkq x;
+
+ fprintf(stderr,"Spark Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
+ print_spark(x);
+ }
+}
+
+
+#endif /* GRAN */
\end{code}
%****************************************************************************
%
-\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
+\subsection[entry-points]{Routines directly called from Haskell world}
%
%****************************************************************************
-Event dumping routines.
+The @GranSim...@ routines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
\begin{code}
+#if defined(GRAN)
+/* Take the current thread off the thread queue and thereby activate the */
+/* next thread. It's assumed that the next ReSchedule after this uses */
+/* NEW_THREAD as param. */
+/* This fct is called from GranSimBlock and GranSimFetch */
-FILE *gr_file = NULL;
+void
+ActivateNextThread (PROC proc)
+{
+ ASSERT(RunnableThreadsHd[proc]!=PrelBase_Z91Z93_closure);
+
+ RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]);
+ if(RunnableThreadsHd[proc]==PrelBase_Z91Z93_closure) {
+ MAKE_IDLE(proc);
+ RunnableThreadsTl[proc] = PrelBase_Z91Z93_closure;
+ } else {
+ CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
+ if (RTSflags.GranFlags.granSimStats &&
+ (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000)))
+ DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc],
+ PrelBase_Z91Z93_closure,0);
+ }
+}
+\end{code}
-char *gran_event_names[] = {
- "START", "START(Q)",
- "STEALING", "STOLEN", "STOLEN(Q)",
- "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
- "SCHEDULE", "DESCHEDULE",
- "END",
- "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
- "TERMINATE",
- "??"
-};
+Now the main stg-called routines:
+
+\begin{code}
+/* ------------------------------------------------------------------------ */
+/* The following GranSim... fcts are stg-called from the threaded world. */
+/* ------------------------------------------------------------------------ */
+
+/* Called from HEAP_CHK -- NB: node and liveness are junk here now.
+ They are left temporarily to avoid complete recompilation.
+ KH
+*/
+void
+GranSimAllocate(n,node,liveness)
+I_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) += n;
+ ++TSO_BASICBLOCKS(CurrentTSO);
+
+ if (RTSflags.GranFlags.granSimStats_Heap) {
+ DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO,
+ PrelBase_Z91Z93_closure,n);
+ }
+
+ TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost;
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+void
+GranSimUnallocate(n,node,liveness)
+W_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) -= n;
+ --TSO_BASICBLOCKS(CurrentTSO);
+
+ TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost;
+ CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost;
+}
+
+/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+W_ ariths,branches,loads,stores,floats;
+{
+ W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths +
+ RTSflags.GranFlags.gran_branch_cost*branches +
+ RTSflags.GranFlags.gran_load_cost * loads +
+ RTSflags.GranFlags.gran_store_cost*stores +
+ RTSflags.GranFlags.gran_float_cost*floats;
+
+ TSO_EXECTIME(CurrentTSO) += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+/* This function in Threads.lc is only needed for SimplifiedFetch */
+FetchNode PROTO((P_ node,PROC CurrentProc));
+
+I_
+GranSimFetch(node /* , liveness_mask */ )
+P_ node;
+/* I_ liveness_mask; */
+{
+ if (RTSflags.GranFlags.Light) {
+ /* Always reschedule in GrAnSim-Light to prevent one TSO from
+ running off too far
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,CurrentTSO,node,NULL);
+ */
+ NeedToReSchedule = rtsFalse;
+ return(0);
+ }
+
+ /* Note: once a node has been fetched, this test will be passed */
+ if(!IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ /* Add mpacktime to the remote PE for the reply */
+ {
+ PROC p = where_is(node);
+ TIME fetchtime;
+
+# ifdef GRAN_CHECK
+ if ( ( RTSflags.GranFlags.debug & 0x40 ) &&
+ p == CurrentProc )
+ fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
+# endif /* GRAN_CHECK */
+
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
+ /* NB: Fetch is counted on arrival (FETCHREPLY) */
+
+ if (RTSflags.GranFlags.SimplifiedFetch)
+ {
+ FetchNode(node,CurrentProc);
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+
+ RTSflags.GranFlags.gran_fetchtime+
+ RTSflags.GranFlags.gran_munpacktime;
+ return(1);
+ }
+
+ fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RTSflags.GranFlags.gran_latency;
+
+ new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
+ if (!RTSflags.GranFlags.DoReScheduleOnFetch)
+ MAKE_FETCHING(CurrentProc);
+ ++OutstandingFetches[CurrentProc];
+
+ if (fetchtime<TimeOfNextEvent)
+ TimeOfNextEvent = fetchtime;
+
+ /* About to block */
+ TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
+
+ if (RTSflags.GranFlags.DoReScheduleOnFetch)
+ {
+ /* Remove CurrentTSO from the queue
+ -- assumes head of queue == CurrentTSO */
+ if(!RTSflags.GranFlags.DoFairSchedule)
+ {
+ if(RTSflags.GranFlags.granSimStats)
+ DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,
+ node,0);
+
+ ActivateNextThread(CurrentProc);
+
+# if defined(GRAN_CHECK)
+ if (RTSflags.GranFlags.debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ EXIT(EXIT_FAILURE);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+ }
+# endif
+ TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
+ /* CurrentTSO = PrelBase_Z91Z93_closure; */
+
+ /* ThreadQueueHd is now the next TSO to schedule or NULL */
+ /* CurrentTSO is pointed to by the FETCHNODE event */
+ }
+ else /* fair scheduling currently not supported -- HWL */
+ {
+ fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+ else /* !RTSflags.GranFlags.DoReScheduleOnFetch */
+ {
+ /* Note: CurrentProc is still busy as it's blocked on fetch */
+ if(RTSflags.GranFlags.granSimStats)
+ DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,node,0);
+
+# if defined(GRAN_CHECK)
+ if (RTSflags.GranFlags.debug & 0x04)
+ BlockedOnFetch[CurrentProc] = CurrentTSO; /*- rtsTrue; -*/
+ if (RTSflags.GranFlags.debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ EXIT(EXIT_FAILURE);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+ CurrentTSO = PrelBase_Z91Z93_closure;
+ }
+# endif
+ }
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
+
+ /* Rescheduling is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+ }
+ return(0);
+}
+
+void
+GranSimSpark(local,node)
+W_ local;
+P_ node;
+{
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if(RTSflags.GranFlags.granSimStats_Sparks)
+ DumpRawGranEvent(CurrentProc,(PROC)0,SP_SPARK,PrelBase_Z91Z93_closure,node,
+ spark_queue_len(CurrentProc,ADVISORY_POOL)-1);
+
+ /* Force the PE to take notice of the spark */
+ if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
+ if (CurrentTime[CurrentProc]<TimeOfNextEvent)
+ TimeOfNextEvent = CurrentTime[CurrentProc];
+ }
+
+ if(local)
+ ++TSO_LOCALSPARKS(CurrentTSO);
+ else
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+}
+
+void
+GranSimSparkAt(spark,where,identifier)
+sparkq spark;
+P_ where; /* This should be a node; alternatively could be a GA */
+I_ identifier;
+{
+ PROC p = where_is(where);
+ GranSimSparkAtAbs(spark,p,identifier);
+}
+
+void
+GranSimSparkAtAbs(spark,proc,identifier)
+sparkq spark;
+PROC proc;
+I_ identifier;
+{
+ TIME exporttime;
+
+ if ( spark == (sparkq)NULL) /* Note: Granularity control might have */
+ return; /* turned a spark into a NULL. */
+
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if(RTSflags.GranFlags.granSimStats_Sparks)
+ DumpRawGranEvent(proc,0,SP_SPARKAT,PrelBase_Z91Z93_closure,SPARK_NODE(spark),
+ spark_queue_len(proc,ADVISORY_POOL));
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
+ exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
+ CurrentTime[proc]: CurrentTime[CurrentProc])
+ + RTSflags.GranFlags.gran_latency;
+ } else {
+ exporttime = CurrentTime[CurrentProc];
+ }
+
+ if ( RTSflags.GranFlags.Light )
+ /* Need CurrentTSO in event field to associate costs with creating
+ spark even in a GrAnSim Light setup */
+ new_event(proc,CurrentProc,exporttime,
+ MOVESPARK,CurrentTSO,PrelBase_Z91Z93_closure,spark);
+ else
+ new_event(proc,CurrentProc,exporttime,
+ MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
+ /* Bit of a hack to treat placed sparks the same as stolen sparks */
+ ++OutstandingFishes[proc];
+
+ /* Force the PE to take notice of the spark (FINDWORK is put after a
+ MOVESPARK into the sparkq!) */
+ if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,exporttime+1,
+ FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
+ }
+
+ if (exporttime<TimeOfNextEvent)
+ TimeOfNextEvent = exporttime;
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+ } else {
+ ++TSO_LOCALSPARKS(CurrentTSO);
+ }
+}
+
+/* This function handles local and global blocking */
+/* It's called either from threaded code (RBH_entry, BH_entry etc) or */
+/* from blockFetch when trying to fetch an BH or RBH */
+
+void
+GranSimBlock(P_ tso, PROC proc, P_ node)
+{
+ PROC node_proc = where_is(node);
+
+ ASSERT(tso==RunnableThreadsHd[proc]);
+
+ if(RTSflags.GranFlags.granSimStats)
+ DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,0);
+
+ ++TSO_BLOCKCOUNT(tso);
+ /* Distinction between local and global block is made in blockFetch */
+ TSO_BLOCKEDAT(tso) = CurrentTime[proc];
+
+ CurrentTime[proc] += RTSflags.GranFlags.gran_threadqueuetime;
+ ActivateNextThread(proc);
+ TSO_LINK(tso) = PrelBase_Z91Z93_closure; /* not really necessary; only for testing */
+}
+
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
+%
+%****************************************************************************
+
+Event dumping routines.
+
+\begin{code}
/*
* If you're not using GNUC and you're on a 32-bit machine, you're
#endif /* !GRAN */
+#if defined(GRAN) || defined(PAR)
void
DumpGranEvent(name, tso)
enum gran_event_types name;
P_ tso;
{
- DumpRawGranEvent(CURRENT_PROC, name, TSO_ID(tso));
-}
-
-void
-DumpSparkGranEvent(name, id)
-enum gran_event_types name;
-W_ id;
-{
- DumpRawGranEvent(CURRENT_PROC, name, id);
+ DumpRawGranEvent(CURRENT_PROC, (PROC)0, name, tso, PrelBase_Z91Z93_closure, 0);
}
void
-DumpGranEventAndNode(name, tso, node, proc)
+DumpRawGranEvent(proc, p, name, tso, node, len)
+PROC proc, p; /* proc ... where it happens; p ... where node lives */
enum gran_event_types name;
P_ tso, node;
-PROC proc;
+I_ len;
{
- PROC pe = CURRENT_PROC;
- W_ id;
+ W_ id;
+ char time_string[500], node_str[16]; /*ToDo: kill magic constants */
+ ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
+#if defined(GRAN)
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
+#endif
- char time_string[500]; /*ToDo: kill magic constant */
- ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+ id = tso == NULL ? -1 : TSO_ID(tso);
+ if (node==PrelBase_Z91Z93_closure)
+ strcpy(node_str,"________"); /* "PrelBase_Z91Z93_closure"); */
+ else
+ sprintf(node_str,"0x%-6lx",node);
-#ifdef PAR
- id = tso == NULL ? -1 : TSO_ID(tso);
-#else
- id = TSO_ID(tso);
-#endif
- if (name > GR_EVENT_MAX)
+ if (name > GR_EVENT_MAX)
name = GR_EVENT_MAX;
- if (do_gr_binary) {
- grputw(name);
- grputw(pe);
- abort(); /* die please: a single word doesn't represent long long times */
- grputw(CURRENT_TIME); /* this line is bound to do the wrong thing */
- grputw(id);
- } else
- fprintf(gr_file, "PE %2u [%s]: %s %lx \t0x%lx\t(from %2u)\n",
- pe, time_string, gran_event_names[name], id, (W_) node, proc);
+ if(GRANSIMSTATS_BINARY)
+ /* ToDo: fix code for writing binary GrAnSim statistics */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((W_)node);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc)); /* this line is bound to */
+ grputw(id); /* do the wrong thing */
+ break;
+ default:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((W_)node);
+ }
+ else
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
+ /* using spark queue length as optional argument ^^^^^^^^^ */
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
+ /* using spark name as optional argument ^^^^^^ */
+ proc,time_string,gran_event_names[name],
+ id,node_str,len);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ case GR_STOLEN:
+ case GR_STOLENQ:
+ fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
+ proc, time_string, gran_event_names[name],
+ id,node_str,p);
+ break;
+ case GR_RESUME:
+ case GR_RESUMEQ:
+ case GR_SCHEDULE:
+ case GR_DESCHEDULE:
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n",
+ proc,time_string,gran_event_names[name],id);
+ break;
+ case GR_STEALING:
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
+ proc,time_string,gran_event_names[name],id,p);
+ break;
+ case GR_ALLOC:
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
+ proc,time_string,gran_event_names[name],id,len);
+ break;
+ default:
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],id,node_str,len);
+ }
}
+
+#if defined(GRAN)
+/* Only needed for special dynamic spark labelling support */
void
-DumpRawGranEvent(pe, name, id)
-PROC pe;
+DumpStartEventAt(time, proc, p, name, tso, node, len)
+TIME time;
+PROC proc, p; /* proc ... where it happens; p ... where node lives */
enum gran_event_types name;
-W_ id;
+P_ tso, node;
+I_ len;
{
- char time_string[500]; /* ToDo: kill magic constant */
-
- if (name > GR_EVENT_MAX)
+ W_ id;
+ char time_string[500], node_str[16]; /*ToDo: kill magic constants */
+ ullong_format_string(time, time_string, rtsFalse/*no commas!*/);
+ /* ^^^^ only important change to DumpRawGranEvent */
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
+
+ id = tso == NULL ? -1 : TSO_ID(tso);
+ if (node==PrelBase_Z91Z93_closure)
+ strcpy(node_str,"________"); /* "Z91Z93_closure"); */
+ else
+ sprintf(node_str,"0x%-6lx",node);
+
+ if (name > GR_EVENT_MAX)
name = GR_EVENT_MAX;
- ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
-
- if (do_gr_binary) {
- grputw(name);
- grputw(pe);
- abort(); /* die please: a single word doesn't represent long long times */
- grputw(CURRENT_TIME); /* this line is bound to fail */
- grputw(id);
- } else
- fprintf(gr_file, "PE %2u [%s]: %s %lx\n",
- pe, time_string, gran_event_names[name], id);
+ if(GRANSIMSTATS_BINARY)
+ /* ToDo: fix code for writing binary GrAnSim statistics */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((W_)node);
+ break;
+ default:
+ fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
+ gran_event_names[name]);
+ }
+ else
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
+ /* using spark queue length as optional argument ^^^^^^^^^ */
+ fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
+ /* using spark name as optional argument ^^^^^^ */
+ proc,time_string,gran_event_names[name],
+ id,node_str,len);
+ break;
+ default:
+ fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
+ gran_event_names[name]);
+ }
}
+#endif /* GRAN */
void
-DumpGranInfo(pe, tso, mandatory_thread)
-PROC pe;
+DumpGranInfo(proc, tso, mandatory_thread)
+PROC proc;
P_ tso;
rtsBool mandatory_thread;
{
char time_string[500]; /* ToDo: kill magic constant */
ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
- if (do_gr_binary) {
+#if defined(GRAN)
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
+#endif
+
+ if (GRANSIMSTATS_BINARY) {
grputw(GR_END);
- grputw(pe);
+ grputw(proc);
abort(); /* die please: a single word doesn't represent long long times */
grputw(CURRENT_TIME); /* this line is bound to fail */
grputw(TSO_ID(tso));
} else {
/*
- * NB: DumpGranEvent cannot be used because PE may be wrong (as well as the
- * extra info)
+ * NB: DumpGranEvent cannot be used because PE may be wrong
+ * (as well as the extra info)
*/
fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
- ,pe
+ ,proc
,time_string
,TSO_ID(tso)
,TSO_SPARKNAME(tso)
}
}
+void
+DumpTSO(tso)
+P_ tso;
+{
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
+ ,tso
+ ,TSO_NAME(tso)
+ ,TSO_ID(tso)
+ ,TSO_LINK(tso)
+ ,TSO_TYPE(tso)==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ "???"
+ );
+
+ fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n"
+ ,TSO_PC1(tso)
+ ,TSO_PC2(tso)
+ ,TSO_ARG1(tso)
+ /* ,TSO_ARG2(tso) */
+ ,TSO_SWITCH(tso)
+ );
+
+ fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
+ ,TSO_ID(tso)
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso)?'T':'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ );
+}
+
/*
Output a terminate event and an 8-byte time.
*/
grterminate(v)
TIME v;
{
- DumpGranEvent(GR_TERMINATE, 0);
+#if defined(GRAN)
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
+#endif
+
+ DumpGranEvent(GR_TERMINATE, PrelBase_Z91Z93_closure);
if (sizeof(TIME) == 4) {
putc('\0', gr_file);
grputw(v)
TIME v;
{
- if (v <= 0x3fl) {
+#if defined(GRAN)
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
+#endif
+
+ if (v <= 0x3fl) { /* length v = 1 byte */
fputc(v & 0x3f, gr_file);
- } else if (v <= 0x3fffl) {
+ } else if (v <= 0x3fffl) { /* length v = 2 byte */
fputc((v >> 8l) | 0x40l, gr_file);
fputc(v & 0xffl, gr_file);
- } else if (v <= 0x3fffffffl) {
+ } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
fputc((v >> 24l) | 0x80l, gr_file);
fputc((v >> 16l) & 0xffl, gr_file);
fputc((v >> 8l) & 0xffl, gr_file);
}
}
+#endif /* GRAN || PAR */
\end{code}
%****************************************************************************
%
%****************************************************************************
+General routines for GranSim. Mainly, startup and shutdown routines, called
+from @main.lc@.
+
\begin{code}
-#ifdef GRAN
-char gr_filename[32]; /*ToDo: magic short filename constant????? WDP 95/07 */
-I_ do_gr_sim = 0;
+#if defined(GRAN)
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+/* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */
int
init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
{
I_ i;
- if (do_gr_sim) {
- char *extension = do_gr_binary ? "gb" : "gr";
+ char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr";
- sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0], extension);
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
- if ((gr_file = fopen(gr_filename, "w")) == NULL) {
- fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
- exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */
- }
-#if defined(GRAN_CHECK) && defined(GRAN)
- if (DoReScheduleOnFetch)
+ sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
+ EXIT(EXIT_FAILURE);
+ }
+# if 0 /* that's obsolete now, I think -- HWL */
+ if (RTSflags.GranFlags.DoReScheduleOnFetch)
setbuf(gr_file, NULL);
-#endif
+# endif
fputs("Granularity Simulation for ", gr_file);
for (i = 0; i < prog_argc; ++i) {
fputc(' ', gr_file);
}
}
+
+ fputs("\nStart time: ", gr_file);
+ fputs(time_str(), gr_file); /* defined in main.lc */
+ fputc('\n', gr_file);
+
fputs("\n\n--------------------\n\n", gr_file);
fputs("General Parameters:\n\n", gr_file);
- fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s ????? %s\n",
- max_proc, DoFairSchedule ? "Fair" : "Unfair",
- DoThreadMigration ? "" : "Don't ",
- DoThreadMigration && DoStealThreadsFirst ? " Before Sparks" : "",
- DoReScheduleOnFetch ? "" : "Don't ");
-
- fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
- SimplifiedFetch ? "Simplified Fetch" : (DoReScheduleOnFetch ? "Reschedule on Fetch" : "Block on Fetch"),
- DoGUMMFetching ? "Many Closures" : "Exactly One Closure");
- fprintf(gr_file, "Fetch Strategy(%lu): If outstanding fetches %s\n",
- FetchStrategy,
- FetchStrategy == 1 ? "only run runnable threads (don't create new ones" :
- FetchStrategy == 2 ? "create threads only from local sparks" :
- FetchStrategy == 3 ? "create threads from local or global sparks" :
- FetchStrategy == 4 ? "create sparks and steal threads if necessary" :
- "unknown");
+ if (RTSflags.GranFlags.Light)
+ fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
+ RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
+ RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
+ "Block on Fetch");
+ else
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
+ RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
+ RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
+ "Block on Fetch");
+
+ if (RTSflags.GranFlags.DoGUMMFetching)
+ if (RTSflags.GranFlags.ThunksToPack)
+ fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
+ RTSflags.GranFlags.ThunksToPack,
+ RTSflags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
+ RTSflags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
+
+ fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
+ RTSflags.GranFlags.FetchStrategy,
+ RTSflags.GranFlags.FetchStrategy==0 ?
+ " block (block-on-fetch)":
+ RTSflags.GranFlags.FetchStrategy==1 ?
+ "only run runnable threads":
+ RTSflags.GranFlags.FetchStrategy==2 ?
+ "create threads only from local sparks":
+ RTSflags.GranFlags.FetchStrategy==3 ?
+ "create threads from local or global sparks":
+ RTSflags.GranFlags.FetchStrategy==4 ?
+ "create sparks and steal threads if necessary":
+ "unknown");
+
+ if (RTSflags.GranFlags.DoPrioritySparking)
+ fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
+
+ if (RTSflags.GranFlags.DoPriorityScheduling)
+ fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
- gran_threadcreatetime, gran_threadqueuetime);
+ RTSflags.GranFlags.gran_threadcreatetime,
+ RTSflags.GranFlags.gran_threadqueuetime);
fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
- gran_threaddescheduletime, gran_threadscheduletime);
+ RTSflags.GranFlags.gran_threaddescheduletime,
+ RTSflags.GranFlags.gran_threadscheduletime);
fprintf(gr_file, "Thread Context-Switch Time %lu\n",
- gran_threadcontextswitchtime);
+ RTSflags.GranFlags.gran_threadcontextswitchtime);
fputs("\n\n--------------------\n\n", gr_file);
fputs("Communication Metrics:\n\n", gr_file);
fprintf(gr_file,
"Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
- gran_latency, gran_additional_latency, gran_fetchtime,
- gran_gunblocktime, gran_lunblocktime);
+ RTSflags.GranFlags.gran_latency,
+ RTSflags.GranFlags.gran_additional_latency,
+ RTSflags.GranFlags.gran_fetchtime,
+ RTSflags.GranFlags.gran_gunblocktime,
+ RTSflags.GranFlags.gran_lunblocktime);
fprintf(gr_file,
"Message Creation %lu (+ %lu after send), Message Read %lu\n",
- gran_mpacktime, gran_mtidytime, gran_munpacktime);
+ RTSflags.GranFlags.gran_mpacktime,
+ RTSflags.GranFlags.gran_mtidytime,
+ RTSflags.GranFlags.gran_munpacktime);
fputs("\n\n--------------------\n\n", gr_file);
fputs("Instruction Metrics:\n\n", gr_file);
fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
- gran_arith_cost, gran_branch_cost,
- gran_load_cost, gran_store_cost, gran_float_cost, gran_heapalloc_cost);
+ RTSflags.GranFlags.gran_arith_cost,
+ RTSflags.GranFlags.gran_branch_cost,
+ RTSflags.GranFlags.gran_load_cost,
+ RTSflags.GranFlags.gran_store_cost,
+ RTSflags.GranFlags.gran_float_cost,
+ RTSflags.GranFlags.gran_heapalloc_cost);
fputs("\n\n++++++++++++++++++++\n\n", gr_file);
- }
- if (do_gr_binary)
+
+ if (RTSflags.GranFlags.granSimStats_Binary)
grputw(sizeof(TIME));
- Idlers = max_proc;
return (0);
}
void
end_gr_simulation(STG_NO_ARGS)
{
- if (do_gr_sim) {
- fprintf(stderr, "The simulation is finished. Look at %s for details.\n",
- gr_filename);
- fclose(gr_file);
- }
-}
+ char time_string[500]; /* ToDo: kill magic constant */
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
-#endif /* GRAN */
+ if (RTSflags.GranFlags.granSimStats_suppressed)
+ return;
-#ifdef PAR
-char gr_filename[50]; /*ToDo: (small) magic constant alert!!!! WDP 95/07 */
+#if defined(GRAN_CHECK) && defined(GRAN)
+ /* Print event stats */
+ if (RTSflags.GranFlags.debug & 0x20) {
+ int i;
+
+ fprintf(stderr,"Event statistics (number of events: %d):\n",
+ noOfEvents);
+ for (i=0; i<=MAX_EVENT; i++) {
+ fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n",
+ event_names[i],i,event_counts[i],
+ (float)(100*event_counts[i])/(float)(noOfEvents),
+ (i==CONTINUETHREAD ? 0.0 :
+ (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) ));
+ }
+ fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n",
+ rs_sp_count, rs_t_count, no_of_steals,
+ (float)ntimes_total/(float)STG_MAX(no_of_steals,1),
+ (float)fl_total/(float)STG_MAX(no_of_steals,1));
+ fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
+ tot_sparks,withered_sparks,
+ ( tot_sparks == 0 ? 0 :
+ (float)(100*withered_sparks)/(float)(tot_sparks)) );
+ /* Print statistics about priority sparking */
+ if (RTSflags.GranFlags.DoPrioritySparking) {
+ fprintf(stderr,"About Priority Sparking:\n");
+ fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes);
+ }
+ /* Print statistics about priority sparking */
+ if (RTSflags.GranFlags.DoPriorityScheduling) {
+ fprintf(stderr,"About Priority Scheduling:\n");
+ fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
+ tot_add_threads, non_end_add_threads,
+ (float)tot_tq_len/(float)tot_add_threads);
+ }
+ /* Print packet statistics if GUMM fetching is turned on */
+ if (RTSflags.GranFlags.DoGUMMFetching) {
+ fprintf(stderr,"Packet statistcs:\n");
+ fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets);
+ fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets);
+ fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets);
+ /*
+ if (closure_queue_overflows>0)
+ fprintf(stderr," Number of closure queue overflows: %u\n",
+ closure_queue_overflows);
+ */
+ }
+ }
+
+ if (RTSflags.GranFlags.PrintFetchMisses)
+ fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
+
+# if defined(GRAN_COUNT)
+ fprintf(stderr,"Update count statistics:\n");
+ fprintf(stderr," Total number of updates: %u\n",nUPDs);
+ fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
+ nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
+ fprintf(stderr," Number of PAPs: %u\n",nPAPs);
+# endif
-I_ do_gr_profile = 0;
-I_ do_sp_profile = 0;
-I_ do_gr_binary = 0;
+#endif /* GRAN_CHECK */
+
+ fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n",
+ time_string,gr_filename);
+ if (RTSflags.GranFlags.granSimStats)
+ fclose(gr_file);
+}
+#elif defined(PAR)
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+
+/* I_ do_sp_profile = 0; */
void
init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
-char *prog_argv[], *rts_argv[];
-int prog_argc, rts_argc;
+ char *prog_argv[], *rts_argv[];
+ int prog_argc, rts_argc;
{
int i;
- char *extension = do_gr_binary ? "gb" : "gr";
+ char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr";
- sprintf(gr_filename, "%0.28s.%03d.%0.2s", prog_argv[0], thisPE, extension);
+ sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
if ((gr_file = fopen(gr_filename, "w")) == NULL) {
fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
}
fputc('\n', gr_file);
+ fputs("Start-Time: ", gr_file);
+ fputs(time_str(), gr_file); /* defined in main.lc */
+ fputc('\n', gr_file);
+
startTime = CURRENT_TIME;
if (startTime > LL(1000000000)) {
fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
}
- if (do_gr_binary)
+ if (RTSflags.ParFlags.granSimStats_Binary)
grputw(sizeof(TIME));
}
#endif /* PAR */
-#endif /* GRAN || PAR */
+#endif /* GRAN || PAR */
\end{code}