--- /dev/null
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
+%
+%************************************************************************
+%* *
+\section[Threads.lc]{Thread Control Routines}
+%* *
+%************************************************************************
+
+%************************************************************************
+%
+\subsection[thread-overview]{Overview of the Thread Management System}
+%
+%************************************************************************
+
+%************************************************************************
+%
+\subsection[thread-decls]{Thread Declarations}
+%
+%************************************************************************
+
+% I haven't checked if GRAN can work with QP profiling. But as we use our
+% own profiling (GR profiling) that should be irrelevant. -- HWL
+
+\begin{code}
+
+#if defined(CONCURRENT)
+
+# define NON_POSIX_SOURCE /* so says Solaris */
+
+# include "rtsdefs.h"
+# include <setjmp.h>
+
+#include "LLC.h"
+#include "HLC.h"
+
+static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
+\end{code}
+
+@AvailableStack@ is used to determine whether an existing stack can be
+reused without new allocation, so reducing garbage collection, and
+stack setup time. At present, it is only used for the first stack
+chunk of a thread, the one that's got @StkOChunkSize@ words.
+
+\begin{code}
+P_ AvailableStack = Nil_closure;
+P_ AvailableTSO = Nil_closure;
+\end{code}
+
+Macros for dealing with the new and improved GA field for simulating
+parallel execution. Based on @CONCURRENT@ package. The GA field now
+contains a mask, where the n-th bit stands for the n-th processor,
+where this data can be found. In case of multiple copies, several bits
+are set. The total number of processors is bounded by @MAX_PROC@,
+which should be <= the length of a word in bits. -- HWL
+
+\begin{code}
+/* mattson thinks this is obsolete */
+
+# if 0 && defined(GRAN)
+extern FILE *main_statsfile; /* Might be of general interest HWL */
+
+typedef unsigned long TIME;
+typedef unsigned char PROC;
+typedef unsigned char EVTTYPE;
+
+
+# undef max
+# define max(a,b) (a>b?a:b)
+
+static PROC
+ga_to_proc(W_ ga)
+{ PROC i;
+
+ for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ;
+
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+static PROC
+where_is(P_ node)
+{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
+
+static PROC
+no_of_copies(P_ node) /* DaH lo'lu'Qo'; currently unused */
+{ PROC i, n;
+
+ for (i=0, n=0; i<MAX_PROC; i++)
+ if (IS_LOCAL_TO(PROCS(node),i))
+ n++;;
+
+ return (n);
+}
+
+# endif /* GRAN ; HWL */
+\end{code}
+
+%****************************************************************
+%* *
+\subsection[thread-getthread]{The Thread Scheduler}
+%* *
+%****************************************************************
+
+This is the heart of the thread scheduling code.
+
+\begin{code}
+# if defined(GRAN_CHECK) && defined(GRAN)
+W_ debug = 0;
+# endif
+
+W_ event_trace = 0;
+W_ event_trace_all = 0;
+
+STGRegisterTable *CurrentRegTable = NULL;
+P_ CurrentTSO = NULL;
+
+# if defined(GRAN) /* HWL */
+
+unsigned CurrentProc = 0;
+W_ IdleProcs = ~0L, Idlers = MAX_PROC;
+
+# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
+ /* normally */
+# endif
+
+I_ DoFairSchedule = 0;
+I_ DoReScheduleOnFetch = 0;
+I_ DoStealThreadsFirst = 0;
+I_ SimplifiedFetch = 0;
+I_ DoAlwaysCreateThreads = 0;
+I_ DoGUMMFetching = 0;
+I_ DoThreadMigration = 0;
+I_ FetchStrategy = 4;
+I_ PreferSparksOfLocalNodes = 0;
+
+# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+I_ NoForward = 0;
+I_ PrintFetchMisses = 0, fetch_misses = 0;
+# endif
+
+# if defined(COUNT)
+I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
+ BQ_lens = 0;
+# endif
+
+I_ do_gr_binary = 0;
+I_ do_gr_profile = 0; /* Full .gr profile or only END events? */
+I_ no_gr_profile = 0; /* Don't create any .gr file at all? */
+I_ do_sp_profile = 0;
+I_ do_gr_migration = 0;
+
+P_ RunnableThreadsHd[MAX_PROC];
+P_ RunnableThreadsTl[MAX_PROC];
+
+P_ WaitThreadsHd[MAX_PROC];
+P_ WaitThreadsTl[MAX_PROC];
+
+sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
+sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
+
+W_ CurrentTime[MAX_PROC]; /* Per PE clock */
+
+# if defined(GRAN_CHECK) && defined(GRAN)
+P_ BlockedOnFetch[MAX_PROC]; /* HWL-CHECK */
+# endif
+
+I_ OutstandingFetches[MAX_PROC];
+
+W_ SparksAvail = 0; /* How many sparks are available */
+W_ SurplusThreads = 0; /* How many excess threads are there */
+
+StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
+
+/* Communication Cost Variables -- set in main program */
+
+W_ gran_latency = LATENCY, gran_additional_latency = ADDITIONAL_LATENCY,
+ gran_fetchtime = FETCHTIME,
+ gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime = GLOBALUNBLOCKTIME,
+ gran_mpacktime = MSGPACKTIME, gran_munpacktime = MSGUNPACKTIME,
+ gran_mtidytime = 0;
+
+W_ gran_threadcreatetime = THREADCREATETIME,
+ gran_threadqueuetime = THREADQUEUETIME,
+ gran_threaddescheduletime = THREADDESCHEDULETIME,
+ gran_threadscheduletime = THREADSCHEDULETIME,
+ gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
+
+/* Instruction Cost Variables -- set in main program */
+
+W_ gran_arith_cost = ARITH_COST, gran_branch_cost = BRANCH_COST,
+ gran_load_cost = LOAD_COST, gran_store_cost = STORE_COST,
+ gran_float_cost = FLOAT_COST, gran_heapalloc_cost = 0;
+
+W_ max_proc = MAX_PROC;
+
+/* Granularity event types' names for output */
+
+char *event_names[] =
+ { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
+ "MOVESPARK", "MOVETHREAD", "FINDWORK",
+ "FETCHNODE", "FETCHREPLY"
+ };
+
+# if defined(GRAN)
+/* Prototypes of GrAnSim debugging functions */
+void DEBUG_PRINT_NODE PROTO((P_));
+void DEBUG_TREE PROTO((P_));
+void DEBUG_INFO_TABLE PROTO((P_));
+void DEBUG_CURR_THREADQ PROTO((I_));
+void DEBUG_THREADQ PROTO((P_, I_));
+void DEBUG_TSO PROTO((P_, I_));
+void DEBUG_EVENT PROTO((eventq, I_));
+void DEBUG_SPARK PROTO((sparkq, I_));
+void DEBUG_SPARKQ PROTO((sparkq, I_));
+void DEBUG_CURR_SPARKQ PROTO((I_));
+void DEBUG_PROC PROTO((I_, I_));
+void DCT(STG_NO_ARGS);
+void DCP(STG_NO_ARGS);
+void DEQ(STG_NO_ARGS);
+void DSQ(STG_NO_ARGS);
+
+void HandleFetchRequest PROTO((P_, PROC, P_));
+# endif /* GRAN ; HWL */
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
+
+static I_ noOfEvents = 0;
+static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+#endif
+
+TIME SparkStealTime();
+
+/* Fcts for manipulating event queues have been deleted -- HWL */
+/* ---------------------------------- */
+
+static void
+print_spark(spark)
+ sparkq spark;
+{
+
+ if (spark==NULL)
+ fprintf(stderr,"Spark: NIL\n");
+ else
+ fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
+ (W_) SPARK_NODE(spark), SPARK_NAME(spark),
+ ((SPARK_EXPORTED(spark))?"True":"False"),
+ SPARK_PREV(spark), SPARK_NEXT(spark) );
+}
+
+static print_sparkq(hd)
+sparkq hd;
+{
+ sparkq x;
+
+ fprintf(stderr,"Spark Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
+ print_spark(x);
+ }
+}
+
+static print_event(event)
+eventq event;
+{
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
+ event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
+ EVENT_PROC(event), EVENT_CREATOR(event),
+ EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
+ EVENT_SPARK(event), EVENT_NEXT(event)*/ );
+
+}
+
+static print_eventq(hd)
+eventq hd;
+{
+ eventq x;
+
+ fprintf(stderr,"Event Queue with root at %x:\n",hd);
+ for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
+ print_event(x);
+ }
+}
+
+/* ---------------------------------- */
+
+#if 0 /* moved */
+static eventq getnextevent()
+{
+ static eventq entry = NULL;
+
+ if(EventHd == NULL)
+ {
+ fprintf(stderr,"No next event\n");
+ exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */
+ }
+
+ if(entry != NULL)
+ free((char *)entry);
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x20) { /* count events */
+ noOfEvents++;
+ event_counts[EVENT_TYPE(EventHd)]++;
+ }
+#endif
+
+ entry = EventHd;
+ EventHd = EVENT_NEXT(EventHd);
+ return(entry);
+}
+
+/* ToDo: replace malloc/free with a free list */
+
+static insert_event(newentry)
+eventq newentry;
+{
+ EVTTYPE evttype = EVENT_TYPE(newentry);
+ eventq event, *prev;
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else
+ {
+ for (event = EventHd, prev=&EventHd; event != NULL;
+ prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
+ {
+ if(evttype == FINDWORK ? (EVENT_TIME(event) >= EVENT_TIME(newentry)) :
+ evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) :
+ (EVENT_TIME(event) > EVENT_TIME(newentry) ||
+ (EVENT_TIME(event) == EVENT_TIME(newentry) &&
+ EVENT_TYPE(event) != FINDWORK )))
+ {
+ *prev = newentry;
+ EVENT_NEXT(newentry) = event;
+ break;
+ }
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+static newevent(proc,creator,time,evttype,tso,node,spark)
+PROC proc, creator;
+TIME time;
+EVTTYPE evttype;
+P_ tso, node;
+sparkq spark;
+{
+ extern P_ xmalloc();
+ eventq newentry = (eventq) xmalloc(sizeof(struct event));
+
+ EVENT_PROC(newentry) = proc;
+ EVENT_CREATOR(newentry) = creator;
+ EVENT_TIME(newentry) = time;
+ EVENT_TYPE(newentry) = evttype;
+ EVENT_TSO(newentry) = tso;
+ EVENT_NODE(newentry) = node;
+ EVENT_SPARK(newentry) = spark;
+ EVENT_NEXT(newentry) = NULL;
+
+ insert_event(newentry);
+}
+#endif /* 0 moved */
+
+# else /* !GRAN */
+
+P_ RunnableThreadsHd = Nil_closure;
+P_ RunnableThreadsTl = Nil_closure;
+
+P_ WaitingThreadsHd = Nil_closure;
+P_ WaitingThreadsTl = Nil_closure;
+
+PP_ PendingSparksBase[SPARK_POOLS];
+PP_ PendingSparksLim[SPARK_POOLS];
+
+PP_ PendingSparksHd[SPARK_POOLS];
+PP_ PendingSparksTl[SPARK_POOLS];
+
+# endif /* GRAN ; HWL */
+
+static jmp_buf scheduler_loop;
+
+I_ MaxThreads = DEFAULT_MAX_THREADS;
+I_ required_thread_count = 0;
+I_ advisory_thread_count = 0;
+
+EXTFUN(resumeThread);
+
+P_ NewThread PROTO((P_, W_));
+
+I_ context_switch = 0;
+
+I_ contextSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
+
+#if !defined(GRAN)
+
+I_ threadId = 0;
+
+I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
+I_ SparkLimit[SPARK_POOLS];
+
+extern I_ doSanityChks;
+extern void checkAStack(STG_NO_ARGS);
+
+rtsBool
+initThreadPools(size)
+I_ size;
+{
+ SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
+ if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
+ return rtsFalse;
+ if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
+ return rtsFalse;
+ PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
+ PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
+ return rtsTrue;
+}
+#endif
+
+#ifdef PAR
+rtsBool sameThread;
+#endif
+
+void
+ScheduleThreads(topClosure)
+P_ topClosure;
+{
+ I_ i;
+ P_ tso;
+
+#if defined(USE_COST_CENTRES) || defined(GUM)
+ if (time_profiling || contextSwitchTime > 0) {
+ if (initialize_virtual_timer(tick_millisecs)) {
+#else
+ if (contextSwitchTime > 0) {
+ if (initialize_virtual_timer(contextSwitchTime)) {
+#endif
+ fflush(stdout);
+ fprintf(stderr, "Can't initialize virtual timer.\n");
+ EXIT(EXIT_FAILURE);
+ }
+ } else
+ context_switch = 0 /* 1 HWL */;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
+ if ( debug & 0x40 ) {
+ fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
+ }
+#endif
+
+#if defined(GRAN) /* KH */
+ for (i=0; i<max_proc; i++)
+ {
+ RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
+ WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
+ PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] =
+ PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] =
+ NULL;
+
+# if defined(GRAN_CHECK)
+ if (debug & 0x04)
+ BlockedOnFetch[i] = 0; /*- StgFalse; -*/ /* HWL-CHECK */
+# endif
+ OutstandingFetches[i] = 0;
+ }
+
+ CurrentProc = MainProc;
+#endif /* GRAN */
+
+ if (DO_QP_PROF)
+ init_qp_profiling();
+
+ /*
+ * We perform GC so that a signal handler can install a new TopClosure and start
+ * a new main thread.
+ */
+#ifdef PAR
+ if (IAmMainThread) {
+#endif
+ if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
+ /* kludge to save the top closure as a root */
+ CurrentTSO = topClosure;
+ ReallyPerformThreadGC(0, rtsTrue);
+ topClosure = CurrentTSO;
+ if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
+ fflush(stdout);
+ fprintf(stderr, "Not enough heap for main thread\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+#ifndef GRAN
+ RunnableThreadsHd = RunnableThreadsTl = tso;
+#else
+ /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
+ ThreadQueueHd = ThreadQueueTl = tso;
+
+# if defined(GRAN_CHECK)
+ if ( debug & 0x40 ) {
+ fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
+ }
+# endif
+#endif
+
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+
+#if defined(GRAN)
+ MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
+#endif
+
+ required_thread_count = 1;
+ advisory_thread_count = 0;
+#ifdef PAR
+ } /*if IAmMainThread ...*/
+#endif
+
+ /* ----------------------------------------------------------------- */
+ /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
+ /* ----------------------------------------------------------------- */
+
+ if(setjmp(scheduler_loop) < 0)
+ return;
+
+#if defined(GRAN) && defined(GRAN_CHECK)
+ if ( debug & 0x80 ) {
+ fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
+ DEBUG_TSO(ThreadQueueHd,1);
+ /* if (ThreadQueueHd == MainTSO) {
+ fprintf(stderr,"D> Event Queue is now:\n");
+ DEQ();
+ } */
+ }
+#endif
+
+#ifdef PAR
+ if (PendingFetches != Nil_closure) {
+ processFetches();
+ }
+
+#elif defined(GRAN)
+ if (ThreadQueueHd == Nil_closure) {
+ fprintf(stderr, "No runnable threads!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
+ QP_Event1("AG", ThreadQueueHd);
+ }
+#endif
+
+#ifndef PAR
+ while (RunnableThreadsHd == Nil_closure) {
+ /* If we've no work */
+ if (WaitingThreadsHd == Nil_closure) {
+ fflush(stdout);
+ fprintf(stderr, "No runnable threads!\n");
+ EXIT(EXIT_FAILURE);
+ }
+ AwaitEvent(0);
+ }
+#else
+ if (RunnableThreadsHd == Nil_closure) {
+ if (advisory_thread_count < MaxThreads &&
+ (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
+ PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
+ /*
+ * If we're here (no runnable threads) and we have pending sparks,
+ * we must have a space problem. Get enough space to turn one of
+ * those pending sparks into a thread...ReallyPerformGC doesn't
+ * return until the space is available, so it may force global GC.
+ * ToDo: Is this unnecessary here? Duplicated in ReSchedule()? --JSM
+ */
+ ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
+ SAVE_Hp -= THREAD_SPACE_REQUIRED;
+ } else {
+ /*
+ * We really have absolutely no work. Send out a fish (there may be
+ * some out there already), and wait for something to arrive. We
+ * clearly can't run any threads until a SCHEDULE or RESUME arrives,
+ * and so that's what we're hoping to see. (Of course, we still have
+ * to respond to other types of messages.)
+ */
+ if (!fishing)
+ sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
+ NEW_FISH_HUNGER);
+ processMessages();
+ }
+ ReSchedule(0);
+ } else if (PacketsWaiting()) { /* Look for incoming messages */
+ processMessages();
+ }
+#endif /* PAR */
+
+ if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
+ QP_Event1("AG", RunnableThreadsHd);
+ }
+
+#ifdef PAR
+ if (do_gr_profile && !sameThread)
+ DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
+#endif
+
+#if !GRAN /* ROUND_ROBIN */
+ CurrentTSO = RunnableThreadsHd;
+ RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
+ TSO_LINK(CurrentTSO) = Nil_closure;
+
+ if (RunnableThreadsHd == Nil_closure)
+ RunnableThreadsTl = Nil_closure;
+
+#else /* GRAN */
+ /* This used to be Round Robin. KH.
+ I think we can ignore that, and move it down to ReSchedule instead.
+ */
+ CurrentTSO = ThreadQueueHd;
+ /* TSO_LINK(CurrentTSO) = Nil_closure; humbug */
+#endif
+
+ /* If we're not running a timer, just leave the flag on */
+ if (contextSwitchTime > 0)
+ context_switch = 0;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (CurrentTSO == Nil_closure) {
+ fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
+ CurrentProc,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+
+ if (debug & 0x04) {
+ if (BlockedOnFetch[CurrentProc]) {
+ fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
+ CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
+ exit(99);
+ }
+ }
+
+ if ( (debug & 0x10) &&
+ (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
+ fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
+ CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+#endif
+
+# if defined(__STG_TAILJUMPS__)
+ miniInterpret((StgFunPtr)resumeThread);
+# else
+ if (doSanityChks)
+ miniInterpret_debug((StgFunPtr)resumeThread, checkAStack);
+ else
+ miniInterpret((StgFunPtr)resumeThread);
+# endif /* __STG_TAILJUMPS__ */
+}
+\end{code}
+
+% Some remarks on GrAnSim -- HWL
+
+The ReSchedule fct is the heart of GrAnSim. Based on its par it issues a
+CONTINUETRHEAD to carry on executing the current thread in due course or it
+watches out for new work (e.g. called from EndThread).
+
+Then it picks the next event (getnextevent) and handles it appropriately
+(see switch construct). Note that a continue in the switch causes the next
+event to be handled and a break causes a jmp to the scheduler_loop where
+the TSO at the head of the current processor's runnable queue is executed.
+
+ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
+itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
+
+\begin{code}
+#if defined(GRAN)
+
+void
+ReSchedule(what_next)
+int what_next; /* Run the current thread again? */
+{
+ sparkq spark, nextspark;
+ P_ tso;
+ P_ node;
+ eventq event;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x80 ) {
+ fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
+ DEBUG_TSO(ThreadQueueHd,1);
+ }
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( (debug & 0x80) || (debug & 0x40 ) )
+ if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
+ fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
+ what_next);
+#endif
+
+ /* Run the current thread again (if there is one) */
+ if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
+ {
+ /* A bit of a hassle if the event queue is empty, but ... */
+ CurrentTSO = ThreadQueueHd;
+
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
+
+ /* This code does round-Robin, if preferred. */
+ if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
+ {
+ if(do_gr_profile)
+ DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
+ ThreadQueueHd = TSO_LINK(CurrentTSO);
+ TSO_LINK(ThreadQueueTl) = CurrentTSO;
+ ThreadQueueTl = CurrentTSO;
+ TSO_LINK(CurrentTSO) = Nil_closure;
+ if (do_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ }
+ }
+ /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
+ /* has been updated before that already. */
+ else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
+ {
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(DoReScheduleOnFetch)
+ {
+ fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
+ exit(99);
+ }
+#endif
+
+ if(do_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+
+ CurrentTSO = ThreadQueueHd;
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ }
+
+ /* We go in here if the current thread is blocked on fetch => don'd CONT */
+ else if(what_next==CHANGE_THREAD)
+ {
+ /* just fall into event handling loop for next event */
+ }
+
+ /* We go in here if we have no runnable threads or what_next==0 */
+ else
+ {
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ CurrentTSO = Nil_closure;
+ }
+
+ /* ----------------------------------------------------------------- */
+ /* This part is the EVENT HANDLING LOOP */
+ /* ----------------------------------------------------------------- */
+
+ do {
+ /* Choose the processor with the next event */
+ event = getnextevent();
+ CurrentProc = EVENT_PROC(event);
+ if(EVENT_TIME(event) > CurrentTime[CurrentProc])
+ CurrentTime[CurrentProc] = EVENT_TIME(event);
+
+ MAKE_BUSY(CurrentProc);
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x80)
+ fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
+#endif
+
+ /* Deal with the idlers */
+ HandleIdlePEs();
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (event_trace &&
+ (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
+ (debug & 0x80) ))
+ print_event(event);
+#endif
+
+ switch (EVENT_TYPE(event))
+ {
+ /* Should just be continuing execution */
+ case CONTINUETHREAD:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
+ fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
+ CurrentProc,CurrentTime[CurrentProc]);
+ print_event(event);
+ continue;
+ }
+#endif
+ if(ThreadQueueHd==Nil_closure)
+ {
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
+ }
+ else
+ break; /* fall into scheduler loop */
+
+ case FETCHNODE:
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (SimplifiedFetch) {
+ fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
+ exit (99);
+ }
+#endif
+
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ HandleFetchRequest(EVENT_NODE(event),
+ EVENT_CREATOR(event),
+ EVENT_TSO(event));
+ continue;
+
+ case FETCHREPLY:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (SimplifiedFetch) {
+ fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
+ exit (99);
+ }
+
+ if (debug & 0x10) {
+ if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
+ TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
+ } else {
+ fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit(99);
+ }
+ }
+
+ if (debug & 0x04) {
+ if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
+ fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
+ CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
+ exit(99);
+ } else {
+ BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
+ }
+ }
+#endif
+
+ /* Copy or move node to CurrentProc */
+ if (FetchNode(EVENT_NODE(event),
+ EVENT_CREATOR(event),
+ EVENT_PROC(event)) ) {
+ /* Fetch has failed i.e. node has been grabbed by another PE */
+ P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
+ PROC p = where_is(node);
+ TIME fetchtime;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (PrintFetchMisses) {
+ fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
+ CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
+ fetch_misses++;
+ }
+#endif /* GRAN_CHECK */
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ /* Count fetch again !? */
+ ++TSO_FETCHCOUNT(tso);
+ TSO_FETCHTIME(tso) += gran_fetchtime;
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ gran_latency;
+
+ /* Chase the grabbed node */
+ newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x04)
+ BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
+
+ if (debug & 0x10)
+ TSO_TYPE(tso) |= FETCH_MASK_TSO;
+#endif
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ continue; /* NB: no REPLy has been processed; tso still sleeping */
+ }
+
+ /* -- Qapla'! Fetch has been successful; node is here, now */
+ ++TSO_FETCHCOUNT(EVENT_TSO(event));
+ TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
+
+ if (do_gr_profile)
+ DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
+ EVENT_NODE(event),EVENT_CREATOR(event));
+
+ --OutstandingFetches[CurrentProc];
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (OutstandingFetches[CurrentProc] < 0) {
+ fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
+ exit (99);
+ }
+#endif
+
+ if (!DoReScheduleOnFetch) {
+ CurrentTSO = EVENT_TSO(event); /* awaken blocked thread */
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
+ TSO_BLOCKEDAT(EVENT_TSO(event));
+ if(do_gr_profile)
+ DumpGranEvent(GR_RESUME,EVENT_TSO(event));
+ continue;
+ } else {
+ /* fall through to RESUMETHREAD */
+ }
+
+ case RESUMETHREAD: /* Move from the blocked queue to the tail of */
+ /* the runnable queue ( i.e. Qu' SImqa'lu') */
+ TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
+ TSO_BLOCKEDAT(EVENT_TSO(event));
+ StartThread(event,GR_RESUME);
+ continue;
+
+ case STARTTHREAD:
+ StartThread(event,GR_START);
+ continue;
+
+ case MOVETHREAD:
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (!DoThreadMigration) {
+ fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
+ exit (99);
+ }
+#endif
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ StartThread(event,GR_STOLEN);
+ continue; /* to the next event */
+
+ case MOVESPARK:
+ CurrentTime[CurrentProc] += gran_munpacktime;
+ spark = EVENT_SPARK(event);
+
+ ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
+ so the assignment above is needed. */
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_ACQUIRED,spark);
+
+ ++SparksAvail; /* Probably Temporarily */
+ /* Drop into FINDWORK */
+
+ if (!DoReScheduleOnFetch &&
+ (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
+ continue; /* to next event (i.e. leave */
+ } /* spark in sparkq for now) */
+
+ case FINDWORK:
+ if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
+ && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
+ {
+ W_ found = 0;
+ sparkq spark_of_non_local_node = NULL;
+
+ /* Choose a spark from the local spark queue */
+ spark = SparkQueueHd;
+
+ while (spark != NULL && !found)
+ {
+ node = SPARK_NODE(spark);
+ if (!SHOULD_SPARK(node))
+ {
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED,spark);
+
+ assert(spark != NULL);
+
+ SparkQueueHd = SPARK_NEXT(spark);
+ if(SparkQueueHd == NULL)
+ SparkQueueTl = NULL;
+
+ DisposeSpark(spark);
+
+ spark = SparkQueueHd;
+ }
+ /* -- node should eventually be sparked */
+ else if (PreferSparksOfLocalNodes &&
+ !IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ /* We have seen this spark before => no local sparks */
+ if (spark==spark_of_non_local_node) {
+ found = 1;
+ break;
+ }
+
+ /* Remember first non-local node */
+ if (spark_of_non_local_node==NULL)
+ spark_of_non_local_node = spark;
+
+ /* Special case: 1 elem sparkq with non-local spark */
+ if (spark==SparkQueueTl) {
+ found = 1;
+ break;
+ }
+
+ /* Put spark (non-local!) at the end of the sparkq */
+ SPARK_NEXT(SparkQueueTl) = spark;
+ SparkQueueHd = SPARK_NEXT(spark);
+ SPARK_NEXT(spark) = NULL;
+ SparkQueueTl = spark;
+
+ spark = SparkQueueHd;
+ }
+ else
+ {
+ found = 1;
+ }
+ }
+
+ /* We've found a node; now, create thread (DaH Qu' yIchen) */
+ if (found)
+ {
+ CurrentTime[CurrentProc] += gran_threadcreatetime;
+
+ node = SPARK_NODE(spark);
+ if((tso = NewThread(node, T_REQUIRED))==NULL)
+ {
+ /* Some kind of backoff needed here in case there's too little heap */
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+ ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
+ spark = NULL;
+ continue; /* to the next event, eventually */
+ }
+
+ TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
+ TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
+ TSO_SPARKNAME(tso) = SPARK_NAME(spark);
+
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ STARTTHREAD,tso,Nil_closure,NULL);
+
+ assert(spark != NULL);
+
+ SparkQueueHd = SPARK_NEXT(spark);
+ if(SparkQueueHd == NULL)
+ SparkQueueTl = NULL;
+
+ DisposeSpark(spark);
+ }
+ else
+ /* Make the PE idle if nothing sparked and we have no threads. */
+ {
+ if(ThreadQueueHd == Nil_closure)
+#if defined(GRAN_CHECK) && defined(GRAN)
+ {
+ MAKE_IDLE(CurrentProc);
+ if ( (debug & 0x40) || (debug & 0x80) ) {
+ fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
+ }
+ }
+#else
+ MAKE_IDLE(CurrentProc);
+#endif /* GRAN_CHECK */
+ else
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ }
+
+ continue; /* to the next event */
+ }
+ else
+ {
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if ( (debug & 0x04) &&
+ (!DoReScheduleOnFetch && ThreadQueueHd != Nil_closure)
+ ) {
+ fprintf(stderr,"Waning in FINDWORK handling:\n");
+ fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
+ }
+#endif
+ if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
+ continue; /* to next event */
+ else
+ break; /* run ThreadQueueHd */
+ }
+ /* never reached */
+
+ default:
+ fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
+ continue;
+ }
+ _longjmp(scheduler_loop, 1);
+ } while(1);
+ }
+\end{code}
+
+Here follows the non-GRAN @ReSchedule@.
+\begin{code}
+#else /* !GRAN */
+
+void
+ReSchedule(again)
+int again; /* Run the current thread again? */
+{
+ P_ spark;
+ PP_ sparkp;
+ P_ tso;
+
+#ifdef PAR
+ /*
+ * In the parallel world, we do unfair scheduling for the moment.
+ * Ultimately, this should all be merged with the more sophicticated
+ * GrAnSim scheduling options. (Of course, some provision should be
+ * made for *required* threads to make sure that they don't starve,
+ * but for now we assume that no one is running concurrent Haskell on
+ * a multi-processor platform.)
+ */
+
+ sameThread = again;
+
+ if (again) {
+ if (RunnableThreadsHd == Nil_closure)
+ RunnableThreadsTl = CurrentTSO;
+ TSO_LINK(CurrentTSO) = RunnableThreadsHd;
+ RunnableThreadsHd = CurrentTSO;
+ }
+
+#else
+
+ /*
+ * In the sequential world, we assume that the whole point of running
+ * the threaded build is for concurrent Haskell, so we provide round-robin
+ * scheduling.
+ */
+
+ if (again) {
+ if(RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = CurrentTSO;
+ } else {
+ TSO_LINK(RunnableThreadsTl) = CurrentTSO;
+ if (DO_QP_PROF > 1) {
+ QP_Event1("GA", CurrentTSO);
+ }
+ }
+ RunnableThreadsTl = CurrentTSO;
+ }
+#endif
+
+#if 1
+ /*
+ * Debugging code, which is useful enough (and cheap enough) to compile
+ * in all the time. This makes sure that we don't access saved registers,
+ * etc. in threads which are supposed to be sleeping.
+ */
+ CurrentTSO = Nil_closure;
+ CurrentRegTable = NULL;
+#endif
+
+ /* First the required sparks */
+
+ for (sparkp = PendingSparksHd[REQUIRED_POOL];
+ sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
+ spark = *sparkp;
+ if (SHOULD_SPARK(spark)) {
+ if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
+ break;
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = tso;
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+ } else {
+ TSO_LINK(RunnableThreadsTl) = tso;
+#ifdef PAR
+ if (do_gr_profile)
+ DumpGranEvent(GR_STARTQ, tso);
+#endif
+ }
+ RunnableThreadsTl = tso;
+ } else {
+ if (DO_QP_PROF)
+ QP_Event0(threadId++, spark);
+#ifdef PAR
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED, threadId++);
+#endif
+ }
+ }
+ PendingSparksHd[REQUIRED_POOL] = sparkp;
+
+ /* Now, almost the same thing for advisory sparks */
+
+ for (sparkp = PendingSparksHd[ADVISORY_POOL];
+ sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
+ spark = *sparkp;
+ if (SHOULD_SPARK(spark)) {
+ if (
+#ifdef PAR
+ /* In the parallel world, don't create advisory threads if we are
+ * about to rerun the same thread, or already have runnable threads,
+ * or the main thread has terminated */
+ (RunnableThreadsHd != Nil_closure ||
+ (required_thread_count == 0 && IAmMainThread)) ||
+#endif
+ advisory_thread_count == MaxThreads ||
+ (tso = NewThread(spark, T_ADVISORY)) == NULL)
+ break;
+ advisory_thread_count++;
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = tso;
+#ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_START, tso);
+ sameThread = rtsTrue;
+ }
+#endif
+ } else {
+ TSO_LINK(RunnableThreadsTl) = tso;
+#ifdef PAR
+ if (do_gr_profile)
+ DumpGranEvent(GR_STARTQ, tso);
+#endif
+ }
+ RunnableThreadsTl = tso;
+ } else {
+ if (DO_QP_PROF)
+ QP_Event0(threadId++, spark);
+#ifdef PAR
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED, threadId++);
+#endif
+ }
+ }
+ PendingSparksHd[ADVISORY_POOL] = sparkp;
+
+#ifndef PAR
+ longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
+#else
+ longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
+#endif
+}
+
+#endif /* !GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-gransim-execution]{Starting, Idling and Migrating
+ Threads (GrAnSim only)}
+%
+%****************************************************************************
+
+Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
+processors).
+
+\begin{code}
+#if defined(GRAN)
+
+StartThread(event,event_type)
+eventq event;
+enum gran_event_types event_type;
+{
+ if(ThreadQueueHd==Nil_closure)
+ {
+ CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
+ CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
+ if(do_gr_profile)
+ DumpGranEvent(event_type,EVENT_TSO(event));
+ }
+ else
+ {
+ TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
+ ThreadQueueTl = EVENT_TSO(event);
+
+ if(DoThreadMigration)
+ ++SurplusThreads;
+
+ if(do_gr_profile)
+ DumpGranEvent(event_type+1,EVENT_TSO(event));
+
+ }
+ CurrentTime[CurrentProc] += gran_threadqueuetime;
+}
+\end{code}
+
+Export work to idle PEs.
+
+\begin{code}
+HandleIdlePEs()
+{
+ PROC proc;
+
+ if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
+ for(proc = 0; proc < max_proc; proc++)
+ if(IS_IDLE(proc))
+ {
+ if(DoStealThreadsFirst &&
+ (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
+ {
+ if (SurplusThreads > 0l) /* Steal a thread */
+ StealThread(proc);
+
+ if(!IS_IDLE(proc))
+ break;
+ }
+
+ if(SparksAvail > 0l &&
+ (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
+ StealSpark(proc);
+
+ if (IS_IDLE(proc) && SurplusThreads > 0l &&
+ (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
+ StealThread(proc);
+ }
+}
+\end{code}
+
+Steal a spark and schedule moving it to proc. We want to look at PEs in
+clock order -- most retarded first. Currently sparks are only stolen from
+the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
+be changed to first steal from the former then from the latter.
+
+\begin{code}
+StealSpark(proc)
+PROC proc;
+{
+ PROC p;
+ sparkq spark, prev, next;
+ int stolen = 0;
+ TIME times[MAX_PROC], stealtime;
+ unsigned ntimes=0, i, j;
+
+ /* times shall contain processors from which we may steal sparks */
+ for(p=0; p < max_proc; ++p)
+ if(proc != p &&
+ PendingSparksHd[p][ADVISORY_POOL] != NULL &&
+ CurrentTime[p] <= CurrentTime[CurrentProc])
+ times[ntimes++] = p;
+
+ /* sort times */
+ for(i=0; i < ntimes; ++i)
+ for(j=i+1; j < ntimes; ++j)
+ if(CurrentTime[times[i]] > CurrentTime[times[j]])
+ {
+ unsigned temp = times[i];
+ times[i] = times[j];
+ times[j] = temp;
+ }
+
+ for(i=0; i < ntimes && !stolen; ++i)
+ {
+ p = times[i];
+
+ for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
+ spark != NULL && !stolen;
+ spark=next)
+ {
+ next = SPARK_NEXT(spark);
+
+ if(SHOULD_SPARK(SPARK_NODE(spark)))
+ {
+ /* Don't Steal local sparks */
+ if(!SPARK_GLOBAL(spark))
+ {
+ prev=spark;
+ continue;
+ }
+
+ SPARK_NEXT(spark) = NULL;
+ CurrentTime[p] += gran_mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
+ + SparkStealTime();
+
+ newevent(proc,p /* CurrentProc */,stealtime,
+ MOVESPARK,Nil_closure,Nil_closure,spark);
+
+ MAKE_BUSY(proc);
+ stolen = 1;
+ ++SPARK_GLOBAL(spark);
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_EXPORTED,spark);
+
+ CurrentTime[p] += gran_mtidytime;
+
+ --SparksAvail;
+ }
+ else
+ {
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_PRUNED,spark);
+ DisposeSpark(spark);
+ }
+
+ if(spark == PendingSparksHd[p][ADVISORY_POOL])
+ PendingSparksHd[p][ADVISORY_POOL] = next;
+
+ if(prev!=NULL)
+ SPARK_NEXT(prev) = next;
+ }
+
+ if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
+ PendingSparksTl[p][ADVISORY_POOL] = NULL;
+ }
+}
+\end{code}
+
+Steal a spark and schedule moving it to proc.
+
+\begin{code}
+StealThread(proc)
+PROC proc;
+{
+ PROC p;
+ P_ thread, prev;
+ TIME times[MAX_PROC], stealtime;
+ unsigned ntimes=0, i, j;
+
+ /* Hunt for a thread */
+
+ /* times shall contain processors from which we may steal threads */
+ for(p=0; p < max_proc; ++p)
+ if(proc != p && RunnableThreadsHd[p] != Nil_closure &&
+ CurrentTime[p] <= CurrentTime[CurrentProc])
+ times[ntimes++] = p;
+
+ /* sort times */
+ for(i=0; i < ntimes; ++i)
+ for(j=i+1; j < ntimes; ++j)
+ if(CurrentTime[times[i]] > CurrentTime[times[j]])
+ {
+ unsigned temp = times[i];
+ times[i] = times[j];
+ times[j] = temp;
+ }
+
+ for(i=0; i < ntimes; ++i)
+ {
+ p = times[i];
+
+ /* Steal the first exportable thread in the runnable queue after the */
+ /* first one */
+
+ if(RunnableThreadsHd[p] != Nil_closure)
+ {
+ for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
+ thread != Nil_closure && TSO_LOCKED(thread);
+ prev = thread, thread = TSO_LINK(thread))
+ /* SKIP */;
+
+ if(thread != Nil_closure) /* Take thread out of runnable queue */
+ {
+ TSO_LINK(prev) = TSO_LINK(thread);
+
+ TSO_LINK(thread) = Nil_closure;
+
+ if(RunnableThreadsTl[p] == thread)
+ RunnableThreadsTl[p] = prev;
+
+ /* Turn magic constants into params !? -- HWL */
+
+ CurrentTime[p] += 5l * gran_mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
+ + SparkStealTime() + 4l * gran_additional_latency
+ + 5l * gran_munpacktime;
+
+ /* Move the thread */
+ SET_PROCS(thread,PE_NUMBER(proc));
+
+ /* Move from one queue to another */
+ newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
+ MAKE_BUSY(proc);
+ --SurplusThreads;
+
+ if(do_gr_profile)
+ DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
+
+ CurrentTime[p] += 5l * gran_mtidytime;
+
+ /* Found one */
+ break;
+ }
+ }
+ }
+}
+
+TIME SparkStealTime()
+{
+ double fishdelay, sparkdelay, latencydelay;
+ fishdelay = (double)max_proc/2;
+ sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
+ latencydelay = sparkdelay*((double)gran_latency);
+
+/*
+ fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
+ fishdelay,sparkdelay,latencydelay,Idlers);
+*/
+ return((TIME)latencydelay);
+}
+#endif /* GRAN ; HWL */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-execution]{Executing Threads}
+%
+%****************************************************************************
+
+\begin{code}
+EXTDATA_RO(StkO_info);
+EXTDATA_RO(TSO_info);
+EXTDATA_RO(WorldStateToken_closure);
+
+EXTFUN(EnterNodeCode);
+UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
+
+#if defined(GRAN)
+
+/* Slow but relatively reliable method uses xmalloc */
+/* Eventually change that to heap allocated sparks. */
+
+sparkq
+NewSpark(node,name,local)
+P_ node;
+I_ name, local;
+{
+ extern P_ xmalloc();
+ sparkq newspark = (sparkq) xmalloc(sizeof(struct spark));
+ SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
+ SPARK_NODE(newspark) = node;
+ SPARK_NAME(newspark) = name;
+ SPARK_GLOBAL(newspark) = !local;
+ return(newspark);
+}
+
+void
+DisposeSpark(spark)
+sparkq spark;
+{
+ if(spark!=NULL)
+ free(spark);
+
+ --SparksAvail;
+
+/* Heap-allocated disposal.
+
+ FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
+ SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
+*/
+}
+
+DisposeSparkQ(spark)
+sparkq spark;
+{
+ if (spark==NULL)
+ return;
+
+ DisposeSparkQ(SPARK_NEXT(spark));
+
+#ifdef GRAN_CHECK
+ if (SparksAvail < 0)
+ fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
+#endif
+
+ free(spark);
+}
+
+#endif
+
+I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE;
+
+/* Create a new TSO, with the specified closure to enter and thread type */
+
+P_
+NewThread(topClosure, type)
+P_ topClosure;
+W_ type;
+{
+ P_ stko, tso;
+
+ if (AvailableTSO != Nil_closure) {
+ tso = AvailableTSO;
+#if defined(GRAN)
+ SET_PROCS(tso,ThisPE); /* Allocate it locally! */
+#endif
+ AvailableTSO = TSO_LINK(tso);
+ } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
+ return(NULL);
+ } else {
+ ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
+ BYTES_TO_STGWORDS(sizeof(StgDouble)));
+ tso = SAVE_Hp + 1;
+ SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
+ SET_TSO_HDR(tso, TSO_info, CCC);
+ }
+
+ TSO_LINK(tso) = Nil_closure;
+ TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
+ TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
+ TSO_ID(tso) = threadId++;
+ TSO_TYPE(tso) = type;
+ TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
+ TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
+ TSO_SWITCH(tso) = NULL;
+
+#ifdef DO_REDN_COUNTING
+ TSO_AHWM(tso) = 0;
+ TSO_BHWM(tso) = 0;
+#endif
+
+#if defined(GRAN) || defined(PAR)
+ TSO_SPARKNAME(tso) = 0;
+#if defined(GRAN)
+ TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
+#else
+ TSO_STARTEDAT(tso) = CURRENT_TIME;
+#endif
+ TSO_EXPORTED(tso) = 0;
+ TSO_BASICBLOCKS(tso) = 0;
+ TSO_ALLOCS(tso) = 0;
+ TSO_EXECTIME(tso) = 0;
+ TSO_FETCHTIME(tso) = 0;
+ TSO_FETCHCOUNT(tso) = 0;
+ TSO_BLOCKTIME(tso) = 0;
+ TSO_BLOCKCOUNT(tso) = 0;
+ TSO_BLOCKEDAT(tso) = 0;
+ TSO_GLOBALSPARKS(tso) = 0;
+ TSO_LOCALSPARKS(tso) = 0;
+#endif
+ /*
+ * set pc, Node (R1), liveness
+ */
+ CurrentRegTable = TSO_INTERNAL_PTR(tso);
+ SAVE_Liveness = LIVENESS_R1;
+ SAVE_R1.p = topClosure;
+
+# ifndef PAR
+ if (type == T_MAIN) {
+ stko = MainStkO;
+ } else {
+# endif
+ if (AvailableStack != Nil_closure) {
+ stko = AvailableStack;
+#if defined(GRAN)
+ SET_PROCS(stko,ThisPE);
+#endif
+ AvailableStack = STKO_LINK(AvailableStack);
+ } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) {
+ return(NULL);
+ } else {
+ ALLOC_STK(STKO_HS,StkOChunkSize,0);
+ stko = SAVE_Hp + 1;
+ SAVE_Hp += STKO_HS + StkOChunkSize;
+ SET_STKO_HDR(stko, StkO_info, CCC);
+ }
+ STKO_SIZE(stko) = StkOChunkSize + STKO_VHS;
+ STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
+ STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
+ STKO_LINK(stko) = Nil_closure;
+ STKO_RETURN(stko) = NULL;
+# ifndef PAR
+ }
+# endif
+
+#ifdef DO_REDN_COUNTING
+ STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
+#endif
+
+ if (type == T_MAIN) {
+ STKO_SpA(stko) -= AREL(1);
+ *STKO_SpA(stko) = (P_) WorldStateToken_closure;
+ }
+
+ SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
+ SAVE_StkO = stko;
+
+ if (DO_QP_PROF) {
+ QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
+ }
+ return tso;
+}
+\end{code}
+
+\begin{code}
+
+void
+EndThread(STG_NO_ARGS)
+{
+#ifdef PAR
+ TIME now = CURRENT_TIME;
+#endif
+#ifdef DO_REDN_COUNTING
+ extern FILE *tickyfile;
+
+ if (tickyfile != NULL) {
+ fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
+ TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
+ fprintf(tickyfile, "\tB stack max. depth: %ld words\n",
+ TSO_BHWM(CurrentTSO));
+ }
+#endif
+
+ if (DO_QP_PROF) {
+ QP_Event1("G*", CurrentTSO);
+ }
+
+#if defined(GRAN)
+ assert(CurrentTSO == ThreadQueueHd);
+ ThreadQueueHd = TSO_LINK(CurrentTSO);
+
+ if(ThreadQueueHd == Nil_closure)
+ ThreadQueueTl = Nil_closure;
+
+ else if (DoThreadMigration)
+ --SurplusThreads;
+
+ if (do_gr_sim)
+ {
+ if(TSO_TYPE(CurrentTSO)==T_MAIN)
+ {
+ int i;
+ for(i=0; i < max_proc; ++i) {
+ StgBool is_first = StgTrue;
+ while(RunnableThreadsHd[i] != Nil_closure)
+ {
+ /* We schedule runnable threads before killing them to */
+ /* make the job of bookkeeping the running, runnable, */
+ /* blocked threads easier for scripts like gr2ps -- HWL */
+
+ if (do_gr_profile && !is_first)
+ DumpRawGranEvent(i,GR_SCHEDULE,
+ TSO_ID(RunnableThreadsHd[i]));
+ if (!no_gr_profile)
+ DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
+ RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
+ is_first = StgFalse;
+ }
+ }
+
+ ThreadQueueHd = Nil_closure;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ /* Print event stats */
+ if (debug & 0x20) {
+ int i;
+
+ fprintf(stderr,"Statistics of events (total=%d):\n",
+ noOfEvents);
+ for (i=0; i<=7; i++) {
+ fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
+ event_names[i],i,event_counts[i],
+ (float)(100*event_counts[i])/(float)(noOfEvents) );
+ }
+ }
+#endif
+
+ }
+
+ if (!no_gr_profile)
+ DumpGranInfo(CurrentProc,CurrentTSO,
+ TSO_TYPE(CurrentTSO) != T_ADVISORY);
+
+ /* Note ThreadQueueHd is Nil when the main thread terminates */
+ if(ThreadQueueHd != Nil_closure)
+ {
+ if (do_gr_profile && !no_gr_profile)
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ CurrentTime[CurrentProc] += gran_threadscheduletime;
+ }
+
+ else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
+ !no_gr_profile)
+ grterminate(CurrentTime[CurrentProc]);
+ }
+#endif /* GRAN */
+
+#ifdef PAR
+ if (do_gr_profile) {
+ TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
+ DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
+ }
+#endif
+
+ switch (TSO_TYPE(CurrentTSO)) {
+ case T_MAIN:
+ required_thread_count--;
+#ifdef PAR
+ if (do_gr_binary)
+ grterminate(now);
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( (debug & 0x80) || (debug & 0x40) )
+ fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
+
+ /* I've stolen that from the end of ReSchedule (!GRAN). HWL */
+ longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
+#else
+ ReSchedule(0);
+#endif /* GRAN */
+
+ case T_REQUIRED:
+ required_thread_count--;
+ break;
+
+ case T_ADVISORY:
+ advisory_thread_count--;
+ break;
+
+ case T_FAIL:
+ EXIT(EXIT_FAILURE);
+
+ default:
+ fflush(stdout);
+ fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
+ EXIT(EXIT_FAILURE);
+ }
+
+ /* Reuse stack object space */
+ ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
+ STKO_LINK(SAVE_StkO) = AvailableStack;
+ AvailableStack = SAVE_StkO;
+ /* Reuse TSO */
+ TSO_LINK(CurrentTSO) = AvailableTSO;
+ AvailableTSO = CurrentTSO;
+ CurrentTSO = Nil_closure;
+ CurrentRegTable = NULL;
+
+#if defined(GRAN)
+ /* NB: Now ThreadQueueHd is either the next runnable thread on this */
+ /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
+ /* issued by ReSchedule. */
+ ReSchedule(SAME_THREAD); /* back for more! */
+#else
+ ReSchedule(0); /* back for more! */
+#endif
+}
+\end{code}
+
+%****************************************************************************
+%
+\subsection[thread-blocking]{Local Blocking}
+%
+%****************************************************************************
+
+\begin{code}
+
+#if defined(COUNT)
+void CountnUPDs() { ++nUPDs; }
+void CountnUPDs_old() { ++nUPDs_old; }
+void CountnUPDs_new() { ++nUPDs_new; }
+
+void CountnPAPs() { ++nPAPs; }
+#endif
+
+EXTDATA_RO(BQ_info);
+
+#ifndef GRAN
+/* NB: non-GRAN version ToDo
+ *
+ * AwakenBlockingQueue awakens a list of TSOs and FBQs.
+ */
+
+P_ PendingFetches = Nil_closure;
+
+void
+AwakenBlockingQueue(bqe)
+ P_ bqe;
+{
+ P_ last_tso = NULL;
+
+# ifdef PAR
+ P_ next;
+ TIME now = CURRENT_TIME;
+
+# endif
+
+# ifndef PAR
+ while (bqe != Nil_closure) {
+# else
+ while (IS_MUTABLE(INFO_PTR(bqe))) {
+ switch (INFO_TYPE(INFO_PTR(bqe))) {
+ case INFO_TSO_TYPE:
+# endif
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
+ }
+# ifdef PAR
+ if (do_gr_profile) {
+ DumpGranEvent(GR_RESUMEQ, bqe);
+ switch (TSO_QUEUE(bqe)) {
+ case Q_BLOCKED:
+ TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
+ break;
+ case Q_FETCHING:
+ TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
+ break;
+ default:
+ fflush(stdout);
+ fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
+ EXIT(EXIT_FAILURE);
+ }
+ }
+# endif
+ if (last_tso == NULL) {
+ if (RunnableThreadsHd == Nil_closure) {
+ RunnableThreadsHd = bqe;
+ } else {
+ TSO_LINK(RunnableThreadsTl) = bqe;
+ }
+ }
+ last_tso = bqe;
+ bqe = TSO_LINK(bqe);
+# ifdef PAR
+ break;
+ case INFO_BF_TYPE:
+ next = BF_LINK(bqe);
+ BF_LINK(bqe) = PendingFetches;
+ PendingFetches = bqe;
+ bqe = next;
+ if (last_tso != NULL)
+ TSO_LINK(last_tso) = next;
+ break;
+ default:
+ fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
+ INFO_PTR(bqe), (W_) bqe);
+ EXIT(EXIT_FAILURE);
+ }
+ }
+#else
+ }
+# endif
+ if (last_tso != NULL) {
+ RunnableThreadsTl = last_tso;
+# ifdef PAR
+ TSO_LINK(last_tso) = Nil_closure;
+# endif
+ }
+}
+#endif /* !GRAN */
+
+#ifdef GRAN
+
+/* NB: GRAN version only ToDo
+ *
+ * AwakenBlockingQueue returns True if we are on the oldmutables list,
+ * so that the update code knows what to do next.
+ */
+
+I_
+AwakenBlockingQueue(node)
+ P_ node;
+{
+ P_ tso = (P_) BQ_ENTRIES(node);
+ P_ prev;
+
+ if(do_gr_sim)
+ {
+ W_ notifytime;
+
+# if defined(COUNT)
+ ++nUPDs;
+ if (tso != Nil_closure)
+ ++nUPDs_BQ;
+# endif
+
+ while(tso != Nil_closure) {
+ W_ proc;
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+
+# if defined(COUNT)
+ ++BQ_lens;
+# endif
+
+ /* Find where the tso lives */
+ proc = where_is(tso);
+
+ if(proc == CurrentProc)
+ notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
+ else
+ {
+ CurrentTime[CurrentProc] += gran_mpacktime;
+ notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+
+ /* and create a resume message */
+ newevent(proc, CurrentProc, notifytime,
+ RESUMETHREAD,tso,Nil_closure,NULL);
+
+ prev = tso;
+ tso = TSO_LINK(tso);
+ TSO_LINK(prev) = Nil_closure;
+ }
+ }
+ else
+ {
+ if (ThreadQueueHd == Nil_closure)
+ ThreadQueueHd = tso;
+ else
+ TSO_LINK(ThreadQueueTl) = tso;
+
+ while(TSO_LINK(tso) != Nil_closure) {
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
+ }
+ tso = TSO_LINK(tso);
+ }
+
+ assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
+ if (DO_QP_PROF) {
+ QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
+ }
+
+ ThreadQueueTl = tso;
+ }
+
+ return MUT_LINK(node) != MUT_NOT_LINKED;
+}
+
+#endif /* GRAN only */
+
+EXTFUN(Continue);
+
+void
+Yield(args)
+W_ args;
+{
+ SAVE_Liveness = args >> 1;
+ TSO_PC1(CurrentTSO) = Continue;
+ if (DO_QP_PROF) {
+ QP_Event1("GR", CurrentTSO);
+ }
+#ifdef PAR
+ if (do_gr_profile) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
+ }
+#endif
+ ReSchedule(args & 1);
+}
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
+%
+%****************************************************************************
+
+The following GrAnSim routines simulate the fetching of nodes from a remote
+processor. We use a 1 word bitmask to indicate on which processor a node is
+lying. Thus, moving or copying a node from one processor to another just
+requires an appropriate change in this bitmask (using @SET_GA@).
+Additionally, the clocks have to be updated.
+
+A special case arises when the node that is needed by processor A has been
+moved from a processor B to a processor C between sending out a @FETCH@
+(from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
+to C.
+
+Currently, we only support GRIP-like single closure fetching. We plan to
+incorporate GUM-like packet fetching in the near future.
+
+\begin{code}
+#if defined(GRAN)
+
+/* Fetch node "node" to processor "p" */
+
+int
+FetchNode(node,from,to)
+P_ node;
+PROC from, to;
+{
+ assert(to==CurrentProc);
+ if (!IS_LOCAL_TO(PROCS(node),from) &&
+ !IS_LOCAL_TO(PROCS(node),to) )
+ return 1;
+
+ if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
+ PROCS(node) |= PE_NUMBER(to); /* Copy node */
+ else
+ PROCS(node) = PE_NUMBER(to); /* Move node */
+
+ /* Now fetch the children */
+ if(DoGUMMFetching)
+ {
+ fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
+ }
+
+ return 0;
+}
+
+/* --------------------------------------------------
+ Cost of sending a packet of size n = C + P*n
+ where C = packet construction constant,
+ P = cost of packing one word into a packet
+ [Should also account for multiple packets].
+ -------------------------------------------------- */
+
+void
+HandleFetchRequest(node,p,tso)
+P_ node, tso;
+PROC p;
+{
+ if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
+ { /* start tso */
+ newevent(p,CurrentProc,
+ CurrentTime[CurrentProc] /* +gran_latency */,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+ else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
+ {
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ newevent(p,CurrentProc,
+ CurrentTime[CurrentProc]+gran_latency,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+ else
+ { /* Qu'vatlh! node has been grabbed by another proc => forward */
+ PROC p_new = where_is(node);
+ TIME fetchtime;
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (NoForward) {
+ newevent(p,p_new,
+ max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
+ FETCHREPLY,tso,node,NULL); /* node needed ?? */
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ return;
+ }
+#endif
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x2) /* 0x2 should be somehting like DBG_PRINT_FWD */
+ fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
+ node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
+#endif
+ /* Prepare FORWARD message to proc p_new */
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
+ gran_latency;
+
+ newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+ }
+}
+#endif
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gr-simulation]{Granularity Simulation}
+%
+%****************************************************************************
+
+\begin{code}
+#if 0 /* moved to GranSim.lc */
+#if defined(GRAN)
+I_ do_gr_sim = 0;
+FILE *gr_file = NULL;
+char gr_filename[32];
+
+init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ I_ i;
+
+ if(do_gr_sim)
+ {
+ char *extension = do_gr_binary? "gb": "gr";
+ sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0],extension);
+
+ if ((gr_file = fopen(gr_filename,"w")) == NULL )
+ {
+ fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
+ exit(EXIT_FAILURE);
+ }
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(DoReScheduleOnFetch)
+ setbuf(gr_file,NULL);
+#endif
+
+ fputs("Granularity Simulation for ",gr_file);
+ for(i=0; i < prog_argc; ++i)
+ {
+ fputs(prog_argv[i],gr_file);
+ fputc(' ',gr_file);
+ }
+
+ if(rts_argc > 0)
+ {
+ fputs("+RTS ",gr_file);
+
+ for(i=0; i < rts_argc; ++i)
+ {
+ fputs(rts_argv[i],gr_file);
+ fputc(' ',gr_file);
+ }
+ }
+
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("General Parameters:\n\n",gr_file);
+
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
+ max_proc,DoFairSchedule?"Fair":"Unfair",
+ DoThreadMigration?"":"Don't ",
+ DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
+ DoReScheduleOnFetch?"":"Don't ");
+
+ fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
+ SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
+ DoGUMMFetching?"Many Closures":"Exactly One Closure");
+ fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
+ FetchStrategy,
+ FetchStrategy==1?"only run runnable threads (don't create new ones":
+ FetchStrategy==2?"create threads only from local sparks":
+ FetchStrategy==3?"create threads from local or global sparks":
+ FetchStrategy==4?"create sparks and steal threads if necessary":
+ "unknown");
+
+ fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
+ gran_threadcreatetime,gran_threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
+ gran_threaddescheduletime,gran_threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %lu\n",
+ gran_threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("Communication Metrics:\n\n",gr_file);
+ fprintf(gr_file,
+ "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
+ gran_latency, gran_additional_latency, gran_fetchtime,
+ gran_gunblocktime, gran_lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %lu (+ %lu after send), Message Read %lu\n",
+ gran_mpacktime, gran_mtidytime, gran_munpacktime);
+ fputs("\n\n--------------------\n\n",gr_file);
+
+ fputs("Instruction Metrics:\n\n",gr_file);
+ fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
+ gran_arith_cost, gran_branch_cost,
+ gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n",gr_file);
+ }
+
+ if(do_gr_binary)
+ grputw(sizeof(TIME));
+
+ Idlers = max_proc;
+ return(0);
+}
+
+void end_gr_simulation() {
+ if(do_gr_sim)
+ {
+ fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
+ gr_filename);
+ fclose(gr_file);
+ }
+}
+#endif /*0*/
+\end{code}
+
+%****************************************************************************
+%
+\subsection[qp-profile]{Quasi-Parallel Profiling}
+%
+%****************************************************************************
+
+\begin{code}
+#ifndef PAR
+
+I_ do_qp_prof;
+FILE *qp_file;
+
+/* *Virtual* Time in milliseconds */
+long
+qp_elapsed_time(STG_NO_ARGS)
+{
+ extern StgDouble usertime();
+
+ return ((long) (usertime() * 1e3));
+}
+
+static void
+init_qp_profiling(STG_NO_ARGS)
+{
+ I_ i;
+ char qp_filename[32];
+
+ sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
+ if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
+ fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
+ qp_filename);
+ do_qp_prof = 0;
+ } else {
+ fputs(prog_argv[0], qp_file);
+ for(i = 1; prog_argv[i]; i++) {
+ fputc(' ', qp_file);
+ fputs(prog_argv[i], qp_file);
+ }
+ fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads);
+ fputs(time_str(), qp_file);
+ fputc('\n', qp_file);
+ }
+}
+
+void
+QP_Event0(tid, node)
+I_ tid;
+P_ node;
+{
+ fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
+}
+
+void
+QP_Event1(event, tso)
+char *event;
+P_ tso;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso), TSO_NAME(tso));
+}
+
+void
+QP_Event2(event, tso1, tso2)
+char *event;
+P_ tso1, tso2;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
+}
+
+#endif /* !PAR */
+\end{code}
+
+%****************************************************************************
+%
+\subsection[entry-points]{Routines directly called from Haskell world}
+%
+%****************************************************************************
+
+The @GranSim...@ rotuines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
+
+\begin{code}
+#ifdef GRAN
+/* Take the current thread off the thread queue and thereby activate the */
+/* next thread. It's assumed that the next ReSchedule after this uses */
+/* NEW_THREAD as param. */
+/* This fct is called from GranSimBlock and GranSimFetch */
+
+void
+ActivateNextThread ()
+{
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if(ThreadQueueHd != CurrentTSO) {
+ fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
+ exit(99);
+ }
+#endif
+
+ ThreadQueueHd = TSO_LINK(ThreadQueueHd);
+ if(ThreadQueueHd==Nil_closure) {
+ MAKE_IDLE(CurrentProc);
+ ThreadQueueTl = Nil_closure;
+ } else if (do_gr_profile) {
+ CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
+ DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
+ }
+}
+\end{code}
+
+Now the main stg-called routines:
+
+\begin{code}
+/* ------------------------------------------------------------------------ */
+/* The following GranSim... fcts are stg-called from the threaded world. */
+/* ------------------------------------------------------------------------ */
+
+/* Called from HEAP_CHK -- NB: node and liveness are junk here now.
+ They are left temporarily to avoid complete recompilation.
+ KH
+*/
+void
+GranSimAllocate(n,node,liveness)
+I_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) += n;
+ ++TSO_BASICBLOCKS(CurrentTSO);
+
+ TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
+ CurrentTime[CurrentProc] += gran_heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+void
+GranSimUnallocate(n,node,liveness)
+W_ n;
+P_ node;
+W_ liveness;
+{
+ TSO_ALLOCS(CurrentTSO) -= n;
+ --TSO_BASICBLOCKS(CurrentTSO);
+
+ TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
+ CurrentTime[CurrentProc] -= gran_heapalloc_cost;
+}
+
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+W_ ariths,branches,loads,stores,floats;
+{
+ W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
+ gran_store_cost*stores + gran_float_cost*floats;
+
+ TSO_EXECTIME(CurrentTSO) += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+I_
+GranSimFetch(node /* , liveness_mask */ )
+P_ node;
+/* I_ liveness_mask; */
+{
+ /* Note: once a node has been fetched, this test will be passed */
+ if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
+ {
+ /* I suppose we shouldn't do this for CAFs? -- KH */
+ /* Should reschedule if the latency is high */
+ /* We should add mpacktime to the remote PE for the reply,
+ but we don't know who owns the node
+ */
+ /* if(DYNAMIC_POINTER(node)) */ /* For 0.22; gone in 0.23 !!! */
+ {
+ PROC p = where_is(node);
+ TIME fetchtime;
+
+#ifdef GRAN_CHECK
+ if ( ( debug & 0x40 ) &&
+ p == CurrentProc )
+ fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
+#endif /* GRAN_CHECK */
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ ++TSO_FETCHCOUNT(CurrentTSO);
+ TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
+
+ if (SimplifiedFetch)
+ {
+ FetchNode(node,CurrentProc);
+ CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
+ gran_munpacktime;
+ return(1);
+ }
+
+ fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ gran_latency;
+
+ newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
+ ++OutstandingFetches[CurrentProc];
+
+ /* About to block */
+ TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
+
+ if (DoReScheduleOnFetch)
+ {
+
+ /* Remove CurrentTSO from the queue
+ -- assumes head of queue == CurrentTSO */
+ if(!DoFairSchedule)
+ {
+ if(do_gr_profile)
+ DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
+
+ ActivateNextThread();
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit (99);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+
+ }
+#endif
+
+ TSO_LINK(CurrentTSO) = Nil_closure;
+ /* CurrentTSO = Nil_closure; */
+
+ /* ThreadQueueHd is now the next TSO to schedule or NULL */
+ /* CurrentTSO is pointed to by the FETCHNODE event */
+ }
+ else /* DoFairSchedule */
+ {
+ /* Remove from the tail of the thread queue */
+ fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
+ exit(99);
+ }
+ }
+ else /* !DoReScheduleOnFetch */
+ {
+ /* Note: CurrentProc is still busy as it's blocked on fetch */
+ if(do_gr_profile)
+ DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
+
+#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ if (debug & 0x04)
+ BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
+
+ if (debug & 0x10) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ exit (99);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+
+ CurrentTSO = Nil_closure;
+ }
+#endif
+ }
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ /* Rescheduling is necessary */
+ NeedToReSchedule = StgTrue;
+
+ return(1);
+ }
+ }
+ return(0);
+}
+
+void
+GranSimSpark(local,node)
+W_ local;
+P_ node;
+{
+ ++SparksAvail;
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_SPARK,node);
+
+ /* Force the PE to take notice of the spark */
+ if(DoAlwaysCreateThreads)
+ newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FINDWORK,Nil_closure,Nil_closure,NULL);
+
+ if(local)
+ ++TSO_LOCALSPARKS(CurrentTSO);
+ else
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+}
+
+void
+GranSimSparkAt(spark,where,identifier)
+sparkq spark;
+P_ where; /* This should be a node; alternatively could be a GA */
+I_ identifier;
+{
+ PROC p = where_is(where);
+ TIME exporttime;
+
+ if(do_sp_profile)
+ DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
+
+ CurrentTime[CurrentProc] += gran_mpacktime;
+
+ exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]?
+ CurrentTime[p]: CurrentTime[CurrentProc])
+ + gran_latency;
+
+ newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
+
+ CurrentTime[CurrentProc] += gran_mtidytime;
+
+ ++TSO_GLOBALSPARKS(CurrentTSO);
+}
+
+void
+GranSimBlock()
+{
+ if(do_gr_profile)
+ DumpGranEvent(GR_BLOCK,CurrentTSO);
+
+ ++TSO_BLOCKCOUNT(CurrentTSO);
+ TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
+ ActivateNextThread();
+}
+
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
+%
+%****************************************************************************
+
+Garbage collection code for the event queue. We walk the event queue
+so that if the only reference to a TSO is in some event (e.g. RESUME),
+the TSO is still preserved.
+
+\begin{code}
+#ifdef GRAN
+
+extern smInfo StorageMgrInfo;
+
+I_
+SaveEventRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ eventq event = EventHd;
+ while(event != NULL)
+ {
+ if(EVENT_TYPE(event) == RESUMETHREAD ||
+ EVENT_TYPE(event) == MOVETHREAD ||
+ EVENT_TYPE(event) == STARTTHREAD )
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
+
+ else if(EVENT_TYPE(event) == MOVESPARK)
+ StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
+
+ else if (EVENT_TYPE(event) == FETCHNODE ||
+ EVENT_TYPE(event) == FETCHREPLY )
+ {
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
+ StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
+ }
+
+ event = EVENT_NEXT(event);
+ }
+ return(num_ptr_roots);
+}
+
+I_
+SaveSparkRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ sparkq spark, /* prev, */ disposeQ=NULL;
+ PROC proc;
+ I_ i, sparkroots=0, prunedSparks=0;
+
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(stderr,"D> Saving spark roots for GC ...\n");
+#endif
+
+ for(proc = 0; proc < max_proc; ++proc) {
+ for(i = 0; i < SPARK_POOLS; ++i) {
+ for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
+ spark != NULL;
+ /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
+ {
+ if(++sparkroots <= MAX_SPARKS)
+ {
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
+ num_ptr_roots,proc,i,SPARK_NODE(spark));
+#endif
+ StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
+ }
+ else
+ {
+ SPARK_NODE(spark) = Nil_closure;
+ if (prunedSparks==0) {
+ disposeQ = spark;
+ /*
+ *prev = NULL;
+ */
+ }
+ prunedSparks++;
+ }
+ } /* forall spark ... */
+ if (prunedSparks>0) {
+ fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
+ prunedSparks,MAX_SPARKS,proc);
+ if (disposeQ == PendingSparksHd[proc][i])
+ PendingSparksHd[proc][i] = NULL;
+ else
+ SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
+ DisposeSparkQ(disposeQ);
+ prunedSparks = 0;
+ disposeQ = NULL;
+ }
+ } /* forall i ... */
+ } /*forall proc .. */
+
+ return(num_ptr_roots);
+}
+
+/*
+ GC roots must be restored in *reverse order*.
+ The recursion is a little ugly, but is better than
+ in-place pointer reversal.
+*/
+
+static I_
+RestoreEvtRoots(event,num_ptr_roots)
+eventq event;
+I_ num_ptr_roots;
+{
+ if(event != NULL)
+ {
+ num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
+
+ if(EVENT_TYPE(event) == RESUMETHREAD ||
+ EVENT_TYPE(event) == MOVETHREAD ||
+ EVENT_TYPE(event) == STARTTHREAD )
+ EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
+
+ else if(EVENT_TYPE(event) == MOVESPARK )
+ SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
+
+ else if (EVENT_TYPE(event) == FETCHNODE ||
+ EVENT_TYPE(event) == FETCHREPLY )
+ {
+ EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
+ EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
+ }
+ }
+
+ return(num_ptr_roots);
+}
+
+I_
+RestoreEventRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ return(RestoreEvtRoots(EventHd,num_ptr_roots));
+}
+
+static I_
+RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
+sparkq spark;
+I_ num_ptr_roots, sparkroots;
+{
+ if(spark != NULL)
+ {
+ num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
+ if(sparkroots <= MAX_SPARKS)
+ {
+ P_ n = SPARK_NODE(spark);
+ SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n",
+ num_ptr_roots,SPARK_NODE(spark));
+#endif
+ }
+ else
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if ( debug & 0x40 )
+ fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
+ num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
+#endif
+
+ }
+
+ return(num_ptr_roots);
+}
+
+I_
+RestoreSparkRoots(num_ptr_roots)
+I_ num_ptr_roots;
+{
+ PROC proc;
+ I_ i;
+
+ /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
+ /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
+ /* of the for loop. For i that is currently not necessary. C is really */
+ /* impressive in datatype abstraction! -- HWL */
+
+ for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
+ for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
+ num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
+ }
+ }
+ return(num_ptr_roots);
+}
+
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
+%
+%****************************************************************************
+
+Event dumping routines.
+
+\begin{code}
+#ifdef GRAN
+
+DumpGranEvent(name,tso)
+enum gran_event_types name;
+P_ tso;
+{
+ DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
+}
+
+DumpSparkGranEvent(name,id)
+enum gran_event_types name;
+W_ id;
+{
+ DumpRawGranEvent(CurrentProc,name,id);
+}
+
+DumpGranEventAndNode(name,tso,node,proc)
+enum gran_event_types name;
+P_ tso, node;
+PROC proc;
+{
+ PROC pe = CurrentProc;
+ W_ id = TSO_ID(tso);
+
+ if(name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if(do_gr_binary)
+ {
+ grputw(name);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(id);
+ }
+ else
+ fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
+ pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
+}
+
+DumpRawGranEvent(pe,name,id)
+PROC pe;
+enum gran_event_types name;
+W_ id;
+{
+ if(name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if(do_gr_binary)
+ {
+ grputw(name);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(id);
+ }
+ else
+ fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
+ pe,CurrentTime[CurrentProc],gran_event_names[name],id);
+}
+
+DumpGranInfo(pe,tso,mandatory_thread)
+PROC pe;
+P_ tso;
+I_ mandatory_thread;
+{
+ if(do_gr_binary)
+ {
+ grputw(GR_END);
+ grputw(pe);
+ grputw(CurrentTime[CurrentProc]);
+ grputw(TSO_ID(tso));
+ grputw(TSO_SPARKNAME(tso));
+ grputw(TSO_STARTEDAT(tso));
+ grputw(TSO_EXPORTED(tso));
+ grputw(TSO_BASICBLOCKS(tso));
+ grputw(TSO_ALLOCS(tso));
+ grputw(TSO_EXECTIME(tso));
+ grputw(TSO_BLOCKTIME(tso));
+ grputw(TSO_BLOCKCOUNT(tso));
+ grputw(TSO_FETCHTIME(tso));
+ grputw(TSO_FETCHCOUNT(tso));
+ grputw(TSO_LOCALSPARKS(tso));
+ grputw(TSO_GLOBALSPARKS(tso));
+ grputw(mandatory_thread);
+ }
+ else
+ {
+ /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
+ fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
+ ,pe
+ ,CurrentTime[CurrentProc]
+ ,TSO_ID(tso)
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso)?'T':'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ ,mandatory_thread?'T':'F'
+ );
+ }
+}
+
+DumpTSO(tso)
+P_ tso;
+{
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
+ ,tso
+ ,TSO_NAME(tso)
+ ,TSO_ID(tso)
+ ,TSO_LINK(tso)
+ ,TSO_TYPE(tso)==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ "???"
+ );
+
+ fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
+ ,TSO_PC1(tso)
+ ,TSO_PC2(tso)
+ ,TSO_ARG1(tso)
+ ,TSO_ARG2(tso)
+ ,TSO_SWITCH(tso)
+ );
+
+ fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
+ ,TSO_SPARKNAME(tso)
+ ,TSO_STARTEDAT(tso)
+ ,TSO_EXPORTED(tso)?'T':'F'
+ ,TSO_BASICBLOCKS(tso)
+ ,TSO_ALLOCS(tso)
+ ,TSO_EXECTIME(tso)
+ ,TSO_BLOCKTIME(tso)
+ ,TSO_BLOCKCOUNT(tso)
+ ,TSO_FETCHTIME(tso)
+ ,TSO_FETCHCOUNT(tso)
+ ,TSO_LOCALSPARKS(tso)
+ ,TSO_GLOBALSPARKS(tso)
+ );
+}
+
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+grterminate(v)
+TIME v;
+{
+ DumpGranEvent(GR_TERMINATE,0);
+
+ if(sizeof(TIME)==4)
+ {
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ putc('\0',gr_file);
+ }
+ else
+ {
+ putc(v >> 56l,gr_file);
+ putc((v >> 48l)&0xffl,gr_file);
+ putc((v >> 40l)&0xffl,gr_file);
+ putc((v >> 32l)&0xffl,gr_file);
+ }
+ putc((v >> 24l)&0xffl,gr_file);
+ putc((v >> 16l)&0xffl,gr_file);
+ putc((v >> 8l)&0xffl,gr_file);
+ putc(v&0xffl,gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+grputw(v)
+TIME v;
+{
+ if(v <= 0x3fl)
+ {
+ fputc(v & 0x3f,gr_file);
+ }
+
+ else if (v <= 0x3fffl)
+ {
+ fputc((v >> 8l)|0x40l,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else if (v <= 0x3fffffffl)
+ {
+ fputc((v >> 24l)|0x80l,gr_file);
+ fputc((v >> 16l)&0xffl,gr_file);
+ fputc((v >> 8l)&0xffl,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else if (sizeof(TIME) == 4)
+ {
+ fputc(0x70,gr_file);
+ fputc((v >> 24l)&0xffl,gr_file);
+ fputc((v >> 16l)&0xffl,gr_file);
+ fputc((v >> 8l)&0xffl,gr_file);
+ fputc(v&0xffl,gr_file);
+ }
+
+ else
+ {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l)|0x60l,gr_file);
+ else
+ {
+ putc(0x70,gr_file);
+ putc((v >> 56l)&0xffl,gr_file);
+ }
+
+ putc((v >> 48l)&0xffl,gr_file);
+ putc((v >> 40l)&0xffl,gr_file);
+ putc((v >> 32l)&0xffl,gr_file);
+ putc((v >> 24l)&0xffl,gr_file);
+ putc((v >> 16l)&0xffl,gr_file);
+ putc((v >> 8l)&0xffl,gr_file);
+ putc(v&0xffl,gr_file);
+ }
+}
+#endif /* GRAN */
+
+\end{code}
+
+%****************************************************************************
+%
+\subsection[GrAnSim-debug]{Debugging routines for GrAnSim}
+%
+%****************************************************************************
+
+Debugging routines, mainly for GrAnSim. They should really be in a separate file.
+
+The first couple of routines are general ones (look also into
+c-as-asm/StgDebug.lc).
+
+\begin{code}
+
+#define NULL_REG_MAP /* Not threaded */
+#include "stgdefs.h"
+
+char *
+info_hdr_type(info_ptr)
+W_ info_ptr;
+{
+#if ! defined(PAR) && !defined(GRAN)
+ switch (INFO_TAG(info_ptr))
+ {
+ case INFO_OTHER_TAG:
+ return("OTHER_TAG");
+/* case INFO_IND_TAG:
+ return("IND_TAG");
+*/ default:
+ return("TAG<n>");
+ }
+#else /* PAR */
+ switch(INFO_TYPE(info_ptr))
+ {
+ case INFO_SPEC_U_TYPE:
+ return("SPECU");
+
+ case INFO_SPEC_N_TYPE:
+ return("SPECN");
+
+ case INFO_GEN_U_TYPE:
+ return("GENU");
+
+ case INFO_GEN_N_TYPE:
+ return("GENN");
+
+ case INFO_DYN_TYPE:
+ return("DYN");
+
+ /*
+ case INFO_DYN_TYPE_N:
+ return("DYNN");
+
+ case INFO_DYN_TYPE_U:
+ return("DYNU");
+ */
+
+ case INFO_TUPLE_TYPE:
+ return("TUPLE");
+
+ case INFO_DATA_TYPE:
+ return("DATA");
+
+ case INFO_MUTUPLE_TYPE:
+ return("MUTUPLE");
+
+ case INFO_IMMUTUPLE_TYPE:
+ return("IMMUTUPLE");
+
+ case INFO_STATIC_TYPE:
+ return("STATIC");
+
+ case INFO_CONST_TYPE:
+ return("CONST");
+
+ case INFO_CHARLIKE_TYPE:
+ return("CHAR");
+
+ case INFO_INTLIKE_TYPE:
+ return("INT");
+
+ case INFO_BH_TYPE:
+ return("BHOLE");
+
+ case INFO_IND_TYPE:
+ return("IND");
+
+ case INFO_CAF_TYPE:
+ return("CAF");
+
+ case INFO_FETCHME_TYPE:
+ return("FETCHME");
+
+ case INFO_BQ_TYPE:
+ return("BQ");
+
+ /*
+ case INFO_BQENT_TYPE:
+ return("BQENT");
+ */
+
+ case INFO_TSO_TYPE:
+ return("TSO");
+
+ case INFO_STKO_TYPE:
+ return("STKO");
+
+ default:
+ fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
+ return("??");
+ }
+#endif /* PAR */
+}
+
+/*
+@var_hdr_size@ computes the size of the variable header for a closure.
+*/
+
+I_
+var_hdr_size(node)
+P_ node;
+{
+ switch(INFO_TYPE(INFO_PTR(node)))
+ {
+ case INFO_SPEC_U_TYPE: return(0); /* by decree */
+ case INFO_SPEC_N_TYPE: return(0);
+ case INFO_GEN_U_TYPE: return(GEN_VHS);
+ case INFO_GEN_N_TYPE: return(GEN_VHS);
+ case INFO_DYN_TYPE: return(DYN_VHS);
+ /*
+ case INFO_DYN_TYPE_N: return(DYN_VHS);
+ case INFO_DYN_TYPE_U: return(DYN_VHS);
+ */
+ case INFO_TUPLE_TYPE: return(TUPLE_VHS);
+ case INFO_DATA_TYPE: return(DATA_VHS);
+ case INFO_MUTUPLE_TYPE: return(MUTUPLE_VHS);
+ case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
+ case INFO_STATIC_TYPE: return(STATIC_VHS);
+ case INFO_CONST_TYPE: return(0);
+ case INFO_CHARLIKE_TYPE: return(0);
+ case INFO_INTLIKE_TYPE: return(0);
+ case INFO_BH_TYPE: return(0);
+ case INFO_IND_TYPE: return(0);
+ case INFO_CAF_TYPE: return(0);
+ case INFO_FETCHME_TYPE: return(0);
+ case INFO_BQ_TYPE: return(0);
+ /*
+ case INFO_BQENT_TYPE: return(0);
+ */
+ case INFO_TSO_TYPE: return(TSO_VHS);
+ case INFO_STKO_TYPE: return(STKO_VHS);
+ default:
+ fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
+ INFO_TYPE(INFO_PTR(node)));
+ return(0);
+ }
+}
+
+
+/* Determine the size and number of pointers for this kind of closure */
+void
+size_and_ptrs(node,size,ptrs)
+P_ node;
+W_ *size, *ptrs;
+{
+ switch(INFO_TYPE(INFO_PTR(node)))
+ {
+ case INFO_SPEC_U_TYPE:
+ case INFO_SPEC_N_TYPE:
+ *size = INFO_SIZE(INFO_PTR(node)); /* New for 0.24; check */
+ *ptrs = INFO_NoPTRS(INFO_PTR(node)); /* that! -- HWL */
+ /*
+ *size = SPEC_CLOSURE_SIZE(node);
+ *ptrs = SPEC_CLOSURE_NoPTRS(node);
+ */
+ break;
+
+ case INFO_GEN_U_TYPE:
+ case INFO_GEN_N_TYPE:
+ *size = GEN_CLOSURE_SIZE(node);
+ *ptrs = GEN_CLOSURE_NoPTRS(node);
+ break;
+
+ /*
+ case INFO_DYN_TYPE_U:
+ case INFO_DYN_TYPE_N:
+ */
+ case INFO_DYN_TYPE:
+ *size = DYN_CLOSURE_SIZE(node);
+ *ptrs = DYN_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_TUPLE_TYPE:
+ *size = TUPLE_CLOSURE_SIZE(node);
+ *ptrs = TUPLE_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_DATA_TYPE:
+ *size = DATA_CLOSURE_SIZE(node);
+ *ptrs = DATA_CLOSURE_NoPTRS(node);
+ break;
+
+ case INFO_IND_TYPE:
+ *size = IND_CLOSURE_SIZE(node);
+ *ptrs = IND_CLOSURE_NoPTRS(node);
+ break;
+
+/* ToDo: more (WDP) */
+
+ /* Don't know about the others */
+ default:
+ *size = *ptrs = 0;
+ break;
+ }
+}
+
+void
+DEBUG_PRINT_NODE(node)
+P_ node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ I_ size = 0, ptrs = 0, i, vhs = 0;
+ char *info_type = info_hdr_type(info_ptr);
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ fprintf(stderr,"Node: 0x%lx", (W_) node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
+ info_ptr,info_type,size,ptrs);
+
+ /* For now, we ignore the variable header */
+
+ for(i=0; i < size; ++i)
+ {
+ if(i == 0)
+ fprintf(stderr,"Data: ");
+
+ else if(i % 6 == 0)
+ fprintf(stderr,"\n ");
+
+ if(i < ptrs)
+ fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
+ else
+ fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
+ }
+ fprintf(stderr, "\n");
+}
+
+
+#define INFO_MASK 0x80000000
+
+void
+DEBUG_TREE(node)
+P_ node;
+{
+ W_ size = 0, ptrs = 0, i, vhs = 0;
+
+ /* Don't print cycles */
+ if((INFO_PTR(node) & INFO_MASK) != 0)
+ return;
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ DEBUG_PRINT_NODE(node);
+ fprintf(stderr, "\n");
+
+ /* Mark the node -- may be dangerous */
+ INFO_PTR(node) |= INFO_MASK;
+
+ for(i = 0; i < ptrs; ++i)
+ DEBUG_TREE((P_)node[i+vhs+_FHS]);
+
+ /* Unmark the node */
+ INFO_PTR(node) &= ~INFO_MASK;
+}
+
+
+void
+DEBUG_INFO_TABLE(node)
+P_ node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ char *ip_type = info_hdr_type(info_ptr);
+
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif
+}
+#endif /* GRAN */
+
+\end{code}
+
+The remaining debugging routines are more or less specific for GrAnSim.
+
+\begin{code}
+#if defined(GRAN) && defined(GRAN_CHECK)
+void
+DEBUG_CURR_THREADQ(verbose)
+I_ verbose;
+{
+ fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
+ DEBUG_THREADQ(ThreadQueueHd, verbose);
+}
+
+void
+DEBUG_THREADQ(closure, verbose)
+P_ closure;
+I_ verbose;
+{
+ P_ x;
+
+ fprintf(stderr,"Thread Queue: ");
+ for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
+ if (verbose)
+ DEBUG_TSO(x,0);
+ else
+ fprintf(stderr," 0x%x",x);
+
+ if (closure==Nil_closure)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+/* Check with Threads.lh */
+static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
+
+void
+DEBUG_TSO(closure,verbose)
+P_ closure;
+I_ verbose;
+{
+
+ if (closure==Nil_closure) {
+ fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
+ return;
+ }
+
+ fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
+
+ fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
+ fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
+ fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
+#if defined(GRAN_CHECK) && defined(GRAN)
+ if (debug & 0x10)
+ fprintf(stderr,"\tType: %s %s\n",
+ type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
+ (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
+ else
+ fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
+#else
+ fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
+#endif
+ fprintf(stderr,"> PC1: 0x%x",TSO_PC1(closure));
+ fprintf(stderr,"\tPC2: 0x%x\n",TSO_PC2(closure));
+ fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
+ fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
+ fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
+
+ if (verbose) {
+ fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
+ fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
+ fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
+ fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
+ fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
+ fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
+ fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
+ fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
+ fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
+ fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
+ fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
+ fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
+ fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
+ fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
+ }
+}
+
+void
+DEBUG_EVENT(event, verbose)
+eventq event;
+I_ verbose;
+{
+ if (verbose) {
+ print_event(event);
+ }else{
+ fprintf(stderr," 0x%x",event);
+ }
+}
+
+void
+DEBUG_EVENTQ(verbose)
+I_ verbose;
+{
+ eventq x;
+
+ fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
+ DEBUG_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+DEBUG_SPARK(spark, verbose)
+sparkq spark;
+I_ verbose;
+{
+ if (verbose)
+ print_spark(spark);
+ else
+ fprintf(stderr," 0x%x",spark);
+}
+
+void
+DEBUG_SPARKQ(spark,verbose)
+sparkq spark;
+I_ verbose;
+{
+ sparkq x;
+
+ fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
+ for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
+ DEBUG_SPARK(x,verbose);
+ }
+ if (spark==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+DEBUG_CURR_SPARKQ(verbose)
+I_ verbose;
+{
+ DEBUG_SPARKQ(SparkQueueHd,verbose);
+}
+
+void
+DEBUG_PROC(proc,verbose)
+I_ proc;
+I_ verbose;
+{
+ fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
+ proc,CurrentTime[proc],CurrentTime[proc],
+ (CurrentProc==proc)?"ACTIVE":"INACTIVE");
+ DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
+ if ( (CurrentProc==proc) )
+ DEBUG_TSO(CurrentTSO,1);
+
+ if (EventHd!=NULL)
+ fprintf(stderr,"Next event (%s) is on proc %d\n",
+ event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
+
+ if (verbose & 0x1) {
+ fprintf(stderr,"\nREQUIRED sparks: ");
+ DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
+ fprintf(stderr,"\nADVISORY_sparks: ");
+ DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
+ }
+}
+
+/* Debug CurrentTSO */
+void
+DCT(){
+ fprintf(stderr,"Current Proc: %d\n",CurrentProc);
+ DEBUG_TSO(CurrentTSO,1);
+}
+
+/* Debug Current Processor */
+void
+DCP(){ DEBUG_PROC(CurrentProc,2); }
+
+/* Shorthand for debugging event queue */
+void
+DEQ() { DEBUG_EVENTQ(1); }
+
+/* Shorthand for debugging spark queue */
+void
+DSQ() { DEBUG_CURR_SPARKQ(1); }
+
+/* Shorthand for printing a node */
+void
+DN(P_ node) { DEBUG_PRINT_NODE(node); }
+
+#endif /* GRAN */
+\end{code}
+
+
+%****************************************************************************
+%
+\subsection[qp-profile]{Quasi-Parallel Profiling}
+%
+%****************************************************************************
+
+\begin{code}
+#ifndef GRAN
+I_ do_qp_prof;
+FILE *qp_file;
+
+/* *Virtual* Time in milliseconds */
+long
+qp_elapsed_time()
+{
+ return ((long) (usertime() * 1e3));
+}
+
+static void
+init_qp_profiling(STG_NO_ARGS)
+{
+ I_ i;
+ char qp_filename[32];
+
+ sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
+ if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
+ fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
+ qp_filename);
+ do_qp_prof = 0;
+ } else {
+ fputs(prog_argv[0], qp_file);
+ for(i = 1; prog_argv[i]; i++) {
+ fputc(' ', qp_file);
+ fputs(prog_argv[i], qp_file);
+ }
+ fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads);
+ fputs(time_str(), qp_file);
+ fputc('\n', qp_file);
+ }
+}
+
+void
+QP_Event0(tid, node)
+I_ tid;
+P_ node;
+{
+ fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
+}
+
+void
+QP_Event1(event, tso)
+char *event;
+P_ tso;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso), (W_) TSO_NAME(tso));
+}
+
+void
+QP_Event2(event, tso1, tso2)
+char *event;
+P_ tso1, tso2;
+{
+ fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
+ TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
+}
+#endif /* 0 */
+#endif /* GRAN */
+
+#if defined(CONCURRENT) && !defined(GRAN)
+/* romoluSnganpu' SamuS! */
+
+unsigned CurrentProc = 0;
+W_ IdleProcs = ~0l, Idlers = 32;
+
+void
+GranSimAllocate(n,node,liveness)
+I_ n;
+P_ node;
+W_ liveness;
+{ }
+
+void
+GranSimUnallocate(n,node,liveness)
+W_ n;
+P_ node;
+W_ liveness;
+{ }
+
+
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+W_ ariths,branches,loads,stores,floats;
+{ }
+
+I_
+GranSimFetch(node /* , liveness_mask */ )
+P_ node;
+/* I_ liveness_mask; */
+{ }
+
+void
+GranSimSpark(local,node)
+W_ local;
+P_ node;
+{ }
+
+#if 0
+void
+GranSimSparkAt(spark,where,identifier)
+sparkq spark;
+P_ where; /* This should be a node; alternatively could be a GA */
+I_ identifier;
+{ }
+#endif
+
+void
+GranSimBlock()
+{ }
+#endif
+
+\end{code}
+