%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
%
%************************************************************************
%*                                                                      *
\section[Threads.lc]{Thread Control Routines}
%*									*
%************************************************************************

%************************************************************************
%
\subsection[thread-overview]{Overview of the Thread Management System}
%
%************************************************************************

%************************************************************************
%
\subsection[thread-decls]{Thread Declarations}
%
%************************************************************************

% I haven't checked if GRAN can work with QP profiling. But as we use our
% own profiling (GR profiling) that should be irrelevant. -- HWL

\begin{code}

#if defined(CONCURRENT)

# define NON_POSIX_SOURCE /* so says Solaris */

# include "rtsdefs.h"
# include <setjmp.h>

#include "LLC.h"
#include "HLC.h"

static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
\end{code}

@AvailableStack@ is used to determine whether an existing stack can be
reused without new allocation, so reducing garbage collection, and
stack setup time.  At present, it is only used for the first stack
chunk of a thread, the one that's got @StkOChunkSize@ words.

\begin{code}
P_ AvailableStack = Nil_closure;
P_ AvailableTSO = Nil_closure;
\end{code}

Macros for dealing with the new and improved GA field for simulating
parallel execution. Based on @CONCURRENT@ package. The GA field now
contains a mask, where the n-th bit stands for the n-th processor,
where this data can be found. In case of multiple copies, several bits
are set.  The total number of processors is bounded by @MAX_PROC@,
which should be <= the length of a word in bits.  -- HWL

\begin{code}
/* mattson thinks this is obsolete */

# if 0 && defined(GRAN)
extern FILE *main_statsfile;         /* Might be of general interest  HWL */

typedef unsigned long TIME;
typedef unsigned char PROC;
typedef unsigned char EVTTYPE;


#  undef max
#  define max(a,b) (a>b?a:b)

static PROC
ga_to_proc(W_ ga)
{ PROC i;
                                
  for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ; 

  return (i);
}

/* NB: This takes a *node* rather than just a ga as input */
static PROC
where_is(P_ node)
{ return (ga_to_proc(PROCS(node))); }   /* Access the GA field of the node */

static PROC
no_of_copies(P_ node)       /* DaH lo'lu'Qo'; currently unused */
{ PROC i, n;
                                
  for (i=0, n=0; i<MAX_PROC; i++) 
    if (IS_LOCAL_TO(PROCS(node),i))
      n++;; 

  return (n);
}

# endif /* GRAN ; HWL */ 
\end{code}

%****************************************************************
%*								*
\subsection[thread-getthread]{The Thread Scheduler}
%*								*
%****************************************************************

This is the heart of the thread scheduling code.

\begin{code}
# if defined(GRAN_CHECK) && defined(GRAN)
W_ debug = 0;
# endif       

W_ event_trace = 0;
W_ event_trace_all = 0;

STGRegisterTable *CurrentRegTable = NULL;
P_ CurrentTSO = NULL;

# if defined(GRAN)                                                  /* HWL */

unsigned CurrentProc = 0;
W_ IdleProcs = ~0L, Idlers = MAX_PROC; 

# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
# define FETCH_MASK_TSO  0x08000000  /* only bits 0, 1, 2 should be used */
                                     /* normally */
# endif

I_ DoFairSchedule = 0;
I_ DoReScheduleOnFetch = 0;
I_ DoStealThreadsFirst = 0;
I_ SimplifiedFetch = 0;
I_ DoAlwaysCreateThreads = 0;
I_ DoGUMMFetching = 0;
I_ DoThreadMigration = 0;
I_ FetchStrategy = 4;
I_ PreferSparksOfLocalNodes = 0;

# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
I_ NoForward = 0;
I_ PrintFetchMisses = 0, fetch_misses = 0;
# endif

# if defined(COUNT)
I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
   BQ_lens = 0;
# endif

I_ do_gr_binary = 0;
I_ do_gr_profile = 0;        /* Full .gr profile or only END events? */
I_ no_gr_profile = 0;        /* Don't create any .gr file at all? */
I_ do_sp_profile = 0;
I_ do_gr_migration = 0;

P_ RunnableThreadsHd[MAX_PROC];
P_ RunnableThreadsTl[MAX_PROC];

P_ WaitThreadsHd[MAX_PROC];
P_ WaitThreadsTl[MAX_PROC];

sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];

W_ CurrentTime[MAX_PROC];       /* Per PE clock */

# if defined(GRAN_CHECK) && defined(GRAN)
P_ BlockedOnFetch[MAX_PROC];    /* HWL-CHECK */
# endif

I_ OutstandingFetches[MAX_PROC];

W_ SparksAvail = 0;     /* How many sparks are available */
W_ SurplusThreads = 0;  /* How many excess threads are there */

StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */

/* Communication Cost Variables -- set in main program */

W_ gran_latency =      LATENCY,          gran_additional_latency = ADDITIONAL_LATENCY, 
   gran_fetchtime =    FETCHTIME, 
   gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime =       GLOBALUNBLOCKTIME,
   gran_mpacktime =    MSGPACKTIME,      gran_munpacktime =        MSGUNPACKTIME,
   gran_mtidytime =    0;

W_ gran_threadcreatetime =         THREADCREATETIME,
   gran_threadqueuetime =          THREADQUEUETIME,
   gran_threaddescheduletime =     THREADDESCHEDULETIME,
   gran_threadscheduletime =       THREADSCHEDULETIME,
   gran_threadcontextswitchtime =  THREADCONTEXTSWITCHTIME;

/* Instruction Cost Variables -- set in main program */

W_ gran_arith_cost =   ARITH_COST,       gran_branch_cost =        BRANCH_COST, 
   gran_load_cost =    LOAD_COST,        gran_store_cost =         STORE_COST, 
   gran_float_cost =   FLOAT_COST,       gran_heapalloc_cost =     0;

W_ max_proc = MAX_PROC;

/* Granularity event types' names for output */

char *event_names[] =
    { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD", 
      "MOVESPARK", "MOVETHREAD", "FINDWORK",
      "FETCHNODE", "FETCHREPLY"
    };

# if defined(GRAN)
/* Prototypes of GrAnSim debugging functions */
void DEBUG_PRINT_NODE	PROTO((P_));
void DEBUG_TREE		PROTO((P_));
void DEBUG_INFO_TABLE	PROTO((P_));
void DEBUG_CURR_THREADQ	PROTO((I_));
void DEBUG_THREADQ	PROTO((P_, I_));
void DEBUG_TSO		PROTO((P_, I_));
void DEBUG_EVENT	PROTO((eventq, I_));
void DEBUG_SPARK	PROTO((sparkq, I_));
void DEBUG_SPARKQ	PROTO((sparkq, I_));
void DEBUG_CURR_SPARKQ	PROTO((I_));
void DEBUG_PROC		PROTO((I_, I_));
void DCT(STG_NO_ARGS);
void DCP(STG_NO_ARGS);
void DEQ(STG_NO_ARGS);
void DSQ(STG_NO_ARGS);

void HandleFetchRequest PROTO((P_, PROC, P_));
# endif /* GRAN ; HWL */ 

#if defined(GRAN_CHECK) && defined(GRAN)
static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;

static I_ noOfEvents = 0;
static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
#endif

TIME SparkStealTime();

/* Fcts for manipulating event queues have been deleted  -- HWL */
/* ---------------------------------- */

static void
print_spark(spark)
  sparkq spark;
{

  if (spark==NULL)
    fprintf(stderr,"Spark: NIL\n");
  else
    fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
	    (W_) SPARK_NODE(spark), SPARK_NAME(spark), 
            ((SPARK_EXPORTED(spark))?"True":"False"), 
            SPARK_PREV(spark), SPARK_NEXT(spark) );
}

static print_sparkq(hd)
sparkq hd;
{
  sparkq x;

  fprintf(stderr,"Spark Queue with root at %x:\n",hd);
  for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
    print_spark(x);
  }
}

static print_event(event)
eventq event;
{

  if (event==NULL)
    fprintf(stderr,"Evt: NIL\n");
  else
    fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
              event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
              EVENT_PROC(event), EVENT_CREATOR(event), 
              EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
              EVENT_SPARK(event), EVENT_NEXT(event)*/ );

}

static print_eventq(hd)
eventq hd;
{
  eventq x;

  fprintf(stderr,"Event Queue with root at %x:\n",hd);
  for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
    print_event(x);
  }
}

/* ---------------------------------- */

#if 0 /* moved */
static eventq getnextevent()
{
  static eventq entry = NULL;

  if(EventHd == NULL)
    {
      fprintf(stderr,"No next event\n");
      exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */
    }

  if(entry != NULL)
    free((char *)entry);

#if defined(GRAN_CHECK) && defined(GRAN)
  if (debug & 0x20) {     /* count events */
    noOfEvents++;
    event_counts[EVENT_TYPE(EventHd)]++;
  }
#endif       

  entry = EventHd;
  EventHd = EVENT_NEXT(EventHd);
  return(entry);
}

/* ToDo: replace malloc/free with a free list */

static insert_event(newentry)
eventq newentry;
{
  EVTTYPE evttype = EVENT_TYPE(newentry);
  eventq event, *prev;

  /* Search the queue and insert at the right point:
     FINDWORK before everything, CONTINUETHREAD after everything.

     This ensures that we find any available work after all threads have
     executed the current cycle.  This level of detail would normally be
     irrelevant, but matters for ridiculously low latencies...
  */

  if(EventHd == NULL)
    EventHd = newentry;
  else 
    {
      for (event = EventHd, prev=&EventHd; event != NULL; 
           prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
        {
          if(evttype == FINDWORK ?       (EVENT_TIME(event) >=  EVENT_TIME(newentry)) :
             evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) : 
                                         (EVENT_TIME(event) >  EVENT_TIME(newentry) ||
                                         (EVENT_TIME(event) == EVENT_TIME(newentry) &&
                                          EVENT_TYPE(event) != FINDWORK )))
            {
              *prev = newentry;
              EVENT_NEXT(newentry) = event;
              break;
            }
        }
      if (event == NULL)
        *prev = newentry;
    }
}

static newevent(proc,creator,time,evttype,tso,node,spark)
PROC proc, creator;
TIME time;
EVTTYPE evttype;
P_ tso, node;
sparkq spark;
{
  extern P_ xmalloc();
  eventq newentry = (eventq) xmalloc(sizeof(struct event));

  EVENT_PROC(newentry) = proc;
  EVENT_CREATOR(newentry) = creator;
  EVENT_TIME(newentry) = time;
  EVENT_TYPE(newentry) = evttype;
  EVENT_TSO(newentry) =  tso;
  EVENT_NODE(newentry) =  node;
  EVENT_SPARK(newentry) =  spark;
  EVENT_NEXT(newentry) = NULL;

  insert_event(newentry);
}
#endif /* 0 moved */

# else                                                            /* !GRAN */

P_ RunnableThreadsHd = Nil_closure;
P_ RunnableThreadsTl = Nil_closure;

P_ WaitingThreadsHd = Nil_closure;
P_ WaitingThreadsTl = Nil_closure;

PP_ PendingSparksBase[SPARK_POOLS];
PP_ PendingSparksLim[SPARK_POOLS];

PP_ PendingSparksHd[SPARK_POOLS];
PP_ PendingSparksTl[SPARK_POOLS];

# endif                                                      /* GRAN ; HWL */

static jmp_buf scheduler_loop;

I_ MaxThreads = DEFAULT_MAX_THREADS;
I_ required_thread_count = 0;
I_ advisory_thread_count = 0;

EXTFUN(resumeThread);

P_ NewThread PROTO((P_, W_));

I_ context_switch = 0;

I_ contextSwitchTime = CS_MIN_MILLISECS;  /* In milliseconds */

#if !defined(GRAN)

I_ threadId = 0;

I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
I_ SparkLimit[SPARK_POOLS];

extern I_ doSanityChks;
extern void checkAStack(STG_NO_ARGS);

rtsBool
initThreadPools(size)
I_ size;
{
    SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
    if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
	return rtsFalse;
    if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
	return rtsFalse;
    PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
    PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
    return rtsTrue;
}
#endif

#ifdef PAR
rtsBool sameThread;
#endif

void
ScheduleThreads(topClosure)
P_ topClosure;
{
    I_ i;
    P_ tso;

#if defined(USE_COST_CENTRES) || defined(GUM)
    if (time_profiling || contextSwitchTime > 0) {
        if (initialize_virtual_timer(tick_millisecs)) {
#else
    if (contextSwitchTime > 0) {
        if (initialize_virtual_timer(contextSwitchTime)) {
#endif
            fflush(stdout);
            fprintf(stderr, "Can't initialize virtual timer.\n");
            EXIT(EXIT_FAILURE);
        }
    } else
        context_switch = 0 /* 1 HWL */;

#if defined(GRAN_CHECK) && defined(GRAN)                                           /* HWL */
    if ( debug & 0x40 ) {
      fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
    }
#endif

#if defined(GRAN)                                                  /* KH */
    for (i=0; i<max_proc; i++) 
      {
        RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
	WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
        PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] = 
        PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] = 
            NULL; 

# if defined(GRAN_CHECK)
        if (debug & 0x04) 
          BlockedOnFetch[i] = 0; /*- StgFalse; -*/              /* HWL-CHECK */
# endif
        OutstandingFetches[i] = 0;
      }

    CurrentProc = MainProc;
#endif /* GRAN */

    if (DO_QP_PROF)
        init_qp_profiling();

    /*
     * We perform GC so that a signal handler can install a new TopClosure and start
     * a new main thread.
     */
#ifdef PAR
    if (IAmMainThread) {
#endif
    if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
        /* kludge to save the top closure as a root */
        CurrentTSO = topClosure;
    	ReallyPerformThreadGC(0, rtsTrue);
        topClosure = CurrentTSO;
        if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
            fflush(stdout);
            fprintf(stderr, "Not enough heap for main thread\n");
            EXIT(EXIT_FAILURE);             
        }
    }           
#ifndef GRAN
    RunnableThreadsHd = RunnableThreadsTl = tso;
#else
    /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
    ThreadQueueHd = ThreadQueueTl = tso;

# if defined(GRAN_CHECK)
    if ( debug & 0x40 ) {
      fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
    }
# endif      
#endif

#ifdef PAR
    if (do_gr_profile) {
	DumpGranEvent(GR_START, tso);
	sameThread = rtsTrue;
    }
#endif

#if defined(GRAN)
    MAKE_BUSY(MainProc);  /* Everything except the main PE is idle */
#endif      

    required_thread_count = 1;
    advisory_thread_count = 0;
#ifdef PAR
    }   /*if IAmMainThread ...*/
#endif

    /* ----------------------------------------------------------------- */
    /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule   */
    /* ----------------------------------------------------------------- */

    if(setjmp(scheduler_loop) < 0)
        return;

#if defined(GRAN) && defined(GRAN_CHECK)
    if ( debug & 0x80 ) {
      fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
      DEBUG_TSO(ThreadQueueHd,1);
      /* if (ThreadQueueHd == MainTSO) {
        fprintf(stderr,"D> Event Queue is now:\n");
        DEQ();
      } */
    }
#endif

#ifdef PAR
    if (PendingFetches != Nil_closure) {
        processFetches();
    }

#elif defined(GRAN)
    if (ThreadQueueHd == Nil_closure) {
        fprintf(stderr, "No runnable threads!\n");
        EXIT(EXIT_FAILURE);
    }
    if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
        QP_Event1("AG", ThreadQueueHd);
    }
#endif

#ifndef PAR
    while (RunnableThreadsHd == Nil_closure) {
	/* If we've no work */
	if (WaitingThreadsHd == Nil_closure) {
	    fflush(stdout);
	    fprintf(stderr, "No runnable threads!\n");
	    EXIT(EXIT_FAILURE);
	}
	AwaitEvent(0);
    }
#else
    if (RunnableThreadsHd == Nil_closure) {
	if (advisory_thread_count < MaxThreads &&
          (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
	  PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
	    /* 
             * If we're here (no runnable threads) and we have pending sparks,
	     * we must have a space problem.  Get enough space to turn one of
             * those pending sparks into a thread...ReallyPerformGC doesn't 
             * return until the space is available, so it may force global GC.
             * ToDo: Is this unnecessary here?  Duplicated in ReSchedule()? --JSM
             */
	    ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
	    SAVE_Hp -= THREAD_SPACE_REQUIRED;
	} else {
	    /*
             * We really have absolutely no work.  Send out a fish (there may be
             * some out there already), and wait for something to arrive.  We 
             * clearly can't run any threads until a SCHEDULE or RESUME arrives, 
             * and so that's what we're hoping to see.  (Of course, we still have 
             * to respond to other types of messages.)
             */
	    if (!fishing)
	        sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, 
                  NEW_FISH_HUNGER);
	    processMessages();
	}
	ReSchedule(0);
    } else if (PacketsWaiting()) {  /* Look for incoming messages */
	processMessages();
    }
#endif /* PAR */

    if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
        QP_Event1("AG", RunnableThreadsHd);
    }

#ifdef PAR
    if (do_gr_profile && !sameThread)
        DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
#endif

#if !GRAN /* ROUND_ROBIN */
    CurrentTSO = RunnableThreadsHd;
    RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
    TSO_LINK(CurrentTSO) = Nil_closure;
    
    if (RunnableThreadsHd == Nil_closure)
        RunnableThreadsTl = Nil_closure;

#else /* GRAN */
    /* This used to be Round Robin. KH.  
       I think we can ignore that, and move it down to ReSchedule instead.
    */
    CurrentTSO = ThreadQueueHd;
    /* TSO_LINK(CurrentTSO) = Nil_closure;  humbug */
#endif

    /* If we're not running a timer, just leave the flag on */
    if (contextSwitchTime > 0)
        context_switch = 0;

#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
    if (CurrentTSO == Nil_closure) {
        fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
                CurrentProc,CurrentTime[CurrentProc]);
        exit(99);
      }

    if (debug & 0x04) {
      if (BlockedOnFetch[CurrentProc]) {
        fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
              CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
        exit(99);
      }
    }

    if ( (debug & 0x10) &&
         (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
      fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
              CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
        exit(99);
    }
#endif

# if defined(__STG_TAILJUMPS__)
    miniInterpret((StgFunPtr)resumeThread);
# else
    if (doSanityChks)
        miniInterpret_debug((StgFunPtr)resumeThread, checkAStack);
    else
        miniInterpret((StgFunPtr)resumeThread);
# endif /* __STG_TAILJUMPS__ */
}
\end{code}

% Some remarks on GrAnSim -- HWL

The ReSchedule fct is the heart  of GrAnSim.  Based  on its par it issues a
CONTINUETRHEAD to carry on executing the current thread in due course or it
watches out for new work (e.g. called from EndThread).

Then it picks the next   event (getnextevent) and handles it  appropriately
(see switch construct). Note that a continue  in the switch causes the next
event to be handled  and a break  causes a jmp  to the scheduler_loop where
the TSO at the head of the current processor's runnable queue is executed.

ReSchedule is mostly  entered from HpOverflow.lc:PerformReSchedule which is
itself called via the GRAN_RESCHEDULE macro in the compiler generated code.

\begin{code}
#if defined(GRAN)

void
ReSchedule(what_next)
int what_next;           /* Run the current thread again? */
{
  sparkq spark, nextspark;
  P_ tso;
  P_ node;
  eventq event;

#if defined(GRAN_CHECK) && defined(GRAN)
  if ( debug & 0x80 ) {
    fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
    DEBUG_TSO(ThreadQueueHd,1);
  }
#endif

#if defined(GRAN_CHECK) && defined(GRAN)
  if ( (debug & 0x80) || (debug & 0x40 ) )
      if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
	fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
		what_next);
#endif
    
  /* Run the current thread again (if there is one) */
  if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
    {
      /* A bit of a hassle if the event queue is empty, but ... */
      CurrentTSO = ThreadQueueHd;

      newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
               CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);

      /* This code does round-Robin, if preferred. */
      if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
        {
          if(do_gr_profile)
            DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
          ThreadQueueHd =           TSO_LINK(CurrentTSO);
          TSO_LINK(ThreadQueueTl) = CurrentTSO;
          ThreadQueueTl =           CurrentTSO;
          TSO_LINK(CurrentTSO) =    Nil_closure;
          if (do_gr_profile)
            DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
          CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
        }
    }
  /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
  /* has been updated before that already. */ 
  else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
    {
#if defined(GRAN_CHECK) && defined(GRAN)
      if(DoReScheduleOnFetch)
        {
          fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
          exit(99);
        }
#endif

      if(do_gr_profile)
        DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);

      CurrentTSO = ThreadQueueHd;
      newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
               CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
      
      CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
    }

  /* We go in here if the current thread is blocked on fetch => don'd CONT */
  else if(what_next==CHANGE_THREAD)
    {
      /* just fall into event handling loop for next event */
    }

  /* We go in here if we have no runnable threads or what_next==0 */
  else
    {
      newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
               FINDWORK,Nil_closure,Nil_closure,NULL);
      CurrentTSO = Nil_closure;
    }

  /* ----------------------------------------------------------------- */
  /* This part is the EVENT HANDLING LOOP                              */
  /* ----------------------------------------------------------------- */

  do {
    /* Choose the processor with the next event */
    event = getnextevent();
    CurrentProc = EVENT_PROC(event);
    if(EVENT_TIME(event) > CurrentTime[CurrentProc])
      CurrentTime[CurrentProc] = EVENT_TIME(event);

    MAKE_BUSY(CurrentProc);

#if defined(GRAN_CHECK) && defined(GRAN)
    if (debug & 0x80)
      fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
#endif

    /* Deal with the idlers */
    HandleIdlePEs();

#if defined(GRAN_CHECK) && defined(GRAN)
    if (event_trace && 
        (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
         (debug & 0x80) ))
      print_event(event);
#endif

    switch (EVENT_TYPE(event))
      {
        /* Should just be continuing execution */
        case CONTINUETHREAD:
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
              if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
                fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u  @ %u\n",
                        CurrentProc,CurrentTime[CurrentProc]);
                print_event(event);
                continue;
              }
#endif
          if(ThreadQueueHd==Nil_closure) 
            {
              newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
                       FINDWORK,Nil_closure,Nil_closure,NULL);
              continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
            }
          else 
            break;   /* fall into scheduler loop */

        case FETCHNODE:
#if defined(GRAN_CHECK) && defined(GRAN)
          if (SimplifiedFetch) {
            fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
            exit (99);
          }
#endif       

          CurrentTime[CurrentProc] += gran_munpacktime;
          HandleFetchRequest(EVENT_NODE(event),
                             EVENT_CREATOR(event),
                             EVENT_TSO(event));
          continue;

        case FETCHREPLY:
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
          if (SimplifiedFetch) {
            fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
            exit (99);
          }

          if (debug & 0x10) {
            if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
              TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
            } else {
              fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
                      CurrentTSO,CurrentTime[CurrentProc]);
              exit(99);
            }
          }

          if (debug & 0x04) {
            if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
              fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
                      CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
              exit(99);
            } else {
              BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
            }
          }
#endif

          /* Copy or  move node to CurrentProc */
          if (FetchNode(EVENT_NODE(event),
                        EVENT_CREATOR(event),
                        EVENT_PROC(event)) ) {
            /* Fetch has failed i.e. node has been grabbed by another PE */
            P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
            PROC p = where_is(node);
            TIME fetchtime;

#if defined(GRAN_CHECK) && defined(GRAN)
            if (PrintFetchMisses) {
              fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
                      CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
              fetch_misses++;
            }
#endif  /* GRAN_CHECK */

            CurrentTime[CurrentProc] += gran_mpacktime;

            /* Count fetch again !? */
            ++TSO_FETCHCOUNT(tso);
            TSO_FETCHTIME(tso) += gran_fetchtime;
              
            fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
                        gran_latency;

            /* Chase the grabbed node */
            newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);

#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
              if (debug & 0x04)
                BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/

              if (debug & 0x10) 
                TSO_TYPE(tso) |= FETCH_MASK_TSO;
#endif

            CurrentTime[CurrentProc] += gran_mtidytime;

            continue; /* NB: no REPLy has been processed; tso still sleeping */
          }
          
          /* -- Qapla'! Fetch has been successful; node is here, now  */
          ++TSO_FETCHCOUNT(EVENT_TSO(event));
          TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
              
          if (do_gr_profile)
            DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
                                 EVENT_NODE(event),EVENT_CREATOR(event));

          --OutstandingFetches[CurrentProc];
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
          if (OutstandingFetches[CurrentProc] < 0) {
            fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
            exit (99);
          }
#endif

          if (!DoReScheduleOnFetch) {
            CurrentTSO = EVENT_TSO(event);          /* awaken blocked thread */
            newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
                     CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
            TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
                                               TSO_BLOCKEDAT(EVENT_TSO(event));
            if(do_gr_profile)
              DumpGranEvent(GR_RESUME,EVENT_TSO(event));
            continue;
          } else {
            /* fall through to RESUMETHREAD */
          }

        case RESUMETHREAD:  /* Move from the blocked queue to the tail of */
                            /* the runnable queue ( i.e. Qu' SImqa'lu') */ 
          TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
                                             TSO_BLOCKEDAT(EVENT_TSO(event));
          StartThread(event,GR_RESUME);
          continue;

        case STARTTHREAD:
          StartThread(event,GR_START);
          continue;

        case MOVETHREAD:
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
          if (!DoThreadMigration) {
            fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
            exit (99);
          }
#endif
          CurrentTime[CurrentProc] += gran_munpacktime;
          StartThread(event,GR_STOLEN);
          continue; /* to the next event */

        case MOVESPARK:
          CurrentTime[CurrentProc] += gran_munpacktime;
          spark = EVENT_SPARK(event);

          ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
                                        so the assignment above is needed.  */

          if(do_sp_profile)
            DumpSparkGranEvent(SP_ACQUIRED,spark);

          ++SparksAvail;                  /* Probably Temporarily */
          /* Drop into FINDWORK */

          if (!DoReScheduleOnFetch &&
               (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
                continue;                      /* to next event (i.e. leave */
          }                                    /* spark in sparkq for now) */

        case FINDWORK:
          if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
             && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
            {
              W_ found = 0;
              sparkq spark_of_non_local_node = NULL;

              /* Choose a spark from the local spark queue */
              spark = SparkQueueHd;

              while (spark != NULL && !found)
                {
                  node = SPARK_NODE(spark);
                  if (!SHOULD_SPARK(node)) 
                    {
                      if(do_sp_profile)
                        DumpSparkGranEvent(SP_PRUNED,spark);

                      assert(spark != NULL);

                      SparkQueueHd = SPARK_NEXT(spark);
                      if(SparkQueueHd == NULL)
                        SparkQueueTl = NULL;

                      DisposeSpark(spark);
                  
                      spark = SparkQueueHd;
                    }
                  /* -- node should eventually be sparked */
                  else if (PreferSparksOfLocalNodes && 
                          !IS_LOCAL_TO(PROCS(node),CurrentProc)) 
                    {
                      /* We have seen this spark before => no local sparks */
                      if (spark==spark_of_non_local_node) {
                        found = 1;
                        break;
                      }

                      /* Remember first non-local node */
                      if (spark_of_non_local_node==NULL)
                        spark_of_non_local_node = spark;

                      /* Special case: 1 elem sparkq with non-local spark */
                      if (spark==SparkQueueTl) {
                        found = 1;
                        break;
                      }                 

                      /* Put spark (non-local!) at the end of the sparkq */
                      SPARK_NEXT(SparkQueueTl) = spark;
                      SparkQueueHd = SPARK_NEXT(spark);
                      SPARK_NEXT(spark) = NULL;
                      SparkQueueTl = spark;
 
                      spark = SparkQueueHd;
                    }
                  else
                    {
                      found = 1;
                    }
                }

              /* We've found a node; now, create thread (DaH Qu' yIchen) */
              if (found) 
                {
                  CurrentTime[CurrentProc] += gran_threadcreatetime;

                  node = SPARK_NODE(spark);
                  if((tso = NewThread(node, T_REQUIRED))==NULL)
                    {
                      /* Some kind of backoff needed here in case there's too little heap */
                      newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
                               FINDWORK,Nil_closure,Nil_closure,NULL);
                      ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
                      spark = NULL;
                      continue; /* to the next event, eventually */
                    }
                      
                  TSO_EXPORTED(tso) =  SPARK_EXPORTED(spark);
                  TSO_LOCKED(tso) =    !SPARK_GLOBAL(spark);
                  TSO_SPARKNAME(tso) = SPARK_NAME(spark);

                  newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
                           STARTTHREAD,tso,Nil_closure,NULL);

                  assert(spark != NULL);

                  SparkQueueHd = SPARK_NEXT(spark);
                  if(SparkQueueHd == NULL)
                    SparkQueueTl = NULL;
                  
                  DisposeSpark(spark);
                }
              else
              /* Make the PE idle if nothing sparked and we have no threads. */
                {
                  if(ThreadQueueHd == Nil_closure)
#if defined(GRAN_CHECK) && defined(GRAN)
		    {
                    MAKE_IDLE(CurrentProc);
		    if ( (debug & 0x40) || (debug & 0x80) ) {
			fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
		      }
		  }
#else 
                    MAKE_IDLE(CurrentProc);
#endif  /* GRAN_CHECK */
                  else
                    newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
                             CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
                }

              continue; /* to the next event */
            }
          else
            {
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
              if ( (debug & 0x04) &&
                   (!DoReScheduleOnFetch &&  ThreadQueueHd != Nil_closure)
                  ) {
                fprintf(stderr,"Waning in FINDWORK handling:\n");
                fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
              }
#endif
              if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
                continue;  /* to next event */
              else
                break;     /* run ThreadQueueHd */
            }
            /* never reached */

        default:
          fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
          continue;
        }
    _longjmp(scheduler_loop, 1);
  } while(1);
  }
\end{code}

Here follows the non-GRAN @ReSchedule@.
\begin{code}
#else      /* !GRAN */

void
ReSchedule(again)
int again;  	    	    	    	/* Run the current thread again? */
{
    P_ spark;
    PP_ sparkp;
    P_ tso;

#ifdef PAR
    /* 
     * In the parallel world, we do unfair scheduling for the moment.
     * Ultimately, this should all be merged with the more sophicticated
     * GrAnSim scheduling options.  (Of course, some provision should be
     * made for *required* threads to make sure that they don't starve,
     * but for now we assume that no one is running concurrent Haskell on
     * a multi-processor platform.)
     */

    sameThread = again;

    if (again) {
	if (RunnableThreadsHd == Nil_closure)
	    RunnableThreadsTl = CurrentTSO;
	TSO_LINK(CurrentTSO) = RunnableThreadsHd;
	RunnableThreadsHd = CurrentTSO;
    }

#else

    /* 
     * In the sequential world, we assume that the whole point of running
     * the threaded build is for concurrent Haskell, so we provide round-robin
     * scheduling.
     */
    
    if (again) {
    	if(RunnableThreadsHd == Nil_closure) {
            RunnableThreadsHd = CurrentTSO;
        } else {
    	    TSO_LINK(RunnableThreadsTl) = CurrentTSO;
            if (DO_QP_PROF > 1) {
                QP_Event1("GA", CurrentTSO);
            }
        }
        RunnableThreadsTl = CurrentTSO;
    }
#endif

#if 1
    /* 
     * Debugging code, which is useful enough (and cheap enough) to compile
     * in all the time.  This makes sure that we don't access saved registers,
     * etc. in threads which are supposed to be sleeping.
     */
    CurrentTSO = Nil_closure;
    CurrentRegTable = NULL;
#endif

    /* First the required sparks */

    for (sparkp = PendingSparksHd[REQUIRED_POOL]; 
      sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
	spark = *sparkp;
    	if (SHOULD_SPARK(spark)) {    	
	    if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
	    	break;
            if (RunnableThreadsHd == Nil_closure) {
        	RunnableThreadsHd = tso;
#ifdef PAR
		if (do_gr_profile) {
		    DumpGranEvent(GR_START, tso);
		    sameThread = rtsTrue;
		}
#endif
	    } else {
        	TSO_LINK(RunnableThreadsTl) = tso;
#ifdef PAR
		if (do_gr_profile)
		    DumpGranEvent(GR_STARTQ, tso);
#endif
	    }
            RunnableThreadsTl = tso;
        } else {
	    if (DO_QP_PROF)
		QP_Event0(threadId++, spark);
#ifdef PAR
            if(do_sp_profile)
                DumpSparkGranEvent(SP_PRUNED, threadId++);
#endif
	}
    }
    PendingSparksHd[REQUIRED_POOL] = sparkp;

    /* Now, almost the same thing for advisory sparks */

    for (sparkp = PendingSparksHd[ADVISORY_POOL]; 
      sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
	spark = *sparkp;
    	if (SHOULD_SPARK(spark)) {    	
	    if (
#ifdef PAR
    /* In the parallel world, don't create advisory threads if we are 
     * about to rerun the same thread, or already have runnable threads,
     *  or the main thread has terminated */
	      (RunnableThreadsHd != Nil_closure ||
	       (required_thread_count == 0 && IAmMainThread)) || 
#endif
   	      advisory_thread_count == MaxThreads ||
	      (tso = NewThread(spark, T_ADVISORY)) == NULL)
	    	break;
    	    advisory_thread_count++;
            if (RunnableThreadsHd == Nil_closure) {
        	RunnableThreadsHd = tso;
#ifdef PAR
		if (do_gr_profile) {
		    DumpGranEvent(GR_START, tso);
		    sameThread = rtsTrue;
		}
#endif
            } else {
        	TSO_LINK(RunnableThreadsTl) = tso;
#ifdef PAR
		if (do_gr_profile)
		    DumpGranEvent(GR_STARTQ, tso);
#endif
	    }
            RunnableThreadsTl = tso;
        } else {
	    if (DO_QP_PROF)
		QP_Event0(threadId++, spark);
#ifdef PAR
            if(do_sp_profile)
                DumpSparkGranEvent(SP_PRUNED, threadId++);
#endif
	}
    }
    PendingSparksHd[ADVISORY_POOL] = sparkp;

#ifndef PAR
    longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
#else
    longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
#endif
}

#endif  /* !GRAN */

\end{code}

%****************************************************************************
%
\subsection[thread-gransim-execution]{Starting, Idling and Migrating
                                        Threads (GrAnSim only)}
%
%****************************************************************************

Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
processors). 

\begin{code}
#if defined(GRAN)

StartThread(event,event_type)
eventq event;
enum gran_event_types event_type;
{
  if(ThreadQueueHd==Nil_closure)
    {
      CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
      newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
               CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
      if(do_gr_profile)
        DumpGranEvent(event_type,EVENT_TSO(event));
    }
  else
    {
      TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
      ThreadQueueTl = EVENT_TSO(event);

      if(DoThreadMigration)
        ++SurplusThreads;

      if(do_gr_profile)
        DumpGranEvent(event_type+1,EVENT_TSO(event));

    }
  CurrentTime[CurrentProc] += gran_threadqueuetime;
}
\end{code}

Export work to idle PEs.

\begin{code}
HandleIdlePEs()
{
  PROC proc;

  if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
    for(proc = 0; proc < max_proc; proc++)
      if(IS_IDLE(proc))
        {
          if(DoStealThreadsFirst && 
             (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
            {
              if (SurplusThreads > 0l)                    /* Steal a thread */
                StealThread(proc);
          
              if(!IS_IDLE(proc))
                break;
            }

          if(SparksAvail > 0l && 
             (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
            StealSpark(proc);

          if (IS_IDLE(proc) && SurplusThreads > 0l && 
              (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
            StealThread(proc);
        }
}
\end{code}

Steal a spark and schedule  moving it to  proc. We want  to look at PEs  in
clock order -- most retarded first.  Currently  sparks are only stolen from
the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
be changed to first steal from the former then from the latter.

\begin{code}
StealSpark(proc)
PROC proc;
{
  PROC p;
  sparkq spark, prev, next;
  int stolen = 0;
  TIME times[MAX_PROC], stealtime;
  unsigned ntimes=0, i, j;

  /* times shall contain processors from which we may steal sparks */ 
  for(p=0; p < max_proc; ++p)
    if(proc != p && 
       PendingSparksHd[p][ADVISORY_POOL] != NULL && 
       CurrentTime[p] <= CurrentTime[CurrentProc])
      times[ntimes++] = p;

  /* sort times */
  for(i=0; i < ntimes; ++i)
    for(j=i+1; j < ntimes; ++j)
      if(CurrentTime[times[i]] > CurrentTime[times[j]])
        {
          unsigned temp = times[i];
          times[i] = times[j];
          times[j] = temp;
        }

  for(i=0; i < ntimes && !stolen; ++i) 
    {
      p = times[i];
      
      for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL]; 
          spark != NULL && !stolen; 
          spark=next)
        {
          next = SPARK_NEXT(spark);
          
          if(SHOULD_SPARK(SPARK_NODE(spark)))
            {
              /* Don't Steal local sparks */
              if(!SPARK_GLOBAL(spark))
                {
                  prev=spark;
                  continue;
                }
              
              SPARK_NEXT(spark) = NULL;
              CurrentTime[p] += gran_mpacktime;

              stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
                + SparkStealTime();
              
              newevent(proc,p /* CurrentProc */,stealtime,
                       MOVESPARK,Nil_closure,Nil_closure,spark);

              MAKE_BUSY(proc);
              stolen = 1;
              ++SPARK_GLOBAL(spark);

              if(do_sp_profile)
                DumpSparkGranEvent(SP_EXPORTED,spark);

              CurrentTime[p] += gran_mtidytime;

              --SparksAvail;
            }
          else
            {
              if(do_sp_profile)
                DumpSparkGranEvent(SP_PRUNED,spark);
              DisposeSpark(spark);
            }
          
          if(spark == PendingSparksHd[p][ADVISORY_POOL])
            PendingSparksHd[p][ADVISORY_POOL] = next;
          
          if(prev!=NULL)
            SPARK_NEXT(prev) = next;
        }
                      
      if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
        PendingSparksTl[p][ADVISORY_POOL] = NULL;
    }
}
\end{code}

Steal a spark and schedule moving it to proc.

\begin{code}
StealThread(proc)
PROC proc;
{
  PROC p;
  P_ thread, prev;
  TIME times[MAX_PROC], stealtime;
  unsigned ntimes=0, i, j;

  /* Hunt for a thread */

  /* times shall contain processors from which we may steal threads */ 
  for(p=0; p < max_proc; ++p)
    if(proc != p && RunnableThreadsHd[p] != Nil_closure && 
       CurrentTime[p] <= CurrentTime[CurrentProc])
      times[ntimes++] = p;

  /* sort times */
  for(i=0; i < ntimes; ++i)
    for(j=i+1; j < ntimes; ++j)
      if(CurrentTime[times[i]] > CurrentTime[times[j]])
        {
          unsigned temp = times[i];
          times[i] = times[j];
          times[j] = temp;
        }

  for(i=0; i < ntimes; ++i) 
    {
      p = times[i];
      
      /* Steal the first exportable thread in the runnable queue after the */
      /* first one */ 
      
      if(RunnableThreadsHd[p] != Nil_closure)
        {
          for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]); 
              thread != Nil_closure && TSO_LOCKED(thread); 
              prev = thread, thread = TSO_LINK(thread))
            /* SKIP */;

          if(thread != Nil_closure)   /* Take thread out of runnable queue */
            {
              TSO_LINK(prev) = TSO_LINK(thread);

              TSO_LINK(thread) = Nil_closure;

              if(RunnableThreadsTl[p] == thread)
                RunnableThreadsTl[p] = prev;

              /* Turn magic constants into params !? -- HWL */

              CurrentTime[p] += 5l * gran_mpacktime;

              stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
                           + SparkStealTime() + 4l * gran_additional_latency
                             + 5l * gran_munpacktime;

              /* Move the thread */
              SET_PROCS(thread,PE_NUMBER(proc)); 

              /* Move from one queue to another */
              newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
              MAKE_BUSY(proc);
              --SurplusThreads;

              if(do_gr_profile)
                DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
          
              CurrentTime[p] += 5l * gran_mtidytime;

              /* Found one */
              break;
            }
        }
    }
}

TIME SparkStealTime()
{
  double fishdelay, sparkdelay, latencydelay;
  fishdelay =  (double)max_proc/2;
  sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
  latencydelay = sparkdelay*((double)gran_latency);

/*
  fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
          fishdelay,sparkdelay,latencydelay,Idlers);
*/
  return((TIME)latencydelay);
}
#endif                                                       /* GRAN ; HWL */

\end{code}

%****************************************************************************
%
\subsection[thread-execution]{Executing Threads}
%
%****************************************************************************

\begin{code}
EXTDATA_RO(StkO_info);
EXTDATA_RO(TSO_info);
EXTDATA_RO(WorldStateToken_closure);

EXTFUN(EnterNodeCode);
UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)

#if defined(GRAN)

/* Slow but relatively reliable method uses xmalloc */
/* Eventually change that to heap allocated sparks. */

sparkq 
NewSpark(node,name,local)
P_ node;
I_ name, local;
{
  extern P_ xmalloc();
  sparkq newspark = (sparkq) xmalloc(sizeof(struct spark));
  SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
  SPARK_NODE(newspark) = node;
  SPARK_NAME(newspark) = name;
  SPARK_GLOBAL(newspark) = !local;
  return(newspark);
}

void
DisposeSpark(spark)
sparkq spark;
{
  if(spark!=NULL)
    free(spark);

  --SparksAvail;

/* Heap-allocated disposal.

  FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
  SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
*/
}

DisposeSparkQ(spark)
sparkq spark;
{
  if (spark==NULL) 
    return;

  DisposeSparkQ(SPARK_NEXT(spark));

#ifdef GRAN_CHECK
  if (SparksAvail < 0)
    fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
#endif

  free(spark);
}

#endif

I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE;

/* Create a new TSO, with the specified closure to enter and thread type */

P_
NewThread(topClosure, type)
P_ topClosure;
W_ type;
{
    P_ stko, tso;

    if (AvailableTSO != Nil_closure) {
        tso = AvailableTSO;
#if defined(GRAN)
        SET_PROCS(tso,ThisPE);  /* Allocate it locally! */
#endif
        AvailableTSO = TSO_LINK(tso);
    } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
        return(NULL);
    } else {
        ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
                  BYTES_TO_STGWORDS(sizeof(StgDouble)));
        tso = SAVE_Hp + 1;
        SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
        SET_TSO_HDR(tso, TSO_info, CCC);
    }

    TSO_LINK(tso) = Nil_closure;
    TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
    TSO_NAME(tso) = (P_) INFO_PTR(topClosure);  /* A string would be nicer -- JSM */
    TSO_ID(tso) = threadId++;
    TSO_TYPE(tso) = type;
    TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
    TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
    TSO_SWITCH(tso) = NULL;

#ifdef DO_REDN_COUNTING
    TSO_AHWM(tso) = 0;
    TSO_BHWM(tso) = 0;
#endif

#if defined(GRAN) || defined(PAR)
    TSO_SPARKNAME(tso)    = 0;
#if defined(GRAN)
    TSO_STARTEDAT(tso)    = CurrentTime[CurrentProc];
#else
    TSO_STARTEDAT(tso)    = CURRENT_TIME;
#endif
    TSO_EXPORTED(tso)     = 0;
    TSO_BASICBLOCKS(tso)  = 0;
    TSO_ALLOCS(tso)       = 0;
    TSO_EXECTIME(tso)     = 0;
    TSO_FETCHTIME(tso)    = 0;
    TSO_FETCHCOUNT(tso)   = 0;
    TSO_BLOCKTIME(tso)    = 0;
    TSO_BLOCKCOUNT(tso)   = 0;
    TSO_BLOCKEDAT(tso)    = 0;
    TSO_GLOBALSPARKS(tso) = 0;
    TSO_LOCALSPARKS(tso)  = 0;
#endif    
    /*
     * set pc, Node (R1), liveness
     */
    CurrentRegTable = TSO_INTERNAL_PTR(tso);
    SAVE_Liveness = LIVENESS_R1;
    SAVE_R1.p = topClosure;

# ifndef PAR
    if (type == T_MAIN) {
        stko = MainStkO;
    } else {
# endif
        if (AvailableStack != Nil_closure) {
            stko = AvailableStack;
#if defined(GRAN)
            SET_PROCS(stko,ThisPE);
#endif
	    AvailableStack = STKO_LINK(AvailableStack);
        } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) {
            return(NULL);
        } else {
            ALLOC_STK(STKO_HS,StkOChunkSize,0);
            stko = SAVE_Hp + 1;
    	    SAVE_Hp += STKO_HS + StkOChunkSize;
            SET_STKO_HDR(stko, StkO_info, CCC);
        }
        STKO_SIZE(stko) = StkOChunkSize + STKO_VHS;
        STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
        STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
        STKO_LINK(stko) = Nil_closure;
        STKO_RETURN(stko) = NULL;
# ifndef PAR
    }
# endif
    
#ifdef DO_REDN_COUNTING
    STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
#endif

    if (type == T_MAIN) {
        STKO_SpA(stko) -= AREL(1);
        *STKO_SpA(stko) = (P_) WorldStateToken_closure;
    }

    SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
    SAVE_StkO = stko;

    if (DO_QP_PROF) {
        QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
    }
    return tso;
}
\end{code}

\begin{code}

void
EndThread(STG_NO_ARGS)
{
#ifdef PAR
    TIME now = CURRENT_TIME;
#endif
#ifdef DO_REDN_COUNTING
    extern FILE *tickyfile;

    if (tickyfile != NULL) {
	fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
	  TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
	fprintf(tickyfile, "\tB stack max. depth: %ld words\n",
	  TSO_BHWM(CurrentTSO));
    }
#endif

    if (DO_QP_PROF) {
        QP_Event1("G*", CurrentTSO);
    }

#if defined(GRAN)
    assert(CurrentTSO == ThreadQueueHd);
    ThreadQueueHd = TSO_LINK(CurrentTSO);

    if(ThreadQueueHd == Nil_closure)
      ThreadQueueTl = Nil_closure;

    else if (DoThreadMigration)
      --SurplusThreads;

    if (do_gr_sim)
      {
        if(TSO_TYPE(CurrentTSO)==T_MAIN)
          {
            int i;
            for(i=0; i < max_proc; ++i) {
              StgBool is_first = StgTrue;
              while(RunnableThreadsHd[i] != Nil_closure)
                {
                  /* We schedule runnable threads before killing them to */
                  /* make the job of bookkeeping the running, runnable, */
                  /* blocked threads easier for scripts like gr2ps  -- HWL */ 

                  if (do_gr_profile && !is_first)
                    DumpRawGranEvent(i,GR_SCHEDULE,
                                     TSO_ID(RunnableThreadsHd[i]));
		  if (!no_gr_profile)
		    DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
                  RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
                  is_first = StgFalse;
                }
            }

            ThreadQueueHd = Nil_closure;

#if defined(GRAN_CHECK) && defined(GRAN)
            /* Print event stats */
            if (debug & 0x20) {
              int i;

              fprintf(stderr,"Statistics of events (total=%d):\n",
                      noOfEvents);
              for (i=0; i<=7; i++) {
                fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
                        event_names[i],i,event_counts[i],
                        (float)(100*event_counts[i])/(float)(noOfEvents) );
              }
            }
#endif       

          }

	if (!no_gr_profile)
	  DumpGranInfo(CurrentProc,CurrentTSO,
		       TSO_TYPE(CurrentTSO) != T_ADVISORY);

        /* Note ThreadQueueHd is Nil when the main thread terminates */
        if(ThreadQueueHd != Nil_closure)
          {
            if (do_gr_profile && !no_gr_profile)
              DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
            CurrentTime[CurrentProc] += gran_threadscheduletime;
          }

        else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
		 !no_gr_profile)
          grterminate(CurrentTime[CurrentProc]);
      }
#endif  /* GRAN */

#ifdef PAR
    if (do_gr_profile) {
        TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
	DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
    }
#endif

    switch (TSO_TYPE(CurrentTSO)) {
    case T_MAIN:
        required_thread_count--;
#ifdef PAR
        if (do_gr_binary)
            grterminate(now);
#endif

#if defined(GRAN_CHECK) && defined(GRAN)
	if ( (debug & 0x80) || (debug & 0x40) )
	  fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");

	/* I've stolen that from the end of ReSchedule (!GRAN).  HWL */
	longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
#else
        ReSchedule(0);
#endif  /* GRAN */

    case T_REQUIRED:
        required_thread_count--;
        break;

    case T_ADVISORY:
        advisory_thread_count--;
        break;

    case T_FAIL:
        EXIT(EXIT_FAILURE);

    default:
        fflush(stdout);
        fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
        EXIT(EXIT_FAILURE);
    }

    /* Reuse stack object space */
    ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
    STKO_LINK(SAVE_StkO) = AvailableStack;
    AvailableStack = SAVE_StkO;
    /* Reuse TSO */
    TSO_LINK(CurrentTSO) = AvailableTSO;
    AvailableTSO = CurrentTSO;
    CurrentTSO = Nil_closure;
    CurrentRegTable = NULL;

#if defined(GRAN)
        /* NB: Now ThreadQueueHd is either the next runnable thread on this */
        /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
        /* issued by ReSchedule. */
        ReSchedule(SAME_THREAD);                /* back for more! */
#else
        ReSchedule(0);                          /* back for more! */
#endif
}
\end{code}

%****************************************************************************
%
\subsection[thread-blocking]{Local Blocking}
%
%****************************************************************************

\begin{code}

#if defined(COUNT)
void CountnUPDs() { ++nUPDs; }
void CountnUPDs_old() { ++nUPDs_old; }
void CountnUPDs_new() { ++nUPDs_new; }

void CountnPAPs() { ++nPAPs; }
#endif

EXTDATA_RO(BQ_info);

#ifndef GRAN
/* NB: non-GRAN version ToDo
 *
 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
 */

P_ PendingFetches = Nil_closure;

void
AwakenBlockingQueue(bqe)
  P_ bqe;
{
    P_ last_tso = NULL;

# ifdef PAR
    P_ next;
    TIME now = CURRENT_TIME;

# endif

# ifndef PAR
    while (bqe != Nil_closure) {
# else
    while (IS_MUTABLE(INFO_PTR(bqe))) {
	switch (INFO_TYPE(INFO_PTR(bqe))) {
	case INFO_TSO_TYPE:
# endif
	    if (DO_QP_PROF) {
		QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
	    }
# ifdef PAR
	    if (do_gr_profile) {
		DumpGranEvent(GR_RESUMEQ, bqe);
		switch (TSO_QUEUE(bqe)) {
		case Q_BLOCKED:
		    TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
		    break;
		case Q_FETCHING:
		    TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
		    break;
		default:
		    fflush(stdout);
		    fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
		    EXIT(EXIT_FAILURE);
		}
	    }
# endif
	    if (last_tso == NULL) {
		if (RunnableThreadsHd == Nil_closure) {
		    RunnableThreadsHd = bqe;
		} else {
		    TSO_LINK(RunnableThreadsTl) = bqe;
		}
	    }
	    last_tso = bqe;
	    bqe = TSO_LINK(bqe);
# ifdef PAR
	    break;
	case INFO_BF_TYPE:
	    next = BF_LINK(bqe);
	    BF_LINK(bqe) = PendingFetches;
	    PendingFetches = bqe;
	    bqe = next;
	    if (last_tso != NULL)
		TSO_LINK(last_tso) = next;
	    break;
	default:
	    fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
	      INFO_PTR(bqe), (W_) bqe);
	    EXIT(EXIT_FAILURE);
	}
    }
#else
    }
# endif
    if (last_tso != NULL) {
	RunnableThreadsTl = last_tso;
# ifdef PAR
	TSO_LINK(last_tso) = Nil_closure;
# endif
    }
}
#endif /* !GRAN */

#ifdef GRAN

/* NB: GRAN version only ToDo
 *
 * AwakenBlockingQueue returns True if we are on the oldmutables list,
 * so that the update code knows what to do next.
 */

I_
AwakenBlockingQueue(node)
  P_ node;
{
    P_ tso = (P_) BQ_ENTRIES(node);
    P_ prev;

    if(do_gr_sim)
      {
        W_ notifytime;

# if defined(COUNT)
        ++nUPDs;
        if (tso != Nil_closure) 
          ++nUPDs_BQ;
# endif

        while(tso != Nil_closure) {
          W_ proc;
          assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);

# if defined(COUNT)
          ++BQ_lens;
# endif

          /* Find where the tso lives */
          proc = where_is(tso);
 
          if(proc == CurrentProc)
            notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
          else
            {
              CurrentTime[CurrentProc] += gran_mpacktime;
              notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
              CurrentTime[CurrentProc] += gran_mtidytime;
            }

          /* and create a resume message */
          newevent(proc, CurrentProc, notifytime, 
                   RESUMETHREAD,tso,Nil_closure,NULL);

          prev = tso;
          tso = TSO_LINK(tso);
          TSO_LINK(prev) = Nil_closure;
        }
      }
    else
      {
	if (ThreadQueueHd == Nil_closure)
	  ThreadQueueHd = tso;
	else
	  TSO_LINK(ThreadQueueTl) = tso;

        while(TSO_LINK(tso) != Nil_closure) {
          assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
          if (DO_QP_PROF) {
            QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
          }
          tso = TSO_LINK(tso);
        }
        
        assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
        if (DO_QP_PROF) {
          QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
        }
        
	ThreadQueueTl = tso;
      }

    return MUT_LINK(node) != MUT_NOT_LINKED;
}

#endif /* GRAN only */

EXTFUN(Continue);

void
Yield(args)
W_ args;
{
    SAVE_Liveness = args >> 1;
    TSO_PC1(CurrentTSO) = Continue;
    if (DO_QP_PROF) {
	QP_Event1("GR", CurrentTSO);
    }
#ifdef PAR
    if (do_gr_profile) {
        /* Note that CURRENT_TIME may perform an unsafe call */
	TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
    }
#endif
    ReSchedule(args & 1);
}

\end{code}

%****************************************************************************
%
\subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
%
%****************************************************************************

The following GrAnSim routines simulate the fetching of nodes from a remote
processor. We use a 1 word bitmask to indicate on which processor a node is
lying. Thus,  moving or copying a  node from one  processor to another just
requires  an     appropriate  change in this     bitmask  (using @SET_GA@).
Additionally, the clocks have to be updated.

A special case arises when the node that is  needed by processor A has been
moved from a  processor B to a processor   C between sending  out a @FETCH@
(from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
to C.
 
Currently, we  only support GRIP-like  single closure fetching.  We plan to
incorporate GUM-like packet fetching in the near future.
 
\begin{code}
#if defined(GRAN)

/* Fetch node "node" to processor "p" */

int
FetchNode(node,from,to)
P_ node;
PROC from, to;
{
  assert(to==CurrentProc);
  if (!IS_LOCAL_TO(PROCS(node),from) &&
      !IS_LOCAL_TO(PROCS(node),to) ) 
    return 1;

  if(IS_NF(INFO_PTR(node)))                 /* Old: || IS_BQ(node) */
    PROCS(node) |= PE_NUMBER(to);           /* Copy node */
  else
    PROCS(node) = PE_NUMBER(to);            /* Move node */

  /* Now fetch the children */
  if(DoGUMMFetching)
    {
      fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
    }

  return 0;
}

/* --------------------------------------------------
   Cost of sending a packet of size n = C + P*n
   where C = packet construction constant, 
         P = cost of packing one word into a packet
   [Should also account for multiple packets].
   -------------------------------------------------- */

void 
HandleFetchRequest(node,p,tso)
P_ node, tso;
PROC p;
{
  if (IS_LOCAL_TO(PROCS(node),p) )  /* Somebody else moved node already => */
    {                               /* start tso                           */ 
      newevent(p,CurrentProc,
               CurrentTime[CurrentProc] /* +gran_latency */,
               FETCHREPLY,tso,node,NULL);            /* node needed ?? */
      CurrentTime[CurrentProc] += gran_mtidytime;
    }
  else if (IS_LOCAL_TO(PROCS(node),CurrentProc) )   /* Is node still here? */
    {
      /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
      /* Send a reply to the originator */
      CurrentTime[CurrentProc] += gran_mpacktime;

      newevent(p,CurrentProc,
               CurrentTime[CurrentProc]+gran_latency,
               FETCHREPLY,tso,node,NULL);            /* node needed ?? */
      
      CurrentTime[CurrentProc] += gran_mtidytime;
    }
  else
    {    /* Qu'vatlh! node has been grabbed by another proc => forward */
      PROC p_new = where_is(node);
      TIME fetchtime;

#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
      if (NoForward) {
        newevent(p,p_new,
                 max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
                 FETCHREPLY,tso,node,NULL);            /* node needed ?? */
        CurrentTime[CurrentProc] += gran_mtidytime;
        return;
      }
#endif

#if defined(GRAN_CHECK) && defined(GRAN)         /* Just for testing */
      if (debug & 0x2)    /* 0x2 should be somehting like DBG_PRINT_FWD */
        fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
                node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
#endif
      /* Prepare FORWARD message to proc p_new */
      CurrentTime[CurrentProc] += gran_mpacktime;
      
      fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
                      gran_latency;
          
      newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);

      CurrentTime[CurrentProc] += gran_mtidytime;
    }
}
#endif
\end{code}

%****************************************************************************
%
\subsection[gr-simulation]{Granularity Simulation}
%
%****************************************************************************

\begin{code}
#if 0 /* moved to GranSim.lc */
#if defined(GRAN)
I_ do_gr_sim = 0;
FILE *gr_file = NULL;
char gr_filename[32];

init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
char *prog_argv[], *rts_argv[];
int prog_argc, rts_argc;
{
    I_ i;

    if(do_gr_sim)
      { 
        char *extension = do_gr_binary? "gb": "gr";
        sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0],extension);

        if ((gr_file = fopen(gr_filename,"w")) == NULL ) 
          {
            fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
            exit(EXIT_FAILURE);             
          }

#if defined(GRAN_CHECK) && defined(GRAN)
        if(DoReScheduleOnFetch)
          setbuf(gr_file,NULL);
#endif

        fputs("Granularity Simulation for ",gr_file);
        for(i=0; i < prog_argc; ++i)
          {
            fputs(prog_argv[i],gr_file);
            fputc(' ',gr_file);
          }

        if(rts_argc > 0)
          {
            fputs("+RTS ",gr_file);

            for(i=0; i < rts_argc; ++i)
              {
                fputs(rts_argv[i],gr_file);
                fputc(' ',gr_file);
              }
          }

        fputs("\n\n--------------------\n\n",gr_file);

        fputs("General Parameters:\n\n",gr_file);

        fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
                max_proc,DoFairSchedule?"Fair":"Unfair",
                DoThreadMigration?"":"Don't ",
                DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
                DoReScheduleOnFetch?"":"Don't ");

        fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
                SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
                DoGUMMFetching?"Many Closures":"Exactly One Closure");
        fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
                FetchStrategy,
                FetchStrategy==1?"only run runnable threads (don't create new ones":
                FetchStrategy==2?"create threads only from local sparks":
                FetchStrategy==3?"create threads from local or global sparks":
                FetchStrategy==4?"create sparks and steal threads if necessary":
                                 "unknown");

        fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
                gran_threadcreatetime,gran_threadqueuetime);
        fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
                gran_threaddescheduletime,gran_threadscheduletime);
        fprintf(gr_file, "Thread Context-Switch Time %lu\n",
                gran_threadcontextswitchtime);
        fputs("\n\n--------------------\n\n",gr_file);

        fputs("Communication Metrics:\n\n",gr_file);
        fprintf(gr_file,
                "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
                gran_latency, gran_additional_latency, gran_fetchtime,
                gran_gunblocktime, gran_lunblocktime);
        fprintf(gr_file,
                "Message Creation %lu (+ %lu after send), Message Read %lu\n",
                gran_mpacktime, gran_mtidytime, gran_munpacktime);
        fputs("\n\n--------------------\n\n",gr_file);

        fputs("Instruction Metrics:\n\n",gr_file);
        fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
                gran_arith_cost, gran_branch_cost, 
                gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
        fputs("\n\n++++++++++++++++++++\n\n",gr_file);
      }

    if(do_gr_binary)
      grputw(sizeof(TIME));

    Idlers = max_proc;
    return(0);
}

void end_gr_simulation() {
  if(do_gr_sim)
    {
      fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
              gr_filename);
      fclose(gr_file);
    }
}
#endif /*0*/
\end{code}

%****************************************************************************
%
\subsection[qp-profile]{Quasi-Parallel Profiling}
%
%****************************************************************************

\begin{code}
#ifndef PAR

I_ do_qp_prof;
FILE *qp_file;

/* *Virtual* Time in milliseconds */
long 
qp_elapsed_time(STG_NO_ARGS)
{
    extern StgDouble usertime();

    return ((long) (usertime() * 1e3));
}

static void 
init_qp_profiling(STG_NO_ARGS)
{
    I_ i;
    char qp_filename[32];

    sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
    if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
        fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
            qp_filename);
        do_qp_prof = 0;
    } else {
        fputs(prog_argv[0], qp_file);
        for(i = 1; prog_argv[i]; i++) {
            fputc(' ', qp_file);
            fputs(prog_argv[i], qp_file);
        }
        fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads);
        fputs(time_str(), qp_file);
        fputc('\n', qp_file);
    }
}

void
QP_Event0(tid, node)
I_ tid;
P_ node;
{
    fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
}

void
QP_Event1(event, tso)
char *event;
P_ tso;
{
    fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
            TSO_ID(tso), TSO_NAME(tso));
}

void
QP_Event2(event, tso1, tso2)
char *event;
P_ tso1, tso2;
{
    fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
            TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
}

#endif /* !PAR */
\end{code}

%****************************************************************************
%
\subsection[entry-points]{Routines directly called from Haskell world}
%
%****************************************************************************

The @GranSim...@ rotuines in here are directly called via macros from the
threaded world. 

First some auxiliary routines.

\begin{code}
#ifdef GRAN
/* Take the current thread off the thread queue and thereby activate the */
/* next thread. It's assumed that the next ReSchedule after this uses */
/* NEW_THREAD as param. */
/* This fct is called from GranSimBlock and GranSimFetch */

void 
ActivateNextThread ()
{
#if defined(GRAN_CHECK) && defined(GRAN)
  if(ThreadQueueHd != CurrentTSO) {
    fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
    exit(99);
  }
#endif
 
  ThreadQueueHd = TSO_LINK(ThreadQueueHd);
  if(ThreadQueueHd==Nil_closure) {
    MAKE_IDLE(CurrentProc);
    ThreadQueueTl = Nil_closure;
  } else if (do_gr_profile) {
    CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
    DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
  }
}
\end{code}

Now the main stg-called routines:

\begin{code}
/* ------------------------------------------------------------------------ */
/* The following GranSim... fcts are stg-called from the threaded world.    */
/* ------------------------------------------------------------------------ */

/* Called from HEAP_CHK  -- NB: node and liveness are junk here now. 
   They are left temporarily to avoid complete recompilation.
   KH 
*/
void 
GranSimAllocate(n,node,liveness)
I_ n;
P_ node;
W_ liveness;
{
  TSO_ALLOCS(CurrentTSO) += n;
  ++TSO_BASICBLOCKS(CurrentTSO);
  
  TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
  CurrentTime[CurrentProc] += gran_heapalloc_cost;
}

/*
  Subtract the values added above, if a heap check fails and
  so has to be redone.
*/
void 
GranSimUnallocate(n,node,liveness)
W_ n;
P_ node;
W_ liveness;
{
  TSO_ALLOCS(CurrentTSO) -= n;
  --TSO_BASICBLOCKS(CurrentTSO);
  
  TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
  CurrentTime[CurrentProc] -= gran_heapalloc_cost;
}

void 
GranSimExec(ariths,branches,loads,stores,floats)
W_ ariths,branches,loads,stores,floats;
{
  W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
            gran_store_cost*stores + gran_float_cost*floats;

  TSO_EXECTIME(CurrentTSO) += cost;
  CurrentTime[CurrentProc] += cost;
}


/* 
   Fetch the node if it isn't local
   -- result indicates whether fetch has been done.

   This is GRIP-style single item fetching.
*/

I_ 
GranSimFetch(node /* , liveness_mask */ )
P_ node;
/* I_ liveness_mask; */
{
  /* Note: once a node has been fetched, this test will be passed */
  if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
    {
      /* I suppose we shouldn't do this for CAFs? -- KH */
      /* Should reschedule if the latency is high */
      /* We should add mpacktime to the remote PE for the reply,
         but we don't know who owns the node
      */
      /* if(DYNAMIC_POINTER(node)) */        /* For 0.22; gone in 0.23 !!! */
        {
          PROC p = where_is(node);
          TIME fetchtime;

#ifdef GRAN_CHECK
	  if ( ( debug & 0x40 ) &&
	       p == CurrentProc )
	    fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
#endif  /* GRAN_CHECK */

          CurrentTime[CurrentProc] += gran_mpacktime;

          ++TSO_FETCHCOUNT(CurrentTSO);
          TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
              
          if (SimplifiedFetch)
            {
              FetchNode(node,CurrentProc);
              CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
                                          gran_munpacktime;
              return(1);
            }

          fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
                      gran_latency;

          newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
          ++OutstandingFetches[CurrentProc];

          /* About to block */
          TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];

          if (DoReScheduleOnFetch) 
            {

              /* Remove CurrentTSO from the queue 
                 -- assumes head of queue == CurrentTSO */
              if(!DoFairSchedule)
                {
                  if(do_gr_profile)
                    DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);

                  ActivateNextThread();
              
#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
                  if (debug & 0x10) {
                    if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
                      fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
                              CurrentTSO,CurrentTime[CurrentProc]);
                      exit (99);
                    } else {
                      TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
                    }

                  }
#endif

                  TSO_LINK(CurrentTSO) = Nil_closure;
                  /* CurrentTSO = Nil_closure; */

                  /* ThreadQueueHd is now the next TSO to schedule or NULL */
                  /* CurrentTSO is pointed to by the FETCHNODE event */
                }
              else                            /* DoFairSchedule */
                {
                  /* Remove from the tail of the thread queue */
                  fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
                  exit(99);
                }
            }
          else                                /* !DoReScheduleOnFetch */
            {
              /* Note: CurrentProc is still busy as it's blocked on fetch */
              if(do_gr_profile)
                DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);

#if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
              if (debug & 0x04)
                  BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/

              if (debug & 0x10) {
                if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
                  fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
                          CurrentTSO,CurrentTime[CurrentProc]);
                  exit (99);
                } else {
                  TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
                }

                CurrentTSO = Nil_closure;
              }
#endif
            }

          CurrentTime[CurrentProc] += gran_mtidytime;

          /* Rescheduling is necessary */
          NeedToReSchedule = StgTrue;

          return(1); 
        }
    }
  return(0);
}

void 
GranSimSpark(local,node)
W_ local;
P_ node;
{
  ++SparksAvail;
  if(do_sp_profile)
    DumpSparkGranEvent(SP_SPARK,node);

  /* Force the PE to take notice of the spark */
  if(DoAlwaysCreateThreads)
    newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
             FINDWORK,Nil_closure,Nil_closure,NULL);

  if(local)
    ++TSO_LOCALSPARKS(CurrentTSO);
  else
    ++TSO_GLOBALSPARKS(CurrentTSO);
}

void 
GranSimSparkAt(spark,where,identifier)
sparkq spark;
P_  where;        /* This should be a node; alternatively could be a GA */
I_ identifier;
{
  PROC p = where_is(where);
  TIME exporttime;

  if(do_sp_profile)
    DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));

  CurrentTime[CurrentProc] += gran_mpacktime;

  exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]? 
                CurrentTime[p]: CurrentTime[CurrentProc])
               + gran_latency;
  
  newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);

  CurrentTime[CurrentProc] += gran_mtidytime;

  ++TSO_GLOBALSPARKS(CurrentTSO);
}

void 
GranSimBlock()
{
  if(do_gr_profile)
    DumpGranEvent(GR_BLOCK,CurrentTSO);

  ++TSO_BLOCKCOUNT(CurrentTSO);
  TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
  ActivateNextThread();
}

#endif  /* GRAN */

\end{code}

%****************************************************************************
%
\subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
%
%****************************************************************************

Garbage collection code for the event queue.  We walk the event queue
so that if the only reference to a TSO is in some event (e.g. RESUME),
the TSO is still preserved.

\begin{code}
#ifdef GRAN

extern smInfo StorageMgrInfo;

I_
SaveEventRoots(num_ptr_roots)
I_ num_ptr_roots;
{
  eventq event = EventHd;
  while(event != NULL)
    {
      if(EVENT_TYPE(event) == RESUMETHREAD || 
         EVENT_TYPE(event) == MOVETHREAD || 
         EVENT_TYPE(event) == STARTTHREAD )
        StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);

      else if(EVENT_TYPE(event) == MOVESPARK)
        StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));

      else if (EVENT_TYPE(event) == FETCHNODE ||
               EVENT_TYPE(event) == FETCHREPLY )
        {
          StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
          StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
        }

      event = EVENT_NEXT(event);
    }
  return(num_ptr_roots);
}

I_
SaveSparkRoots(num_ptr_roots)
I_ num_ptr_roots;
{
  sparkq spark, /* prev, */ disposeQ=NULL;
  PROC proc;
  I_ i, sparkroots=0, prunedSparks=0;

#if defined(GRAN_CHECK) && defined(GRAN)
  if ( debug & 0x40 ) 
    fprintf(stderr,"D> Saving spark roots for GC ...\n");
#endif       

  for(proc = 0; proc < max_proc; ++proc) {
    for(i = 0; i < SPARK_POOLS; ++i) {
      for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i]; 
	  spark != NULL; 
	  /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
        {
          if(++sparkroots <= MAX_SPARKS)
            {
#if defined(GRAN_CHECK) && defined(GRAN)
	      if ( debug & 0x40 ) 
		fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
			num_ptr_roots,proc,i,SPARK_NODE(spark));
#endif       
              StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
            }
          else
            {
              SPARK_NODE(spark) = Nil_closure;
              if (prunedSparks==0) {
                disposeQ = spark;
		/*
		   *prev = NULL;
		*/
	      }
              prunedSparks++;
            }
        }  /* forall spark ... */
        if (prunedSparks>0) {
          fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
                  prunedSparks,MAX_SPARKS,proc);
	  if (disposeQ == PendingSparksHd[proc][i])
	    PendingSparksHd[proc][i] = NULL;
	  else
	    SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
          DisposeSparkQ(disposeQ);
          prunedSparks = 0;
          disposeQ = NULL;
        }  
        }  /* forall i ... */
    }      /*forall proc .. */

  return(num_ptr_roots);
}

/*
   GC roots must be restored in *reverse order*.
   The recursion is a little ugly, but is better than
   in-place pointer reversal.
*/

static I_
RestoreEvtRoots(event,num_ptr_roots)
eventq event;
I_ num_ptr_roots;
{
  if(event != NULL)
    {
      num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);

      if(EVENT_TYPE(event) == RESUMETHREAD || 
         EVENT_TYPE(event) == MOVETHREAD || 
         EVENT_TYPE(event) == STARTTHREAD )
        EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];

      else if(EVENT_TYPE(event) == MOVESPARK )
        SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];

      else if (EVENT_TYPE(event) == FETCHNODE ||
               EVENT_TYPE(event) == FETCHREPLY )
        {
          EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
          EVENT_TSO(event) =  StorageMgrInfo.roots[--num_ptr_roots];
        }
    }

  return(num_ptr_roots);
}

I_ 
RestoreEventRoots(num_ptr_roots)
I_ num_ptr_roots;
{
  return(RestoreEvtRoots(EventHd,num_ptr_roots));
}

static I_
RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
sparkq spark;
I_ num_ptr_roots, sparkroots;
{
  if(spark != NULL)
    {
      num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
      if(sparkroots <= MAX_SPARKS)
        {
          P_ n = SPARK_NODE(spark);
          SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
#if defined(GRAN_CHECK) && defined(GRAN)
	  if ( debug & 0x40 ) 
	    fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n",
		    num_ptr_roots,SPARK_NODE(spark));
#endif
        }
      else
#if defined(GRAN_CHECK) && defined(GRAN)
	  if ( debug & 0x40 ) 
	    fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
		    num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
#endif

    }

  return(num_ptr_roots);
}

I_ 
RestoreSparkRoots(num_ptr_roots)
I_ num_ptr_roots;
{
  PROC proc;
  I_   i;

  /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
  /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
  /* of the for loop. For i that is currently not necessary. C is really */
  /* impressive in datatype abstraction!   -- HWL */

  for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
    for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
      num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
    }
  }
  return(num_ptr_roots);
}

#endif  /* GRAN */

\end{code}

%****************************************************************************
%
\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
%
%****************************************************************************

Event dumping routines.

\begin{code}
#ifdef GRAN 

DumpGranEvent(name,tso)
enum gran_event_types name;
P_ tso;
{
  DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
}

DumpSparkGranEvent(name,id)
enum gran_event_types name;
W_ id;
{
  DumpRawGranEvent(CurrentProc,name,id);
}

DumpGranEventAndNode(name,tso,node,proc)
enum gran_event_types name;
P_ tso, node;
PROC proc;
{
  PROC pe = CurrentProc;
  W_ id = TSO_ID(tso);

  if(name > GR_EVENT_MAX)
    name = GR_EVENT_MAX;

  if(do_gr_binary)
    {
      grputw(name);
      grputw(pe);
      grputw(CurrentTime[CurrentProc]);
      grputw(id);
    }
  else
    fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
            pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
}

DumpRawGranEvent(pe,name,id)
PROC pe;
enum gran_event_types name;
W_ id;
{
  if(name > GR_EVENT_MAX)
    name = GR_EVENT_MAX;

  if(do_gr_binary)
    {
      grputw(name);
      grputw(pe);
      grputw(CurrentTime[CurrentProc]);
      grputw(id);
    }
  else
    fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
            pe,CurrentTime[CurrentProc],gran_event_names[name],id);
}

DumpGranInfo(pe,tso,mandatory_thread)
PROC pe;
P_ tso;
I_ mandatory_thread;
{
  if(do_gr_binary)
    {
      grputw(GR_END);
      grputw(pe);
      grputw(CurrentTime[CurrentProc]);
      grputw(TSO_ID(tso));
      grputw(TSO_SPARKNAME(tso));
      grputw(TSO_STARTEDAT(tso));
      grputw(TSO_EXPORTED(tso));
      grputw(TSO_BASICBLOCKS(tso));
      grputw(TSO_ALLOCS(tso));
      grputw(TSO_EXECTIME(tso));
      grputw(TSO_BLOCKTIME(tso));
      grputw(TSO_BLOCKCOUNT(tso));
      grputw(TSO_FETCHTIME(tso));
      grputw(TSO_FETCHCOUNT(tso));
      grputw(TSO_LOCALSPARKS(tso));
      grputw(TSO_GLOBALSPARKS(tso));
      grputw(mandatory_thread);
    }
  else
    {
      /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
      fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
              ,pe
              ,CurrentTime[CurrentProc]
              ,TSO_ID(tso)
              ,TSO_SPARKNAME(tso)
              ,TSO_STARTEDAT(tso)
              ,TSO_EXPORTED(tso)?'T':'F'
              ,TSO_BASICBLOCKS(tso)
              ,TSO_ALLOCS(tso)
              ,TSO_EXECTIME(tso)
              ,TSO_BLOCKTIME(tso)
              ,TSO_BLOCKCOUNT(tso)
              ,TSO_FETCHTIME(tso)
              ,TSO_FETCHCOUNT(tso)
              ,TSO_LOCALSPARKS(tso)
              ,TSO_GLOBALSPARKS(tso)
              ,mandatory_thread?'T':'F'
              );
    }
}

DumpTSO(tso)
P_ tso;
{
  fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
          ,tso
          ,TSO_NAME(tso)
          ,TSO_ID(tso)
          ,TSO_LINK(tso)
          ,TSO_TYPE(tso)==T_MAIN?"MAIN":
           TSO_TYPE(tso)==T_FAIL?"FAIL":
           TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
           TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
           "???"
          );
          
  fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
          ,TSO_PC1(tso)
          ,TSO_PC2(tso)
          ,TSO_ARG1(tso)
          ,TSO_ARG2(tso)
          ,TSO_SWITCH(tso)
          );

  fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
          ,TSO_SPARKNAME(tso)
          ,TSO_STARTEDAT(tso)
          ,TSO_EXPORTED(tso)?'T':'F'
          ,TSO_BASICBLOCKS(tso)
          ,TSO_ALLOCS(tso)
          ,TSO_EXECTIME(tso)
          ,TSO_BLOCKTIME(tso)
          ,TSO_BLOCKCOUNT(tso)
          ,TSO_FETCHTIME(tso)
          ,TSO_FETCHCOUNT(tso)
          ,TSO_LOCALSPARKS(tso)
          ,TSO_GLOBALSPARKS(tso)
          );
}

/*
   Output a terminate event and an 8-byte time.
*/

grterminate(v)
TIME v;
{
  DumpGranEvent(GR_TERMINATE,0);

  if(sizeof(TIME)==4)
    {
      putc('\0',gr_file);
      putc('\0',gr_file);
      putc('\0',gr_file);
      putc('\0',gr_file);
    }
  else
    {
      putc(v >> 56l,gr_file);
      putc((v >> 48l)&0xffl,gr_file);
      putc((v >> 40l)&0xffl,gr_file);
      putc((v >> 32l)&0xffl,gr_file);
    }
  putc((v >> 24l)&0xffl,gr_file);
  putc((v >> 16l)&0xffl,gr_file);
  putc((v >> 8l)&0xffl,gr_file);
  putc(v&0xffl,gr_file);
}

/*
   Length-coded output: first 3 bits contain length coding

     00x        1 byte
     01x        2 bytes
     10x        4 bytes
     110        8 bytes
     111        5 or 9 bytes
*/

grputw(v)
TIME v;
{
  if(v <= 0x3fl)
    {
      fputc(v & 0x3f,gr_file);
    }

  else if (v <= 0x3fffl)
    {
      fputc((v >> 8l)|0x40l,gr_file);
      fputc(v&0xffl,gr_file);
    }
  
  else if (v <= 0x3fffffffl)
    {
      fputc((v >> 24l)|0x80l,gr_file);
      fputc((v >> 16l)&0xffl,gr_file);
      fputc((v >> 8l)&0xffl,gr_file);
      fputc(v&0xffl,gr_file);
    }

  else if (sizeof(TIME) == 4)
    {
      fputc(0x70,gr_file);
      fputc((v >> 24l)&0xffl,gr_file);
      fputc((v >> 16l)&0xffl,gr_file);
      fputc((v >> 8l)&0xffl,gr_file);
      fputc(v&0xffl,gr_file);
    }

  else 
    {
      if (v <= 0x3fffffffffffffl)
        putc((v >> 56l)|0x60l,gr_file);
      else
        {
          putc(0x70,gr_file);
          putc((v >> 56l)&0xffl,gr_file);
        }

      putc((v >> 48l)&0xffl,gr_file);
      putc((v >> 40l)&0xffl,gr_file);
      putc((v >> 32l)&0xffl,gr_file);
      putc((v >> 24l)&0xffl,gr_file);
      putc((v >> 16l)&0xffl,gr_file);
      putc((v >> 8l)&0xffl,gr_file);
      putc(v&0xffl,gr_file);
    }
}
#endif  /* GRAN */

\end{code}

%****************************************************************************
%
\subsection[GrAnSim-debug]{Debugging routines  for GrAnSim}
%
%****************************************************************************

Debugging routines, mainly for GrAnSim. They should really be in a separate file.

The    first couple  of routines     are   general ones   (look also   into
c-as-asm/StgDebug.lc).

\begin{code}

#define NULL_REG_MAP        /* Not threaded */
#include "stgdefs.h"

char *
info_hdr_type(info_ptr)
W_ info_ptr;
{
#if ! defined(PAR) && !defined(GRAN)
  switch (INFO_TAG(info_ptr))
    {
      case INFO_OTHER_TAG:
        return("OTHER_TAG");
/*    case INFO_IND_TAG:
        return("IND_TAG");
*/    default:
        return("TAG<n>");
    }
#else /* PAR */
  switch(INFO_TYPE(info_ptr))
    {
      case INFO_SPEC_U_TYPE:
        return("SPECU");

      case INFO_SPEC_N_TYPE:
        return("SPECN");

      case INFO_GEN_U_TYPE:
        return("GENU");

      case INFO_GEN_N_TYPE:
        return("GENN");

      case INFO_DYN_TYPE:
        return("DYN");

      /* 
      case INFO_DYN_TYPE_N:
        return("DYNN");

      case INFO_DYN_TYPE_U:
        return("DYNU");
      */

      case INFO_TUPLE_TYPE:
        return("TUPLE");

      case INFO_DATA_TYPE:
        return("DATA");

      case INFO_MUTUPLE_TYPE:
        return("MUTUPLE");

      case INFO_IMMUTUPLE_TYPE:
        return("IMMUTUPLE");

      case INFO_STATIC_TYPE:
        return("STATIC");

      case INFO_CONST_TYPE:
        return("CONST");

      case INFO_CHARLIKE_TYPE:
        return("CHAR");

      case INFO_INTLIKE_TYPE:
        return("INT");

      case INFO_BH_TYPE:
        return("BHOLE");

      case INFO_IND_TYPE:
        return("IND");

      case INFO_CAF_TYPE:
        return("CAF");

      case INFO_FETCHME_TYPE:
        return("FETCHME");

      case INFO_BQ_TYPE:
        return("BQ");

      /*
      case INFO_BQENT_TYPE:
        return("BQENT");
      */

      case INFO_TSO_TYPE:
        return("TSO");

      case INFO_STKO_TYPE:
        return("STKO");

      default:
        fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
        return("??");
      }
#endif /* PAR */
}
        
/*
@var_hdr_size@ computes the size of the variable header for a closure.
*/

I_
var_hdr_size(node)
P_ node;
{
  switch(INFO_TYPE(INFO_PTR(node)))
    {
      case INFO_SPEC_U_TYPE:    return(0);      /* by decree */
      case INFO_SPEC_N_TYPE:    return(0);
      case INFO_GEN_U_TYPE:     return(GEN_VHS);
      case INFO_GEN_N_TYPE:     return(GEN_VHS);
      case INFO_DYN_TYPE:       return(DYN_VHS);
      /*
      case INFO_DYN_TYPE_N:     return(DYN_VHS);
      case INFO_DYN_TYPE_U:     return(DYN_VHS);
      */
      case INFO_TUPLE_TYPE:     return(TUPLE_VHS);
      case INFO_DATA_TYPE:      return(DATA_VHS);
      case INFO_MUTUPLE_TYPE:   return(MUTUPLE_VHS);
      case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
      case INFO_STATIC_TYPE:    return(STATIC_VHS);
      case INFO_CONST_TYPE:     return(0);
      case INFO_CHARLIKE_TYPE:  return(0);
      case INFO_INTLIKE_TYPE:   return(0);
      case INFO_BH_TYPE:        return(0);
      case INFO_IND_TYPE:       return(0);
      case INFO_CAF_TYPE:       return(0);
      case INFO_FETCHME_TYPE:   return(0);
      case INFO_BQ_TYPE:        return(0);
      /*
      case INFO_BQENT_TYPE:     return(0);
      */
      case INFO_TSO_TYPE:       return(TSO_VHS);
      case INFO_STKO_TYPE:      return(STKO_VHS);
      default:
        fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
          INFO_TYPE(INFO_PTR(node)));
        return(0);
    }
}


/* Determine the size and number of pointers for this kind of closure */
void
size_and_ptrs(node,size,ptrs)
P_ node;
W_ *size, *ptrs;
{
  switch(INFO_TYPE(INFO_PTR(node)))
    {
      case INFO_SPEC_U_TYPE:
      case INFO_SPEC_N_TYPE:
        *size = INFO_SIZE(INFO_PTR(node));          /* New for 0.24; check */
        *ptrs = INFO_NoPTRS(INFO_PTR(node));        /* that! -- HWL */
        /* 
        *size = SPEC_CLOSURE_SIZE(node);
        *ptrs = SPEC_CLOSURE_NoPTRS(node);
	*/
        break;

      case INFO_GEN_U_TYPE:
      case INFO_GEN_N_TYPE:
        *size = GEN_CLOSURE_SIZE(node);
        *ptrs = GEN_CLOSURE_NoPTRS(node);
        break;

      /* 
      case INFO_DYN_TYPE_U:
      case INFO_DYN_TYPE_N:
      */
      case INFO_DYN_TYPE:
        *size = DYN_CLOSURE_SIZE(node);
        *ptrs = DYN_CLOSURE_NoPTRS(node);
        break;

      case INFO_TUPLE_TYPE:
        *size = TUPLE_CLOSURE_SIZE(node);
        *ptrs = TUPLE_CLOSURE_NoPTRS(node);
        break;

      case INFO_DATA_TYPE:
        *size = DATA_CLOSURE_SIZE(node);
        *ptrs = DATA_CLOSURE_NoPTRS(node);
        break;

      case INFO_IND_TYPE:
        *size = IND_CLOSURE_SIZE(node);
        *ptrs = IND_CLOSURE_NoPTRS(node);
        break;

/* ToDo: more (WDP) */

      /* Don't know about the others */
      default:
        *size = *ptrs = 0;
        break;
    }
}

void
DEBUG_PRINT_NODE(node)
P_ node;
{
   W_ info_ptr = INFO_PTR(node);
   I_ size = 0, ptrs = 0, i, vhs = 0;
   char *info_type = info_hdr_type(info_ptr);

   size_and_ptrs(node,&size,&ptrs);
   vhs = var_hdr_size(node);

   fprintf(stderr,"Node: 0x%lx", (W_) node);

#if defined(PAR)
   fprintf(stderr," [GA: 0x%lx]",GA(node));
#endif

#if defined(USE_COST_CENTRES)
   fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
#endif

#if defined(GRAN)
   fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
#endif

   fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
                  info_ptr,info_type,size,ptrs);

   /* For now, we ignore the variable header */

   for(i=0; i < size; ++i)
     {
       if(i == 0)
         fprintf(stderr,"Data: ");

       else if(i % 6 == 0)
         fprintf(stderr,"\n      ");

       if(i < ptrs)
         fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
       else
         fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
     }
   fprintf(stderr, "\n");
}


#define INFO_MASK       0x80000000

void
DEBUG_TREE(node)
P_ node;
{
  W_ size = 0, ptrs = 0, i, vhs = 0;

  /* Don't print cycles */
  if((INFO_PTR(node) & INFO_MASK) != 0)
    return;

  size_and_ptrs(node,&size,&ptrs);
  vhs = var_hdr_size(node);

  DEBUG_PRINT_NODE(node);
  fprintf(stderr, "\n");

  /* Mark the node -- may be dangerous */
  INFO_PTR(node) |= INFO_MASK;

  for(i = 0; i < ptrs; ++i)
    DEBUG_TREE((P_)node[i+vhs+_FHS]);

  /* Unmark the node */
  INFO_PTR(node) &= ~INFO_MASK;
}


void
DEBUG_INFO_TABLE(node)
P_ node;
{
  W_ info_ptr = INFO_PTR(node);
  char *ip_type = info_hdr_type(info_ptr);

  fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
                 ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
#if defined(PAR)
  fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
#endif

#if defined(USE_COST_CENTRES)
  fprintf(stderr,"Cost Centre (???):       0x%lx\n",INFO_CAT(info_ptr));
#endif

#if defined(_INFO_COPYING)
  fprintf(stderr,"Evacuate Entry:    0x%lx;\tScavenge Entry: 0x%lx\n",
          INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
#endif

#if defined(_INFO_COMPACTING)
  fprintf(stderr,"Scan Link:         0x%lx;\tScan Move:      0x%lx\n",
          (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
  fprintf(stderr,"Mark:              0x%lx;\tMarked:         0x%lx;\t",
          (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
#if 0 /* avoid INFO_TYPE */
  if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
    fprintf(stderr,"plus specialised code\n");
  else
    fprintf(stderr,"Marking:           0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
#endif /* 0 */
#endif
}
#endif /* GRAN */

\end{code}

The remaining debugging routines are more or less specific for GrAnSim.

\begin{code}
#if defined(GRAN) && defined(GRAN_CHECK)
void
DEBUG_CURR_THREADQ(verbose) 
I_ verbose;
{ 
  fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
  DEBUG_THREADQ(ThreadQueueHd, verbose);
}

void 
DEBUG_THREADQ(closure, verbose) 
P_ closure;
I_ verbose;
{
 P_ x;

 fprintf(stderr,"Thread Queue: ");
 for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
   if (verbose) 
     DEBUG_TSO(x,0);
   else
     fprintf(stderr," 0x%x",x);

 if (closure==Nil_closure)
   fprintf(stderr,"NIL\n");
 else
   fprintf(stderr,"\n");
}

/* Check with Threads.lh */
static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};

void 
DEBUG_TSO(closure,verbose) 
P_ closure;
I_ verbose;
{
 
 if (closure==Nil_closure) {
   fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
   return;
 }

 fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);

 fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
 fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
 fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
#if defined(GRAN_CHECK) && defined(GRAN)
 if (debug & 0x10)
   fprintf(stderr,"\tType: %s  %s\n",
           type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
           (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
 else
   fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
#else
 fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
#endif
 fprintf(stderr,"> PC1:  0x%x",TSO_PC1(closure));
 fprintf(stderr,"\tPC2:  0x%x\n",TSO_PC2(closure));
 fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
 fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
 fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));

 if (verbose) {
   fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
   fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
   fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
   fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
   fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
   fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
   fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
   fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
   fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
   fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
   fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
   fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
   fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
   fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
 }
}

void 
DEBUG_EVENT(event, verbose) 
eventq event;
I_ verbose;
{
  if (verbose) {
    print_event(event);
  }else{
    fprintf(stderr," 0x%x",event);
  }
}

void
DEBUG_EVENTQ(verbose)
I_ verbose;
{
 eventq x;

 fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
 for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
   DEBUG_EVENT(x,verbose);
 }
 if (EventHd==NULL) 
   fprintf(stderr,"NIL\n");
 else
   fprintf(stderr,"\n");
}

void 
DEBUG_SPARK(spark, verbose) 
sparkq spark;
I_ verbose;
{
  if (verbose)
    print_spark(spark);
  else
    fprintf(stderr," 0x%x",spark);
}

void 
DEBUG_SPARKQ(spark,verbose) 
sparkq spark;
I_ verbose;
{
 sparkq x;

 fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
 for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
   DEBUG_SPARK(x,verbose);
 }
 if (spark==NULL) 
   fprintf(stderr,"NIL\n");
 else
   fprintf(stderr,"\n");
}

void 
DEBUG_CURR_SPARKQ(verbose) 
I_ verbose;
{
  DEBUG_SPARKQ(SparkQueueHd,verbose);
}

void 
DEBUG_PROC(proc,verbose)
I_ proc;
I_ verbose;
{
  fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
          proc,CurrentTime[proc],CurrentTime[proc],
          (CurrentProc==proc)?"ACTIVE":"INACTIVE");
  DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
  if ( (CurrentProc==proc) )
    DEBUG_TSO(CurrentTSO,1);

  if (EventHd!=NULL)
    fprintf(stderr,"Next event (%s) is on proc %d\n",
            event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));

  if (verbose & 0x1) {
    fprintf(stderr,"\nREQUIRED sparks: ");
    DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
    fprintf(stderr,"\nADVISORY_sparks: ");
    DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
  }
}

/* Debug CurrentTSO */
void
DCT(){ 
  fprintf(stderr,"Current Proc: %d\n",CurrentProc);
  DEBUG_TSO(CurrentTSO,1);
}

/* Debug Current Processor */
void
DCP(){ DEBUG_PROC(CurrentProc,2); }

/* Shorthand for debugging event queue */
void
DEQ() { DEBUG_EVENTQ(1); }

/* Shorthand for debugging spark queue */
void
DSQ() { DEBUG_CURR_SPARKQ(1); }

/* Shorthand for printing a node */
void
DN(P_ node) { DEBUG_PRINT_NODE(node); }

#endif /* GRAN */
\end{code}


%****************************************************************************
%
\subsection[qp-profile]{Quasi-Parallel Profiling}
%
%****************************************************************************

\begin{code}
#ifndef GRAN
I_ do_qp_prof;
FILE *qp_file;

/* *Virtual* Time in milliseconds */
long 
qp_elapsed_time()
{
    return ((long) (usertime() * 1e3));
}

static void
init_qp_profiling(STG_NO_ARGS)
{
    I_ i;
    char qp_filename[32];

    sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
    if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
	fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
    	    qp_filename);
    	do_qp_prof = 0;
    } else {
	fputs(prog_argv[0], qp_file);
	for(i = 1; prog_argv[i]; i++) {
    	    fputc(' ', qp_file);
	    fputs(prog_argv[i], qp_file);
    	}
    	fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads);
    	fputs(time_str(), qp_file);
    	fputc('\n', qp_file);
    }
}

void 
QP_Event0(tid, node)
I_ tid;
P_ node;
{
    fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
}

void 
QP_Event1(event, tso)
char *event;
P_ tso;
{
    fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
            TSO_ID(tso), (W_) TSO_NAME(tso));
}

void 
QP_Event2(event, tso1, tso2)
char *event;
P_ tso1, tso2;
{
    fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
            TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
}
#endif /* 0 */
#endif /* GRAN */

#if defined(CONCURRENT) && !defined(GRAN)
/* romoluSnganpu' SamuS! */ 

unsigned CurrentProc = 0;
W_ IdleProcs = ~0l, Idlers = 32; 

void 
GranSimAllocate(n,node,liveness)
I_ n;
P_ node;
W_ liveness;
{ }

void 
GranSimUnallocate(n,node,liveness)
W_ n;
P_ node;
W_ liveness;
{ }


void 
GranSimExec(ariths,branches,loads,stores,floats)
W_ ariths,branches,loads,stores,floats;
{ }

I_ 
GranSimFetch(node /* , liveness_mask */ )
P_ node;
/* I_ liveness_mask; */
{ }

void 
GranSimSpark(local,node)
W_ local;
P_ node;
{ }

#if 0
void 
GranSimSparkAt(spark,where,identifier)
sparkq spark;
P_  where;        /* This should be a node; alternatively could be a GA */
I_ identifier;
{ }
#endif

void 
GranSimBlock()
{ }
#endif 

\end{code}