[project @ 1996-01-11 14:06:51 by partain]
[ghc-hetmet.git] / ghc / runtime / main / Threads.lc
1 %
2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
3 %
4 %************************************************************************
5 %*                                                                      *
6 \section[Threads.lc]{Thread Control Routines}
7 %*                                                                      *
8 %************************************************************************
9
10 %************************************************************************
11 %
12 \subsection[thread-overview]{Overview of the Thread Management System}
13 %
14 %************************************************************************
15
16 %************************************************************************
17 %
18 \subsection[thread-decls]{Thread Declarations}
19 %
20 %************************************************************************
21
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
24
25 \begin{code}
26
27 #if defined(CONCURRENT)
28
29 # define NON_POSIX_SOURCE /* so says Solaris */
30
31 # include "rtsdefs.h"
32 # include <setjmp.h>
33
34 #include "LLC.h"
35 #include "HLC.h"
36
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
38 \end{code}
39
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time.  At present, it is only used for the first stack
43 chunk of a thread, the one that's got
44 @RTSflags.ConcFlags.stkChunkSize@ words.
45
46 \begin{code}
47 P_ AvailableStack = Nil_closure;
48 P_ AvailableTSO = Nil_closure;
49 \end{code}
50
51 Macros for dealing with the new and improved GA field for simulating
52 parallel execution. Based on @CONCURRENT@ package. The GA field now
53 contains a mask, where the n-th bit stands for the n-th processor,
54 where this data can be found. In case of multiple copies, several bits
55 are set.  The total number of processors is bounded by @MAX_PROC@,
56 which should be <= the length of a word in bits.  -- HWL
57
58 \begin{code}
59 /* mattson thinks this is obsolete */
60
61 # if 0 && defined(GRAN)
62
63 typedef unsigned long TIME;
64 typedef unsigned char PROC;
65 typedef unsigned char EVTTYPE;
66
67
68 #  undef max
69 #  define max(a,b) (a>b?a:b)
70
71 static PROC
72 ga_to_proc(W_ ga)
73 { PROC i;
74                                 
75   for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ; 
76
77   return (i);
78 }
79
80 /* NB: This takes a *node* rather than just a ga as input */
81 static PROC
82 where_is(P_ node)
83 { return (ga_to_proc(PROCS(node))); }   /* Access the GA field of the node */
84
85 static PROC
86 no_of_copies(P_ node)       /* DaH lo'lu'Qo'; currently unused */
87 { PROC i, n;
88                                 
89   for (i=0, n=0; i<MAX_PROC; i++) 
90     if (IS_LOCAL_TO(PROCS(node),i))
91       n++;; 
92
93   return (n);
94 }
95
96 # endif /* GRAN ; HWL */ 
97 \end{code}
98
99 %****************************************************************
100 %*                                                              *
101 \subsection[thread-getthread]{The Thread Scheduler}
102 %*                                                              *
103 %****************************************************************
104
105 This is the heart of the thread scheduling code.
106
107 \begin{code}
108 # if defined(GRAN_CHECK) && defined(GRAN)
109 W_ debug = 0;
110 # endif       
111
112 W_ event_trace = 0;
113 W_ event_trace_all = 0;
114
115 STGRegisterTable *CurrentRegTable = NULL;
116 P_ CurrentTSO = NULL;
117
118 # if defined(GRAN)                                                  /* HWL */
119
120 unsigned CurrentProc = 0;
121 W_ IdleProcs = ~0L, Idlers = MAX_PROC; 
122
123 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
124 # define FETCH_MASK_TSO  0x08000000  /* only bits 0, 1, 2 should be used */
125                                      /* normally */
126 # endif
127
128 I_ DoFairSchedule = 0;
129 I_ DoReScheduleOnFetch = 0;
130 I_ DoStealThreadsFirst = 0;
131 I_ SimplifiedFetch = 0;
132 I_ DoAlwaysCreateThreads = 0;
133 I_ DoGUMMFetching = 0;
134 I_ DoThreadMigration = 0;
135 I_ FetchStrategy = 4;
136 I_ PreferSparksOfLocalNodes = 0;
137
138 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
139 I_ NoForward = 0;
140 I_ PrintFetchMisses = 0, fetch_misses = 0;
141 # endif
142
143 # if defined(COUNT)
144 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
145    BQ_lens = 0;
146 # endif
147
148 I_ no_gr_profile = 0;        /* Don't create any .gr file at all? */
149 I_ do_sp_profile = 0;
150 I_ do_gr_migration = 0;
151
152 P_ RunnableThreadsHd[MAX_PROC];
153 P_ RunnableThreadsTl[MAX_PROC];
154
155 P_ WaitThreadsHd[MAX_PROC];
156 P_ WaitThreadsTl[MAX_PROC];
157
158 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
159 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
160
161 W_ CurrentTime[MAX_PROC];       /* Per PE clock */
162
163 # if defined(GRAN_CHECK) && defined(GRAN)
164 P_ BlockedOnFetch[MAX_PROC];    /* HWL-CHECK */
165 # endif
166
167 I_ OutstandingFetches[MAX_PROC];
168
169 W_ SparksAvail = 0;     /* How many sparks are available */
170 W_ SurplusThreads = 0;  /* How many excess threads are there */
171
172 StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
173
174 /* Communication Cost Variables -- set in main program */
175
176 W_ gran_latency =      LATENCY,          gran_additional_latency = ADDITIONAL_LATENCY, 
177    gran_fetchtime =    FETCHTIME, 
178    gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime =       GLOBALUNBLOCKTIME,
179    gran_mpacktime =    MSGPACKTIME,      gran_munpacktime =        MSGUNPACKTIME,
180    gran_mtidytime =    0;
181
182 W_ gran_threadcreatetime =         THREADCREATETIME,
183    gran_threadqueuetime =          THREADQUEUETIME,
184    gran_threaddescheduletime =     THREADDESCHEDULETIME,
185    gran_threadscheduletime =       THREADSCHEDULETIME,
186    gran_threadcontextswitchtime =  THREADCONTEXTSWITCHTIME;
187
188 /* Instruction Cost Variables -- set in main program */
189
190 W_ gran_arith_cost =   ARITH_COST,       gran_branch_cost =        BRANCH_COST, 
191    gran_load_cost =    LOAD_COST,        gran_store_cost =         STORE_COST, 
192    gran_float_cost =   FLOAT_COST,       gran_heapalloc_cost =     0;
193
194 W_ max_proc = MAX_PROC;
195
196 /* Granularity event types' names for output */
197
198 char *event_names[] =
199     { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD", 
200       "MOVESPARK", "MOVETHREAD", "FINDWORK",
201       "FETCHNODE", "FETCHREPLY"
202     };
203
204 # if defined(GRAN)
205 /* Prototypes of GrAnSim debugging functions */
206 void DEBUG_PRINT_NODE   PROTO((P_));
207 void DEBUG_TREE         PROTO((P_));
208 void DEBUG_INFO_TABLE   PROTO((P_));
209 void DEBUG_CURR_THREADQ PROTO((I_));
210 void DEBUG_THREADQ      PROTO((P_, I_));
211 void DEBUG_TSO          PROTO((P_, I_));
212 void DEBUG_EVENT        PROTO((eventq, I_));
213 void DEBUG_SPARK        PROTO((sparkq, I_));
214 void DEBUG_SPARKQ       PROTO((sparkq, I_));
215 void DEBUG_CURR_SPARKQ  PROTO((I_));
216 void DEBUG_PROC         PROTO((I_, I_));
217 void DCT(STG_NO_ARGS);
218 void DCP(STG_NO_ARGS);
219 void DEQ(STG_NO_ARGS);
220 void DSQ(STG_NO_ARGS);
221
222 void HandleFetchRequest PROTO((P_, PROC, P_));
223 # endif /* GRAN ; HWL */ 
224
225 #if defined(GRAN_CHECK) && defined(GRAN)
226 static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
227
228 static I_ noOfEvents = 0;
229 static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
230 #endif
231
232 TIME SparkStealTime();
233
234 /* Fcts for manipulating event queues have been deleted  -- HWL */
235 /* ---------------------------------- */
236
237 static void
238 print_spark(spark)
239   sparkq spark;
240 {
241
242   if (spark==NULL)
243     fprintf(stderr,"Spark: NIL\n");
244   else
245     fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
246             (W_) SPARK_NODE(spark), SPARK_NAME(spark), 
247             ((SPARK_EXPORTED(spark))?"True":"False"), 
248             SPARK_PREV(spark), SPARK_NEXT(spark) );
249 }
250
251 static print_sparkq(hd)
252 sparkq hd;
253 {
254   sparkq x;
255
256   fprintf(stderr,"Spark Queue with root at %x:\n",hd);
257   for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
258     print_spark(x);
259   }
260 }
261
262 static print_event(event)
263 eventq event;
264 {
265
266   if (event==NULL)
267     fprintf(stderr,"Evt: NIL\n");
268   else
269     fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
270               event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
271               EVENT_PROC(event), EVENT_CREATOR(event), 
272               EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
273               EVENT_SPARK(event), EVENT_NEXT(event)*/ );
274
275 }
276
277 static print_eventq(hd)
278 eventq hd;
279 {
280   eventq x;
281
282   fprintf(stderr,"Event Queue with root at %x:\n",hd);
283   for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
284     print_event(x);
285   }
286 }
287
288 /* ---------------------------------- */
289
290 #if 0 /* moved */
291 static eventq getnextevent()
292 {
293   static eventq entry = NULL;
294
295   if(EventHd == NULL)
296     {
297       fprintf(stderr,"No next event\n");
298       exit(EXIT_FAILURE); /* ToDo: abort()? EXIT? */
299     }
300
301   if(entry != NULL)
302     free((char *)entry);
303
304 #if defined(GRAN_CHECK) && defined(GRAN)
305   if (debug & 0x20) {     /* count events */
306     noOfEvents++;
307     event_counts[EVENT_TYPE(EventHd)]++;
308   }
309 #endif       
310
311   entry = EventHd;
312   EventHd = EVENT_NEXT(EventHd);
313   return(entry);
314 }
315
316 /* ToDo: replace malloc/free with a free list */
317
318 static insert_event(newentry)
319 eventq newentry;
320 {
321   EVTTYPE evttype = EVENT_TYPE(newentry);
322   eventq event, *prev;
323
324   /* Search the queue and insert at the right point:
325      FINDWORK before everything, CONTINUETHREAD after everything.
326
327      This ensures that we find any available work after all threads have
328      executed the current cycle.  This level of detail would normally be
329      irrelevant, but matters for ridiculously low latencies...
330   */
331
332   if(EventHd == NULL)
333     EventHd = newentry;
334   else 
335     {
336       for (event = EventHd, prev=&EventHd; event != NULL; 
337            prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
338         {
339           if(evttype == FINDWORK ?       (EVENT_TIME(event) >=  EVENT_TIME(newentry)) :
340              evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) : 
341                                          (EVENT_TIME(event) >  EVENT_TIME(newentry) ||
342                                          (EVENT_TIME(event) == EVENT_TIME(newentry) &&
343                                           EVENT_TYPE(event) != FINDWORK )))
344             {
345               *prev = newentry;
346               EVENT_NEXT(newentry) = event;
347               break;
348             }
349         }
350       if (event == NULL)
351         *prev = newentry;
352     }
353 }
354
355 static newevent(proc,creator,time,evttype,tso,node,spark)
356 PROC proc, creator;
357 TIME time;
358 EVTTYPE evttype;
359 P_ tso, node;
360 sparkq spark;
361 {
362   eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "newevent");
363
364   EVENT_PROC(newentry) = proc;
365   EVENT_CREATOR(newentry) = creator;
366   EVENT_TIME(newentry) = time;
367   EVENT_TYPE(newentry) = evttype;
368   EVENT_TSO(newentry) =  tso;
369   EVENT_NODE(newentry) =  node;
370   EVENT_SPARK(newentry) =  spark;
371   EVENT_NEXT(newentry) = NULL;
372
373   insert_event(newentry);
374 }
375 #endif /* 0 moved */
376
377 # else                                                            /* !GRAN */
378
379 P_ RunnableThreadsHd = Nil_closure;
380 P_ RunnableThreadsTl = Nil_closure;
381
382 P_ WaitingThreadsHd = Nil_closure;
383 P_ WaitingThreadsTl = Nil_closure;
384
385 PP_ PendingSparksBase[SPARK_POOLS];
386 PP_ PendingSparksLim[SPARK_POOLS];
387
388 PP_ PendingSparksHd[SPARK_POOLS];
389 PP_ PendingSparksTl[SPARK_POOLS];
390
391 # endif                                                      /* GRAN ; HWL */
392
393 static jmp_buf scheduler_loop;
394
395 I_ required_thread_count = 0;
396 I_ advisory_thread_count = 0;
397
398 EXTFUN(resumeThread);
399
400 P_ NewThread PROTO((P_, W_));
401
402 I_ context_switch = 0;
403
404 #if !defined(GRAN)
405
406 I_ threadId = 0;
407 I_ sparksIgnored =0;
408
409 I_ SparkLimit[SPARK_POOLS];
410
411 rtsBool
412 initThreadPools(STG_NO_ARGS)
413 {
414     I_ size = RTSflags.ConcFlags.maxLocalSparks;
415
416     SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
417
418     if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
419         return rtsFalse;
420
421     if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
422         return rtsFalse;
423
424     PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
425     PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
426     return rtsTrue;
427 }
428 #endif
429
430 #ifdef PAR
431 rtsBool sameThread;
432 #endif
433
434 void
435 ScheduleThreads(topClosure)
436 P_ topClosure;
437 {
438 #ifdef GRAN
439     I_ i;
440 #endif
441     P_ tso;
442
443 #if defined(PROFILING) || defined(PAR)
444     if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
445         if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
446 #else
447     if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
448         if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
449 #endif
450             fflush(stdout);
451             fprintf(stderr, "Can't initialize virtual timer.\n");
452             EXIT(EXIT_FAILURE);
453         }
454     } else
455         context_switch = 0 /* 1 HWL */;
456
457 #if defined(GRAN_CHECK) && defined(GRAN)                                           /* HWL */
458     if ( debug & 0x40 ) {
459       fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
460     }
461 #endif
462
463 #if defined(GRAN)                                                  /* KH */
464     for (i=0; i<max_proc; i++) 
465       {
466         RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
467         WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
468         PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] = 
469         PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] = 
470             NULL; 
471
472 # if defined(GRAN_CHECK)
473         if (debug & 0x04) 
474           BlockedOnFetch[i] = 0; /*- StgFalse; -*/              /* HWL-CHECK */
475 # endif
476         OutstandingFetches[i] = 0;
477       }
478
479     CurrentProc = MainProc;
480 #endif /* GRAN */
481
482     if (DO_QP_PROF)
483         init_qp_profiling();
484
485     /*
486      * We perform GC so that a signal handler can install a new
487      * TopClosure and start a new main thread.
488      */
489 #ifdef PAR
490     if (IAmMainThread) {
491 #endif
492     if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
493         /* kludge to save the top closure as a root */
494         CurrentTSO = topClosure;
495         ReallyPerformThreadGC(0, rtsTrue);
496         topClosure = CurrentTSO;
497         if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
498             fflush(stdout);
499             fprintf(stderr, "Not enough heap for main thread\n");
500             EXIT(EXIT_FAILURE);             
501         }
502     }           
503 #ifndef GRAN
504     RunnableThreadsHd = RunnableThreadsTl = tso;
505 #else
506     /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
507     ThreadQueueHd = ThreadQueueTl = tso;
508
509 # if defined(GRAN_CHECK)
510     if ( debug & 0x40 ) {
511       fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
512     }
513 # endif      
514 #endif
515
516 #ifdef PAR
517     if (RTSflags.ParFlags.granSimStats) {
518         DumpGranEvent(GR_START, tso);
519         sameThread = rtsTrue;
520     }
521 #endif
522
523 #if defined(GRAN)
524     MAKE_BUSY(MainProc);  /* Everything except the main PE is idle */
525 #endif      
526
527     required_thread_count = 1;
528     advisory_thread_count = 0;
529 #ifdef PAR
530     }   /*if IAmMainThread ...*/
531 #endif
532
533     /* ----------------------------------------------------------------- */
534     /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule   */
535     /* ----------------------------------------------------------------- */
536
537     if(setjmp(scheduler_loop) < 0)
538         return;
539
540 #if defined(GRAN) && defined(GRAN_CHECK)
541     if ( debug & 0x80 ) {
542       fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
543       DEBUG_TSO(ThreadQueueHd,1);
544       /* if (ThreadQueueHd == MainTSO) {
545         fprintf(stderr,"D> Event Queue is now:\n");
546         DEQ();
547       } */
548     }
549 #endif
550
551 #ifdef PAR
552     if (PendingFetches != Nil_closure) {
553         processFetches();
554     }
555
556 #elif defined(GRAN)
557     if (ThreadQueueHd == Nil_closure) {
558         fprintf(stderr, "No runnable threads!\n");
559         EXIT(EXIT_FAILURE);
560     }
561     if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
562         QP_Event1("AG", ThreadQueueHd);
563     }
564 #endif
565
566 #ifndef PAR
567     while (RunnableThreadsHd == Nil_closure) {
568         /* If we've no work */
569         if (WaitingThreadsHd == Nil_closure) {
570             fflush(stdout);
571             fprintf(stderr, "No runnable threads!\n");
572             EXIT(EXIT_FAILURE);
573         }
574         AwaitEvent(RTSflags.ConcFlags.ctxtSwitchTime);
575     }
576 #else
577     if (RunnableThreadsHd == Nil_closure) {
578         if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
579           (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
580           PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
581             /* 
582              * If we're here (no runnable threads) and we have pending
583              * sparks, we must have a space problem.  Get enough space
584              * to turn one of those pending sparks into a
585              * thread... ReallyPerformGC doesn't return until the
586              * space is available, so it may force global GC.  ToDo:
587              * Is this unnecessary here?  Duplicated in ReSchedule()?
588              * --JSM
589              */
590             ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
591             SAVE_Hp -= THREAD_SPACE_REQUIRED;
592         } else {
593             /*
594              * We really have absolutely no work.  Send out a fish
595              * (there may be some out there already), and wait for
596              * something to arrive.  We clearly can't run any threads
597              * until a SCHEDULE or RESUME arrives, and so that's what
598              * we're hoping to see.  (Of course, we still have to
599              * respond to other types of messages.)
600              */
601             if (!fishing)
602                 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, 
603                   NEW_FISH_HUNGER);
604
605             processMessages();
606         }
607         ReSchedule(0);
608     } else if (PacketsWaiting()) {  /* Look for incoming messages */
609         processMessages();
610     }
611 #endif /* PAR */
612
613     if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
614         QP_Event1("AG", RunnableThreadsHd);
615     }
616
617 #ifdef PAR
618     if (RTSflags.ParFlags.granSimStats && !sameThread)
619         DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
620 #endif
621
622 #if !GRAN /* ROUND_ROBIN */
623     CurrentTSO = RunnableThreadsHd;
624     RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
625     TSO_LINK(CurrentTSO) = Nil_closure;
626     
627     if (RunnableThreadsHd == Nil_closure)
628         RunnableThreadsTl = Nil_closure;
629
630 #else /* GRAN */
631     /* This used to be Round Robin. KH.  
632        I think we can ignore that, and move it down to ReSchedule instead.
633     */
634     CurrentTSO = ThreadQueueHd;
635     /* TSO_LINK(CurrentTSO) = Nil_closure;  humbug */
636 #endif
637
638     /* If we're not running a timer, just leave the flag on */
639     if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
640         context_switch = 0;
641
642 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
643     if (CurrentTSO == Nil_closure) {
644         fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
645                 CurrentProc,CurrentTime[CurrentProc]);
646         exit(99);
647       }
648
649     if (debug & 0x04) {
650       if (BlockedOnFetch[CurrentProc]) {
651         fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
652               CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
653         exit(99);
654       }
655     }
656
657     if ( (debug & 0x10) &&
658          (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
659       fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
660               CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
661         exit(99);
662     }
663 #endif
664
665     miniInterpret((StgFunPtr)resumeThread);
666 }
667 \end{code}
668
669 % Some remarks on GrAnSim -- HWL
670
671 The ReSchedule fct is the heart  of GrAnSim.  Based  on its par it issues a
672 CONTINUETRHEAD to carry on executing the current thread in due course or it
673 watches out for new work (e.g. called from EndThread).
674
675 Then it picks the next   event (getnextevent) and handles it  appropriately
676 (see switch construct). Note that a continue  in the switch causes the next
677 event to be handled  and a break  causes a jmp  to the scheduler_loop where
678 the TSO at the head of the current processor's runnable queue is executed.
679
680 ReSchedule is mostly  entered from HpOverflow.lc:PerformReSchedule which is
681 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
682
683 \begin{code}
684 #if defined(GRAN)
685
686 void
687 ReSchedule(what_next)
688 int what_next;           /* Run the current thread again? */
689 {
690   sparkq spark, nextspark;
691   P_ tso;
692   P_ node;
693   eventq event;
694
695 #if defined(GRAN_CHECK) && defined(GRAN)
696   if ( debug & 0x80 ) {
697     fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
698     DEBUG_TSO(ThreadQueueHd,1);
699   }
700 #endif
701
702 #if defined(GRAN_CHECK) && defined(GRAN)
703   if ( (debug & 0x80) || (debug & 0x40 ) )
704       if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
705         fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
706                 what_next);
707 #endif
708     
709   /* Run the current thread again (if there is one) */
710   if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
711     {
712       /* A bit of a hassle if the event queue is empty, but ... */
713       CurrentTSO = ThreadQueueHd;
714
715       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
716                CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
717
718       /* This code does round-Robin, if preferred. */
719       if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
720         {
721           if(RTSflags.ParFlags.granSimStats)
722             DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
723           ThreadQueueHd =           TSO_LINK(CurrentTSO);
724           TSO_LINK(ThreadQueueTl) = CurrentTSO;
725           ThreadQueueTl =           CurrentTSO;
726           TSO_LINK(CurrentTSO) =    Nil_closure;
727           if (RTSflags.ParFlags.granSimStats)
728             DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
729           CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
730         }
731     }
732   /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
733   /* has been updated before that already. */ 
734   else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
735     {
736 #if defined(GRAN_CHECK) && defined(GRAN)
737       if(DoReScheduleOnFetch)
738         {
739           fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
740           exit(99);
741         }
742 #endif
743
744       if(RTSflags.ParFlags.granSimStats)
745         DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
746
747       CurrentTSO = ThreadQueueHd;
748       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
749                CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
750       
751       CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
752     }
753
754   /* We go in here if the current thread is blocked on fetch => don'd CONT */
755   else if(what_next==CHANGE_THREAD)
756     {
757       /* just fall into event handling loop for next event */
758     }
759
760   /* We go in here if we have no runnable threads or what_next==0 */
761   else
762     {
763       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
764                FINDWORK,Nil_closure,Nil_closure,NULL);
765       CurrentTSO = Nil_closure;
766     }
767
768   /* ----------------------------------------------------------------- */
769   /* This part is the EVENT HANDLING LOOP                              */
770   /* ----------------------------------------------------------------- */
771
772   do {
773     /* Choose the processor with the next event */
774     event = getnextevent();
775     CurrentProc = EVENT_PROC(event);
776     if(EVENT_TIME(event) > CurrentTime[CurrentProc])
777       CurrentTime[CurrentProc] = EVENT_TIME(event);
778
779     MAKE_BUSY(CurrentProc);
780
781 #if defined(GRAN_CHECK) && defined(GRAN)
782     if (debug & 0x80)
783       fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
784 #endif
785
786     /* Deal with the idlers */
787     HandleIdlePEs();
788
789 #if defined(GRAN_CHECK) && defined(GRAN)
790     if (event_trace && 
791         (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
792          (debug & 0x80) ))
793       print_event(event);
794 #endif
795
796     switch (EVENT_TYPE(event))
797       {
798         /* Should just be continuing execution */
799         case CONTINUETHREAD:
800 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
801               if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
802                 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u  @ %u\n",
803                         CurrentProc,CurrentTime[CurrentProc]);
804                 print_event(event);
805                 continue;
806               }
807 #endif
808           if(ThreadQueueHd==Nil_closure) 
809             {
810               newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
811                        FINDWORK,Nil_closure,Nil_closure,NULL);
812               continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
813             }
814           else 
815             break;   /* fall into scheduler loop */
816
817         case FETCHNODE:
818 #if defined(GRAN_CHECK) && defined(GRAN)
819           if (SimplifiedFetch) {
820             fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
821             exit (99);
822           }
823 #endif       
824
825           CurrentTime[CurrentProc] += gran_munpacktime;
826           HandleFetchRequest(EVENT_NODE(event),
827                              EVENT_CREATOR(event),
828                              EVENT_TSO(event));
829           continue;
830
831         case FETCHREPLY:
832 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
833           if (SimplifiedFetch) {
834             fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
835             exit (99);
836           }
837
838           if (debug & 0x10) {
839             if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
840               TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
841             } else {
842               fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
843                       CurrentTSO,CurrentTime[CurrentProc]);
844               exit(99);
845             }
846           }
847
848           if (debug & 0x04) {
849             if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
850               fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
851                       CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
852               exit(99);
853             } else {
854               BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
855             }
856           }
857 #endif
858
859           /* Copy or  move node to CurrentProc */
860           if (FetchNode(EVENT_NODE(event),
861                         EVENT_CREATOR(event),
862                         EVENT_PROC(event)) ) {
863             /* Fetch has failed i.e. node has been grabbed by another PE */
864             P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
865             PROC p = where_is(node);
866             TIME fetchtime;
867
868 #if defined(GRAN_CHECK) && defined(GRAN)
869             if (PrintFetchMisses) {
870               fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
871                       CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
872               fetch_misses++;
873             }
874 #endif  /* GRAN_CHECK */
875
876             CurrentTime[CurrentProc] += gran_mpacktime;
877
878             /* Count fetch again !? */
879             ++TSO_FETCHCOUNT(tso);
880             TSO_FETCHTIME(tso) += gran_fetchtime;
881               
882             fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
883                         gran_latency;
884
885             /* Chase the grabbed node */
886             newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
887
888 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
889               if (debug & 0x04)
890                 BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
891
892               if (debug & 0x10) 
893                 TSO_TYPE(tso) |= FETCH_MASK_TSO;
894 #endif
895
896             CurrentTime[CurrentProc] += gran_mtidytime;
897
898             continue; /* NB: no REPLy has been processed; tso still sleeping */
899           }
900           
901           /* -- Qapla'! Fetch has been successful; node is here, now  */
902           ++TSO_FETCHCOUNT(EVENT_TSO(event));
903           TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
904               
905           if (RTSflags.ParFlags.granSimStats)
906             DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
907                                  EVENT_NODE(event),EVENT_CREATOR(event));
908
909           --OutstandingFetches[CurrentProc];
910 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
911           if (OutstandingFetches[CurrentProc] < 0) {
912             fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
913             exit (99);
914           }
915 #endif
916
917           if (!DoReScheduleOnFetch) {
918             CurrentTSO = EVENT_TSO(event);          /* awaken blocked thread */
919             newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
920                      CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
921             TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
922                                                TSO_BLOCKEDAT(EVENT_TSO(event));
923             if(RTSflags.ParFlags.granSimStats)
924               DumpGranEvent(GR_RESUME,EVENT_TSO(event));
925             continue;
926           } else {
927             /* fall through to RESUMETHREAD */
928           }
929
930         case RESUMETHREAD:  /* Move from the blocked queue to the tail of */
931                             /* the runnable queue ( i.e. Qu' SImqa'lu') */ 
932           TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
933                                              TSO_BLOCKEDAT(EVENT_TSO(event));
934           StartThread(event,GR_RESUME);
935           continue;
936
937         case STARTTHREAD:
938           StartThread(event,GR_START);
939           continue;
940
941         case MOVETHREAD:
942 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
943           if (!DoThreadMigration) {
944             fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
945             exit (99);
946           }
947 #endif
948           CurrentTime[CurrentProc] += gran_munpacktime;
949           StartThread(event,GR_STOLEN);
950           continue; /* to the next event */
951
952         case MOVESPARK:
953           CurrentTime[CurrentProc] += gran_munpacktime;
954           spark = EVENT_SPARK(event);
955
956           ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
957                                         so the assignment above is needed.  */
958
959           if(do_sp_profile)
960             DumpSparkGranEvent(SP_ACQUIRED,spark);
961
962           ++SparksAvail;                  /* Probably Temporarily */
963           /* Drop into FINDWORK */
964
965           if (!DoReScheduleOnFetch &&
966                (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
967                 continue;                      /* to next event (i.e. leave */
968           }                                    /* spark in sparkq for now) */
969
970         case FINDWORK:
971           if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
972              && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
973             {
974               W_ found = 0;
975               sparkq spark_of_non_local_node = NULL;
976
977               /* Choose a spark from the local spark queue */
978               spark = SparkQueueHd;
979
980               while (spark != NULL && !found)
981                 {
982                   node = SPARK_NODE(spark);
983                   if (!SHOULD_SPARK(node)) 
984                     {
985                       if(do_sp_profile)
986                         DumpSparkGranEvent(SP_PRUNED,spark);
987
988                       ASSERT(spark != NULL);
989
990                       SparkQueueHd = SPARK_NEXT(spark);
991                       if(SparkQueueHd == NULL)
992                         SparkQueueTl = NULL;
993
994                       DisposeSpark(spark);
995                   
996                       spark = SparkQueueHd;
997                     }
998                   /* -- node should eventually be sparked */
999                   else if (PreferSparksOfLocalNodes && 
1000                           !IS_LOCAL_TO(PROCS(node),CurrentProc)) 
1001                     {
1002                       /* We have seen this spark before => no local sparks */
1003                       if (spark==spark_of_non_local_node) {
1004                         found = 1;
1005                         break;
1006                       }
1007
1008                       /* Remember first non-local node */
1009                       if (spark_of_non_local_node==NULL)
1010                         spark_of_non_local_node = spark;
1011
1012                       /* Special case: 1 elem sparkq with non-local spark */
1013                       if (spark==SparkQueueTl) {
1014                         found = 1;
1015                         break;
1016                       }                 
1017
1018                       /* Put spark (non-local!) at the end of the sparkq */
1019                       SPARK_NEXT(SparkQueueTl) = spark;
1020                       SparkQueueHd = SPARK_NEXT(spark);
1021                       SPARK_NEXT(spark) = NULL;
1022                       SparkQueueTl = spark;
1023  
1024                       spark = SparkQueueHd;
1025                     }
1026                   else
1027                     {
1028                       found = 1;
1029                     }
1030                 }
1031
1032               /* We've found a node; now, create thread (DaH Qu' yIchen) */
1033               if (found) 
1034                 {
1035                   CurrentTime[CurrentProc] += gran_threadcreatetime;
1036
1037                   node = SPARK_NODE(spark);
1038                   if((tso = NewThread(node, T_REQUIRED))==NULL)
1039                     {
1040                       /* Some kind of backoff needed here in case there's too little heap */
1041                       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1042                                FINDWORK,Nil_closure,Nil_closure,NULL);
1043                       ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
1044                       spark = NULL;
1045                       continue; /* to the next event, eventually */
1046                     }
1047                       
1048                   TSO_EXPORTED(tso) =  SPARK_EXPORTED(spark);
1049                   TSO_LOCKED(tso) =    !SPARK_GLOBAL(spark);
1050                   TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1051
1052                   newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1053                            STARTTHREAD,tso,Nil_closure,NULL);
1054
1055                   ASSERT(spark != NULL);
1056
1057                   SparkQueueHd = SPARK_NEXT(spark);
1058                   if(SparkQueueHd == NULL)
1059                     SparkQueueTl = NULL;
1060                   
1061                   DisposeSpark(spark);
1062                 }
1063               else
1064               /* Make the PE idle if nothing sparked and we have no threads. */
1065                 {
1066                   if(ThreadQueueHd == Nil_closure)
1067 #if defined(GRAN_CHECK) && defined(GRAN)
1068                     {
1069                     MAKE_IDLE(CurrentProc);
1070                     if ( (debug & 0x40) || (debug & 0x80) ) {
1071                         fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1072                       }
1073                   }
1074 #else 
1075                     MAKE_IDLE(CurrentProc);
1076 #endif  /* GRAN_CHECK */
1077                   else
1078                     newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1079                              CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1080                 }
1081
1082               continue; /* to the next event */
1083             }
1084           else
1085             {
1086 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1087               if ( (debug & 0x04) &&
1088                    (!DoReScheduleOnFetch &&  ThreadQueueHd != Nil_closure)
1089                   ) {
1090                 fprintf(stderr,"Waning in FINDWORK handling:\n");
1091                 fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
1092               }
1093 #endif
1094               if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
1095                 continue;  /* to next event */
1096               else
1097                 break;     /* run ThreadQueueHd */
1098             }
1099             /* never reached */
1100
1101         default:
1102           fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
1103           continue;
1104         }
1105     _longjmp(scheduler_loop, 1);
1106   } while(1);
1107   }
1108 \end{code}
1109
1110 Here follows the non-GRAN @ReSchedule@.
1111 \begin{code}
1112 #else      /* !GRAN */
1113
1114 void
1115 ReSchedule(again)
1116 int again;                              /* Run the current thread again? */
1117 {
1118     P_ spark;
1119     PP_ sparkp;
1120     P_ tso;
1121
1122 #ifdef PAR
1123     /* 
1124      * In the parallel world, we do unfair scheduling for the moment.
1125      * Ultimately, this should all be merged with the more
1126      * sophisticated GrAnSim scheduling options.  (Of course, some
1127      * provision should be made for *required* threads to make sure
1128      * that they don't starve, but for now we assume that no one is
1129      * running concurrent Haskell on a multi-processor platform.)
1130      */
1131
1132     sameThread = again;
1133
1134     if (again) {
1135         if (RunnableThreadsHd == Nil_closure)
1136             RunnableThreadsTl = CurrentTSO;
1137         TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1138         RunnableThreadsHd = CurrentTSO;
1139     }
1140
1141 #else
1142
1143     /* 
1144      * In the sequential world, we assume that the whole point of running
1145      * the threaded build is for concurrent Haskell, so we provide round-robin
1146      * scheduling.
1147      */
1148     
1149     if (again) {
1150         if(RunnableThreadsHd == Nil_closure) {
1151             RunnableThreadsHd = CurrentTSO;
1152         } else {
1153             TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1154             if (DO_QP_PROF > 1) {
1155                 QP_Event1("GA", CurrentTSO);
1156             }
1157         }
1158         RunnableThreadsTl = CurrentTSO;
1159     }
1160 #endif
1161
1162 #if 1
1163     /* 
1164      * Debugging code, which is useful enough (and cheap enough) to compile
1165      * in all the time.  This makes sure that we don't access saved registers,
1166      * etc. in threads which are supposed to be sleeping.
1167      */
1168     CurrentTSO = Nil_closure;
1169     CurrentRegTable = NULL;
1170 #endif
1171
1172     /* First the required sparks */
1173
1174     for (sparkp = PendingSparksHd[REQUIRED_POOL]; 
1175       sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1176         spark = *sparkp;
1177         if (SHOULD_SPARK(spark)) {      
1178             if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1179                 break;
1180             if (RunnableThreadsHd == Nil_closure) {
1181                 RunnableThreadsHd = tso;
1182 #ifdef PAR
1183                 if (RTSflags.ParFlags.granSimStats) {
1184                     DumpGranEvent(GR_START, tso);
1185                     sameThread = rtsTrue;
1186                 }
1187 #endif
1188             } else {
1189                 TSO_LINK(RunnableThreadsTl) = tso;
1190 #ifdef PAR
1191                 if (RTSflags.ParFlags.granSimStats)
1192                     DumpGranEvent(GR_STARTQ, tso);
1193 #endif
1194             }
1195             RunnableThreadsTl = tso;
1196         } else {
1197             if (DO_QP_PROF)
1198                 QP_Event0(threadId++, spark);
1199 #ifdef PAR
1200             if(do_sp_profile)
1201                 DumpSparkGranEvent(SP_PRUNED, threadId++);
1202 #endif
1203         }
1204     }
1205     PendingSparksHd[REQUIRED_POOL] = sparkp;
1206
1207     /* Now, almost the same thing for advisory sparks */
1208
1209     for (sparkp = PendingSparksHd[ADVISORY_POOL]; 
1210       sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1211         spark = *sparkp;
1212         if (SHOULD_SPARK(spark)) {      
1213             if (
1214 #ifdef PAR
1215     /* In the parallel world, don't create advisory threads if we are 
1216      * about to rerun the same thread, or already have runnable threads,
1217      *  or the main thread has terminated */
1218               (RunnableThreadsHd != Nil_closure ||
1219                (required_thread_count == 0 && IAmMainThread)) || 
1220 #endif
1221               advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1222               (tso = NewThread(spark, T_ADVISORY)) == NULL)
1223                 break;
1224             advisory_thread_count++;
1225             if (RunnableThreadsHd == Nil_closure) {
1226                 RunnableThreadsHd = tso;
1227 #ifdef PAR
1228                 if (RTSflags.ParFlags.granSimStats) {
1229                     DumpGranEvent(GR_START, tso);
1230                     sameThread = rtsTrue;
1231                 }
1232 #endif
1233             } else {
1234                 TSO_LINK(RunnableThreadsTl) = tso;
1235 #ifdef PAR
1236                 if (RTSflags.ParFlags.granSimStats)
1237                     DumpGranEvent(GR_STARTQ, tso);
1238 #endif
1239             }
1240             RunnableThreadsTl = tso;
1241         } else {
1242             if (DO_QP_PROF)
1243                 QP_Event0(threadId++, spark);
1244 #ifdef PAR
1245             if(do_sp_profile)
1246                 DumpSparkGranEvent(SP_PRUNED, threadId++);
1247 #endif
1248         }
1249     }
1250     PendingSparksHd[ADVISORY_POOL] = sparkp;
1251
1252 #ifndef PAR
1253     longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1254 #else
1255     longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1256 #endif
1257 }
1258
1259 #endif  /* !GRAN */
1260
1261 \end{code}
1262
1263 %****************************************************************************
1264 %
1265 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1266                                         Threads (GrAnSim only)}
1267 %
1268 %****************************************************************************
1269
1270 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1271 processors). 
1272
1273 \begin{code}
1274 #if defined(GRAN)
1275
1276 StartThread(event,event_type)
1277 eventq event;
1278 enum gran_event_types event_type;
1279 {
1280   if(ThreadQueueHd==Nil_closure)
1281     {
1282       CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
1283       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
1284                CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1285       if(RTSflags.ParFlags.granSimStats)
1286         DumpGranEvent(event_type,EVENT_TSO(event));
1287     }
1288   else
1289     {
1290       TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1291       ThreadQueueTl = EVENT_TSO(event);
1292
1293       if(DoThreadMigration)
1294         ++SurplusThreads;
1295
1296       if(RTSflags.ParFlags.granSimStats)
1297         DumpGranEvent(event_type+1,EVENT_TSO(event));
1298
1299     }
1300   CurrentTime[CurrentProc] += gran_threadqueuetime;
1301 }
1302 \end{code}
1303
1304 Export work to idle PEs.
1305
1306 \begin{code}
1307 HandleIdlePEs()
1308 {
1309   PROC proc;
1310
1311   if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
1312     for(proc = 0; proc < max_proc; proc++)
1313       if(IS_IDLE(proc))
1314         {
1315           if(DoStealThreadsFirst && 
1316              (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1317             {
1318               if (SurplusThreads > 0l)                    /* Steal a thread */
1319                 StealThread(proc);
1320           
1321               if(!IS_IDLE(proc))
1322                 break;
1323             }
1324
1325           if(SparksAvail > 0l && 
1326              (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1327             StealSpark(proc);
1328
1329           if (IS_IDLE(proc) && SurplusThreads > 0l && 
1330               (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1331             StealThread(proc);
1332         }
1333 }
1334 \end{code}
1335
1336 Steal a spark and schedule  moving it to  proc. We want  to look at PEs  in
1337 clock order -- most retarded first.  Currently  sparks are only stolen from
1338 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1339 be changed to first steal from the former then from the latter.
1340
1341 \begin{code}
1342 StealSpark(proc)
1343 PROC proc;
1344 {
1345   PROC p;
1346   sparkq spark, prev, next;
1347   int stolen = 0;
1348   TIME times[MAX_PROC], stealtime;
1349   unsigned ntimes=0, i, j;
1350
1351   /* times shall contain processors from which we may steal sparks */ 
1352   for(p=0; p < max_proc; ++p)
1353     if(proc != p && 
1354        PendingSparksHd[p][ADVISORY_POOL] != NULL && 
1355        CurrentTime[p] <= CurrentTime[CurrentProc])
1356       times[ntimes++] = p;
1357
1358   /* sort times */
1359   for(i=0; i < ntimes; ++i)
1360     for(j=i+1; j < ntimes; ++j)
1361       if(CurrentTime[times[i]] > CurrentTime[times[j]])
1362         {
1363           unsigned temp = times[i];
1364           times[i] = times[j];
1365           times[j] = temp;
1366         }
1367
1368   for(i=0; i < ntimes && !stolen; ++i) 
1369     {
1370       p = times[i];
1371       
1372       for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL]; 
1373           spark != NULL && !stolen; 
1374           spark=next)
1375         {
1376           next = SPARK_NEXT(spark);
1377           
1378           if(SHOULD_SPARK(SPARK_NODE(spark)))
1379             {
1380               /* Don't Steal local sparks */
1381               if(!SPARK_GLOBAL(spark))
1382                 {
1383                   prev=spark;
1384                   continue;
1385                 }
1386               
1387               SPARK_NEXT(spark) = NULL;
1388               CurrentTime[p] += gran_mpacktime;
1389
1390               stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1391                 + SparkStealTime();
1392               
1393               newevent(proc,p /* CurrentProc */,stealtime,
1394                        MOVESPARK,Nil_closure,Nil_closure,spark);
1395
1396               MAKE_BUSY(proc);
1397               stolen = 1;
1398               ++SPARK_GLOBAL(spark);
1399
1400               if(do_sp_profile)
1401                 DumpSparkGranEvent(SP_EXPORTED,spark);
1402
1403               CurrentTime[p] += gran_mtidytime;
1404
1405               --SparksAvail;
1406             }
1407           else
1408             {
1409               if(do_sp_profile)
1410                 DumpSparkGranEvent(SP_PRUNED,spark);
1411               DisposeSpark(spark);
1412             }
1413           
1414           if(spark == PendingSparksHd[p][ADVISORY_POOL])
1415             PendingSparksHd[p][ADVISORY_POOL] = next;
1416           
1417           if(prev!=NULL)
1418             SPARK_NEXT(prev) = next;
1419         }
1420                       
1421       if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1422         PendingSparksTl[p][ADVISORY_POOL] = NULL;
1423     }
1424 }
1425 \end{code}
1426
1427 Steal a spark and schedule moving it to proc.
1428
1429 \begin{code}
1430 StealThread(proc)
1431 PROC proc;
1432 {
1433   PROC p;
1434   P_ thread, prev;
1435   TIME times[MAX_PROC], stealtime;
1436   unsigned ntimes=0, i, j;
1437
1438   /* Hunt for a thread */
1439
1440   /* times shall contain processors from which we may steal threads */ 
1441   for(p=0; p < max_proc; ++p)
1442     if(proc != p && RunnableThreadsHd[p] != Nil_closure && 
1443        CurrentTime[p] <= CurrentTime[CurrentProc])
1444       times[ntimes++] = p;
1445
1446   /* sort times */
1447   for(i=0; i < ntimes; ++i)
1448     for(j=i+1; j < ntimes; ++j)
1449       if(CurrentTime[times[i]] > CurrentTime[times[j]])
1450         {
1451           unsigned temp = times[i];
1452           times[i] = times[j];
1453           times[j] = temp;
1454         }
1455
1456   for(i=0; i < ntimes; ++i) 
1457     {
1458       p = times[i];
1459       
1460       /* Steal the first exportable thread in the runnable queue after the */
1461       /* first one */ 
1462       
1463       if(RunnableThreadsHd[p] != Nil_closure)
1464         {
1465           for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]); 
1466               thread != Nil_closure && TSO_LOCKED(thread); 
1467               prev = thread, thread = TSO_LINK(thread))
1468             /* SKIP */;
1469
1470           if(thread != Nil_closure)   /* Take thread out of runnable queue */
1471             {
1472               TSO_LINK(prev) = TSO_LINK(thread);
1473
1474               TSO_LINK(thread) = Nil_closure;
1475
1476               if(RunnableThreadsTl[p] == thread)
1477                 RunnableThreadsTl[p] = prev;
1478
1479               /* Turn magic constants into params !? -- HWL */
1480
1481               CurrentTime[p] += 5l * gran_mpacktime;
1482
1483               stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1484                            + SparkStealTime() + 4l * gran_additional_latency
1485                              + 5l * gran_munpacktime;
1486
1487               /* Move the thread */
1488               SET_PROCS(thread,PE_NUMBER(proc)); 
1489
1490               /* Move from one queue to another */
1491               newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
1492               MAKE_BUSY(proc);
1493               --SurplusThreads;
1494
1495               if(RTSflags.ParFlags.granSimStats)
1496                 DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
1497           
1498               CurrentTime[p] += 5l * gran_mtidytime;
1499
1500               /* Found one */
1501               break;
1502             }
1503         }
1504     }
1505 }
1506
1507 TIME SparkStealTime()
1508 {
1509   double fishdelay, sparkdelay, latencydelay;
1510   fishdelay =  (double)max_proc/2;
1511   sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
1512   latencydelay = sparkdelay*((double)gran_latency);
1513
1514 /*
1515   fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
1516           fishdelay,sparkdelay,latencydelay,Idlers);
1517 */
1518   return((TIME)latencydelay);
1519 }
1520 #endif                                                       /* GRAN ; HWL */
1521
1522 \end{code}
1523
1524 %****************************************************************************
1525 %
1526 \subsection[thread-execution]{Executing Threads}
1527 %
1528 %****************************************************************************
1529
1530 \begin{code}
1531 EXTDATA_RO(StkO_info);
1532 EXTDATA_RO(TSO_info);
1533 EXTDATA_RO(WorldStateToken_closure);
1534
1535 EXTFUN(EnterNodeCode);
1536 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
1537
1538 #if defined(GRAN)
1539
1540 /* Slow but relatively reliable method uses stgMallocBytes */
1541 /* Eventually change that to heap allocated sparks. */
1542
1543 sparkq 
1544 NewSpark(node,name,local)
1545 P_ node;
1546 I_ name, local;
1547 {
1548   sparkq newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
1549
1550   SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
1551   SPARK_NODE(newspark) = node;
1552   SPARK_NAME(newspark) = name;
1553   SPARK_GLOBAL(newspark) = !local;
1554   return(newspark);
1555 }
1556
1557 void
1558 DisposeSpark(spark)
1559 sparkq spark;
1560 {
1561   if(spark!=NULL)
1562     free(spark);
1563
1564   --SparksAvail;
1565
1566 /* Heap-allocated disposal.
1567
1568   FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
1569   SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
1570 */
1571 }
1572
1573 DisposeSparkQ(spark)
1574 sparkq spark;
1575 {
1576   if (spark==NULL) 
1577     return;
1578
1579   DisposeSparkQ(SPARK_NEXT(spark));
1580
1581 #ifdef GRAN_CHECK
1582   if (SparksAvail < 0)
1583     fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
1584 #endif
1585
1586   free(spark);
1587 }
1588
1589 #endif
1590
1591 /* Create a new TSO, with the specified closure to enter and thread type */
1592
1593 P_
1594 NewThread(topClosure, type)
1595 P_ topClosure;
1596 W_ type;
1597 {
1598     P_ stko, tso;
1599
1600     if (AvailableTSO != Nil_closure) {
1601         tso = AvailableTSO;
1602 #if defined(GRAN)
1603         SET_PROCS(tso,ThisPE);  /* Allocate it locally! */
1604 #endif
1605         AvailableTSO = TSO_LINK(tso);
1606     } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
1607         return(NULL);
1608     } else {
1609         ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
1610                   BYTES_TO_STGWORDS(sizeof(StgDouble)));
1611         tso = SAVE_Hp + 1;
1612         SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
1613         SET_TSO_HDR(tso, TSO_info, CCC);
1614     }
1615
1616     TSO_LINK(tso) = Nil_closure;
1617 #ifdef PAR
1618     TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
1619 #endif
1620     TSO_NAME(tso) = (P_) INFO_PTR(topClosure);  /* A string would be nicer -- JSM */
1621     TSO_ID(tso) = threadId++;
1622     TSO_TYPE(tso) = type;
1623     TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
1624     TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
1625     TSO_SWITCH(tso) = NULL;
1626
1627 #ifdef TICKY_TICKY
1628     TSO_AHWM(tso) = 0;
1629     TSO_BHWM(tso) = 0;
1630 #endif
1631
1632 #if defined(GRAN) || defined(PAR)
1633     TSO_SPARKNAME(tso)    = 0;
1634 #if defined(GRAN)
1635     TSO_STARTEDAT(tso)    = CurrentTime[CurrentProc];
1636 #else
1637     TSO_STARTEDAT(tso)    = CURRENT_TIME;
1638 #endif
1639     TSO_EXPORTED(tso)     = 0;
1640     TSO_BASICBLOCKS(tso)  = 0;
1641     TSO_ALLOCS(tso)       = 0;
1642     TSO_EXECTIME(tso)     = 0;
1643     TSO_FETCHTIME(tso)    = 0;
1644     TSO_FETCHCOUNT(tso)   = 0;
1645     TSO_BLOCKTIME(tso)    = 0;
1646     TSO_BLOCKCOUNT(tso)   = 0;
1647     TSO_BLOCKEDAT(tso)    = 0;
1648     TSO_GLOBALSPARKS(tso) = 0;
1649     TSO_LOCALSPARKS(tso)  = 0;
1650 #endif    
1651     /*
1652      * set pc, Node (R1), liveness
1653      */
1654     CurrentRegTable = TSO_INTERNAL_PTR(tso);
1655     SAVE_Liveness = LIVENESS_R1;
1656     SAVE_R1.p = topClosure;
1657
1658 # ifndef PAR
1659     if (type == T_MAIN) {
1660         stko = MainStkO;
1661     } else {
1662 # endif
1663         if (AvailableStack != Nil_closure) {
1664             stko = AvailableStack;
1665 #if defined(GRAN)
1666             SET_PROCS(stko,ThisPE);
1667 #endif
1668             AvailableStack = STKO_LINK(AvailableStack);
1669         } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
1670             return(NULL);
1671         } else {
1672             ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
1673             stko = SAVE_Hp + 1;
1674             SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
1675             SET_STKO_HDR(stko, StkO_info, CCC);
1676         }
1677         STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
1678         STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
1679         STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
1680         STKO_LINK(stko) = Nil_closure;
1681         STKO_RETURN(stko) = NULL;
1682 # ifndef PAR
1683     }
1684 # endif
1685     
1686 #ifdef TICKY_TICKY
1687     STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
1688 #endif
1689
1690     if (type == T_MAIN) {
1691         STKO_SpA(stko) -= AREL(1);
1692         *STKO_SpA(stko) = (P_) WorldStateToken_closure;
1693     }
1694
1695     SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
1696     SAVE_StkO = stko;
1697
1698     ASSERT(sanityChk_StkO(stko));
1699
1700     if (DO_QP_PROF) {
1701         QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
1702     }
1703     return tso;
1704 }
1705 \end{code}
1706
1707 \begin{code}
1708
1709 void
1710 EndThread(STG_NO_ARGS)
1711 {
1712 #ifdef PAR
1713     TIME now = CURRENT_TIME;
1714 #endif
1715 #ifdef TICKY_TICKY
1716     if (RTSflags.TickyFlags.showTickyStats) {
1717         fprintf(RTSflags.TickyFlags.tickyFile,
1718                 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
1719                 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
1720         fprintf(RTSflags.TickyFlags.tickyFile,
1721                 "\tB stack max. depth: %ld words\n",
1722                 TSO_BHWM(CurrentTSO));
1723     }
1724 #endif
1725
1726     if (DO_QP_PROF) {
1727         QP_Event1("G*", CurrentTSO);
1728     }
1729
1730 #if defined(GRAN)
1731     ASSERT(CurrentTSO == ThreadQueueHd);
1732     ThreadQueueHd = TSO_LINK(CurrentTSO);
1733
1734     if(ThreadQueueHd == Nil_closure)
1735       ThreadQueueTl = Nil_closure;
1736
1737     else if (DoThreadMigration)
1738       --SurplusThreads;
1739
1740     if (do_gr_sim)
1741       {
1742         if(TSO_TYPE(CurrentTSO)==T_MAIN)
1743           {
1744             int i;
1745             for(i=0; i < max_proc; ++i) {
1746               StgBool is_first = StgTrue;
1747               while(RunnableThreadsHd[i] != Nil_closure)
1748                 {
1749                   /* We schedule runnable threads before killing them to */
1750                   /* make the job of bookkeeping the running, runnable, */
1751                   /* blocked threads easier for scripts like gr2ps  -- HWL */ 
1752
1753                   if (RTSflags.ParFlags.granSimStats && !is_first)
1754                     DumpRawGranEvent(i,GR_SCHEDULE,
1755                                      TSO_ID(RunnableThreadsHd[i]));
1756                   if (!no_gr_profile)
1757                     DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
1758                   RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
1759                   is_first = StgFalse;
1760                 }
1761             }
1762
1763             ThreadQueueHd = Nil_closure;
1764
1765 #if defined(GRAN_CHECK) && defined(GRAN)
1766             /* Print event stats */
1767             if (debug & 0x20) {
1768               int i;
1769
1770               fprintf(stderr,"Statistics of events (total=%d):\n",
1771                       noOfEvents);
1772               for (i=0; i<=7; i++) {
1773                 fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
1774                         event_names[i],i,event_counts[i],
1775                         (float)(100*event_counts[i])/(float)(noOfEvents) );
1776               }
1777             }
1778 #endif       
1779
1780           }
1781
1782         if (!no_gr_profile)
1783           DumpGranInfo(CurrentProc,CurrentTSO,
1784                        TSO_TYPE(CurrentTSO) != T_ADVISORY);
1785
1786         /* Note ThreadQueueHd is Nil when the main thread terminates */
1787         if(ThreadQueueHd != Nil_closure)
1788           {
1789             if (RTSflags.ParFlags.granSimStats && !no_gr_profile)
1790               DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
1791             CurrentTime[CurrentProc] += gran_threadscheduletime;
1792           }
1793
1794         else if (RTSflags.ParFlags.granSimStats_Binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
1795                  !no_gr_profile)
1796           grterminate(CurrentTime[CurrentProc]);
1797       }
1798 #endif  /* GRAN */
1799
1800 #ifdef PAR
1801     if (RTSflags.ParFlags.granSimStats) {
1802         TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
1803         DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
1804     }
1805 #endif
1806
1807     switch (TSO_TYPE(CurrentTSO)) {
1808     case T_MAIN:
1809         required_thread_count--;
1810 #ifdef PAR
1811         if (RTSflags.ParFlags.granSimStats_Binary)
1812             grterminate(now);
1813 #endif
1814
1815 #if defined(GRAN_CHECK) && defined(GRAN)
1816         if ( (debug & 0x80) || (debug & 0x40) )
1817           fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
1818
1819         /* I've stolen that from the end of ReSchedule (!GRAN).  HWL */
1820         longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
1821 #else
1822         ReSchedule(0);
1823 #endif  /* GRAN */
1824
1825     case T_REQUIRED:
1826         required_thread_count--;
1827         break;
1828
1829     case T_ADVISORY:
1830         advisory_thread_count--;
1831         break;
1832
1833     case T_FAIL:
1834         EXIT(EXIT_FAILURE);
1835
1836     default:
1837         fflush(stdout);
1838         fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
1839         EXIT(EXIT_FAILURE);
1840     }
1841
1842     /* Reuse stack object space */
1843     ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
1844     STKO_LINK(SAVE_StkO) = AvailableStack;
1845     AvailableStack = SAVE_StkO;
1846     /* Reuse TSO */
1847     TSO_LINK(CurrentTSO) = AvailableTSO;
1848     AvailableTSO = CurrentTSO;
1849     CurrentTSO = Nil_closure;
1850     CurrentRegTable = NULL;
1851
1852 #if defined(GRAN)
1853         /* NB: Now ThreadQueueHd is either the next runnable thread on this */
1854         /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
1855         /* issued by ReSchedule. */
1856         ReSchedule(SAME_THREAD);                /* back for more! */
1857 #else
1858         ReSchedule(0);                          /* back for more! */
1859 #endif
1860 }
1861 \end{code}
1862
1863 %****************************************************************************
1864 %
1865 \subsection[thread-blocking]{Local Blocking}
1866 %
1867 %****************************************************************************
1868
1869 \begin{code}
1870
1871 #if defined(COUNT)
1872 void CountnUPDs() { ++nUPDs; }
1873 void CountnUPDs_old() { ++nUPDs_old; }
1874 void CountnUPDs_new() { ++nUPDs_new; }
1875
1876 void CountnPAPs() { ++nPAPs; }
1877 #endif
1878
1879 EXTDATA_RO(BQ_info);
1880
1881 #ifndef GRAN
1882 /* NB: non-GRAN version ToDo
1883  *
1884  * AwakenBlockingQueue awakens a list of TSOs and FBQs.
1885  */
1886
1887 P_ PendingFetches = Nil_closure;
1888
1889 void
1890 AwakenBlockingQueue(bqe)
1891   P_ bqe;
1892 {
1893     P_ last_tso = NULL;
1894
1895 # ifdef PAR
1896     P_ next;
1897     TIME now = CURRENT_TIME;
1898
1899 # endif
1900
1901 # ifndef PAR
1902     while (bqe != Nil_closure) {
1903 # else
1904     while (IS_MUTABLE(INFO_PTR(bqe))) {
1905         switch (INFO_TYPE(INFO_PTR(bqe))) {
1906         case INFO_TSO_TYPE:
1907 # endif
1908             if (DO_QP_PROF) {
1909                 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
1910             }
1911 # ifdef PAR
1912             if (RTSflags.ParFlags.granSimStats) {
1913                 DumpGranEvent(GR_RESUMEQ, bqe);
1914                 switch (TSO_QUEUE(bqe)) {
1915                 case Q_BLOCKED:
1916                     TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1917                     break;
1918                 case Q_FETCHING:
1919                     TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1920                     break;
1921                 default:
1922                     fflush(stdout);
1923                     fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
1924                     EXIT(EXIT_FAILURE);
1925                 }
1926             }
1927 # endif
1928             if (last_tso == NULL) {
1929                 if (RunnableThreadsHd == Nil_closure) {
1930                     RunnableThreadsHd = bqe;
1931                 } else {
1932                     TSO_LINK(RunnableThreadsTl) = bqe;
1933                 }
1934             }
1935             last_tso = bqe;
1936             bqe = TSO_LINK(bqe);
1937 # ifdef PAR
1938             break;
1939         case INFO_BF_TYPE:
1940             next = BF_LINK(bqe);
1941             BF_LINK(bqe) = PendingFetches;
1942             PendingFetches = bqe;
1943             bqe = next;
1944             if (last_tso != NULL)
1945                 TSO_LINK(last_tso) = next;
1946             break;
1947         default:
1948             fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
1949               INFO_PTR(bqe), (W_) bqe);
1950             EXIT(EXIT_FAILURE);
1951         }
1952     }
1953 #else
1954     }
1955 # endif
1956     if (last_tso != NULL) {
1957         RunnableThreadsTl = last_tso;
1958 # ifdef PAR
1959         TSO_LINK(last_tso) = Nil_closure;
1960 # endif
1961     }
1962 }
1963 #endif /* !GRAN */
1964
1965 #ifdef GRAN
1966
1967 /* NB: GRAN version only ToDo
1968  *
1969  * AwakenBlockingQueue returns True if we are on the oldmutables list,
1970  * so that the update code knows what to do next.
1971  */
1972
1973 I_
1974 AwakenBlockingQueue(node)
1975   P_ node;
1976 {
1977     P_ tso = (P_) BQ_ENTRIES(node);
1978     P_ prev;
1979
1980     if(do_gr_sim)
1981       {
1982         W_ notifytime;
1983
1984 # if defined(COUNT)
1985         ++nUPDs;
1986         if (tso != Nil_closure) 
1987           ++nUPDs_BQ;
1988 # endif
1989
1990         while(tso != Nil_closure) {
1991           W_ proc;
1992           ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
1993
1994 # if defined(COUNT)
1995           ++BQ_lens;
1996 # endif
1997
1998           /* Find where the tso lives */
1999           proc = where_is(tso);
2000  
2001           if(proc == CurrentProc)
2002             notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
2003           else
2004             {
2005               CurrentTime[CurrentProc] += gran_mpacktime;
2006               notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
2007               CurrentTime[CurrentProc] += gran_mtidytime;
2008             }
2009
2010           /* and create a resume message */
2011           newevent(proc, CurrentProc, notifytime, 
2012                    RESUMETHREAD,tso,Nil_closure,NULL);
2013
2014           prev = tso;
2015           tso = TSO_LINK(tso);
2016           TSO_LINK(prev) = Nil_closure;
2017         }
2018       }
2019     else
2020       {
2021         if (ThreadQueueHd == Nil_closure)
2022           ThreadQueueHd = tso;
2023         else
2024           TSO_LINK(ThreadQueueTl) = tso;
2025
2026         while(TSO_LINK(tso) != Nil_closure) {
2027           ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2028           if (DO_QP_PROF) {
2029             QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2030           }
2031           tso = TSO_LINK(tso);
2032         }
2033         
2034         ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2035         if (DO_QP_PROF) {
2036           QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2037         }
2038         
2039         ThreadQueueTl = tso;
2040       }
2041
2042     return MUT_LINK(node) != MUT_NOT_LINKED;
2043 }
2044
2045 #endif /* GRAN only */
2046
2047 EXTFUN(Continue);
2048
2049 void
2050 Yield(args)
2051 W_ args;
2052 {
2053     SAVE_Liveness = args >> 1;
2054     TSO_PC1(CurrentTSO) = Continue;
2055     if (DO_QP_PROF) {
2056         QP_Event1("GR", CurrentTSO);
2057     }
2058 #ifdef PAR
2059     if (RTSflags.ParFlags.granSimStats) {
2060         /* Note that CURRENT_TIME may perform an unsafe call */
2061         TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
2062     }
2063 #endif
2064     ReSchedule(args & 1);
2065 }
2066
2067 \end{code}
2068
2069 %****************************************************************************
2070 %
2071 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
2072 %
2073 %****************************************************************************
2074
2075 The following GrAnSim routines simulate the fetching of nodes from a remote
2076 processor. We use a 1 word bitmask to indicate on which processor a node is
2077 lying. Thus,  moving or copying a  node from one  processor to another just
2078 requires  an     appropriate  change in this     bitmask  (using @SET_GA@).
2079 Additionally, the clocks have to be updated.
2080
2081 A special case arises when the node that is  needed by processor A has been
2082 moved from a  processor B to a processor   C between sending  out a @FETCH@
2083 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
2084 to C.
2085  
2086 Currently, we  only support GRIP-like  single closure fetching.  We plan to
2087 incorporate GUM-like packet fetching in the near future.
2088  
2089 \begin{code}
2090 #if defined(GRAN)
2091
2092 /* Fetch node "node" to processor "p" */
2093
2094 int
2095 FetchNode(node,from,to)
2096 P_ node;
2097 PROC from, to;
2098 {
2099   ASSERT(to==CurrentProc);
2100
2101   if (!IS_LOCAL_TO(PROCS(node),from) &&
2102       !IS_LOCAL_TO(PROCS(node),to) ) 
2103     return 1;
2104
2105   if(IS_NF(INFO_PTR(node)))                 /* Old: || IS_BQ(node) */
2106     PROCS(node) |= PE_NUMBER(to);           /* Copy node */
2107   else
2108     PROCS(node) = PE_NUMBER(to);            /* Move node */
2109
2110   /* Now fetch the children */
2111   if(DoGUMMFetching)
2112     {
2113       fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
2114     }
2115
2116   return 0;
2117 }
2118
2119 /* --------------------------------------------------
2120    Cost of sending a packet of size n = C + P*n
2121    where C = packet construction constant, 
2122          P = cost of packing one word into a packet
2123    [Should also account for multiple packets].
2124    -------------------------------------------------- */
2125
2126 void 
2127 HandleFetchRequest(node,p,tso)
2128 P_ node, tso;
2129 PROC p;
2130 {
2131   if (IS_LOCAL_TO(PROCS(node),p) )  /* Somebody else moved node already => */
2132     {                               /* start tso                           */ 
2133       newevent(p,CurrentProc,
2134                CurrentTime[CurrentProc] /* +gran_latency */,
2135                FETCHREPLY,tso,node,NULL);            /* node needed ? */
2136       CurrentTime[CurrentProc] += gran_mtidytime;
2137     }
2138   else if (IS_LOCAL_TO(PROCS(node),CurrentProc) )   /* Is node still here? */
2139     {
2140       /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
2141       /* Send a reply to the originator */
2142       CurrentTime[CurrentProc] += gran_mpacktime;
2143
2144       newevent(p,CurrentProc,
2145                CurrentTime[CurrentProc]+gran_latency,
2146                FETCHREPLY,tso,node,NULL);            /* node needed ? */
2147       
2148       CurrentTime[CurrentProc] += gran_mtidytime;
2149     }
2150   else
2151     {    /* Qu'vatlh! node has been grabbed by another proc => forward */
2152       PROC p_new = where_is(node);
2153       TIME fetchtime;
2154
2155 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2156       if (NoForward) {
2157         newevent(p,p_new,
2158                  max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
2159                  FETCHREPLY,tso,node,NULL);            /* node needed ? */
2160         CurrentTime[CurrentProc] += gran_mtidytime;
2161         return;
2162       }
2163 #endif
2164
2165 #if defined(GRAN_CHECK) && defined(GRAN)         /* Just for testing */
2166       if (debug & 0x2)    /* 0x2 should be somehting like DBG_PRINT_FWD */
2167         fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
2168                 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
2169 #endif
2170       /* Prepare FORWARD message to proc p_new */
2171       CurrentTime[CurrentProc] += gran_mpacktime;
2172       
2173       fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
2174                       gran_latency;
2175           
2176       newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
2177
2178       CurrentTime[CurrentProc] += gran_mtidytime;
2179     }
2180 }
2181 #endif
2182 \end{code}
2183
2184 %****************************************************************************
2185 %
2186 \subsection[gr-simulation]{Granularity Simulation}
2187 %
2188 %****************************************************************************
2189
2190 \begin{code}
2191 #if 0 /* moved to GranSim.lc */
2192 #if defined(GRAN)
2193 I_ do_gr_sim = 0;
2194 FILE *gr_file = NULL;
2195 char gr_filename[STATS_FILENAME_MAXLEN];
2196
2197 init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
2198 char *prog_argv[], *rts_argv[];
2199 int prog_argc, rts_argc;
2200 {
2201     I_ i;
2202
2203     if(do_gr_sim)
2204       { 
2205         char *extension = RTSflags.ParFlags.granSimStats_Binary? "gb": "gr";
2206         sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0],extension);
2207
2208         if ((gr_file = fopen(gr_filename,"w")) == NULL ) 
2209           {
2210             fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
2211             exit(EXIT_FAILURE);             
2212           }
2213
2214 #if defined(GRAN_CHECK) && defined(GRAN)
2215         if(DoReScheduleOnFetch)
2216           setbuf(gr_file,NULL);
2217 #endif
2218
2219         fputs("Granularity Simulation for ",gr_file);
2220         for(i=0; i < prog_argc; ++i)
2221           {
2222             fputs(prog_argv[i],gr_file);
2223             fputc(' ',gr_file);
2224           }
2225
2226         if(rts_argc > 0)
2227           {
2228             fputs("+RTS ",gr_file);
2229
2230             for(i=0; i < rts_argc; ++i)
2231               {
2232                 fputs(rts_argv[i],gr_file);
2233                 fputc(' ',gr_file);
2234               }
2235           }
2236
2237         fputs("\n\n--------------------\n\n",gr_file);
2238
2239         fputs("General Parameters:\n\n",gr_file);
2240
2241         fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
2242                 max_proc,DoFairSchedule?"Fair":"Unfair",
2243                 DoThreadMigration?"":"Don't ",
2244                 DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
2245                 DoReScheduleOnFetch?"":"Don't ");
2246
2247         fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
2248                 SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
2249                 DoGUMMFetching?"Many Closures":"Exactly One Closure");
2250         fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
2251                 FetchStrategy,
2252                 FetchStrategy==1?"only run runnable threads (don't create new ones":
2253                 FetchStrategy==2?"create threads only from local sparks":
2254                 FetchStrategy==3?"create threads from local or global sparks":
2255                 FetchStrategy==4?"create sparks and steal threads if necessary":
2256                                  "unknown");
2257
2258         fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
2259                 gran_threadcreatetime,gran_threadqueuetime);
2260         fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
2261                 gran_threaddescheduletime,gran_threadscheduletime);
2262         fprintf(gr_file, "Thread Context-Switch Time %lu\n",
2263                 gran_threadcontextswitchtime);
2264         fputs("\n\n--------------------\n\n",gr_file);
2265
2266         fputs("Communication Metrics:\n\n",gr_file);
2267         fprintf(gr_file,
2268                 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
2269                 gran_latency, gran_additional_latency, gran_fetchtime,
2270                 gran_gunblocktime, gran_lunblocktime);
2271         fprintf(gr_file,
2272                 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
2273                 gran_mpacktime, gran_mtidytime, gran_munpacktime);
2274         fputs("\n\n--------------------\n\n",gr_file);
2275
2276         fputs("Instruction Metrics:\n\n",gr_file);
2277         fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
2278                 gran_arith_cost, gran_branch_cost, 
2279                 gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
2280         fputs("\n\n++++++++++++++++++++\n\n",gr_file);
2281       }
2282
2283     if(RTSflags.ParFlags.granSimStats_Binary)
2284       grputw(sizeof(TIME));
2285
2286     Idlers = max_proc;
2287     return(0);
2288 }
2289
2290 void end_gr_simulation() {
2291   if(do_gr_sim)
2292     {
2293       fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
2294               gr_filename);
2295       fclose(gr_file);
2296     }
2297 }
2298 #endif /*0*/
2299 \end{code}
2300
2301 %****************************************************************************
2302 %
2303 \subsection[qp-profile]{Quasi-Parallel Profiling}
2304 %
2305 %****************************************************************************
2306
2307 \begin{code}
2308 #ifndef PAR
2309
2310 I_ do_qp_prof;
2311 FILE *qp_file;
2312
2313 /* *Virtual* Time in milliseconds */
2314 long 
2315 qp_elapsed_time(STG_NO_ARGS)
2316 {
2317     extern StgDouble usertime();
2318
2319     return ((long) (usertime() * 1e3));
2320 }
2321
2322 static void 
2323 init_qp_profiling(STG_NO_ARGS)
2324 {
2325     I_ i;
2326     char qp_filename[STATS_FILENAME_MAXLEN];
2327
2328     sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
2329     if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
2330         fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
2331             qp_filename);
2332         do_qp_prof = 0;
2333     } else {
2334         fputs(prog_argv[0], qp_file);
2335         for(i = 1; prog_argv[i]; i++) {
2336             fputc(' ', qp_file);
2337             fputs(prog_argv[i], qp_file);
2338         }
2339         fprintf(qp_file, " +RTS -C%d -t%d\n"
2340                 , RTSflags.ConcFlags.ctxtSwitchTime
2341                 , RTSflags.ConcFlags.maxThreads);
2342
2343         fputs(time_str(), qp_file);
2344         fputc('\n', qp_file);
2345     }
2346 }
2347
2348 void
2349 QP_Event0(tid, node)
2350 I_ tid;
2351 P_ node;
2352 {
2353     fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
2354 }
2355
2356 void
2357 QP_Event1(event, tso)
2358 char *event;
2359 P_ tso;
2360 {
2361     fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
2362             TSO_ID(tso), TSO_NAME(tso));
2363 }
2364
2365 void
2366 QP_Event2(event, tso1, tso2)
2367 char *event;
2368 P_ tso1, tso2;
2369 {
2370     fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
2371             TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
2372 }
2373
2374 #endif /* !PAR */
2375 \end{code}
2376
2377 %****************************************************************************
2378 %
2379 \subsection[entry-points]{Routines directly called from Haskell world}
2380 %
2381 %****************************************************************************
2382
2383 The @GranSim...@ rotuines in here are directly called via macros from the
2384 threaded world. 
2385
2386 First some auxiliary routines.
2387
2388 \begin{code}
2389 #ifdef GRAN
2390 /* Take the current thread off the thread queue and thereby activate the */
2391 /* next thread. It's assumed that the next ReSchedule after this uses */
2392 /* NEW_THREAD as param. */
2393 /* This fct is called from GranSimBlock and GranSimFetch */
2394
2395 void 
2396 ActivateNextThread ()
2397 {
2398 #if defined(GRAN_CHECK) && defined(GRAN)
2399   if(ThreadQueueHd != CurrentTSO) {
2400     fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
2401     exit(99);
2402   }
2403 #endif
2404  
2405   ThreadQueueHd = TSO_LINK(ThreadQueueHd);
2406   if(ThreadQueueHd==Nil_closure) {
2407     MAKE_IDLE(CurrentProc);
2408     ThreadQueueTl = Nil_closure;
2409   } else if (RTSflags.ParFlags.granSimStats) {
2410     CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
2411     DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2412   }
2413 }
2414 \end{code}
2415
2416 Now the main stg-called routines:
2417
2418 \begin{code}
2419 /* ------------------------------------------------------------------------ */
2420 /* The following GranSim... fcts are stg-called from the threaded world.    */
2421 /* ------------------------------------------------------------------------ */
2422
2423 /* Called from HEAP_CHK  -- NB: node and liveness are junk here now. 
2424    They are left temporarily to avoid complete recompilation.
2425    KH 
2426 */
2427 void 
2428 GranSimAllocate(n,node,liveness)
2429 I_ n;
2430 P_ node;
2431 W_ liveness;
2432 {
2433   TSO_ALLOCS(CurrentTSO) += n;
2434   ++TSO_BASICBLOCKS(CurrentTSO);
2435   
2436   TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
2437   CurrentTime[CurrentProc] += gran_heapalloc_cost;
2438 }
2439
2440 /*
2441   Subtract the values added above, if a heap check fails and
2442   so has to be redone.
2443 */
2444 void 
2445 GranSimUnallocate(n,node,liveness)
2446 W_ n;
2447 P_ node;
2448 W_ liveness;
2449 {
2450   TSO_ALLOCS(CurrentTSO) -= n;
2451   --TSO_BASICBLOCKS(CurrentTSO);
2452   
2453   TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
2454   CurrentTime[CurrentProc] -= gran_heapalloc_cost;
2455 }
2456
2457 void 
2458 GranSimExec(ariths,branches,loads,stores,floats)
2459 W_ ariths,branches,loads,stores,floats;
2460 {
2461   W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
2462             gran_store_cost*stores + gran_float_cost*floats;
2463
2464   TSO_EXECTIME(CurrentTSO) += cost;
2465   CurrentTime[CurrentProc] += cost;
2466 }
2467
2468
2469 /* 
2470    Fetch the node if it isn't local
2471    -- result indicates whether fetch has been done.
2472
2473    This is GRIP-style single item fetching.
2474 */
2475
2476 I_ 
2477 GranSimFetch(node /* , liveness_mask */ )
2478 P_ node;
2479 /* I_ liveness_mask; */
2480 {
2481   /* Note: once a node has been fetched, this test will be passed */
2482   if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
2483     {
2484       /* I suppose we shouldn't do this for CAFs? -- KH */
2485       /* Should reschedule if the latency is high */
2486       /* We should add mpacktime to the remote PE for the reply,
2487          but we don't know who owns the node
2488       */
2489       /* if(DYNAMIC_POINTER(node)) */        /* For 0.22; gone in 0.23 !!! */
2490         {
2491           PROC p = where_is(node);
2492           TIME fetchtime;
2493
2494 #ifdef GRAN_CHECK
2495           if ( ( debug & 0x40 ) &&
2496                p == CurrentProc )
2497             fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
2498 #endif  /* GRAN_CHECK */
2499
2500           CurrentTime[CurrentProc] += gran_mpacktime;
2501
2502           ++TSO_FETCHCOUNT(CurrentTSO);
2503           TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
2504               
2505           if (SimplifiedFetch)
2506             {
2507               FetchNode(node,CurrentProc);
2508               CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
2509                                           gran_munpacktime;
2510               return(1);
2511             }
2512
2513           fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
2514                       gran_latency;
2515
2516           newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
2517           ++OutstandingFetches[CurrentProc];
2518
2519           /* About to block */
2520           TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
2521
2522           if (DoReScheduleOnFetch) 
2523             {
2524
2525               /* Remove CurrentTSO from the queue 
2526                  -- assumes head of queue == CurrentTSO */
2527               if(!DoFairSchedule)
2528                 {
2529                   if(RTSflags.ParFlags.granSimStats)
2530                     DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2531
2532                   ActivateNextThread();
2533               
2534 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2535                   if (debug & 0x10) {
2536                     if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2537                       fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2538                               CurrentTSO,CurrentTime[CurrentProc]);
2539                       exit (99);
2540                     } else {
2541                       TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2542                     }
2543
2544                   }
2545 #endif
2546
2547                   TSO_LINK(CurrentTSO) = Nil_closure;
2548                   /* CurrentTSO = Nil_closure; */
2549
2550                   /* ThreadQueueHd is now the next TSO to schedule or NULL */
2551                   /* CurrentTSO is pointed to by the FETCHNODE event */
2552                 }
2553               else                            /* DoFairSchedule */
2554                 {
2555                   /* Remove from the tail of the thread queue */
2556                   fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
2557                   exit(99);
2558                 }
2559             }
2560           else                                /* !DoReScheduleOnFetch */
2561             {
2562               /* Note: CurrentProc is still busy as it's blocked on fetch */
2563               if(RTSflags.ParFlags.granSimStats)
2564                 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2565
2566 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2567               if (debug & 0x04)
2568                   BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
2569
2570               if (debug & 0x10) {
2571                 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2572                   fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2573                           CurrentTSO,CurrentTime[CurrentProc]);
2574                   exit (99);
2575                 } else {
2576                   TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2577                 }
2578
2579                 CurrentTSO = Nil_closure;
2580               }
2581 #endif
2582             }
2583
2584           CurrentTime[CurrentProc] += gran_mtidytime;
2585
2586           /* Rescheduling is necessary */
2587           NeedToReSchedule = StgTrue;
2588
2589           return(1); 
2590         }
2591     }
2592   return(0);
2593 }
2594
2595 void 
2596 GranSimSpark(local,node)
2597 W_ local;
2598 P_ node;
2599 {
2600   ++SparksAvail;
2601   if(do_sp_profile)
2602     DumpSparkGranEvent(SP_SPARK,node);
2603
2604   /* Force the PE to take notice of the spark */
2605   if(DoAlwaysCreateThreads)
2606     newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
2607              FINDWORK,Nil_closure,Nil_closure,NULL);
2608
2609   if(local)
2610     ++TSO_LOCALSPARKS(CurrentTSO);
2611   else
2612     ++TSO_GLOBALSPARKS(CurrentTSO);
2613 }
2614
2615 void 
2616 GranSimSparkAt(spark,where,identifier)
2617 sparkq spark;
2618 P_  where;        /* This should be a node; alternatively could be a GA */
2619 I_ identifier;
2620 {
2621   PROC p = where_is(where);
2622   TIME exporttime;
2623
2624   if(do_sp_profile)
2625     DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
2626
2627   CurrentTime[CurrentProc] += gran_mpacktime;
2628
2629   exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]? 
2630                 CurrentTime[p]: CurrentTime[CurrentProc])
2631                + gran_latency;
2632   
2633   newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
2634
2635   CurrentTime[CurrentProc] += gran_mtidytime;
2636
2637   ++TSO_GLOBALSPARKS(CurrentTSO);
2638 }
2639
2640 void 
2641 GranSimBlock()
2642 {
2643   if(RTSflags.ParFlags.granSimStats)
2644     DumpGranEvent(GR_BLOCK,CurrentTSO);
2645
2646   ++TSO_BLOCKCOUNT(CurrentTSO);
2647   TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
2648   ActivateNextThread();
2649 }
2650
2651 #endif  /* GRAN */
2652
2653 \end{code}
2654
2655 %****************************************************************************
2656 %
2657 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
2658 %
2659 %****************************************************************************
2660
2661 Garbage collection code for the event queue.  We walk the event queue
2662 so that if the only reference to a TSO is in some event (e.g. RESUME),
2663 the TSO is still preserved.
2664
2665 \begin{code}
2666 #ifdef GRAN
2667
2668 extern smInfo StorageMgrInfo;
2669
2670 I_
2671 SaveEventRoots(num_ptr_roots)
2672 I_ num_ptr_roots;
2673 {
2674   eventq event = EventHd;
2675   while(event != NULL)
2676     {
2677       if(EVENT_TYPE(event) == RESUMETHREAD || 
2678          EVENT_TYPE(event) == MOVETHREAD || 
2679          EVENT_TYPE(event) == STARTTHREAD )
2680         StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2681
2682       else if(EVENT_TYPE(event) == MOVESPARK)
2683         StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
2684
2685       else if (EVENT_TYPE(event) == FETCHNODE ||
2686                EVENT_TYPE(event) == FETCHREPLY )
2687         {
2688           StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2689           StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
2690         }
2691
2692       event = EVENT_NEXT(event);
2693     }
2694   return(num_ptr_roots);
2695 }
2696
2697 I_
2698 SaveSparkRoots(num_ptr_roots)
2699 I_ num_ptr_roots;
2700 {
2701   sparkq spark, /* prev, */ disposeQ=NULL;
2702   PROC proc;
2703   I_ i, sparkroots=0, prunedSparks=0;
2704
2705 #if defined(GRAN_CHECK) && defined(GRAN)
2706   if ( debug & 0x40 ) 
2707     fprintf(stderr,"D> Saving spark roots for GC ...\n");
2708 #endif       
2709
2710   for(proc = 0; proc < max_proc; ++proc) {
2711     for(i = 0; i < SPARK_POOLS; ++i) {
2712       for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i]; 
2713           spark != NULL; 
2714           /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
2715         {
2716           if(++sparkroots <= MAX_SPARKS)
2717             {
2718 #if defined(GRAN_CHECK) && defined(GRAN)
2719               if ( debug & 0x40 ) 
2720                 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
2721                         num_ptr_roots,proc,i,SPARK_NODE(spark));
2722 #endif       
2723               StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
2724             }
2725           else
2726             {
2727               SPARK_NODE(spark) = Nil_closure;
2728               if (prunedSparks==0) {
2729                 disposeQ = spark;
2730                 /*
2731                    *prev = NULL;
2732                 */
2733               }
2734               prunedSparks++;
2735             }
2736         }  /* forall spark ... */
2737         if (prunedSparks>0) {
2738           fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
2739                   prunedSparks,MAX_SPARKS,proc);
2740           if (disposeQ == PendingSparksHd[proc][i])
2741             PendingSparksHd[proc][i] = NULL;
2742           else
2743             SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
2744           DisposeSparkQ(disposeQ);
2745           prunedSparks = 0;
2746           disposeQ = NULL;
2747         }  
2748         }  /* forall i ... */
2749     }      /*forall proc .. */
2750
2751   return(num_ptr_roots);
2752 }
2753
2754 /*
2755    GC roots must be restored in *reverse order*.
2756    The recursion is a little ugly, but is better than
2757    in-place pointer reversal.
2758 */
2759
2760 static I_
2761 RestoreEvtRoots(event,num_ptr_roots)
2762 eventq event;
2763 I_ num_ptr_roots;
2764 {
2765   if(event != NULL)
2766     {
2767       num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
2768
2769       if(EVENT_TYPE(event) == RESUMETHREAD || 
2770          EVENT_TYPE(event) == MOVETHREAD || 
2771          EVENT_TYPE(event) == STARTTHREAD )
2772         EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2773
2774       else if(EVENT_TYPE(event) == MOVESPARK )
2775         SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
2776
2777       else if (EVENT_TYPE(event) == FETCHNODE ||
2778                EVENT_TYPE(event) == FETCHREPLY )
2779         {
2780           EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
2781           EVENT_TSO(event) =  StorageMgrInfo.roots[--num_ptr_roots];
2782         }
2783     }
2784
2785   return(num_ptr_roots);
2786 }
2787
2788 I_ 
2789 RestoreEventRoots(num_ptr_roots)
2790 I_ num_ptr_roots;
2791 {
2792   return(RestoreEvtRoots(EventHd,num_ptr_roots));
2793 }
2794
2795 static I_
2796 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
2797 sparkq spark;
2798 I_ num_ptr_roots, sparkroots;
2799 {
2800   if(spark != NULL)
2801     {
2802       num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
2803       if(sparkroots <= MAX_SPARKS)
2804         {
2805           P_ n = SPARK_NODE(spark);
2806           SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
2807 #if defined(GRAN_CHECK) && defined(GRAN)
2808           if ( debug & 0x40 ) 
2809             fprintf(RTSflags.GcFlags.statsFile,"Restoring Spark Root %d -- new: 0x%lx \n",
2810                     num_ptr_roots,SPARK_NODE(spark));
2811 #endif
2812         }
2813       else
2814 #if defined(GRAN_CHECK) && defined(GRAN)
2815           if ( debug & 0x40 ) 
2816             fprintf(RTSflags.GcFlags.statsFile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
2817                     num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
2818 #endif
2819
2820     }
2821
2822   return(num_ptr_roots);
2823 }
2824
2825 I_ 
2826 RestoreSparkRoots(num_ptr_roots)
2827 I_ num_ptr_roots;
2828 {
2829   PROC proc;
2830   I_   i;
2831
2832   /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
2833   /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
2834   /* of the for loop. For i that is currently not necessary. C is really */
2835   /* impressive in datatype abstraction!   -- HWL */
2836
2837   for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
2838     for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
2839       num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
2840     }
2841   }
2842   return(num_ptr_roots);
2843 }
2844
2845 #endif  /* GRAN */
2846
2847 \end{code}
2848
2849 %****************************************************************************
2850 %
2851 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
2852 %
2853 %****************************************************************************
2854
2855 Event dumping routines.
2856
2857 \begin{code}
2858 #ifdef GRAN 
2859
2860 DumpGranEvent(name,tso)
2861 enum gran_event_types name;
2862 P_ tso;
2863 {
2864   DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
2865 }
2866
2867 DumpSparkGranEvent(name,id)
2868 enum gran_event_types name;
2869 W_ id;
2870 {
2871   DumpRawGranEvent(CurrentProc,name,id);
2872 }
2873
2874 DumpGranEventAndNode(name,tso,node,proc)
2875 enum gran_event_types name;
2876 P_ tso, node;
2877 PROC proc;
2878 {
2879   PROC pe = CurrentProc;
2880   W_ id = TSO_ID(tso);
2881
2882   if(name > GR_EVENT_MAX)
2883     name = GR_EVENT_MAX;
2884
2885   if(RTSflags.ParFlags.granSimStats_Binary)
2886     {
2887       grputw(name);
2888       grputw(pe);
2889       grputw(CurrentTime[CurrentProc]);
2890       grputw(id);
2891     }
2892   else
2893     fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
2894             pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
2895 }
2896
2897 DumpRawGranEvent(pe,name,id)
2898 PROC pe;
2899 enum gran_event_types name;
2900 W_ id;
2901 {
2902   if(name > GR_EVENT_MAX)
2903     name = GR_EVENT_MAX;
2904
2905   if(RTSflags.ParFlags.granSimStats_Binary)
2906     {
2907       grputw(name);
2908       grputw(pe);
2909       grputw(CurrentTime[CurrentProc]);
2910       grputw(id);
2911     }
2912   else
2913     fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
2914             pe,CurrentTime[CurrentProc],gran_event_names[name],id);
2915 }
2916
2917 DumpGranInfo(pe,tso,mandatory_thread)
2918 PROC pe;
2919 P_ tso;
2920 I_ mandatory_thread;
2921 {
2922   if(RTSflags.ParFlags.granSimStats_Binary)
2923     {
2924       grputw(GR_END);
2925       grputw(pe);
2926       grputw(CurrentTime[CurrentProc]);
2927       grputw(TSO_ID(tso));
2928       grputw(TSO_SPARKNAME(tso));
2929       grputw(TSO_STARTEDAT(tso));
2930       grputw(TSO_EXPORTED(tso));
2931       grputw(TSO_BASICBLOCKS(tso));
2932       grputw(TSO_ALLOCS(tso));
2933       grputw(TSO_EXECTIME(tso));
2934       grputw(TSO_BLOCKTIME(tso));
2935       grputw(TSO_BLOCKCOUNT(tso));
2936       grputw(TSO_FETCHTIME(tso));
2937       grputw(TSO_FETCHCOUNT(tso));
2938       grputw(TSO_LOCALSPARKS(tso));
2939       grputw(TSO_GLOBALSPARKS(tso));
2940       grputw(mandatory_thread);
2941     }
2942   else
2943     {
2944       /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
2945       fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
2946               ,pe
2947               ,CurrentTime[CurrentProc]
2948               ,TSO_ID(tso)
2949               ,TSO_SPARKNAME(tso)
2950               ,TSO_STARTEDAT(tso)
2951               ,TSO_EXPORTED(tso)?'T':'F'
2952               ,TSO_BASICBLOCKS(tso)
2953               ,TSO_ALLOCS(tso)
2954               ,TSO_EXECTIME(tso)
2955               ,TSO_BLOCKTIME(tso)
2956               ,TSO_BLOCKCOUNT(tso)
2957               ,TSO_FETCHTIME(tso)
2958               ,TSO_FETCHCOUNT(tso)
2959               ,TSO_LOCALSPARKS(tso)
2960               ,TSO_GLOBALSPARKS(tso)
2961               ,mandatory_thread?'T':'F'
2962               );
2963     }
2964 }
2965
2966 DumpTSO(tso)
2967 P_ tso;
2968 {
2969   fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
2970           ,tso
2971           ,TSO_NAME(tso)
2972           ,TSO_ID(tso)
2973           ,TSO_LINK(tso)
2974           ,TSO_TYPE(tso)==T_MAIN?"MAIN":
2975            TSO_TYPE(tso)==T_FAIL?"FAIL":
2976            TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
2977            TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
2978            "???"
2979           );
2980           
2981   fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
2982           ,TSO_PC1(tso)
2983           ,TSO_PC2(tso)
2984           ,TSO_ARG1(tso)
2985           ,TSO_ARG2(tso)
2986           ,TSO_SWITCH(tso)
2987           );
2988
2989   fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
2990           ,TSO_SPARKNAME(tso)
2991           ,TSO_STARTEDAT(tso)
2992           ,TSO_EXPORTED(tso)?'T':'F'
2993           ,TSO_BASICBLOCKS(tso)
2994           ,TSO_ALLOCS(tso)
2995           ,TSO_EXECTIME(tso)
2996           ,TSO_BLOCKTIME(tso)
2997           ,TSO_BLOCKCOUNT(tso)
2998           ,TSO_FETCHTIME(tso)
2999           ,TSO_FETCHCOUNT(tso)
3000           ,TSO_LOCALSPARKS(tso)
3001           ,TSO_GLOBALSPARKS(tso)
3002           );
3003 }
3004
3005 /*
3006    Output a terminate event and an 8-byte time.
3007 */
3008
3009 grterminate(v)
3010 TIME v;
3011 {
3012   DumpGranEvent(GR_TERMINATE,0);
3013
3014   if(sizeof(TIME)==4)
3015     {
3016       putc('\0',gr_file);
3017       putc('\0',gr_file);
3018       putc('\0',gr_file);
3019       putc('\0',gr_file);
3020     }
3021   else
3022     {
3023       putc(v >> 56l,gr_file);
3024       putc((v >> 48l)&0xffl,gr_file);
3025       putc((v >> 40l)&0xffl,gr_file);
3026       putc((v >> 32l)&0xffl,gr_file);
3027     }
3028   putc((v >> 24l)&0xffl,gr_file);
3029   putc((v >> 16l)&0xffl,gr_file);
3030   putc((v >> 8l)&0xffl,gr_file);
3031   putc(v&0xffl,gr_file);
3032 }
3033
3034 /*
3035    Length-coded output: first 3 bits contain length coding
3036
3037      00x        1 byte
3038      01x        2 bytes
3039      10x        4 bytes
3040      110        8 bytes
3041      111        5 or 9 bytes
3042 */
3043
3044 grputw(v)
3045 TIME v;
3046 {
3047   if(v <= 0x3fl)
3048     {
3049       fputc(v & 0x3f,gr_file);
3050     }
3051
3052   else if (v <= 0x3fffl)
3053     {
3054       fputc((v >> 8l)|0x40l,gr_file);
3055       fputc(v&0xffl,gr_file);
3056     }
3057   
3058   else if (v <= 0x3fffffffl)
3059     {
3060       fputc((v >> 24l)|0x80l,gr_file);
3061       fputc((v >> 16l)&0xffl,gr_file);
3062       fputc((v >> 8l)&0xffl,gr_file);
3063       fputc(v&0xffl,gr_file);
3064     }
3065
3066   else if (sizeof(TIME) == 4)
3067     {
3068       fputc(0x70,gr_file);
3069       fputc((v >> 24l)&0xffl,gr_file);
3070       fputc((v >> 16l)&0xffl,gr_file);
3071       fputc((v >> 8l)&0xffl,gr_file);
3072       fputc(v&0xffl,gr_file);
3073     }
3074
3075   else 
3076     {
3077       if (v <= 0x3fffffffffffffl)
3078         putc((v >> 56l)|0x60l,gr_file);
3079       else
3080         {
3081           putc(0x70,gr_file);
3082           putc((v >> 56l)&0xffl,gr_file);
3083         }
3084
3085       putc((v >> 48l)&0xffl,gr_file);
3086       putc((v >> 40l)&0xffl,gr_file);
3087       putc((v >> 32l)&0xffl,gr_file);
3088       putc((v >> 24l)&0xffl,gr_file);
3089       putc((v >> 16l)&0xffl,gr_file);
3090       putc((v >> 8l)&0xffl,gr_file);
3091       putc(v&0xffl,gr_file);
3092     }
3093 }
3094 #endif  /* GRAN */
3095
3096 \end{code}
3097
3098 %****************************************************************************
3099 %
3100 \subsection[GrAnSim-debug]{Debugging routines  for GrAnSim}
3101 %
3102 %****************************************************************************
3103
3104 Debugging routines, mainly for GrAnSim. They should really be in a separate file.
3105
3106 The    first couple  of routines     are   general ones   (look also   into
3107 c-as-asm/StgDebug.lc).
3108
3109 \begin{code}
3110
3111 #define NULL_REG_MAP        /* Not threaded */
3112 #include "stgdefs.h"
3113
3114 char *
3115 info_hdr_type(info_ptr)
3116 W_ info_ptr;
3117 {
3118 #if ! defined(PAR) && !defined(GRAN)
3119   switch (INFO_TAG(info_ptr))
3120     {
3121       case INFO_OTHER_TAG:
3122         return("OTHER_TAG");
3123 /*    case INFO_IND_TAG:
3124         return("IND_TAG");
3125 */    default:
3126         return("TAG<n>");
3127     }
3128 #else /* PAR */
3129   switch(INFO_TYPE(info_ptr))
3130     {
3131       case INFO_SPEC_U_TYPE:
3132         return("SPECU");
3133
3134       case INFO_SPEC_N_TYPE:
3135         return("SPECN");
3136
3137       case INFO_GEN_U_TYPE:
3138         return("GENU");
3139
3140       case INFO_GEN_N_TYPE:
3141         return("GENN");
3142
3143       case INFO_DYN_TYPE:
3144         return("DYN");
3145
3146       /* 
3147       case INFO_DYN_TYPE_N:
3148         return("DYNN");
3149
3150       case INFO_DYN_TYPE_U:
3151         return("DYNU");
3152       */
3153
3154       case INFO_TUPLE_TYPE:
3155         return("TUPLE");
3156
3157       case INFO_DATA_TYPE:
3158         return("DATA");
3159
3160       case INFO_MUTUPLE_TYPE:
3161         return("MUTUPLE");
3162
3163       case INFO_IMMUTUPLE_TYPE:
3164         return("IMMUTUPLE");
3165
3166       case INFO_STATIC_TYPE:
3167         return("STATIC");
3168
3169       case INFO_CONST_TYPE:
3170         return("CONST");
3171
3172       case INFO_CHARLIKE_TYPE:
3173         return("CHAR");
3174
3175       case INFO_INTLIKE_TYPE:
3176         return("INT");
3177
3178       case INFO_BH_TYPE:
3179         return("BHOLE");
3180
3181       case INFO_IND_TYPE:
3182         return("IND");
3183
3184       case INFO_CAF_TYPE:
3185         return("CAF");
3186
3187       case INFO_FETCHME_TYPE:
3188         return("FETCHME");
3189
3190       case INFO_BQ_TYPE:
3191         return("BQ");
3192
3193       /*
3194       case INFO_BQENT_TYPE:
3195         return("BQENT");
3196       */
3197
3198       case INFO_TSO_TYPE:
3199         return("TSO");
3200
3201       case INFO_STKO_TYPE:
3202         return("STKO");
3203
3204       default:
3205         fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
3206         return("??");
3207       }
3208 #endif /* PAR */
3209 }
3210         
3211 /*
3212 @var_hdr_size@ computes the size of the variable header for a closure.
3213 */
3214
3215 I_
3216 var_hdr_size(node)
3217 P_ node;
3218 {
3219   switch(INFO_TYPE(INFO_PTR(node)))
3220     {
3221       case INFO_SPEC_U_TYPE:    return(0);      /* by decree */
3222       case INFO_SPEC_N_TYPE:    return(0);
3223       case INFO_GEN_U_TYPE:     return(GEN_VHS);
3224       case INFO_GEN_N_TYPE:     return(GEN_VHS);
3225       case INFO_DYN_TYPE:       return(DYN_VHS);
3226       /*
3227       case INFO_DYN_TYPE_N:     return(DYN_VHS);
3228       case INFO_DYN_TYPE_U:     return(DYN_VHS);
3229       */
3230       case INFO_TUPLE_TYPE:     return(TUPLE_VHS);
3231       case INFO_DATA_TYPE:      return(DATA_VHS);
3232       case INFO_MUTUPLE_TYPE:   return(MUTUPLE_VHS);
3233       case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
3234       case INFO_STATIC_TYPE:    return(STATIC_VHS);
3235       case INFO_CONST_TYPE:     return(0);
3236       case INFO_CHARLIKE_TYPE:  return(0);
3237       case INFO_INTLIKE_TYPE:   return(0);
3238       case INFO_BH_TYPE:        return(0);
3239       case INFO_IND_TYPE:       return(0);
3240       case INFO_CAF_TYPE:       return(0);
3241       case INFO_FETCHME_TYPE:   return(0);
3242       case INFO_BQ_TYPE:        return(0);
3243       /*
3244       case INFO_BQENT_TYPE:     return(0);
3245       */
3246       case INFO_TSO_TYPE:       return(TSO_VHS);
3247       case INFO_STKO_TYPE:      return(STKO_VHS);
3248       default:
3249         fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
3250           INFO_TYPE(INFO_PTR(node)));
3251         return(0);
3252     }
3253 }
3254
3255
3256 /* Determine the size and number of pointers for this kind of closure */
3257 void
3258 size_and_ptrs(node,size,ptrs)
3259 P_ node;
3260 W_ *size, *ptrs;
3261 {
3262   switch(INFO_TYPE(INFO_PTR(node)))
3263     {
3264       case INFO_SPEC_U_TYPE:
3265       case INFO_SPEC_N_TYPE:
3266         *size = INFO_SIZE(INFO_PTR(node));          /* New for 0.24; check */
3267         *ptrs = INFO_NoPTRS(INFO_PTR(node));        /* that! -- HWL */
3268         /* 
3269         *size = SPEC_CLOSURE_SIZE(node);
3270         *ptrs = SPEC_CLOSURE_NoPTRS(node);
3271         */
3272         break;
3273
3274       case INFO_GEN_U_TYPE:
3275       case INFO_GEN_N_TYPE:
3276         *size = GEN_CLOSURE_SIZE(node);
3277         *ptrs = GEN_CLOSURE_NoPTRS(node);
3278         break;
3279
3280       /* 
3281       case INFO_DYN_TYPE_U:
3282       case INFO_DYN_TYPE_N:
3283       */
3284       case INFO_DYN_TYPE:
3285         *size = DYN_CLOSURE_SIZE(node);
3286         *ptrs = DYN_CLOSURE_NoPTRS(node);
3287         break;
3288
3289       case INFO_TUPLE_TYPE:
3290         *size = TUPLE_CLOSURE_SIZE(node);
3291         *ptrs = TUPLE_CLOSURE_NoPTRS(node);
3292         break;
3293
3294       case INFO_DATA_TYPE:
3295         *size = DATA_CLOSURE_SIZE(node);
3296         *ptrs = DATA_CLOSURE_NoPTRS(node);
3297         break;
3298
3299       case INFO_IND_TYPE:
3300         *size = IND_CLOSURE_SIZE(node);
3301         *ptrs = IND_CLOSURE_NoPTRS(node);
3302         break;
3303
3304 /* ToDo: more (WDP) */
3305
3306       /* Don't know about the others */
3307       default:
3308         *size = *ptrs = 0;
3309         break;
3310     }
3311 }
3312
3313 void
3314 DEBUG_PRINT_NODE(node)
3315 P_ node;
3316 {
3317    W_ info_ptr = INFO_PTR(node);
3318    I_ size = 0, ptrs = 0, i, vhs = 0;
3319    char *info_type = info_hdr_type(info_ptr);
3320
3321    size_and_ptrs(node,&size,&ptrs);
3322    vhs = var_hdr_size(node);
3323
3324    fprintf(stderr,"Node: 0x%lx", (W_) node);
3325
3326 #if defined(PAR)
3327    fprintf(stderr," [GA: 0x%lx]",GA(node));
3328 #endif
3329
3330 #if defined(PROFILING)
3331    fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
3332 #endif
3333
3334 #if defined(GRAN)
3335    fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
3336 #endif
3337
3338    fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
3339                   info_ptr,info_type,size,ptrs);
3340
3341    /* For now, we ignore the variable header */
3342
3343    for(i=0; i < size; ++i)
3344      {
3345        if(i == 0)
3346          fprintf(stderr,"Data: ");
3347
3348        else if(i % 6 == 0)
3349          fprintf(stderr,"\n      ");
3350
3351        if(i < ptrs)
3352          fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
3353        else
3354          fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
3355      }
3356    fprintf(stderr, "\n");
3357 }
3358
3359
3360 #define INFO_MASK       0x80000000
3361
3362 void
3363 DEBUG_TREE(node)
3364 P_ node;
3365 {
3366   W_ size = 0, ptrs = 0, i, vhs = 0;
3367
3368   /* Don't print cycles */
3369   if((INFO_PTR(node) & INFO_MASK) != 0)
3370     return;
3371
3372   size_and_ptrs(node,&size,&ptrs);
3373   vhs = var_hdr_size(node);
3374
3375   DEBUG_PRINT_NODE(node);
3376   fprintf(stderr, "\n");
3377
3378   /* Mark the node -- may be dangerous */
3379   INFO_PTR(node) |= INFO_MASK;
3380
3381   for(i = 0; i < ptrs; ++i)
3382     DEBUG_TREE((P_)node[i+vhs+_FHS]);
3383
3384   /* Unmark the node */
3385   INFO_PTR(node) &= ~INFO_MASK;
3386 }
3387
3388
3389 void
3390 DEBUG_INFO_TABLE(node)
3391 P_ node;
3392 {
3393   W_ info_ptr = INFO_PTR(node);
3394   char *ip_type = info_hdr_type(info_ptr);
3395
3396   fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
3397                  ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
3398 #if defined(PAR)
3399   fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
3400 #endif
3401
3402 #if defined(PROFILING)
3403   fprintf(stderr,"Cost Centre (???):       0x%lx\n",INFO_CAT(info_ptr));
3404 #endif
3405
3406 #if defined(_INFO_COPYING)
3407   fprintf(stderr,"Evacuate Entry:    0x%lx;\tScavenge Entry: 0x%lx\n",
3408           INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
3409 #endif
3410
3411 #if defined(_INFO_COMPACTING)
3412   fprintf(stderr,"Scan Link:         0x%lx;\tScan Move:      0x%lx\n",
3413           (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
3414   fprintf(stderr,"Mark:              0x%lx;\tMarked:         0x%lx;\t",
3415           (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
3416 #if 0 /* avoid INFO_TYPE */
3417   if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
3418     fprintf(stderr,"plus specialised code\n");
3419   else
3420     fprintf(stderr,"Marking:           0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
3421 #endif /* 0 */
3422 #endif
3423 }
3424 #endif /* GRAN */
3425
3426 \end{code}
3427
3428 The remaining debugging routines are more or less specific for GrAnSim.
3429
3430 \begin{code}
3431 #if defined(GRAN) && defined(GRAN_CHECK)
3432 void
3433 DEBUG_CURR_THREADQ(verbose) 
3434 I_ verbose;
3435
3436   fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
3437   DEBUG_THREADQ(ThreadQueueHd, verbose);
3438 }
3439
3440 void 
3441 DEBUG_THREADQ(closure, verbose) 
3442 P_ closure;
3443 I_ verbose;
3444 {
3445  P_ x;
3446
3447  fprintf(stderr,"Thread Queue: ");
3448  for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
3449    if (verbose) 
3450      DEBUG_TSO(x,0);
3451    else
3452      fprintf(stderr," 0x%x",x);
3453
3454  if (closure==Nil_closure)
3455    fprintf(stderr,"NIL\n");
3456  else
3457    fprintf(stderr,"\n");
3458 }
3459
3460 /* Check with Threads.lh */
3461 static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
3462
3463 void 
3464 DEBUG_TSO(closure,verbose) 
3465 P_ closure;
3466 I_ verbose;
3467 {
3468  
3469  if (closure==Nil_closure) {
3470    fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
3471    return;
3472  }
3473
3474  fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
3475
3476  fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
3477  fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
3478  fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
3479 #if defined(GRAN_CHECK) && defined(GRAN)
3480  if (debug & 0x10)
3481    fprintf(stderr,"\tType: %s  %s\n",
3482            type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
3483            (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
3484  else
3485    fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3486 #else
3487  fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3488 #endif
3489  fprintf(stderr,"> PC1:  0x%x",TSO_PC1(closure));
3490  fprintf(stderr,"\tPC2:  0x%x\n",TSO_PC2(closure));
3491  fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
3492  fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
3493  fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
3494
3495  if (verbose) {
3496    fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
3497    fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
3498    fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
3499    fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
3500    fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
3501    fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
3502    fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
3503    fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
3504    fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
3505    fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
3506    fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
3507    fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
3508    fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
3509    fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
3510  }
3511 }
3512
3513 void 
3514 DEBUG_EVENT(event, verbose) 
3515 eventq event;
3516 I_ verbose;
3517 {
3518   if (verbose) {
3519     print_event(event);
3520   }else{
3521     fprintf(stderr," 0x%x",event);
3522   }
3523 }
3524
3525 void
3526 DEBUG_EVENTQ(verbose)
3527 I_ verbose;
3528 {
3529  eventq x;
3530
3531  fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
3532  for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
3533    DEBUG_EVENT(x,verbose);
3534  }
3535  if (EventHd==NULL) 
3536    fprintf(stderr,"NIL\n");
3537  else
3538    fprintf(stderr,"\n");
3539 }
3540
3541 void 
3542 DEBUG_SPARK(spark, verbose) 
3543 sparkq spark;
3544 I_ verbose;
3545 {
3546   if (verbose)
3547     print_spark(spark);
3548   else
3549     fprintf(stderr," 0x%x",spark);
3550 }
3551
3552 void 
3553 DEBUG_SPARKQ(spark,verbose) 
3554 sparkq spark;
3555 I_ verbose;
3556 {
3557  sparkq x;
3558
3559  fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
3560  for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
3561    DEBUG_SPARK(x,verbose);
3562  }
3563  if (spark==NULL) 
3564    fprintf(stderr,"NIL\n");
3565  else
3566    fprintf(stderr,"\n");
3567 }
3568
3569 void 
3570 DEBUG_CURR_SPARKQ(verbose) 
3571 I_ verbose;
3572 {
3573   DEBUG_SPARKQ(SparkQueueHd,verbose);
3574 }
3575
3576 void 
3577 DEBUG_PROC(proc,verbose)
3578 I_ proc;
3579 I_ verbose;
3580 {
3581   fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
3582           proc,CurrentTime[proc],CurrentTime[proc],
3583           (CurrentProc==proc)?"ACTIVE":"INACTIVE");
3584   DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
3585   if ( (CurrentProc==proc) )
3586     DEBUG_TSO(CurrentTSO,1);
3587
3588   if (EventHd!=NULL)
3589     fprintf(stderr,"Next event (%s) is on proc %d\n",
3590             event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
3591
3592   if (verbose & 0x1) {
3593     fprintf(stderr,"\nREQUIRED sparks: ");
3594     DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
3595     fprintf(stderr,"\nADVISORY_sparks: ");
3596     DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
3597   }
3598 }
3599
3600 /* Debug CurrentTSO */
3601 void
3602 DCT(){ 
3603   fprintf(stderr,"Current Proc: %d\n",CurrentProc);
3604   DEBUG_TSO(CurrentTSO,1);
3605 }
3606
3607 /* Debug Current Processor */
3608 void
3609 DCP(){ DEBUG_PROC(CurrentProc,2); }
3610
3611 /* Shorthand for debugging event queue */
3612 void
3613 DEQ() { DEBUG_EVENTQ(1); }
3614
3615 /* Shorthand for debugging spark queue */
3616 void
3617 DSQ() { DEBUG_CURR_SPARKQ(1); }
3618
3619 /* Shorthand for printing a node */
3620 void
3621 DN(P_ node) { DEBUG_PRINT_NODE(node); }
3622
3623 #endif /* GRAN */
3624 \end{code}
3625
3626
3627 %****************************************************************************
3628 %
3629 \subsection[qp-profile]{Quasi-Parallel Profiling}
3630 %
3631 %****************************************************************************
3632
3633 \begin{code}
3634 #ifndef GRAN
3635 I_ do_qp_prof;
3636 FILE *qp_file;
3637
3638 /* *Virtual* Time in milliseconds */
3639 long 
3640 qp_elapsed_time()
3641 {
3642     return ((long) (usertime() * 1e3));
3643 }
3644
3645 static void
3646 init_qp_profiling(STG_NO_ARGS)
3647 {
3648     I_ i;
3649     char qp_filename[STATS_FILENAME_MAXLEN];
3650
3651     sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3652     if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3653         fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
3654             qp_filename);
3655         do_qp_prof = 0;
3656     } else {
3657         fputs(prog_argv[0], qp_file);
3658         for(i = 1; prog_argv[i]; i++) {
3659             fputc(' ', qp_file);
3660             fputs(prog_argv[i], qp_file);
3661         }
3662         fprintf(qp_file, "+RTS -C%ld -t%ld\n"
3663                 , RTSflags.ConcFlags.ctxtSwitchTime
3664                 , RTSflags.ConcFlags.maxThreads);
3665
3666         fputs(time_str(), qp_file);
3667         fputc('\n', qp_file);
3668     }
3669 }
3670
3671 void 
3672 QP_Event0(tid, node)
3673 I_ tid;
3674 P_ node;
3675 {
3676     fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3677 }
3678
3679 void 
3680 QP_Event1(event, tso)
3681 char *event;
3682 P_ tso;
3683 {
3684     fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3685             TSO_ID(tso), (W_) TSO_NAME(tso));
3686 }
3687
3688 void 
3689 QP_Event2(event, tso1, tso2)
3690 char *event;
3691 P_ tso1, tso2;
3692 {
3693     fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3694             TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
3695 }
3696 #endif /* 0 */
3697 #endif /* GRAN */
3698
3699 #if defined(CONCURRENT) && !defined(GRAN)
3700 /* romoluSnganpu' SamuS! */ 
3701
3702 unsigned CurrentProc = 0;
3703 W_ IdleProcs = ~0l, Idlers = 32; 
3704
3705 void 
3706 GranSimAllocate(I_ n, P_ node, W_ liveness)
3707 { }
3708
3709 void 
3710 GranSimUnallocate(W_ n, P_ node, W_ liveness)
3711 { }
3712
3713 void 
3714 GranSimExec(W_ ariths, W_ branches, W_ loads, W_ stores, W_ floats)
3715 { }
3716
3717 int
3718 GranSimFetch(P_ node /* , liveness_mask */ )
3719 /* I_ liveness_mask; */
3720 { return(9999999); }
3721
3722 void 
3723 GranSimSpark(W_ local, P_ node)
3724 { }
3725
3726 #if 0
3727 void 
3728 GranSimSparkAt(spark,where,identifier)
3729 sparkq spark;
3730 P_  where;        /* This should be a node; alternatively could be a GA */
3731 I_ identifier;
3732 { }
3733 #endif
3734
3735 void 
3736 GranSimBlock(STG_NO_ARGS)
3737 { }
3738 #endif 
3739
3740 \end{code}
3741