[project @ 1996-01-08 20:28:12 by partain]
[ghc-hetmet.git] / ghc / runtime / main / Threads.lc
1 %
2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
3 %
4 %************************************************************************
5 %*                                                                      *
6 \section[Threads.lc]{Thread Control Routines}
7 %*                                                                      *
8 %************************************************************************
9
10 %************************************************************************
11 %
12 \subsection[thread-overview]{Overview of the Thread Management System}
13 %
14 %************************************************************************
15
16 %************************************************************************
17 %
18 \subsection[thread-decls]{Thread Declarations}
19 %
20 %************************************************************************
21
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
24
25 \begin{code}
26
27 #if defined(CONCURRENT)
28
29 # define NON_POSIX_SOURCE /* so says Solaris */
30
31 # include "rtsdefs.h"
32 # include <setjmp.h>
33
34 #include "LLC.h"
35 #include "HLC.h"
36
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
38 \end{code}
39
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time.  At present, it is only used for the first stack
43 chunk of a thread, the one that's got @StkOChunkSize@ words.
44
45 \begin{code}
46 P_ AvailableStack = Nil_closure;
47 P_ AvailableTSO = Nil_closure;
48 \end{code}
49
50 Macros for dealing with the new and improved GA field for simulating
51 parallel execution. Based on @CONCURRENT@ package. The GA field now
52 contains a mask, where the n-th bit stands for the n-th processor,
53 where this data can be found. In case of multiple copies, several bits
54 are set.  The total number of processors is bounded by @MAX_PROC@,
55 which should be <= the length of a word in bits.  -- HWL
56
57 \begin{code}
58 /* mattson thinks this is obsolete */
59
60 # if 0 && defined(GRAN)
61 extern FILE *main_statsfile;         /* Might be of general interest  HWL */
62
63 typedef unsigned long TIME;
64 typedef unsigned char PROC;
65 typedef unsigned char EVTTYPE;
66
67
68 #  undef max
69 #  define max(a,b) (a>b?a:b)
70
71 static PROC
72 ga_to_proc(W_ ga)
73 { PROC i;
74                                 
75   for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ; 
76
77   return (i);
78 }
79
80 /* NB: This takes a *node* rather than just a ga as input */
81 static PROC
82 where_is(P_ node)
83 { return (ga_to_proc(PROCS(node))); }   /* Access the GA field of the node */
84
85 static PROC
86 no_of_copies(P_ node)       /* DaH lo'lu'Qo'; currently unused */
87 { PROC i, n;
88                                 
89   for (i=0, n=0; i<MAX_PROC; i++) 
90     if (IS_LOCAL_TO(PROCS(node),i))
91       n++;; 
92
93   return (n);
94 }
95
96 # endif /* GRAN ; HWL */ 
97 \end{code}
98
99 %****************************************************************
100 %*                                                              *
101 \subsection[thread-getthread]{The Thread Scheduler}
102 %*                                                              *
103 %****************************************************************
104
105 This is the heart of the thread scheduling code.
106
107 \begin{code}
108 # if defined(GRAN_CHECK) && defined(GRAN)
109 W_ debug = 0;
110 # endif       
111
112 W_ event_trace = 0;
113 W_ event_trace_all = 0;
114
115 STGRegisterTable *CurrentRegTable = NULL;
116 P_ CurrentTSO = NULL;
117
118 # if defined(GRAN)                                                  /* HWL */
119
120 unsigned CurrentProc = 0;
121 W_ IdleProcs = ~0L, Idlers = MAX_PROC; 
122
123 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
124 # define FETCH_MASK_TSO  0x08000000  /* only bits 0, 1, 2 should be used */
125                                      /* normally */
126 # endif
127
128 I_ DoFairSchedule = 0;
129 I_ DoReScheduleOnFetch = 0;
130 I_ DoStealThreadsFirst = 0;
131 I_ SimplifiedFetch = 0;
132 I_ DoAlwaysCreateThreads = 0;
133 I_ DoGUMMFetching = 0;
134 I_ DoThreadMigration = 0;
135 I_ FetchStrategy = 4;
136 I_ PreferSparksOfLocalNodes = 0;
137
138 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
139 I_ NoForward = 0;
140 I_ PrintFetchMisses = 0, fetch_misses = 0;
141 # endif
142
143 # if defined(COUNT)
144 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
145    BQ_lens = 0;
146 # endif
147
148 I_ do_gr_binary = 0;
149 I_ do_gr_profile = 0;        /* Full .gr profile or only END events? */
150 I_ no_gr_profile = 0;        /* Don't create any .gr file at all? */
151 I_ do_sp_profile = 0;
152 I_ do_gr_migration = 0;
153
154 P_ RunnableThreadsHd[MAX_PROC];
155 P_ RunnableThreadsTl[MAX_PROC];
156
157 P_ WaitThreadsHd[MAX_PROC];
158 P_ WaitThreadsTl[MAX_PROC];
159
160 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
161 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
162
163 W_ CurrentTime[MAX_PROC];       /* Per PE clock */
164
165 # if defined(GRAN_CHECK) && defined(GRAN)
166 P_ BlockedOnFetch[MAX_PROC];    /* HWL-CHECK */
167 # endif
168
169 I_ OutstandingFetches[MAX_PROC];
170
171 W_ SparksAvail = 0;     /* How many sparks are available */
172 W_ SurplusThreads = 0;  /* How many excess threads are there */
173
174 StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
175
176 /* Communication Cost Variables -- set in main program */
177
178 W_ gran_latency =      LATENCY,          gran_additional_latency = ADDITIONAL_LATENCY, 
179    gran_fetchtime =    FETCHTIME, 
180    gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime =       GLOBALUNBLOCKTIME,
181    gran_mpacktime =    MSGPACKTIME,      gran_munpacktime =        MSGUNPACKTIME,
182    gran_mtidytime =    0;
183
184 W_ gran_threadcreatetime =         THREADCREATETIME,
185    gran_threadqueuetime =          THREADQUEUETIME,
186    gran_threaddescheduletime =     THREADDESCHEDULETIME,
187    gran_threadscheduletime =       THREADSCHEDULETIME,
188    gran_threadcontextswitchtime =  THREADCONTEXTSWITCHTIME;
189
190 /* Instruction Cost Variables -- set in main program */
191
192 W_ gran_arith_cost =   ARITH_COST,       gran_branch_cost =        BRANCH_COST, 
193    gran_load_cost =    LOAD_COST,        gran_store_cost =         STORE_COST, 
194    gran_float_cost =   FLOAT_COST,       gran_heapalloc_cost =     0;
195
196 W_ max_proc = MAX_PROC;
197
198 /* Granularity event types' names for output */
199
200 char *event_names[] =
201     { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD", 
202       "MOVESPARK", "MOVETHREAD", "FINDWORK",
203       "FETCHNODE", "FETCHREPLY"
204     };
205
206 # if defined(GRAN)
207 /* Prototypes of GrAnSim debugging functions */
208 void DEBUG_PRINT_NODE   PROTO((P_));
209 void DEBUG_TREE         PROTO((P_));
210 void DEBUG_INFO_TABLE   PROTO((P_));
211 void DEBUG_CURR_THREADQ PROTO((I_));
212 void DEBUG_THREADQ      PROTO((P_, I_));
213 void DEBUG_TSO          PROTO((P_, I_));
214 void DEBUG_EVENT        PROTO((eventq, I_));
215 void DEBUG_SPARK        PROTO((sparkq, I_));
216 void DEBUG_SPARKQ       PROTO((sparkq, I_));
217 void DEBUG_CURR_SPARKQ  PROTO((I_));
218 void DEBUG_PROC         PROTO((I_, I_));
219 void DCT(STG_NO_ARGS);
220 void DCP(STG_NO_ARGS);
221 void DEQ(STG_NO_ARGS);
222 void DSQ(STG_NO_ARGS);
223
224 void HandleFetchRequest PROTO((P_, PROC, P_));
225 # endif /* GRAN ; HWL */ 
226
227 #if defined(GRAN_CHECK) && defined(GRAN)
228 static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
229
230 static I_ noOfEvents = 0;
231 static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
232 #endif
233
234 TIME SparkStealTime();
235
236 /* Fcts for manipulating event queues have been deleted  -- HWL */
237 /* ---------------------------------- */
238
239 static void
240 print_spark(spark)
241   sparkq spark;
242 {
243
244   if (spark==NULL)
245     fprintf(stderr,"Spark: NIL\n");
246   else
247     fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
248             (W_) SPARK_NODE(spark), SPARK_NAME(spark), 
249             ((SPARK_EXPORTED(spark))?"True":"False"), 
250             SPARK_PREV(spark), SPARK_NEXT(spark) );
251 }
252
253 static print_sparkq(hd)
254 sparkq hd;
255 {
256   sparkq x;
257
258   fprintf(stderr,"Spark Queue with root at %x:\n",hd);
259   for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
260     print_spark(x);
261   }
262 }
263
264 static print_event(event)
265 eventq event;
266 {
267
268   if (event==NULL)
269     fprintf(stderr,"Evt: NIL\n");
270   else
271     fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
272               event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
273               EVENT_PROC(event), EVENT_CREATOR(event), 
274               EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
275               EVENT_SPARK(event), EVENT_NEXT(event)*/ );
276
277 }
278
279 static print_eventq(hd)
280 eventq hd;
281 {
282   eventq x;
283
284   fprintf(stderr,"Event Queue with root at %x:\n",hd);
285   for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
286     print_event(x);
287   }
288 }
289
290 /* ---------------------------------- */
291
292 #if 0 /* moved */
293 static eventq getnextevent()
294 {
295   static eventq entry = NULL;
296
297   if(EventHd == NULL)
298     {
299       fprintf(stderr,"No next event\n");
300       exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */
301     }
302
303   if(entry != NULL)
304     free((char *)entry);
305
306 #if defined(GRAN_CHECK) && defined(GRAN)
307   if (debug & 0x20) {     /* count events */
308     noOfEvents++;
309     event_counts[EVENT_TYPE(EventHd)]++;
310   }
311 #endif       
312
313   entry = EventHd;
314   EventHd = EVENT_NEXT(EventHd);
315   return(entry);
316 }
317
318 /* ToDo: replace malloc/free with a free list */
319
320 static insert_event(newentry)
321 eventq newentry;
322 {
323   EVTTYPE evttype = EVENT_TYPE(newentry);
324   eventq event, *prev;
325
326   /* Search the queue and insert at the right point:
327      FINDWORK before everything, CONTINUETHREAD after everything.
328
329      This ensures that we find any available work after all threads have
330      executed the current cycle.  This level of detail would normally be
331      irrelevant, but matters for ridiculously low latencies...
332   */
333
334   if(EventHd == NULL)
335     EventHd = newentry;
336   else 
337     {
338       for (event = EventHd, prev=&EventHd; event != NULL; 
339            prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
340         {
341           if(evttype == FINDWORK ?       (EVENT_TIME(event) >=  EVENT_TIME(newentry)) :
342              evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) : 
343                                          (EVENT_TIME(event) >  EVENT_TIME(newentry) ||
344                                          (EVENT_TIME(event) == EVENT_TIME(newentry) &&
345                                           EVENT_TYPE(event) != FINDWORK )))
346             {
347               *prev = newentry;
348               EVENT_NEXT(newentry) = event;
349               break;
350             }
351         }
352       if (event == NULL)
353         *prev = newentry;
354     }
355 }
356
357 static newevent(proc,creator,time,evttype,tso,node,spark)
358 PROC proc, creator;
359 TIME time;
360 EVTTYPE evttype;
361 P_ tso, node;
362 sparkq spark;
363 {
364   extern P_ xmalloc();
365   eventq newentry = (eventq) xmalloc(sizeof(struct event));
366
367   EVENT_PROC(newentry) = proc;
368   EVENT_CREATOR(newentry) = creator;
369   EVENT_TIME(newentry) = time;
370   EVENT_TYPE(newentry) = evttype;
371   EVENT_TSO(newentry) =  tso;
372   EVENT_NODE(newentry) =  node;
373   EVENT_SPARK(newentry) =  spark;
374   EVENT_NEXT(newentry) = NULL;
375
376   insert_event(newentry);
377 }
378 #endif /* 0 moved */
379
380 # else                                                            /* !GRAN */
381
382 P_ RunnableThreadsHd = Nil_closure;
383 P_ RunnableThreadsTl = Nil_closure;
384
385 P_ WaitingThreadsHd = Nil_closure;
386 P_ WaitingThreadsTl = Nil_closure;
387
388 PP_ PendingSparksBase[SPARK_POOLS];
389 PP_ PendingSparksLim[SPARK_POOLS];
390
391 PP_ PendingSparksHd[SPARK_POOLS];
392 PP_ PendingSparksTl[SPARK_POOLS];
393
394 # endif                                                      /* GRAN ; HWL */
395
396 static jmp_buf scheduler_loop;
397
398 I_ MaxThreads = DEFAULT_MAX_THREADS;
399 I_ required_thread_count = 0;
400 I_ advisory_thread_count = 0;
401
402 EXTFUN(resumeThread);
403
404 P_ NewThread PROTO((P_, W_));
405
406 I_ context_switch = 0;
407
408 I_ contextSwitchTime = CS_MIN_MILLISECS;  /* In milliseconds */
409
410 #if !defined(GRAN)
411
412 I_ threadId = 0;
413
414 I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
415 I_ SparkLimit[SPARK_POOLS];
416
417 extern I_ doSanityChks;
418 extern void checkAStack(STG_NO_ARGS);
419
420 rtsBool
421 initThreadPools(size)
422 I_ size;
423 {
424     SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
425     if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
426         return rtsFalse;
427     if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
428         return rtsFalse;
429     PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
430     PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
431     return rtsTrue;
432 }
433 #endif
434
435 #ifdef PAR
436 rtsBool sameThread;
437 #endif
438
439 void
440 ScheduleThreads(topClosure)
441 P_ topClosure;
442 {
443     I_ i;
444     P_ tso;
445
446 #if defined(USE_COST_CENTRES) || defined(GUM)
447     if (time_profiling || contextSwitchTime > 0) {
448         if (initialize_virtual_timer(tick_millisecs)) {
449 #else
450     if (contextSwitchTime > 0) {
451         if (initialize_virtual_timer(contextSwitchTime)) {
452 #endif
453             fflush(stdout);
454             fprintf(stderr, "Can't initialize virtual timer.\n");
455             EXIT(EXIT_FAILURE);
456         }
457     } else
458         context_switch = 0 /* 1 HWL */;
459
460 #if defined(GRAN_CHECK) && defined(GRAN)                                           /* HWL */
461     if ( debug & 0x40 ) {
462       fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
463     }
464 #endif
465
466 #if defined(GRAN)                                                  /* KH */
467     for (i=0; i<max_proc; i++) 
468       {
469         RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
470         WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
471         PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] = 
472         PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] = 
473             NULL; 
474
475 # if defined(GRAN_CHECK)
476         if (debug & 0x04) 
477           BlockedOnFetch[i] = 0; /*- StgFalse; -*/              /* HWL-CHECK */
478 # endif
479         OutstandingFetches[i] = 0;
480       }
481
482     CurrentProc = MainProc;
483 #endif /* GRAN */
484
485     if (DO_QP_PROF)
486         init_qp_profiling();
487
488     /*
489      * We perform GC so that a signal handler can install a new TopClosure and start
490      * a new main thread.
491      */
492 #ifdef PAR
493     if (IAmMainThread) {
494 #endif
495     if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
496         /* kludge to save the top closure as a root */
497         CurrentTSO = topClosure;
498         ReallyPerformThreadGC(0, rtsTrue);
499         topClosure = CurrentTSO;
500         if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
501             fflush(stdout);
502             fprintf(stderr, "Not enough heap for main thread\n");
503             EXIT(EXIT_FAILURE);             
504         }
505     }           
506 #ifndef GRAN
507     RunnableThreadsHd = RunnableThreadsTl = tso;
508 #else
509     /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
510     ThreadQueueHd = ThreadQueueTl = tso;
511
512 # if defined(GRAN_CHECK)
513     if ( debug & 0x40 ) {
514       fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
515     }
516 # endif      
517 #endif
518
519 #ifdef PAR
520     if (do_gr_profile) {
521         DumpGranEvent(GR_START, tso);
522         sameThread = rtsTrue;
523     }
524 #endif
525
526 #if defined(GRAN)
527     MAKE_BUSY(MainProc);  /* Everything except the main PE is idle */
528 #endif      
529
530     required_thread_count = 1;
531     advisory_thread_count = 0;
532 #ifdef PAR
533     }   /*if IAmMainThread ...*/
534 #endif
535
536     /* ----------------------------------------------------------------- */
537     /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule   */
538     /* ----------------------------------------------------------------- */
539
540     if(setjmp(scheduler_loop) < 0)
541         return;
542
543 #if defined(GRAN) && defined(GRAN_CHECK)
544     if ( debug & 0x80 ) {
545       fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
546       DEBUG_TSO(ThreadQueueHd,1);
547       /* if (ThreadQueueHd == MainTSO) {
548         fprintf(stderr,"D> Event Queue is now:\n");
549         DEQ();
550       } */
551     }
552 #endif
553
554 #ifdef PAR
555     if (PendingFetches != Nil_closure) {
556         processFetches();
557     }
558
559 #elif defined(GRAN)
560     if (ThreadQueueHd == Nil_closure) {
561         fprintf(stderr, "No runnable threads!\n");
562         EXIT(EXIT_FAILURE);
563     }
564     if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
565         QP_Event1("AG", ThreadQueueHd);
566     }
567 #endif
568
569 #ifndef PAR
570     while (RunnableThreadsHd == Nil_closure) {
571         /* If we've no work */
572         if (WaitingThreadsHd == Nil_closure) {
573             fflush(stdout);
574             fprintf(stderr, "No runnable threads!\n");
575             EXIT(EXIT_FAILURE);
576         }
577         AwaitEvent(0);
578     }
579 #else
580     if (RunnableThreadsHd == Nil_closure) {
581         if (advisory_thread_count < MaxThreads &&
582           (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
583           PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
584             /* 
585              * If we're here (no runnable threads) and we have pending sparks,
586              * we must have a space problem.  Get enough space to turn one of
587              * those pending sparks into a thread...ReallyPerformGC doesn't 
588              * return until the space is available, so it may force global GC.
589              * ToDo: Is this unnecessary here?  Duplicated in ReSchedule()? --JSM
590              */
591             ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
592             SAVE_Hp -= THREAD_SPACE_REQUIRED;
593         } else {
594             /*
595              * We really have absolutely no work.  Send out a fish (there may be
596              * some out there already), and wait for something to arrive.  We 
597              * clearly can't run any threads until a SCHEDULE or RESUME arrives, 
598              * and so that's what we're hoping to see.  (Of course, we still have 
599              * to respond to other types of messages.)
600              */
601             if (!fishing)
602                 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, 
603                   NEW_FISH_HUNGER);
604             processMessages();
605         }
606         ReSchedule(0);
607     } else if (PacketsWaiting()) {  /* Look for incoming messages */
608         processMessages();
609     }
610 #endif /* PAR */
611
612     if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
613         QP_Event1("AG", RunnableThreadsHd);
614     }
615
616 #ifdef PAR
617     if (do_gr_profile && !sameThread)
618         DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
619 #endif
620
621 #if !GRAN /* ROUND_ROBIN */
622     CurrentTSO = RunnableThreadsHd;
623     RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
624     TSO_LINK(CurrentTSO) = Nil_closure;
625     
626     if (RunnableThreadsHd == Nil_closure)
627         RunnableThreadsTl = Nil_closure;
628
629 #else /* GRAN */
630     /* This used to be Round Robin. KH.  
631        I think we can ignore that, and move it down to ReSchedule instead.
632     */
633     CurrentTSO = ThreadQueueHd;
634     /* TSO_LINK(CurrentTSO) = Nil_closure;  humbug */
635 #endif
636
637     /* If we're not running a timer, just leave the flag on */
638     if (contextSwitchTime > 0)
639         context_switch = 0;
640
641 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
642     if (CurrentTSO == Nil_closure) {
643         fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
644                 CurrentProc,CurrentTime[CurrentProc]);
645         exit(99);
646       }
647
648     if (debug & 0x04) {
649       if (BlockedOnFetch[CurrentProc]) {
650         fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
651               CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
652         exit(99);
653       }
654     }
655
656     if ( (debug & 0x10) &&
657          (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
658       fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
659               CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
660         exit(99);
661     }
662 #endif
663
664 # if defined(__STG_TAILJUMPS__)
665     miniInterpret((StgFunPtr)resumeThread);
666 # else
667     if (doSanityChks)
668         miniInterpret_debug((StgFunPtr)resumeThread, checkAStack);
669     else
670         miniInterpret((StgFunPtr)resumeThread);
671 # endif /* __STG_TAILJUMPS__ */
672 }
673 \end{code}
674
675 % Some remarks on GrAnSim -- HWL
676
677 The ReSchedule fct is the heart  of GrAnSim.  Based  on its par it issues a
678 CONTINUETRHEAD to carry on executing the current thread in due course or it
679 watches out for new work (e.g. called from EndThread).
680
681 Then it picks the next   event (getnextevent) and handles it  appropriately
682 (see switch construct). Note that a continue  in the switch causes the next
683 event to be handled  and a break  causes a jmp  to the scheduler_loop where
684 the TSO at the head of the current processor's runnable queue is executed.
685
686 ReSchedule is mostly  entered from HpOverflow.lc:PerformReSchedule which is
687 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
688
689 \begin{code}
690 #if defined(GRAN)
691
692 void
693 ReSchedule(what_next)
694 int what_next;           /* Run the current thread again? */
695 {
696   sparkq spark, nextspark;
697   P_ tso;
698   P_ node;
699   eventq event;
700
701 #if defined(GRAN_CHECK) && defined(GRAN)
702   if ( debug & 0x80 ) {
703     fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
704     DEBUG_TSO(ThreadQueueHd,1);
705   }
706 #endif
707
708 #if defined(GRAN_CHECK) && defined(GRAN)
709   if ( (debug & 0x80) || (debug & 0x40 ) )
710       if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
711         fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
712                 what_next);
713 #endif
714     
715   /* Run the current thread again (if there is one) */
716   if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
717     {
718       /* A bit of a hassle if the event queue is empty, but ... */
719       CurrentTSO = ThreadQueueHd;
720
721       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
722                CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
723
724       /* This code does round-Robin, if preferred. */
725       if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
726         {
727           if(do_gr_profile)
728             DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
729           ThreadQueueHd =           TSO_LINK(CurrentTSO);
730           TSO_LINK(ThreadQueueTl) = CurrentTSO;
731           ThreadQueueTl =           CurrentTSO;
732           TSO_LINK(CurrentTSO) =    Nil_closure;
733           if (do_gr_profile)
734             DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
735           CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
736         }
737     }
738   /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
739   /* has been updated before that already. */ 
740   else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
741     {
742 #if defined(GRAN_CHECK) && defined(GRAN)
743       if(DoReScheduleOnFetch)
744         {
745           fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
746           exit(99);
747         }
748 #endif
749
750       if(do_gr_profile)
751         DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
752
753       CurrentTSO = ThreadQueueHd;
754       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
755                CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
756       
757       CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
758     }
759
760   /* We go in here if the current thread is blocked on fetch => don'd CONT */
761   else if(what_next==CHANGE_THREAD)
762     {
763       /* just fall into event handling loop for next event */
764     }
765
766   /* We go in here if we have no runnable threads or what_next==0 */
767   else
768     {
769       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
770                FINDWORK,Nil_closure,Nil_closure,NULL);
771       CurrentTSO = Nil_closure;
772     }
773
774   /* ----------------------------------------------------------------- */
775   /* This part is the EVENT HANDLING LOOP                              */
776   /* ----------------------------------------------------------------- */
777
778   do {
779     /* Choose the processor with the next event */
780     event = getnextevent();
781     CurrentProc = EVENT_PROC(event);
782     if(EVENT_TIME(event) > CurrentTime[CurrentProc])
783       CurrentTime[CurrentProc] = EVENT_TIME(event);
784
785     MAKE_BUSY(CurrentProc);
786
787 #if defined(GRAN_CHECK) && defined(GRAN)
788     if (debug & 0x80)
789       fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
790 #endif
791
792     /* Deal with the idlers */
793     HandleIdlePEs();
794
795 #if defined(GRAN_CHECK) && defined(GRAN)
796     if (event_trace && 
797         (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
798          (debug & 0x80) ))
799       print_event(event);
800 #endif
801
802     switch (EVENT_TYPE(event))
803       {
804         /* Should just be continuing execution */
805         case CONTINUETHREAD:
806 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
807               if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
808                 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u  @ %u\n",
809                         CurrentProc,CurrentTime[CurrentProc]);
810                 print_event(event);
811                 continue;
812               }
813 #endif
814           if(ThreadQueueHd==Nil_closure) 
815             {
816               newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
817                        FINDWORK,Nil_closure,Nil_closure,NULL);
818               continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
819             }
820           else 
821             break;   /* fall into scheduler loop */
822
823         case FETCHNODE:
824 #if defined(GRAN_CHECK) && defined(GRAN)
825           if (SimplifiedFetch) {
826             fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
827             exit (99);
828           }
829 #endif       
830
831           CurrentTime[CurrentProc] += gran_munpacktime;
832           HandleFetchRequest(EVENT_NODE(event),
833                              EVENT_CREATOR(event),
834                              EVENT_TSO(event));
835           continue;
836
837         case FETCHREPLY:
838 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
839           if (SimplifiedFetch) {
840             fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
841             exit (99);
842           }
843
844           if (debug & 0x10) {
845             if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
846               TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
847             } else {
848               fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
849                       CurrentTSO,CurrentTime[CurrentProc]);
850               exit(99);
851             }
852           }
853
854           if (debug & 0x04) {
855             if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
856               fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
857                       CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
858               exit(99);
859             } else {
860               BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
861             }
862           }
863 #endif
864
865           /* Copy or  move node to CurrentProc */
866           if (FetchNode(EVENT_NODE(event),
867                         EVENT_CREATOR(event),
868                         EVENT_PROC(event)) ) {
869             /* Fetch has failed i.e. node has been grabbed by another PE */
870             P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
871             PROC p = where_is(node);
872             TIME fetchtime;
873
874 #if defined(GRAN_CHECK) && defined(GRAN)
875             if (PrintFetchMisses) {
876               fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
877                       CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
878               fetch_misses++;
879             }
880 #endif  /* GRAN_CHECK */
881
882             CurrentTime[CurrentProc] += gran_mpacktime;
883
884             /* Count fetch again !? */
885             ++TSO_FETCHCOUNT(tso);
886             TSO_FETCHTIME(tso) += gran_fetchtime;
887               
888             fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
889                         gran_latency;
890
891             /* Chase the grabbed node */
892             newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
893
894 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
895               if (debug & 0x04)
896                 BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
897
898               if (debug & 0x10) 
899                 TSO_TYPE(tso) |= FETCH_MASK_TSO;
900 #endif
901
902             CurrentTime[CurrentProc] += gran_mtidytime;
903
904             continue; /* NB: no REPLy has been processed; tso still sleeping */
905           }
906           
907           /* -- Qapla'! Fetch has been successful; node is here, now  */
908           ++TSO_FETCHCOUNT(EVENT_TSO(event));
909           TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
910               
911           if (do_gr_profile)
912             DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
913                                  EVENT_NODE(event),EVENT_CREATOR(event));
914
915           --OutstandingFetches[CurrentProc];
916 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
917           if (OutstandingFetches[CurrentProc] < 0) {
918             fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
919             exit (99);
920           }
921 #endif
922
923           if (!DoReScheduleOnFetch) {
924             CurrentTSO = EVENT_TSO(event);          /* awaken blocked thread */
925             newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
926                      CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
927             TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
928                                                TSO_BLOCKEDAT(EVENT_TSO(event));
929             if(do_gr_profile)
930               DumpGranEvent(GR_RESUME,EVENT_TSO(event));
931             continue;
932           } else {
933             /* fall through to RESUMETHREAD */
934           }
935
936         case RESUMETHREAD:  /* Move from the blocked queue to the tail of */
937                             /* the runnable queue ( i.e. Qu' SImqa'lu') */ 
938           TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] - 
939                                              TSO_BLOCKEDAT(EVENT_TSO(event));
940           StartThread(event,GR_RESUME);
941           continue;
942
943         case STARTTHREAD:
944           StartThread(event,GR_START);
945           continue;
946
947         case MOVETHREAD:
948 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
949           if (!DoThreadMigration) {
950             fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
951             exit (99);
952           }
953 #endif
954           CurrentTime[CurrentProc] += gran_munpacktime;
955           StartThread(event,GR_STOLEN);
956           continue; /* to the next event */
957
958         case MOVESPARK:
959           CurrentTime[CurrentProc] += gran_munpacktime;
960           spark = EVENT_SPARK(event);
961
962           ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
963                                         so the assignment above is needed.  */
964
965           if(do_sp_profile)
966             DumpSparkGranEvent(SP_ACQUIRED,spark);
967
968           ++SparksAvail;                  /* Probably Temporarily */
969           /* Drop into FINDWORK */
970
971           if (!DoReScheduleOnFetch &&
972                (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
973                 continue;                      /* to next event (i.e. leave */
974           }                                    /* spark in sparkq for now) */
975
976         case FINDWORK:
977           if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
978              && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
979             {
980               W_ found = 0;
981               sparkq spark_of_non_local_node = NULL;
982
983               /* Choose a spark from the local spark queue */
984               spark = SparkQueueHd;
985
986               while (spark != NULL && !found)
987                 {
988                   node = SPARK_NODE(spark);
989                   if (!SHOULD_SPARK(node)) 
990                     {
991                       if(do_sp_profile)
992                         DumpSparkGranEvent(SP_PRUNED,spark);
993
994                       assert(spark != NULL);
995
996                       SparkQueueHd = SPARK_NEXT(spark);
997                       if(SparkQueueHd == NULL)
998                         SparkQueueTl = NULL;
999
1000                       DisposeSpark(spark);
1001                   
1002                       spark = SparkQueueHd;
1003                     }
1004                   /* -- node should eventually be sparked */
1005                   else if (PreferSparksOfLocalNodes && 
1006                           !IS_LOCAL_TO(PROCS(node),CurrentProc)) 
1007                     {
1008                       /* We have seen this spark before => no local sparks */
1009                       if (spark==spark_of_non_local_node) {
1010                         found = 1;
1011                         break;
1012                       }
1013
1014                       /* Remember first non-local node */
1015                       if (spark_of_non_local_node==NULL)
1016                         spark_of_non_local_node = spark;
1017
1018                       /* Special case: 1 elem sparkq with non-local spark */
1019                       if (spark==SparkQueueTl) {
1020                         found = 1;
1021                         break;
1022                       }                 
1023
1024                       /* Put spark (non-local!) at the end of the sparkq */
1025                       SPARK_NEXT(SparkQueueTl) = spark;
1026                       SparkQueueHd = SPARK_NEXT(spark);
1027                       SPARK_NEXT(spark) = NULL;
1028                       SparkQueueTl = spark;
1029  
1030                       spark = SparkQueueHd;
1031                     }
1032                   else
1033                     {
1034                       found = 1;
1035                     }
1036                 }
1037
1038               /* We've found a node; now, create thread (DaH Qu' yIchen) */
1039               if (found) 
1040                 {
1041                   CurrentTime[CurrentProc] += gran_threadcreatetime;
1042
1043                   node = SPARK_NODE(spark);
1044                   if((tso = NewThread(node, T_REQUIRED))==NULL)
1045                     {
1046                       /* Some kind of backoff needed here in case there's too little heap */
1047                       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1048                                FINDWORK,Nil_closure,Nil_closure,NULL);
1049                       ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
1050                       spark = NULL;
1051                       continue; /* to the next event, eventually */
1052                     }
1053                       
1054                   TSO_EXPORTED(tso) =  SPARK_EXPORTED(spark);
1055                   TSO_LOCKED(tso) =    !SPARK_GLOBAL(spark);
1056                   TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1057
1058                   newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1059                            STARTTHREAD,tso,Nil_closure,NULL);
1060
1061                   assert(spark != NULL);
1062
1063                   SparkQueueHd = SPARK_NEXT(spark);
1064                   if(SparkQueueHd == NULL)
1065                     SparkQueueTl = NULL;
1066                   
1067                   DisposeSpark(spark);
1068                 }
1069               else
1070               /* Make the PE idle if nothing sparked and we have no threads. */
1071                 {
1072                   if(ThreadQueueHd == Nil_closure)
1073 #if defined(GRAN_CHECK) && defined(GRAN)
1074                     {
1075                     MAKE_IDLE(CurrentProc);
1076                     if ( (debug & 0x40) || (debug & 0x80) ) {
1077                         fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1078                       }
1079                   }
1080 #else 
1081                     MAKE_IDLE(CurrentProc);
1082 #endif  /* GRAN_CHECK */
1083                   else
1084                     newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1085                              CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1086                 }
1087
1088               continue; /* to the next event */
1089             }
1090           else
1091             {
1092 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1093               if ( (debug & 0x04) &&
1094                    (!DoReScheduleOnFetch &&  ThreadQueueHd != Nil_closure)
1095                   ) {
1096                 fprintf(stderr,"Waning in FINDWORK handling:\n");
1097                 fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
1098               }
1099 #endif
1100               if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
1101                 continue;  /* to next event */
1102               else
1103                 break;     /* run ThreadQueueHd */
1104             }
1105             /* never reached */
1106
1107         default:
1108           fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
1109           continue;
1110         }
1111     _longjmp(scheduler_loop, 1);
1112   } while(1);
1113   }
1114 \end{code}
1115
1116 Here follows the non-GRAN @ReSchedule@.
1117 \begin{code}
1118 #else      /* !GRAN */
1119
1120 void
1121 ReSchedule(again)
1122 int again;                              /* Run the current thread again? */
1123 {
1124     P_ spark;
1125     PP_ sparkp;
1126     P_ tso;
1127
1128 #ifdef PAR
1129     /* 
1130      * In the parallel world, we do unfair scheduling for the moment.
1131      * Ultimately, this should all be merged with the more sophicticated
1132      * GrAnSim scheduling options.  (Of course, some provision should be
1133      * made for *required* threads to make sure that they don't starve,
1134      * but for now we assume that no one is running concurrent Haskell on
1135      * a multi-processor platform.)
1136      */
1137
1138     sameThread = again;
1139
1140     if (again) {
1141         if (RunnableThreadsHd == Nil_closure)
1142             RunnableThreadsTl = CurrentTSO;
1143         TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1144         RunnableThreadsHd = CurrentTSO;
1145     }
1146
1147 #else
1148
1149     /* 
1150      * In the sequential world, we assume that the whole point of running
1151      * the threaded build is for concurrent Haskell, so we provide round-robin
1152      * scheduling.
1153      */
1154     
1155     if (again) {
1156         if(RunnableThreadsHd == Nil_closure) {
1157             RunnableThreadsHd = CurrentTSO;
1158         } else {
1159             TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1160             if (DO_QP_PROF > 1) {
1161                 QP_Event1("GA", CurrentTSO);
1162             }
1163         }
1164         RunnableThreadsTl = CurrentTSO;
1165     }
1166 #endif
1167
1168 #if 1
1169     /* 
1170      * Debugging code, which is useful enough (and cheap enough) to compile
1171      * in all the time.  This makes sure that we don't access saved registers,
1172      * etc. in threads which are supposed to be sleeping.
1173      */
1174     CurrentTSO = Nil_closure;
1175     CurrentRegTable = NULL;
1176 #endif
1177
1178     /* First the required sparks */
1179
1180     for (sparkp = PendingSparksHd[REQUIRED_POOL]; 
1181       sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1182         spark = *sparkp;
1183         if (SHOULD_SPARK(spark)) {      
1184             if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1185                 break;
1186             if (RunnableThreadsHd == Nil_closure) {
1187                 RunnableThreadsHd = tso;
1188 #ifdef PAR
1189                 if (do_gr_profile) {
1190                     DumpGranEvent(GR_START, tso);
1191                     sameThread = rtsTrue;
1192                 }
1193 #endif
1194             } else {
1195                 TSO_LINK(RunnableThreadsTl) = tso;
1196 #ifdef PAR
1197                 if (do_gr_profile)
1198                     DumpGranEvent(GR_STARTQ, tso);
1199 #endif
1200             }
1201             RunnableThreadsTl = tso;
1202         } else {
1203             if (DO_QP_PROF)
1204                 QP_Event0(threadId++, spark);
1205 #ifdef PAR
1206             if(do_sp_profile)
1207                 DumpSparkGranEvent(SP_PRUNED, threadId++);
1208 #endif
1209         }
1210     }
1211     PendingSparksHd[REQUIRED_POOL] = sparkp;
1212
1213     /* Now, almost the same thing for advisory sparks */
1214
1215     for (sparkp = PendingSparksHd[ADVISORY_POOL]; 
1216       sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1217         spark = *sparkp;
1218         if (SHOULD_SPARK(spark)) {      
1219             if (
1220 #ifdef PAR
1221     /* In the parallel world, don't create advisory threads if we are 
1222      * about to rerun the same thread, or already have runnable threads,
1223      *  or the main thread has terminated */
1224               (RunnableThreadsHd != Nil_closure ||
1225                (required_thread_count == 0 && IAmMainThread)) || 
1226 #endif
1227               advisory_thread_count == MaxThreads ||
1228               (tso = NewThread(spark, T_ADVISORY)) == NULL)
1229                 break;
1230             advisory_thread_count++;
1231             if (RunnableThreadsHd == Nil_closure) {
1232                 RunnableThreadsHd = tso;
1233 #ifdef PAR
1234                 if (do_gr_profile) {
1235                     DumpGranEvent(GR_START, tso);
1236                     sameThread = rtsTrue;
1237                 }
1238 #endif
1239             } else {
1240                 TSO_LINK(RunnableThreadsTl) = tso;
1241 #ifdef PAR
1242                 if (do_gr_profile)
1243                     DumpGranEvent(GR_STARTQ, tso);
1244 #endif
1245             }
1246             RunnableThreadsTl = tso;
1247         } else {
1248             if (DO_QP_PROF)
1249                 QP_Event0(threadId++, spark);
1250 #ifdef PAR
1251             if(do_sp_profile)
1252                 DumpSparkGranEvent(SP_PRUNED, threadId++);
1253 #endif
1254         }
1255     }
1256     PendingSparksHd[ADVISORY_POOL] = sparkp;
1257
1258 #ifndef PAR
1259     longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1260 #else
1261     longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1262 #endif
1263 }
1264
1265 #endif  /* !GRAN */
1266
1267 \end{code}
1268
1269 %****************************************************************************
1270 %
1271 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1272                                         Threads (GrAnSim only)}
1273 %
1274 %****************************************************************************
1275
1276 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1277 processors). 
1278
1279 \begin{code}
1280 #if defined(GRAN)
1281
1282 StartThread(event,event_type)
1283 eventq event;
1284 enum gran_event_types event_type;
1285 {
1286   if(ThreadQueueHd==Nil_closure)
1287     {
1288       CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
1289       newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
1290                CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1291       if(do_gr_profile)
1292         DumpGranEvent(event_type,EVENT_TSO(event));
1293     }
1294   else
1295     {
1296       TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1297       ThreadQueueTl = EVENT_TSO(event);
1298
1299       if(DoThreadMigration)
1300         ++SurplusThreads;
1301
1302       if(do_gr_profile)
1303         DumpGranEvent(event_type+1,EVENT_TSO(event));
1304
1305     }
1306   CurrentTime[CurrentProc] += gran_threadqueuetime;
1307 }
1308 \end{code}
1309
1310 Export work to idle PEs.
1311
1312 \begin{code}
1313 HandleIdlePEs()
1314 {
1315   PROC proc;
1316
1317   if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
1318     for(proc = 0; proc < max_proc; proc++)
1319       if(IS_IDLE(proc))
1320         {
1321           if(DoStealThreadsFirst && 
1322              (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1323             {
1324               if (SurplusThreads > 0l)                    /* Steal a thread */
1325                 StealThread(proc);
1326           
1327               if(!IS_IDLE(proc))
1328                 break;
1329             }
1330
1331           if(SparksAvail > 0l && 
1332              (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1333             StealSpark(proc);
1334
1335           if (IS_IDLE(proc) && SurplusThreads > 0l && 
1336               (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1337             StealThread(proc);
1338         }
1339 }
1340 \end{code}
1341
1342 Steal a spark and schedule  moving it to  proc. We want  to look at PEs  in
1343 clock order -- most retarded first.  Currently  sparks are only stolen from
1344 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1345 be changed to first steal from the former then from the latter.
1346
1347 \begin{code}
1348 StealSpark(proc)
1349 PROC proc;
1350 {
1351   PROC p;
1352   sparkq spark, prev, next;
1353   int stolen = 0;
1354   TIME times[MAX_PROC], stealtime;
1355   unsigned ntimes=0, i, j;
1356
1357   /* times shall contain processors from which we may steal sparks */ 
1358   for(p=0; p < max_proc; ++p)
1359     if(proc != p && 
1360        PendingSparksHd[p][ADVISORY_POOL] != NULL && 
1361        CurrentTime[p] <= CurrentTime[CurrentProc])
1362       times[ntimes++] = p;
1363
1364   /* sort times */
1365   for(i=0; i < ntimes; ++i)
1366     for(j=i+1; j < ntimes; ++j)
1367       if(CurrentTime[times[i]] > CurrentTime[times[j]])
1368         {
1369           unsigned temp = times[i];
1370           times[i] = times[j];
1371           times[j] = temp;
1372         }
1373
1374   for(i=0; i < ntimes && !stolen; ++i) 
1375     {
1376       p = times[i];
1377       
1378       for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL]; 
1379           spark != NULL && !stolen; 
1380           spark=next)
1381         {
1382           next = SPARK_NEXT(spark);
1383           
1384           if(SHOULD_SPARK(SPARK_NODE(spark)))
1385             {
1386               /* Don't Steal local sparks */
1387               if(!SPARK_GLOBAL(spark))
1388                 {
1389                   prev=spark;
1390                   continue;
1391                 }
1392               
1393               SPARK_NEXT(spark) = NULL;
1394               CurrentTime[p] += gran_mpacktime;
1395
1396               stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1397                 + SparkStealTime();
1398               
1399               newevent(proc,p /* CurrentProc */,stealtime,
1400                        MOVESPARK,Nil_closure,Nil_closure,spark);
1401
1402               MAKE_BUSY(proc);
1403               stolen = 1;
1404               ++SPARK_GLOBAL(spark);
1405
1406               if(do_sp_profile)
1407                 DumpSparkGranEvent(SP_EXPORTED,spark);
1408
1409               CurrentTime[p] += gran_mtidytime;
1410
1411               --SparksAvail;
1412             }
1413           else
1414             {
1415               if(do_sp_profile)
1416                 DumpSparkGranEvent(SP_PRUNED,spark);
1417               DisposeSpark(spark);
1418             }
1419           
1420           if(spark == PendingSparksHd[p][ADVISORY_POOL])
1421             PendingSparksHd[p][ADVISORY_POOL] = next;
1422           
1423           if(prev!=NULL)
1424             SPARK_NEXT(prev) = next;
1425         }
1426                       
1427       if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1428         PendingSparksTl[p][ADVISORY_POOL] = NULL;
1429     }
1430 }
1431 \end{code}
1432
1433 Steal a spark and schedule moving it to proc.
1434
1435 \begin{code}
1436 StealThread(proc)
1437 PROC proc;
1438 {
1439   PROC p;
1440   P_ thread, prev;
1441   TIME times[MAX_PROC], stealtime;
1442   unsigned ntimes=0, i, j;
1443
1444   /* Hunt for a thread */
1445
1446   /* times shall contain processors from which we may steal threads */ 
1447   for(p=0; p < max_proc; ++p)
1448     if(proc != p && RunnableThreadsHd[p] != Nil_closure && 
1449        CurrentTime[p] <= CurrentTime[CurrentProc])
1450       times[ntimes++] = p;
1451
1452   /* sort times */
1453   for(i=0; i < ntimes; ++i)
1454     for(j=i+1; j < ntimes; ++j)
1455       if(CurrentTime[times[i]] > CurrentTime[times[j]])
1456         {
1457           unsigned temp = times[i];
1458           times[i] = times[j];
1459           times[j] = temp;
1460         }
1461
1462   for(i=0; i < ntimes; ++i) 
1463     {
1464       p = times[i];
1465       
1466       /* Steal the first exportable thread in the runnable queue after the */
1467       /* first one */ 
1468       
1469       if(RunnableThreadsHd[p] != Nil_closure)
1470         {
1471           for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]); 
1472               thread != Nil_closure && TSO_LOCKED(thread); 
1473               prev = thread, thread = TSO_LINK(thread))
1474             /* SKIP */;
1475
1476           if(thread != Nil_closure)   /* Take thread out of runnable queue */
1477             {
1478               TSO_LINK(prev) = TSO_LINK(thread);
1479
1480               TSO_LINK(thread) = Nil_closure;
1481
1482               if(RunnableThreadsTl[p] == thread)
1483                 RunnableThreadsTl[p] = prev;
1484
1485               /* Turn magic constants into params !? -- HWL */
1486
1487               CurrentTime[p] += 5l * gran_mpacktime;
1488
1489               stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1490                            + SparkStealTime() + 4l * gran_additional_latency
1491                              + 5l * gran_munpacktime;
1492
1493               /* Move the thread */
1494               SET_PROCS(thread,PE_NUMBER(proc)); 
1495
1496               /* Move from one queue to another */
1497               newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
1498               MAKE_BUSY(proc);
1499               --SurplusThreads;
1500
1501               if(do_gr_profile)
1502                 DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
1503           
1504               CurrentTime[p] += 5l * gran_mtidytime;
1505
1506               /* Found one */
1507               break;
1508             }
1509         }
1510     }
1511 }
1512
1513 TIME SparkStealTime()
1514 {
1515   double fishdelay, sparkdelay, latencydelay;
1516   fishdelay =  (double)max_proc/2;
1517   sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
1518   latencydelay = sparkdelay*((double)gran_latency);
1519
1520 /*
1521   fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
1522           fishdelay,sparkdelay,latencydelay,Idlers);
1523 */
1524   return((TIME)latencydelay);
1525 }
1526 #endif                                                       /* GRAN ; HWL */
1527
1528 \end{code}
1529
1530 %****************************************************************************
1531 %
1532 \subsection[thread-execution]{Executing Threads}
1533 %
1534 %****************************************************************************
1535
1536 \begin{code}
1537 EXTDATA_RO(StkO_info);
1538 EXTDATA_RO(TSO_info);
1539 EXTDATA_RO(WorldStateToken_closure);
1540
1541 EXTFUN(EnterNodeCode);
1542 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
1543
1544 #if defined(GRAN)
1545
1546 /* Slow but relatively reliable method uses xmalloc */
1547 /* Eventually change that to heap allocated sparks. */
1548
1549 sparkq 
1550 NewSpark(node,name,local)
1551 P_ node;
1552 I_ name, local;
1553 {
1554   extern P_ xmalloc();
1555   sparkq newspark = (sparkq) xmalloc(sizeof(struct spark));
1556   SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
1557   SPARK_NODE(newspark) = node;
1558   SPARK_NAME(newspark) = name;
1559   SPARK_GLOBAL(newspark) = !local;
1560   return(newspark);
1561 }
1562
1563 void
1564 DisposeSpark(spark)
1565 sparkq spark;
1566 {
1567   if(spark!=NULL)
1568     free(spark);
1569
1570   --SparksAvail;
1571
1572 /* Heap-allocated disposal.
1573
1574   FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
1575   SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
1576 */
1577 }
1578
1579 DisposeSparkQ(spark)
1580 sparkq spark;
1581 {
1582   if (spark==NULL) 
1583     return;
1584
1585   DisposeSparkQ(SPARK_NEXT(spark));
1586
1587 #ifdef GRAN_CHECK
1588   if (SparksAvail < 0)
1589     fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
1590 #endif
1591
1592   free(spark);
1593 }
1594
1595 #endif
1596
1597 I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE;
1598
1599 /* Create a new TSO, with the specified closure to enter and thread type */
1600
1601 P_
1602 NewThread(topClosure, type)
1603 P_ topClosure;
1604 W_ type;
1605 {
1606     P_ stko, tso;
1607
1608     if (AvailableTSO != Nil_closure) {
1609         tso = AvailableTSO;
1610 #if defined(GRAN)
1611         SET_PROCS(tso,ThisPE);  /* Allocate it locally! */
1612 #endif
1613         AvailableTSO = TSO_LINK(tso);
1614     } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
1615         return(NULL);
1616     } else {
1617         ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
1618                   BYTES_TO_STGWORDS(sizeof(StgDouble)));
1619         tso = SAVE_Hp + 1;
1620         SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
1621         SET_TSO_HDR(tso, TSO_info, CCC);
1622     }
1623
1624     TSO_LINK(tso) = Nil_closure;
1625     TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
1626     TSO_NAME(tso) = (P_) INFO_PTR(topClosure);  /* A string would be nicer -- JSM */
1627     TSO_ID(tso) = threadId++;
1628     TSO_TYPE(tso) = type;
1629     TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
1630     TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
1631     TSO_SWITCH(tso) = NULL;
1632
1633 #ifdef DO_REDN_COUNTING
1634     TSO_AHWM(tso) = 0;
1635     TSO_BHWM(tso) = 0;
1636 #endif
1637
1638 #if defined(GRAN) || defined(PAR)
1639     TSO_SPARKNAME(tso)    = 0;
1640 #if defined(GRAN)
1641     TSO_STARTEDAT(tso)    = CurrentTime[CurrentProc];
1642 #else
1643     TSO_STARTEDAT(tso)    = CURRENT_TIME;
1644 #endif
1645     TSO_EXPORTED(tso)     = 0;
1646     TSO_BASICBLOCKS(tso)  = 0;
1647     TSO_ALLOCS(tso)       = 0;
1648     TSO_EXECTIME(tso)     = 0;
1649     TSO_FETCHTIME(tso)    = 0;
1650     TSO_FETCHCOUNT(tso)   = 0;
1651     TSO_BLOCKTIME(tso)    = 0;
1652     TSO_BLOCKCOUNT(tso)   = 0;
1653     TSO_BLOCKEDAT(tso)    = 0;
1654     TSO_GLOBALSPARKS(tso) = 0;
1655     TSO_LOCALSPARKS(tso)  = 0;
1656 #endif    
1657     /*
1658      * set pc, Node (R1), liveness
1659      */
1660     CurrentRegTable = TSO_INTERNAL_PTR(tso);
1661     SAVE_Liveness = LIVENESS_R1;
1662     SAVE_R1.p = topClosure;
1663
1664 # ifndef PAR
1665     if (type == T_MAIN) {
1666         stko = MainStkO;
1667     } else {
1668 # endif
1669         if (AvailableStack != Nil_closure) {
1670             stko = AvailableStack;
1671 #if defined(GRAN)
1672             SET_PROCS(stko,ThisPE);
1673 #endif
1674             AvailableStack = STKO_LINK(AvailableStack);
1675         } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) {
1676             return(NULL);
1677         } else {
1678             ALLOC_STK(STKO_HS,StkOChunkSize,0);
1679             stko = SAVE_Hp + 1;
1680             SAVE_Hp += STKO_HS + StkOChunkSize;
1681             SET_STKO_HDR(stko, StkO_info, CCC);
1682         }
1683         STKO_SIZE(stko) = StkOChunkSize + STKO_VHS;
1684         STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
1685         STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
1686         STKO_LINK(stko) = Nil_closure;
1687         STKO_RETURN(stko) = NULL;
1688 # ifndef PAR
1689     }
1690 # endif
1691     
1692 #ifdef DO_REDN_COUNTING
1693     STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
1694 #endif
1695
1696     if (type == T_MAIN) {
1697         STKO_SpA(stko) -= AREL(1);
1698         *STKO_SpA(stko) = (P_) WorldStateToken_closure;
1699     }
1700
1701     SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
1702     SAVE_StkO = stko;
1703
1704     if (DO_QP_PROF) {
1705         QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
1706     }
1707     return tso;
1708 }
1709 \end{code}
1710
1711 \begin{code}
1712
1713 void
1714 EndThread(STG_NO_ARGS)
1715 {
1716 #ifdef PAR
1717     TIME now = CURRENT_TIME;
1718 #endif
1719 #ifdef DO_REDN_COUNTING
1720     extern FILE *tickyfile;
1721
1722     if (tickyfile != NULL) {
1723         fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
1724           TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
1725         fprintf(tickyfile, "\tB stack max. depth: %ld words\n",
1726           TSO_BHWM(CurrentTSO));
1727     }
1728 #endif
1729
1730     if (DO_QP_PROF) {
1731         QP_Event1("G*", CurrentTSO);
1732     }
1733
1734 #if defined(GRAN)
1735     assert(CurrentTSO == ThreadQueueHd);
1736     ThreadQueueHd = TSO_LINK(CurrentTSO);
1737
1738     if(ThreadQueueHd == Nil_closure)
1739       ThreadQueueTl = Nil_closure;
1740
1741     else if (DoThreadMigration)
1742       --SurplusThreads;
1743
1744     if (do_gr_sim)
1745       {
1746         if(TSO_TYPE(CurrentTSO)==T_MAIN)
1747           {
1748             int i;
1749             for(i=0; i < max_proc; ++i) {
1750               StgBool is_first = StgTrue;
1751               while(RunnableThreadsHd[i] != Nil_closure)
1752                 {
1753                   /* We schedule runnable threads before killing them to */
1754                   /* make the job of bookkeeping the running, runnable, */
1755                   /* blocked threads easier for scripts like gr2ps  -- HWL */ 
1756
1757                   if (do_gr_profile && !is_first)
1758                     DumpRawGranEvent(i,GR_SCHEDULE,
1759                                      TSO_ID(RunnableThreadsHd[i]));
1760                   if (!no_gr_profile)
1761                     DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
1762                   RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
1763                   is_first = StgFalse;
1764                 }
1765             }
1766
1767             ThreadQueueHd = Nil_closure;
1768
1769 #if defined(GRAN_CHECK) && defined(GRAN)
1770             /* Print event stats */
1771             if (debug & 0x20) {
1772               int i;
1773
1774               fprintf(stderr,"Statistics of events (total=%d):\n",
1775                       noOfEvents);
1776               for (i=0; i<=7; i++) {
1777                 fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
1778                         event_names[i],i,event_counts[i],
1779                         (float)(100*event_counts[i])/(float)(noOfEvents) );
1780               }
1781             }
1782 #endif       
1783
1784           }
1785
1786         if (!no_gr_profile)
1787           DumpGranInfo(CurrentProc,CurrentTSO,
1788                        TSO_TYPE(CurrentTSO) != T_ADVISORY);
1789
1790         /* Note ThreadQueueHd is Nil when the main thread terminates */
1791         if(ThreadQueueHd != Nil_closure)
1792           {
1793             if (do_gr_profile && !no_gr_profile)
1794               DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
1795             CurrentTime[CurrentProc] += gran_threadscheduletime;
1796           }
1797
1798         else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
1799                  !no_gr_profile)
1800           grterminate(CurrentTime[CurrentProc]);
1801       }
1802 #endif  /* GRAN */
1803
1804 #ifdef PAR
1805     if (do_gr_profile) {
1806         TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
1807         DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
1808     }
1809 #endif
1810
1811     switch (TSO_TYPE(CurrentTSO)) {
1812     case T_MAIN:
1813         required_thread_count--;
1814 #ifdef PAR
1815         if (do_gr_binary)
1816             grterminate(now);
1817 #endif
1818
1819 #if defined(GRAN_CHECK) && defined(GRAN)
1820         if ( (debug & 0x80) || (debug & 0x40) )
1821           fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
1822
1823         /* I've stolen that from the end of ReSchedule (!GRAN).  HWL */
1824         longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
1825 #else
1826         ReSchedule(0);
1827 #endif  /* GRAN */
1828
1829     case T_REQUIRED:
1830         required_thread_count--;
1831         break;
1832
1833     case T_ADVISORY:
1834         advisory_thread_count--;
1835         break;
1836
1837     case T_FAIL:
1838         EXIT(EXIT_FAILURE);
1839
1840     default:
1841         fflush(stdout);
1842         fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
1843         EXIT(EXIT_FAILURE);
1844     }
1845
1846     /* Reuse stack object space */
1847     ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
1848     STKO_LINK(SAVE_StkO) = AvailableStack;
1849     AvailableStack = SAVE_StkO;
1850     /* Reuse TSO */
1851     TSO_LINK(CurrentTSO) = AvailableTSO;
1852     AvailableTSO = CurrentTSO;
1853     CurrentTSO = Nil_closure;
1854     CurrentRegTable = NULL;
1855
1856 #if defined(GRAN)
1857         /* NB: Now ThreadQueueHd is either the next runnable thread on this */
1858         /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
1859         /* issued by ReSchedule. */
1860         ReSchedule(SAME_THREAD);                /* back for more! */
1861 #else
1862         ReSchedule(0);                          /* back for more! */
1863 #endif
1864 }
1865 \end{code}
1866
1867 %****************************************************************************
1868 %
1869 \subsection[thread-blocking]{Local Blocking}
1870 %
1871 %****************************************************************************
1872
1873 \begin{code}
1874
1875 #if defined(COUNT)
1876 void CountnUPDs() { ++nUPDs; }
1877 void CountnUPDs_old() { ++nUPDs_old; }
1878 void CountnUPDs_new() { ++nUPDs_new; }
1879
1880 void CountnPAPs() { ++nPAPs; }
1881 #endif
1882
1883 EXTDATA_RO(BQ_info);
1884
1885 #ifndef GRAN
1886 /* NB: non-GRAN version ToDo
1887  *
1888  * AwakenBlockingQueue awakens a list of TSOs and FBQs.
1889  */
1890
1891 P_ PendingFetches = Nil_closure;
1892
1893 void
1894 AwakenBlockingQueue(bqe)
1895   P_ bqe;
1896 {
1897     P_ last_tso = NULL;
1898
1899 # ifdef PAR
1900     P_ next;
1901     TIME now = CURRENT_TIME;
1902
1903 # endif
1904
1905 # ifndef PAR
1906     while (bqe != Nil_closure) {
1907 # else
1908     while (IS_MUTABLE(INFO_PTR(bqe))) {
1909         switch (INFO_TYPE(INFO_PTR(bqe))) {
1910         case INFO_TSO_TYPE:
1911 # endif
1912             if (DO_QP_PROF) {
1913                 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
1914             }
1915 # ifdef PAR
1916             if (do_gr_profile) {
1917                 DumpGranEvent(GR_RESUMEQ, bqe);
1918                 switch (TSO_QUEUE(bqe)) {
1919                 case Q_BLOCKED:
1920                     TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1921                     break;
1922                 case Q_FETCHING:
1923                     TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1924                     break;
1925                 default:
1926                     fflush(stdout);
1927                     fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
1928                     EXIT(EXIT_FAILURE);
1929                 }
1930             }
1931 # endif
1932             if (last_tso == NULL) {
1933                 if (RunnableThreadsHd == Nil_closure) {
1934                     RunnableThreadsHd = bqe;
1935                 } else {
1936                     TSO_LINK(RunnableThreadsTl) = bqe;
1937                 }
1938             }
1939             last_tso = bqe;
1940             bqe = TSO_LINK(bqe);
1941 # ifdef PAR
1942             break;
1943         case INFO_BF_TYPE:
1944             next = BF_LINK(bqe);
1945             BF_LINK(bqe) = PendingFetches;
1946             PendingFetches = bqe;
1947             bqe = next;
1948             if (last_tso != NULL)
1949                 TSO_LINK(last_tso) = next;
1950             break;
1951         default:
1952             fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
1953               INFO_PTR(bqe), (W_) bqe);
1954             EXIT(EXIT_FAILURE);
1955         }
1956     }
1957 #else
1958     }
1959 # endif
1960     if (last_tso != NULL) {
1961         RunnableThreadsTl = last_tso;
1962 # ifdef PAR
1963         TSO_LINK(last_tso) = Nil_closure;
1964 # endif
1965     }
1966 }
1967 #endif /* !GRAN */
1968
1969 #ifdef GRAN
1970
1971 /* NB: GRAN version only ToDo
1972  *
1973  * AwakenBlockingQueue returns True if we are on the oldmutables list,
1974  * so that the update code knows what to do next.
1975  */
1976
1977 I_
1978 AwakenBlockingQueue(node)
1979   P_ node;
1980 {
1981     P_ tso = (P_) BQ_ENTRIES(node);
1982     P_ prev;
1983
1984     if(do_gr_sim)
1985       {
1986         W_ notifytime;
1987
1988 # if defined(COUNT)
1989         ++nUPDs;
1990         if (tso != Nil_closure) 
1991           ++nUPDs_BQ;
1992 # endif
1993
1994         while(tso != Nil_closure) {
1995           W_ proc;
1996           assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
1997
1998 # if defined(COUNT)
1999           ++BQ_lens;
2000 # endif
2001
2002           /* Find where the tso lives */
2003           proc = where_is(tso);
2004  
2005           if(proc == CurrentProc)
2006             notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
2007           else
2008             {
2009               CurrentTime[CurrentProc] += gran_mpacktime;
2010               notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
2011               CurrentTime[CurrentProc] += gran_mtidytime;
2012             }
2013
2014           /* and create a resume message */
2015           newevent(proc, CurrentProc, notifytime, 
2016                    RESUMETHREAD,tso,Nil_closure,NULL);
2017
2018           prev = tso;
2019           tso = TSO_LINK(tso);
2020           TSO_LINK(prev) = Nil_closure;
2021         }
2022       }
2023     else
2024       {
2025         if (ThreadQueueHd == Nil_closure)
2026           ThreadQueueHd = tso;
2027         else
2028           TSO_LINK(ThreadQueueTl) = tso;
2029
2030         while(TSO_LINK(tso) != Nil_closure) {
2031           assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2032           if (DO_QP_PROF) {
2033             QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2034           }
2035           tso = TSO_LINK(tso);
2036         }
2037         
2038         assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2039         if (DO_QP_PROF) {
2040           QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2041         }
2042         
2043         ThreadQueueTl = tso;
2044       }
2045
2046     return MUT_LINK(node) != MUT_NOT_LINKED;
2047 }
2048
2049 #endif /* GRAN only */
2050
2051 EXTFUN(Continue);
2052
2053 void
2054 Yield(args)
2055 W_ args;
2056 {
2057     SAVE_Liveness = args >> 1;
2058     TSO_PC1(CurrentTSO) = Continue;
2059     if (DO_QP_PROF) {
2060         QP_Event1("GR", CurrentTSO);
2061     }
2062 #ifdef PAR
2063     if (do_gr_profile) {
2064         /* Note that CURRENT_TIME may perform an unsafe call */
2065         TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
2066     }
2067 #endif
2068     ReSchedule(args & 1);
2069 }
2070
2071 \end{code}
2072
2073 %****************************************************************************
2074 %
2075 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
2076 %
2077 %****************************************************************************
2078
2079 The following GrAnSim routines simulate the fetching of nodes from a remote
2080 processor. We use a 1 word bitmask to indicate on which processor a node is
2081 lying. Thus,  moving or copying a  node from one  processor to another just
2082 requires  an     appropriate  change in this     bitmask  (using @SET_GA@).
2083 Additionally, the clocks have to be updated.
2084
2085 A special case arises when the node that is  needed by processor A has been
2086 moved from a  processor B to a processor   C between sending  out a @FETCH@
2087 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
2088 to C.
2089  
2090 Currently, we  only support GRIP-like  single closure fetching.  We plan to
2091 incorporate GUM-like packet fetching in the near future.
2092  
2093 \begin{code}
2094 #if defined(GRAN)
2095
2096 /* Fetch node "node" to processor "p" */
2097
2098 int
2099 FetchNode(node,from,to)
2100 P_ node;
2101 PROC from, to;
2102 {
2103   assert(to==CurrentProc);
2104   if (!IS_LOCAL_TO(PROCS(node),from) &&
2105       !IS_LOCAL_TO(PROCS(node),to) ) 
2106     return 1;
2107
2108   if(IS_NF(INFO_PTR(node)))                 /* Old: || IS_BQ(node) */
2109     PROCS(node) |= PE_NUMBER(to);           /* Copy node */
2110   else
2111     PROCS(node) = PE_NUMBER(to);            /* Move node */
2112
2113   /* Now fetch the children */
2114   if(DoGUMMFetching)
2115     {
2116       fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
2117     }
2118
2119   return 0;
2120 }
2121
2122 /* --------------------------------------------------
2123    Cost of sending a packet of size n = C + P*n
2124    where C = packet construction constant, 
2125          P = cost of packing one word into a packet
2126    [Should also account for multiple packets].
2127    -------------------------------------------------- */
2128
2129 void 
2130 HandleFetchRequest(node,p,tso)
2131 P_ node, tso;
2132 PROC p;
2133 {
2134   if (IS_LOCAL_TO(PROCS(node),p) )  /* Somebody else moved node already => */
2135     {                               /* start tso                           */ 
2136       newevent(p,CurrentProc,
2137                CurrentTime[CurrentProc] /* +gran_latency */,
2138                FETCHREPLY,tso,node,NULL);            /* node needed ?? */
2139       CurrentTime[CurrentProc] += gran_mtidytime;
2140     }
2141   else if (IS_LOCAL_TO(PROCS(node),CurrentProc) )   /* Is node still here? */
2142     {
2143       /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
2144       /* Send a reply to the originator */
2145       CurrentTime[CurrentProc] += gran_mpacktime;
2146
2147       newevent(p,CurrentProc,
2148                CurrentTime[CurrentProc]+gran_latency,
2149                FETCHREPLY,tso,node,NULL);            /* node needed ?? */
2150       
2151       CurrentTime[CurrentProc] += gran_mtidytime;
2152     }
2153   else
2154     {    /* Qu'vatlh! node has been grabbed by another proc => forward */
2155       PROC p_new = where_is(node);
2156       TIME fetchtime;
2157
2158 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2159       if (NoForward) {
2160         newevent(p,p_new,
2161                  max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
2162                  FETCHREPLY,tso,node,NULL);            /* node needed ?? */
2163         CurrentTime[CurrentProc] += gran_mtidytime;
2164         return;
2165       }
2166 #endif
2167
2168 #if defined(GRAN_CHECK) && defined(GRAN)         /* Just for testing */
2169       if (debug & 0x2)    /* 0x2 should be somehting like DBG_PRINT_FWD */
2170         fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
2171                 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
2172 #endif
2173       /* Prepare FORWARD message to proc p_new */
2174       CurrentTime[CurrentProc] += gran_mpacktime;
2175       
2176       fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
2177                       gran_latency;
2178           
2179       newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
2180
2181       CurrentTime[CurrentProc] += gran_mtidytime;
2182     }
2183 }
2184 #endif
2185 \end{code}
2186
2187 %****************************************************************************
2188 %
2189 \subsection[gr-simulation]{Granularity Simulation}
2190 %
2191 %****************************************************************************
2192
2193 \begin{code}
2194 #if 0 /* moved to GranSim.lc */
2195 #if defined(GRAN)
2196 I_ do_gr_sim = 0;
2197 FILE *gr_file = NULL;
2198 char gr_filename[32];
2199
2200 init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
2201 char *prog_argv[], *rts_argv[];
2202 int prog_argc, rts_argc;
2203 {
2204     I_ i;
2205
2206     if(do_gr_sim)
2207       { 
2208         char *extension = do_gr_binary? "gb": "gr";
2209         sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0],extension);
2210
2211         if ((gr_file = fopen(gr_filename,"w")) == NULL ) 
2212           {
2213             fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
2214             exit(EXIT_FAILURE);             
2215           }
2216
2217 #if defined(GRAN_CHECK) && defined(GRAN)
2218         if(DoReScheduleOnFetch)
2219           setbuf(gr_file,NULL);
2220 #endif
2221
2222         fputs("Granularity Simulation for ",gr_file);
2223         for(i=0; i < prog_argc; ++i)
2224           {
2225             fputs(prog_argv[i],gr_file);
2226             fputc(' ',gr_file);
2227           }
2228
2229         if(rts_argc > 0)
2230           {
2231             fputs("+RTS ",gr_file);
2232
2233             for(i=0; i < rts_argc; ++i)
2234               {
2235                 fputs(rts_argv[i],gr_file);
2236                 fputc(' ',gr_file);
2237               }
2238           }
2239
2240         fputs("\n\n--------------------\n\n",gr_file);
2241
2242         fputs("General Parameters:\n\n",gr_file);
2243
2244         fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
2245                 max_proc,DoFairSchedule?"Fair":"Unfair",
2246                 DoThreadMigration?"":"Don't ",
2247                 DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
2248                 DoReScheduleOnFetch?"":"Don't ");
2249
2250         fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
2251                 SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
2252                 DoGUMMFetching?"Many Closures":"Exactly One Closure");
2253         fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
2254                 FetchStrategy,
2255                 FetchStrategy==1?"only run runnable threads (don't create new ones":
2256                 FetchStrategy==2?"create threads only from local sparks":
2257                 FetchStrategy==3?"create threads from local or global sparks":
2258                 FetchStrategy==4?"create sparks and steal threads if necessary":
2259                                  "unknown");
2260
2261         fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
2262                 gran_threadcreatetime,gran_threadqueuetime);
2263         fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
2264                 gran_threaddescheduletime,gran_threadscheduletime);
2265         fprintf(gr_file, "Thread Context-Switch Time %lu\n",
2266                 gran_threadcontextswitchtime);
2267         fputs("\n\n--------------------\n\n",gr_file);
2268
2269         fputs("Communication Metrics:\n\n",gr_file);
2270         fprintf(gr_file,
2271                 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
2272                 gran_latency, gran_additional_latency, gran_fetchtime,
2273                 gran_gunblocktime, gran_lunblocktime);
2274         fprintf(gr_file,
2275                 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
2276                 gran_mpacktime, gran_mtidytime, gran_munpacktime);
2277         fputs("\n\n--------------------\n\n",gr_file);
2278
2279         fputs("Instruction Metrics:\n\n",gr_file);
2280         fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
2281                 gran_arith_cost, gran_branch_cost, 
2282                 gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
2283         fputs("\n\n++++++++++++++++++++\n\n",gr_file);
2284       }
2285
2286     if(do_gr_binary)
2287       grputw(sizeof(TIME));
2288
2289     Idlers = max_proc;
2290     return(0);
2291 }
2292
2293 void end_gr_simulation() {
2294   if(do_gr_sim)
2295     {
2296       fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
2297               gr_filename);
2298       fclose(gr_file);
2299     }
2300 }
2301 #endif /*0*/
2302 \end{code}
2303
2304 %****************************************************************************
2305 %
2306 \subsection[qp-profile]{Quasi-Parallel Profiling}
2307 %
2308 %****************************************************************************
2309
2310 \begin{code}
2311 #ifndef PAR
2312
2313 I_ do_qp_prof;
2314 FILE *qp_file;
2315
2316 /* *Virtual* Time in milliseconds */
2317 long 
2318 qp_elapsed_time(STG_NO_ARGS)
2319 {
2320     extern StgDouble usertime();
2321
2322     return ((long) (usertime() * 1e3));
2323 }
2324
2325 static void 
2326 init_qp_profiling(STG_NO_ARGS)
2327 {
2328     I_ i;
2329     char qp_filename[32];
2330
2331     sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
2332     if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
2333         fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
2334             qp_filename);
2335         do_qp_prof = 0;
2336     } else {
2337         fputs(prog_argv[0], qp_file);
2338         for(i = 1; prog_argv[i]; i++) {
2339             fputc(' ', qp_file);
2340             fputs(prog_argv[i], qp_file);
2341         }
2342         fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads);
2343         fputs(time_str(), qp_file);
2344         fputc('\n', qp_file);
2345     }
2346 }
2347
2348 void
2349 QP_Event0(tid, node)
2350 I_ tid;
2351 P_ node;
2352 {
2353     fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
2354 }
2355
2356 void
2357 QP_Event1(event, tso)
2358 char *event;
2359 P_ tso;
2360 {
2361     fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
2362             TSO_ID(tso), TSO_NAME(tso));
2363 }
2364
2365 void
2366 QP_Event2(event, tso1, tso2)
2367 char *event;
2368 P_ tso1, tso2;
2369 {
2370     fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
2371             TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
2372 }
2373
2374 #endif /* !PAR */
2375 \end{code}
2376
2377 %****************************************************************************
2378 %
2379 \subsection[entry-points]{Routines directly called from Haskell world}
2380 %
2381 %****************************************************************************
2382
2383 The @GranSim...@ rotuines in here are directly called via macros from the
2384 threaded world. 
2385
2386 First some auxiliary routines.
2387
2388 \begin{code}
2389 #ifdef GRAN
2390 /* Take the current thread off the thread queue and thereby activate the */
2391 /* next thread. It's assumed that the next ReSchedule after this uses */
2392 /* NEW_THREAD as param. */
2393 /* This fct is called from GranSimBlock and GranSimFetch */
2394
2395 void 
2396 ActivateNextThread ()
2397 {
2398 #if defined(GRAN_CHECK) && defined(GRAN)
2399   if(ThreadQueueHd != CurrentTSO) {
2400     fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
2401     exit(99);
2402   }
2403 #endif
2404  
2405   ThreadQueueHd = TSO_LINK(ThreadQueueHd);
2406   if(ThreadQueueHd==Nil_closure) {
2407     MAKE_IDLE(CurrentProc);
2408     ThreadQueueTl = Nil_closure;
2409   } else if (do_gr_profile) {
2410     CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
2411     DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2412   }
2413 }
2414 \end{code}
2415
2416 Now the main stg-called routines:
2417
2418 \begin{code}
2419 /* ------------------------------------------------------------------------ */
2420 /* The following GranSim... fcts are stg-called from the threaded world.    */
2421 /* ------------------------------------------------------------------------ */
2422
2423 /* Called from HEAP_CHK  -- NB: node and liveness are junk here now. 
2424    They are left temporarily to avoid complete recompilation.
2425    KH 
2426 */
2427 void 
2428 GranSimAllocate(n,node,liveness)
2429 I_ n;
2430 P_ node;
2431 W_ liveness;
2432 {
2433   TSO_ALLOCS(CurrentTSO) += n;
2434   ++TSO_BASICBLOCKS(CurrentTSO);
2435   
2436   TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
2437   CurrentTime[CurrentProc] += gran_heapalloc_cost;
2438 }
2439
2440 /*
2441   Subtract the values added above, if a heap check fails and
2442   so has to be redone.
2443 */
2444 void 
2445 GranSimUnallocate(n,node,liveness)
2446 W_ n;
2447 P_ node;
2448 W_ liveness;
2449 {
2450   TSO_ALLOCS(CurrentTSO) -= n;
2451   --TSO_BASICBLOCKS(CurrentTSO);
2452   
2453   TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
2454   CurrentTime[CurrentProc] -= gran_heapalloc_cost;
2455 }
2456
2457 void 
2458 GranSimExec(ariths,branches,loads,stores,floats)
2459 W_ ariths,branches,loads,stores,floats;
2460 {
2461   W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
2462             gran_store_cost*stores + gran_float_cost*floats;
2463
2464   TSO_EXECTIME(CurrentTSO) += cost;
2465   CurrentTime[CurrentProc] += cost;
2466 }
2467
2468
2469 /* 
2470    Fetch the node if it isn't local
2471    -- result indicates whether fetch has been done.
2472
2473    This is GRIP-style single item fetching.
2474 */
2475
2476 I_ 
2477 GranSimFetch(node /* , liveness_mask */ )
2478 P_ node;
2479 /* I_ liveness_mask; */
2480 {
2481   /* Note: once a node has been fetched, this test will be passed */
2482   if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
2483     {
2484       /* I suppose we shouldn't do this for CAFs? -- KH */
2485       /* Should reschedule if the latency is high */
2486       /* We should add mpacktime to the remote PE for the reply,
2487          but we don't know who owns the node
2488       */
2489       /* if(DYNAMIC_POINTER(node)) */        /* For 0.22; gone in 0.23 !!! */
2490         {
2491           PROC p = where_is(node);
2492           TIME fetchtime;
2493
2494 #ifdef GRAN_CHECK
2495           if ( ( debug & 0x40 ) &&
2496                p == CurrentProc )
2497             fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
2498 #endif  /* GRAN_CHECK */
2499
2500           CurrentTime[CurrentProc] += gran_mpacktime;
2501
2502           ++TSO_FETCHCOUNT(CurrentTSO);
2503           TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
2504               
2505           if (SimplifiedFetch)
2506             {
2507               FetchNode(node,CurrentProc);
2508               CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
2509                                           gran_munpacktime;
2510               return(1);
2511             }
2512
2513           fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
2514                       gran_latency;
2515
2516           newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
2517           ++OutstandingFetches[CurrentProc];
2518
2519           /* About to block */
2520           TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
2521
2522           if (DoReScheduleOnFetch) 
2523             {
2524
2525               /* Remove CurrentTSO from the queue 
2526                  -- assumes head of queue == CurrentTSO */
2527               if(!DoFairSchedule)
2528                 {
2529                   if(do_gr_profile)
2530                     DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2531
2532                   ActivateNextThread();
2533               
2534 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2535                   if (debug & 0x10) {
2536                     if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2537                       fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2538                               CurrentTSO,CurrentTime[CurrentProc]);
2539                       exit (99);
2540                     } else {
2541                       TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2542                     }
2543
2544                   }
2545 #endif
2546
2547                   TSO_LINK(CurrentTSO) = Nil_closure;
2548                   /* CurrentTSO = Nil_closure; */
2549
2550                   /* ThreadQueueHd is now the next TSO to schedule or NULL */
2551                   /* CurrentTSO is pointed to by the FETCHNODE event */
2552                 }
2553               else                            /* DoFairSchedule */
2554                 {
2555                   /* Remove from the tail of the thread queue */
2556                   fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
2557                   exit(99);
2558                 }
2559             }
2560           else                                /* !DoReScheduleOnFetch */
2561             {
2562               /* Note: CurrentProc is still busy as it's blocked on fetch */
2563               if(do_gr_profile)
2564                 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2565
2566 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2567               if (debug & 0x04)
2568                   BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
2569
2570               if (debug & 0x10) {
2571                 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2572                   fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2573                           CurrentTSO,CurrentTime[CurrentProc]);
2574                   exit (99);
2575                 } else {
2576                   TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2577                 }
2578
2579                 CurrentTSO = Nil_closure;
2580               }
2581 #endif
2582             }
2583
2584           CurrentTime[CurrentProc] += gran_mtidytime;
2585
2586           /* Rescheduling is necessary */
2587           NeedToReSchedule = StgTrue;
2588
2589           return(1); 
2590         }
2591     }
2592   return(0);
2593 }
2594
2595 void 
2596 GranSimSpark(local,node)
2597 W_ local;
2598 P_ node;
2599 {
2600   ++SparksAvail;
2601   if(do_sp_profile)
2602     DumpSparkGranEvent(SP_SPARK,node);
2603
2604   /* Force the PE to take notice of the spark */
2605   if(DoAlwaysCreateThreads)
2606     newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
2607              FINDWORK,Nil_closure,Nil_closure,NULL);
2608
2609   if(local)
2610     ++TSO_LOCALSPARKS(CurrentTSO);
2611   else
2612     ++TSO_GLOBALSPARKS(CurrentTSO);
2613 }
2614
2615 void 
2616 GranSimSparkAt(spark,where,identifier)
2617 sparkq spark;
2618 P_  where;        /* This should be a node; alternatively could be a GA */
2619 I_ identifier;
2620 {
2621   PROC p = where_is(where);
2622   TIME exporttime;
2623
2624   if(do_sp_profile)
2625     DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
2626
2627   CurrentTime[CurrentProc] += gran_mpacktime;
2628
2629   exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]? 
2630                 CurrentTime[p]: CurrentTime[CurrentProc])
2631                + gran_latency;
2632   
2633   newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
2634
2635   CurrentTime[CurrentProc] += gran_mtidytime;
2636
2637   ++TSO_GLOBALSPARKS(CurrentTSO);
2638 }
2639
2640 void 
2641 GranSimBlock()
2642 {
2643   if(do_gr_profile)
2644     DumpGranEvent(GR_BLOCK,CurrentTSO);
2645
2646   ++TSO_BLOCKCOUNT(CurrentTSO);
2647   TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
2648   ActivateNextThread();
2649 }
2650
2651 #endif  /* GRAN */
2652
2653 \end{code}
2654
2655 %****************************************************************************
2656 %
2657 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
2658 %
2659 %****************************************************************************
2660
2661 Garbage collection code for the event queue.  We walk the event queue
2662 so that if the only reference to a TSO is in some event (e.g. RESUME),
2663 the TSO is still preserved.
2664
2665 \begin{code}
2666 #ifdef GRAN
2667
2668 extern smInfo StorageMgrInfo;
2669
2670 I_
2671 SaveEventRoots(num_ptr_roots)
2672 I_ num_ptr_roots;
2673 {
2674   eventq event = EventHd;
2675   while(event != NULL)
2676     {
2677       if(EVENT_TYPE(event) == RESUMETHREAD || 
2678          EVENT_TYPE(event) == MOVETHREAD || 
2679          EVENT_TYPE(event) == STARTTHREAD )
2680         StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2681
2682       else if(EVENT_TYPE(event) == MOVESPARK)
2683         StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
2684
2685       else if (EVENT_TYPE(event) == FETCHNODE ||
2686                EVENT_TYPE(event) == FETCHREPLY )
2687         {
2688           StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2689           StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
2690         }
2691
2692       event = EVENT_NEXT(event);
2693     }
2694   return(num_ptr_roots);
2695 }
2696
2697 I_
2698 SaveSparkRoots(num_ptr_roots)
2699 I_ num_ptr_roots;
2700 {
2701   sparkq spark, /* prev, */ disposeQ=NULL;
2702   PROC proc;
2703   I_ i, sparkroots=0, prunedSparks=0;
2704
2705 #if defined(GRAN_CHECK) && defined(GRAN)
2706   if ( debug & 0x40 ) 
2707     fprintf(stderr,"D> Saving spark roots for GC ...\n");
2708 #endif       
2709
2710   for(proc = 0; proc < max_proc; ++proc) {
2711     for(i = 0; i < SPARK_POOLS; ++i) {
2712       for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i]; 
2713           spark != NULL; 
2714           /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
2715         {
2716           if(++sparkroots <= MAX_SPARKS)
2717             {
2718 #if defined(GRAN_CHECK) && defined(GRAN)
2719               if ( debug & 0x40 ) 
2720                 fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
2721                         num_ptr_roots,proc,i,SPARK_NODE(spark));
2722 #endif       
2723               StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
2724             }
2725           else
2726             {
2727               SPARK_NODE(spark) = Nil_closure;
2728               if (prunedSparks==0) {
2729                 disposeQ = spark;
2730                 /*
2731                    *prev = NULL;
2732                 */
2733               }
2734               prunedSparks++;
2735             }
2736         }  /* forall spark ... */
2737         if (prunedSparks>0) {
2738           fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
2739                   prunedSparks,MAX_SPARKS,proc);
2740           if (disposeQ == PendingSparksHd[proc][i])
2741             PendingSparksHd[proc][i] = NULL;
2742           else
2743             SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
2744           DisposeSparkQ(disposeQ);
2745           prunedSparks = 0;
2746           disposeQ = NULL;
2747         }  
2748         }  /* forall i ... */
2749     }      /*forall proc .. */
2750
2751   return(num_ptr_roots);
2752 }
2753
2754 /*
2755    GC roots must be restored in *reverse order*.
2756    The recursion is a little ugly, but is better than
2757    in-place pointer reversal.
2758 */
2759
2760 static I_
2761 RestoreEvtRoots(event,num_ptr_roots)
2762 eventq event;
2763 I_ num_ptr_roots;
2764 {
2765   if(event != NULL)
2766     {
2767       num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
2768
2769       if(EVENT_TYPE(event) == RESUMETHREAD || 
2770          EVENT_TYPE(event) == MOVETHREAD || 
2771          EVENT_TYPE(event) == STARTTHREAD )
2772         EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2773
2774       else if(EVENT_TYPE(event) == MOVESPARK )
2775         SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
2776
2777       else if (EVENT_TYPE(event) == FETCHNODE ||
2778                EVENT_TYPE(event) == FETCHREPLY )
2779         {
2780           EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
2781           EVENT_TSO(event) =  StorageMgrInfo.roots[--num_ptr_roots];
2782         }
2783     }
2784
2785   return(num_ptr_roots);
2786 }
2787
2788 I_ 
2789 RestoreEventRoots(num_ptr_roots)
2790 I_ num_ptr_roots;
2791 {
2792   return(RestoreEvtRoots(EventHd,num_ptr_roots));
2793 }
2794
2795 static I_
2796 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
2797 sparkq spark;
2798 I_ num_ptr_roots, sparkroots;
2799 {
2800   if(spark != NULL)
2801     {
2802       num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
2803       if(sparkroots <= MAX_SPARKS)
2804         {
2805           P_ n = SPARK_NODE(spark);
2806           SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
2807 #if defined(GRAN_CHECK) && defined(GRAN)
2808           if ( debug & 0x40 ) 
2809             fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n",
2810                     num_ptr_roots,SPARK_NODE(spark));
2811 #endif
2812         }
2813       else
2814 #if defined(GRAN_CHECK) && defined(GRAN)
2815           if ( debug & 0x40 ) 
2816             fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
2817                     num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
2818 #endif
2819
2820     }
2821
2822   return(num_ptr_roots);
2823 }
2824
2825 I_ 
2826 RestoreSparkRoots(num_ptr_roots)
2827 I_ num_ptr_roots;
2828 {
2829   PROC proc;
2830   I_   i;
2831
2832   /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
2833   /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
2834   /* of the for loop. For i that is currently not necessary. C is really */
2835   /* impressive in datatype abstraction!   -- HWL */
2836
2837   for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
2838     for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
2839       num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
2840     }
2841   }
2842   return(num_ptr_roots);
2843 }
2844
2845 #endif  /* GRAN */
2846
2847 \end{code}
2848
2849 %****************************************************************************
2850 %
2851 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
2852 %
2853 %****************************************************************************
2854
2855 Event dumping routines.
2856
2857 \begin{code}
2858 #ifdef GRAN 
2859
2860 DumpGranEvent(name,tso)
2861 enum gran_event_types name;
2862 P_ tso;
2863 {
2864   DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
2865 }
2866
2867 DumpSparkGranEvent(name,id)
2868 enum gran_event_types name;
2869 W_ id;
2870 {
2871   DumpRawGranEvent(CurrentProc,name,id);
2872 }
2873
2874 DumpGranEventAndNode(name,tso,node,proc)
2875 enum gran_event_types name;
2876 P_ tso, node;
2877 PROC proc;
2878 {
2879   PROC pe = CurrentProc;
2880   W_ id = TSO_ID(tso);
2881
2882   if(name > GR_EVENT_MAX)
2883     name = GR_EVENT_MAX;
2884
2885   if(do_gr_binary)
2886     {
2887       grputw(name);
2888       grputw(pe);
2889       grputw(CurrentTime[CurrentProc]);
2890       grputw(id);
2891     }
2892   else
2893     fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
2894             pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
2895 }
2896
2897 DumpRawGranEvent(pe,name,id)
2898 PROC pe;
2899 enum gran_event_types name;
2900 W_ id;
2901 {
2902   if(name > GR_EVENT_MAX)
2903     name = GR_EVENT_MAX;
2904
2905   if(do_gr_binary)
2906     {
2907       grputw(name);
2908       grputw(pe);
2909       grputw(CurrentTime[CurrentProc]);
2910       grputw(id);
2911     }
2912   else
2913     fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
2914             pe,CurrentTime[CurrentProc],gran_event_names[name],id);
2915 }
2916
2917 DumpGranInfo(pe,tso,mandatory_thread)
2918 PROC pe;
2919 P_ tso;
2920 I_ mandatory_thread;
2921 {
2922   if(do_gr_binary)
2923     {
2924       grputw(GR_END);
2925       grputw(pe);
2926       grputw(CurrentTime[CurrentProc]);
2927       grputw(TSO_ID(tso));
2928       grputw(TSO_SPARKNAME(tso));
2929       grputw(TSO_STARTEDAT(tso));
2930       grputw(TSO_EXPORTED(tso));
2931       grputw(TSO_BASICBLOCKS(tso));
2932       grputw(TSO_ALLOCS(tso));
2933       grputw(TSO_EXECTIME(tso));
2934       grputw(TSO_BLOCKTIME(tso));
2935       grputw(TSO_BLOCKCOUNT(tso));
2936       grputw(TSO_FETCHTIME(tso));
2937       grputw(TSO_FETCHCOUNT(tso));
2938       grputw(TSO_LOCALSPARKS(tso));
2939       grputw(TSO_GLOBALSPARKS(tso));
2940       grputw(mandatory_thread);
2941     }
2942   else
2943     {
2944       /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
2945       fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
2946               ,pe
2947               ,CurrentTime[CurrentProc]
2948               ,TSO_ID(tso)
2949               ,TSO_SPARKNAME(tso)
2950               ,TSO_STARTEDAT(tso)
2951               ,TSO_EXPORTED(tso)?'T':'F'
2952               ,TSO_BASICBLOCKS(tso)
2953               ,TSO_ALLOCS(tso)
2954               ,TSO_EXECTIME(tso)
2955               ,TSO_BLOCKTIME(tso)
2956               ,TSO_BLOCKCOUNT(tso)
2957               ,TSO_FETCHTIME(tso)
2958               ,TSO_FETCHCOUNT(tso)
2959               ,TSO_LOCALSPARKS(tso)
2960               ,TSO_GLOBALSPARKS(tso)
2961               ,mandatory_thread?'T':'F'
2962               );
2963     }
2964 }
2965
2966 DumpTSO(tso)
2967 P_ tso;
2968 {
2969   fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
2970           ,tso
2971           ,TSO_NAME(tso)
2972           ,TSO_ID(tso)
2973           ,TSO_LINK(tso)
2974           ,TSO_TYPE(tso)==T_MAIN?"MAIN":
2975            TSO_TYPE(tso)==T_FAIL?"FAIL":
2976            TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
2977            TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
2978            "???"
2979           );
2980           
2981   fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
2982           ,TSO_PC1(tso)
2983           ,TSO_PC2(tso)
2984           ,TSO_ARG1(tso)
2985           ,TSO_ARG2(tso)
2986           ,TSO_SWITCH(tso)
2987           );
2988
2989   fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
2990           ,TSO_SPARKNAME(tso)
2991           ,TSO_STARTEDAT(tso)
2992           ,TSO_EXPORTED(tso)?'T':'F'
2993           ,TSO_BASICBLOCKS(tso)
2994           ,TSO_ALLOCS(tso)
2995           ,TSO_EXECTIME(tso)
2996           ,TSO_BLOCKTIME(tso)
2997           ,TSO_BLOCKCOUNT(tso)
2998           ,TSO_FETCHTIME(tso)
2999           ,TSO_FETCHCOUNT(tso)
3000           ,TSO_LOCALSPARKS(tso)
3001           ,TSO_GLOBALSPARKS(tso)
3002           );
3003 }
3004
3005 /*
3006    Output a terminate event and an 8-byte time.
3007 */
3008
3009 grterminate(v)
3010 TIME v;
3011 {
3012   DumpGranEvent(GR_TERMINATE,0);
3013
3014   if(sizeof(TIME)==4)
3015     {
3016       putc('\0',gr_file);
3017       putc('\0',gr_file);
3018       putc('\0',gr_file);
3019       putc('\0',gr_file);
3020     }
3021   else
3022     {
3023       putc(v >> 56l,gr_file);
3024       putc((v >> 48l)&0xffl,gr_file);
3025       putc((v >> 40l)&0xffl,gr_file);
3026       putc((v >> 32l)&0xffl,gr_file);
3027     }
3028   putc((v >> 24l)&0xffl,gr_file);
3029   putc((v >> 16l)&0xffl,gr_file);
3030   putc((v >> 8l)&0xffl,gr_file);
3031   putc(v&0xffl,gr_file);
3032 }
3033
3034 /*
3035    Length-coded output: first 3 bits contain length coding
3036
3037      00x        1 byte
3038      01x        2 bytes
3039      10x        4 bytes
3040      110        8 bytes
3041      111        5 or 9 bytes
3042 */
3043
3044 grputw(v)
3045 TIME v;
3046 {
3047   if(v <= 0x3fl)
3048     {
3049       fputc(v & 0x3f,gr_file);
3050     }
3051
3052   else if (v <= 0x3fffl)
3053     {
3054       fputc((v >> 8l)|0x40l,gr_file);
3055       fputc(v&0xffl,gr_file);
3056     }
3057   
3058   else if (v <= 0x3fffffffl)
3059     {
3060       fputc((v >> 24l)|0x80l,gr_file);
3061       fputc((v >> 16l)&0xffl,gr_file);
3062       fputc((v >> 8l)&0xffl,gr_file);
3063       fputc(v&0xffl,gr_file);
3064     }
3065
3066   else if (sizeof(TIME) == 4)
3067     {
3068       fputc(0x70,gr_file);
3069       fputc((v >> 24l)&0xffl,gr_file);
3070       fputc((v >> 16l)&0xffl,gr_file);
3071       fputc((v >> 8l)&0xffl,gr_file);
3072       fputc(v&0xffl,gr_file);
3073     }
3074
3075   else 
3076     {
3077       if (v <= 0x3fffffffffffffl)
3078         putc((v >> 56l)|0x60l,gr_file);
3079       else
3080         {
3081           putc(0x70,gr_file);
3082           putc((v >> 56l)&0xffl,gr_file);
3083         }
3084
3085       putc((v >> 48l)&0xffl,gr_file);
3086       putc((v >> 40l)&0xffl,gr_file);
3087       putc((v >> 32l)&0xffl,gr_file);
3088       putc((v >> 24l)&0xffl,gr_file);
3089       putc((v >> 16l)&0xffl,gr_file);
3090       putc((v >> 8l)&0xffl,gr_file);
3091       putc(v&0xffl,gr_file);
3092     }
3093 }
3094 #endif  /* GRAN */
3095
3096 \end{code}
3097
3098 %****************************************************************************
3099 %
3100 \subsection[GrAnSim-debug]{Debugging routines  for GrAnSim}
3101 %
3102 %****************************************************************************
3103
3104 Debugging routines, mainly for GrAnSim. They should really be in a separate file.
3105
3106 The    first couple  of routines     are   general ones   (look also   into
3107 c-as-asm/StgDebug.lc).
3108
3109 \begin{code}
3110
3111 #define NULL_REG_MAP        /* Not threaded */
3112 #include "stgdefs.h"
3113
3114 char *
3115 info_hdr_type(info_ptr)
3116 W_ info_ptr;
3117 {
3118 #if ! defined(PAR) && !defined(GRAN)
3119   switch (INFO_TAG(info_ptr))
3120     {
3121       case INFO_OTHER_TAG:
3122         return("OTHER_TAG");
3123 /*    case INFO_IND_TAG:
3124         return("IND_TAG");
3125 */    default:
3126         return("TAG<n>");
3127     }
3128 #else /* PAR */
3129   switch(INFO_TYPE(info_ptr))
3130     {
3131       case INFO_SPEC_U_TYPE:
3132         return("SPECU");
3133
3134       case INFO_SPEC_N_TYPE:
3135         return("SPECN");
3136
3137       case INFO_GEN_U_TYPE:
3138         return("GENU");
3139
3140       case INFO_GEN_N_TYPE:
3141         return("GENN");
3142
3143       case INFO_DYN_TYPE:
3144         return("DYN");
3145
3146       /* 
3147       case INFO_DYN_TYPE_N:
3148         return("DYNN");
3149
3150       case INFO_DYN_TYPE_U:
3151         return("DYNU");
3152       */
3153
3154       case INFO_TUPLE_TYPE:
3155         return("TUPLE");
3156
3157       case INFO_DATA_TYPE:
3158         return("DATA");
3159
3160       case INFO_MUTUPLE_TYPE:
3161         return("MUTUPLE");
3162
3163       case INFO_IMMUTUPLE_TYPE:
3164         return("IMMUTUPLE");
3165
3166       case INFO_STATIC_TYPE:
3167         return("STATIC");
3168
3169       case INFO_CONST_TYPE:
3170         return("CONST");
3171
3172       case INFO_CHARLIKE_TYPE:
3173         return("CHAR");
3174
3175       case INFO_INTLIKE_TYPE:
3176         return("INT");
3177
3178       case INFO_BH_TYPE:
3179         return("BHOLE");
3180
3181       case INFO_IND_TYPE:
3182         return("IND");
3183
3184       case INFO_CAF_TYPE:
3185         return("CAF");
3186
3187       case INFO_FETCHME_TYPE:
3188         return("FETCHME");
3189
3190       case INFO_BQ_TYPE:
3191         return("BQ");
3192
3193       /*
3194       case INFO_BQENT_TYPE:
3195         return("BQENT");
3196       */
3197
3198       case INFO_TSO_TYPE:
3199         return("TSO");
3200
3201       case INFO_STKO_TYPE:
3202         return("STKO");
3203
3204       default:
3205         fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
3206         return("??");
3207       }
3208 #endif /* PAR */
3209 }
3210         
3211 /*
3212 @var_hdr_size@ computes the size of the variable header for a closure.
3213 */
3214
3215 I_
3216 var_hdr_size(node)
3217 P_ node;
3218 {
3219   switch(INFO_TYPE(INFO_PTR(node)))
3220     {
3221       case INFO_SPEC_U_TYPE:    return(0);      /* by decree */
3222       case INFO_SPEC_N_TYPE:    return(0);
3223       case INFO_GEN_U_TYPE:     return(GEN_VHS);
3224       case INFO_GEN_N_TYPE:     return(GEN_VHS);
3225       case INFO_DYN_TYPE:       return(DYN_VHS);
3226       /*
3227       case INFO_DYN_TYPE_N:     return(DYN_VHS);
3228       case INFO_DYN_TYPE_U:     return(DYN_VHS);
3229       */
3230       case INFO_TUPLE_TYPE:     return(TUPLE_VHS);
3231       case INFO_DATA_TYPE:      return(DATA_VHS);
3232       case INFO_MUTUPLE_TYPE:   return(MUTUPLE_VHS);
3233       case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
3234       case INFO_STATIC_TYPE:    return(STATIC_VHS);
3235       case INFO_CONST_TYPE:     return(0);
3236       case INFO_CHARLIKE_TYPE:  return(0);
3237       case INFO_INTLIKE_TYPE:   return(0);
3238       case INFO_BH_TYPE:        return(0);
3239       case INFO_IND_TYPE:       return(0);
3240       case INFO_CAF_TYPE:       return(0);
3241       case INFO_FETCHME_TYPE:   return(0);
3242       case INFO_BQ_TYPE:        return(0);
3243       /*
3244       case INFO_BQENT_TYPE:     return(0);
3245       */
3246       case INFO_TSO_TYPE:       return(TSO_VHS);
3247       case INFO_STKO_TYPE:      return(STKO_VHS);
3248       default:
3249         fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
3250           INFO_TYPE(INFO_PTR(node)));
3251         return(0);
3252     }
3253 }
3254
3255
3256 /* Determine the size and number of pointers for this kind of closure */
3257 void
3258 size_and_ptrs(node,size,ptrs)
3259 P_ node;
3260 W_ *size, *ptrs;
3261 {
3262   switch(INFO_TYPE(INFO_PTR(node)))
3263     {
3264       case INFO_SPEC_U_TYPE:
3265       case INFO_SPEC_N_TYPE:
3266         *size = INFO_SIZE(INFO_PTR(node));          /* New for 0.24; check */
3267         *ptrs = INFO_NoPTRS(INFO_PTR(node));        /* that! -- HWL */
3268         /* 
3269         *size = SPEC_CLOSURE_SIZE(node);
3270         *ptrs = SPEC_CLOSURE_NoPTRS(node);
3271         */
3272         break;
3273
3274       case INFO_GEN_U_TYPE:
3275       case INFO_GEN_N_TYPE:
3276         *size = GEN_CLOSURE_SIZE(node);
3277         *ptrs = GEN_CLOSURE_NoPTRS(node);
3278         break;
3279
3280       /* 
3281       case INFO_DYN_TYPE_U:
3282       case INFO_DYN_TYPE_N:
3283       */
3284       case INFO_DYN_TYPE:
3285         *size = DYN_CLOSURE_SIZE(node);
3286         *ptrs = DYN_CLOSURE_NoPTRS(node);
3287         break;
3288
3289       case INFO_TUPLE_TYPE:
3290         *size = TUPLE_CLOSURE_SIZE(node);
3291         *ptrs = TUPLE_CLOSURE_NoPTRS(node);
3292         break;
3293
3294       case INFO_DATA_TYPE:
3295         *size = DATA_CLOSURE_SIZE(node);
3296         *ptrs = DATA_CLOSURE_NoPTRS(node);
3297         break;
3298
3299       case INFO_IND_TYPE:
3300         *size = IND_CLOSURE_SIZE(node);
3301         *ptrs = IND_CLOSURE_NoPTRS(node);
3302         break;
3303
3304 /* ToDo: more (WDP) */
3305
3306       /* Don't know about the others */
3307       default:
3308         *size = *ptrs = 0;
3309         break;
3310     }
3311 }
3312
3313 void
3314 DEBUG_PRINT_NODE(node)
3315 P_ node;
3316 {
3317    W_ info_ptr = INFO_PTR(node);
3318    I_ size = 0, ptrs = 0, i, vhs = 0;
3319    char *info_type = info_hdr_type(info_ptr);
3320
3321    size_and_ptrs(node,&size,&ptrs);
3322    vhs = var_hdr_size(node);
3323
3324    fprintf(stderr,"Node: 0x%lx", (W_) node);
3325
3326 #if defined(PAR)
3327    fprintf(stderr," [GA: 0x%lx]",GA(node));
3328 #endif
3329
3330 #if defined(USE_COST_CENTRES)
3331    fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
3332 #endif
3333
3334 #if defined(GRAN)
3335    fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
3336 #endif
3337
3338    fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
3339                   info_ptr,info_type,size,ptrs);
3340
3341    /* For now, we ignore the variable header */
3342
3343    for(i=0; i < size; ++i)
3344      {
3345        if(i == 0)
3346          fprintf(stderr,"Data: ");
3347
3348        else if(i % 6 == 0)
3349          fprintf(stderr,"\n      ");
3350
3351        if(i < ptrs)
3352          fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
3353        else
3354          fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
3355      }
3356    fprintf(stderr, "\n");
3357 }
3358
3359
3360 #define INFO_MASK       0x80000000
3361
3362 void
3363 DEBUG_TREE(node)
3364 P_ node;
3365 {
3366   W_ size = 0, ptrs = 0, i, vhs = 0;
3367
3368   /* Don't print cycles */
3369   if((INFO_PTR(node) & INFO_MASK) != 0)
3370     return;
3371
3372   size_and_ptrs(node,&size,&ptrs);
3373   vhs = var_hdr_size(node);
3374
3375   DEBUG_PRINT_NODE(node);
3376   fprintf(stderr, "\n");
3377
3378   /* Mark the node -- may be dangerous */
3379   INFO_PTR(node) |= INFO_MASK;
3380
3381   for(i = 0; i < ptrs; ++i)
3382     DEBUG_TREE((P_)node[i+vhs+_FHS]);
3383
3384   /* Unmark the node */
3385   INFO_PTR(node) &= ~INFO_MASK;
3386 }
3387
3388
3389 void
3390 DEBUG_INFO_TABLE(node)
3391 P_ node;
3392 {
3393   W_ info_ptr = INFO_PTR(node);
3394   char *ip_type = info_hdr_type(info_ptr);
3395
3396   fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
3397                  ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
3398 #if defined(PAR)
3399   fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
3400 #endif
3401
3402 #if defined(USE_COST_CENTRES)
3403   fprintf(stderr,"Cost Centre (???):       0x%lx\n",INFO_CAT(info_ptr));
3404 #endif
3405
3406 #if defined(_INFO_COPYING)
3407   fprintf(stderr,"Evacuate Entry:    0x%lx;\tScavenge Entry: 0x%lx\n",
3408           INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
3409 #endif
3410
3411 #if defined(_INFO_COMPACTING)
3412   fprintf(stderr,"Scan Link:         0x%lx;\tScan Move:      0x%lx\n",
3413           (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
3414   fprintf(stderr,"Mark:              0x%lx;\tMarked:         0x%lx;\t",
3415           (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
3416 #if 0 /* avoid INFO_TYPE */
3417   if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
3418     fprintf(stderr,"plus specialised code\n");
3419   else
3420     fprintf(stderr,"Marking:           0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
3421 #endif /* 0 */
3422 #endif
3423 }
3424 #endif /* GRAN */
3425
3426 \end{code}
3427
3428 The remaining debugging routines are more or less specific for GrAnSim.
3429
3430 \begin{code}
3431 #if defined(GRAN) && defined(GRAN_CHECK)
3432 void
3433 DEBUG_CURR_THREADQ(verbose) 
3434 I_ verbose;
3435
3436   fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
3437   DEBUG_THREADQ(ThreadQueueHd, verbose);
3438 }
3439
3440 void 
3441 DEBUG_THREADQ(closure, verbose) 
3442 P_ closure;
3443 I_ verbose;
3444 {
3445  P_ x;
3446
3447  fprintf(stderr,"Thread Queue: ");
3448  for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
3449    if (verbose) 
3450      DEBUG_TSO(x,0);
3451    else
3452      fprintf(stderr," 0x%x",x);
3453
3454  if (closure==Nil_closure)
3455    fprintf(stderr,"NIL\n");
3456  else
3457    fprintf(stderr,"\n");
3458 }
3459
3460 /* Check with Threads.lh */
3461 static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
3462
3463 void 
3464 DEBUG_TSO(closure,verbose) 
3465 P_ closure;
3466 I_ verbose;
3467 {
3468  
3469  if (closure==Nil_closure) {
3470    fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
3471    return;
3472  }
3473
3474  fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
3475
3476  fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
3477  fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
3478  fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
3479 #if defined(GRAN_CHECK) && defined(GRAN)
3480  if (debug & 0x10)
3481    fprintf(stderr,"\tType: %s  %s\n",
3482            type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
3483            (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
3484  else
3485    fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3486 #else
3487  fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3488 #endif
3489  fprintf(stderr,"> PC1:  0x%x",TSO_PC1(closure));
3490  fprintf(stderr,"\tPC2:  0x%x\n",TSO_PC2(closure));
3491  fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
3492  fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
3493  fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
3494
3495  if (verbose) {
3496    fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
3497    fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
3498    fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
3499    fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
3500    fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
3501    fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
3502    fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
3503    fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
3504    fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
3505    fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
3506    fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
3507    fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
3508    fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
3509    fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
3510  }
3511 }
3512
3513 void 
3514 DEBUG_EVENT(event, verbose) 
3515 eventq event;
3516 I_ verbose;
3517 {
3518   if (verbose) {
3519     print_event(event);
3520   }else{
3521     fprintf(stderr," 0x%x",event);
3522   }
3523 }
3524
3525 void
3526 DEBUG_EVENTQ(verbose)
3527 I_ verbose;
3528 {
3529  eventq x;
3530
3531  fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
3532  for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
3533    DEBUG_EVENT(x,verbose);
3534  }
3535  if (EventHd==NULL) 
3536    fprintf(stderr,"NIL\n");
3537  else
3538    fprintf(stderr,"\n");
3539 }
3540
3541 void 
3542 DEBUG_SPARK(spark, verbose) 
3543 sparkq spark;
3544 I_ verbose;
3545 {
3546   if (verbose)
3547     print_spark(spark);
3548   else
3549     fprintf(stderr," 0x%x",spark);
3550 }
3551
3552 void 
3553 DEBUG_SPARKQ(spark,verbose) 
3554 sparkq spark;
3555 I_ verbose;
3556 {
3557  sparkq x;
3558
3559  fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
3560  for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
3561    DEBUG_SPARK(x,verbose);
3562  }
3563  if (spark==NULL) 
3564    fprintf(stderr,"NIL\n");
3565  else
3566    fprintf(stderr,"\n");
3567 }
3568
3569 void 
3570 DEBUG_CURR_SPARKQ(verbose) 
3571 I_ verbose;
3572 {
3573   DEBUG_SPARKQ(SparkQueueHd,verbose);
3574 }
3575
3576 void 
3577 DEBUG_PROC(proc,verbose)
3578 I_ proc;
3579 I_ verbose;
3580 {
3581   fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
3582           proc,CurrentTime[proc],CurrentTime[proc],
3583           (CurrentProc==proc)?"ACTIVE":"INACTIVE");
3584   DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
3585   if ( (CurrentProc==proc) )
3586     DEBUG_TSO(CurrentTSO,1);
3587
3588   if (EventHd!=NULL)
3589     fprintf(stderr,"Next event (%s) is on proc %d\n",
3590             event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
3591
3592   if (verbose & 0x1) {
3593     fprintf(stderr,"\nREQUIRED sparks: ");
3594     DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
3595     fprintf(stderr,"\nADVISORY_sparks: ");
3596     DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
3597   }
3598 }
3599
3600 /* Debug CurrentTSO */
3601 void
3602 DCT(){ 
3603   fprintf(stderr,"Current Proc: %d\n",CurrentProc);
3604   DEBUG_TSO(CurrentTSO,1);
3605 }
3606
3607 /* Debug Current Processor */
3608 void
3609 DCP(){ DEBUG_PROC(CurrentProc,2); }
3610
3611 /* Shorthand for debugging event queue */
3612 void
3613 DEQ() { DEBUG_EVENTQ(1); }
3614
3615 /* Shorthand for debugging spark queue */
3616 void
3617 DSQ() { DEBUG_CURR_SPARKQ(1); }
3618
3619 /* Shorthand for printing a node */
3620 void
3621 DN(P_ node) { DEBUG_PRINT_NODE(node); }
3622
3623 #endif /* GRAN */
3624 \end{code}
3625
3626
3627 %****************************************************************************
3628 %
3629 \subsection[qp-profile]{Quasi-Parallel Profiling}
3630 %
3631 %****************************************************************************
3632
3633 \begin{code}
3634 #ifndef GRAN
3635 I_ do_qp_prof;
3636 FILE *qp_file;
3637
3638 /* *Virtual* Time in milliseconds */
3639 long 
3640 qp_elapsed_time()
3641 {
3642     return ((long) (usertime() * 1e3));
3643 }
3644
3645 static void
3646 init_qp_profiling(STG_NO_ARGS)
3647 {
3648     I_ i;
3649     char qp_filename[32];
3650
3651     sprintf(qp_filename, "%0.24s.qp", prog_argv[0]);
3652     if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3653         fprintf(stderr, "Can't open quasi-parallel profile report file %s\n", 
3654             qp_filename);
3655         do_qp_prof = 0;
3656     } else {
3657         fputs(prog_argv[0], qp_file);
3658         for(i = 1; prog_argv[i]; i++) {
3659             fputc(' ', qp_file);
3660             fputs(prog_argv[i], qp_file);
3661         }
3662         fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads);
3663         fputs(time_str(), qp_file);
3664         fputc('\n', qp_file);
3665     }
3666 }
3667
3668 void 
3669 QP_Event0(tid, node)
3670 I_ tid;
3671 P_ node;
3672 {
3673     fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3674 }
3675
3676 void 
3677 QP_Event1(event, tso)
3678 char *event;
3679 P_ tso;
3680 {
3681     fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3682             TSO_ID(tso), (W_) TSO_NAME(tso));
3683 }
3684
3685 void 
3686 QP_Event2(event, tso1, tso2)
3687 char *event;
3688 P_ tso1, tso2;
3689 {
3690     fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3691             TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
3692 }
3693 #endif /* 0 */
3694 #endif /* GRAN */
3695
3696 #if defined(CONCURRENT) && !defined(GRAN)
3697 /* romoluSnganpu' SamuS! */ 
3698
3699 unsigned CurrentProc = 0;
3700 W_ IdleProcs = ~0l, Idlers = 32; 
3701
3702 void 
3703 GranSimAllocate(n,node,liveness)
3704 I_ n;
3705 P_ node;
3706 W_ liveness;
3707 { }
3708
3709 void 
3710 GranSimUnallocate(n,node,liveness)
3711 W_ n;
3712 P_ node;
3713 W_ liveness;
3714 { }
3715
3716
3717 void 
3718 GranSimExec(ariths,branches,loads,stores,floats)
3719 W_ ariths,branches,loads,stores,floats;
3720 { }
3721
3722 I_ 
3723 GranSimFetch(node /* , liveness_mask */ )
3724 P_ node;
3725 /* I_ liveness_mask; */
3726 { }
3727
3728 void 
3729 GranSimSpark(local,node)
3730 W_ local;
3731 P_ node;
3732 { }
3733
3734 #if 0
3735 void 
3736 GranSimSparkAt(spark,where,identifier)
3737 sparkq spark;
3738 P_  where;        /* This should be a node; alternatively could be a GA */
3739 I_ identifier;
3740 { }
3741 #endif
3742
3743 void 
3744 GranSimBlock()
3745 { }
3746 #endif 
3747
3748 \end{code}
3749