2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
27 #if defined(CONCURRENT) /* the whole module! */
29 # define NON_POSIX_SOURCE /* so says Solaris */
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time. At present, it is only used for the first stack
43 chunk of a thread, the one that's got
44 @RTSflags.ConcFlags.stkChunkSize@ words.
47 P_ AvailableStack = Prelude_Z91Z93_closure;
48 P_ AvailableTSO = Prelude_Z91Z93_closure;
51 Macros for dealing with the new and improved GA field for simulating
52 parallel execution. Based on @CONCURRENT@ package. The GA field now
53 contains a mask, where the n-th bit stands for the n-th processor,
54 on which this data can be found. In case of multiple copies, several bits
55 are set. The total number of processors is bounded by @MAX_PROC@,
56 which should be <= the length of a word in bits. -- HWL
58 {{GranSim.lc}Daq ngoq' roQlu'ta'}
59 (Code has been moved to GranSim.lc).
61 %****************************************************************
63 \subsection[thread-getthread]{The Thread Scheduler}
65 %****************************************************************
67 This is the heart of the thread scheduling code.
69 Most of the changes for GranSim are in this part of the RTS.
70 Especially the @ReSchedule@ routine has been blown up quite a lot
71 It now contains the top-level event-handling loop.
73 Parts of the code that are not necessary for GranSim, but convenient to
74 have when developing it are marked with a @GRAN_CHECK@ variable.
77 STGRegisterTable *CurrentRegTable = NULL;
82 /* Only needed for GranSim Light; costs of operations during rescheduling
83 are associated to the virtual processor on which ActiveTSO is living */
85 rtsBool __resched = rtsFalse; /* debugging only !!*/
87 /* Pointers to the head and tail of the runnable queues for each PE */
88 /* In GranSim Light only the thread/spark-queues of proc 0 are used */
89 P_ RunnableThreadsHd[MAX_PROC];
90 P_ RunnableThreadsTl[MAX_PROC];
92 P_ WaitThreadsHd[MAX_PROC];
93 P_ WaitThreadsTl[MAX_PROC];
95 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
96 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
98 /* One clock for each PE */
99 W_ CurrentTime[MAX_PROC];
101 /* Useful to restrict communication; cf fishing model in GUM */
102 I_ OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
104 /* Status of each PE (new since but independent of GranSim Light) */
105 enum proc_status procStatus[MAX_PROC];
107 #if defined(GRAN) && defined(GRAN_CHECK)
108 /* To check if the RTS ever tries to run a thread that should be blocked
109 because of fetching remote data */
110 P_ BlockedOnFetch[MAX_PROC];
113 W_ SparksAvail = 0; /* How many sparks are available */
114 W_ SurplusThreads = 0; /* How many excess threads are there */
116 TIME SparkStealTime();
120 P_ RunnableThreadsHd = Prelude_Z91Z93_closure;
121 P_ RunnableThreadsTl = Prelude_Z91Z93_closure;
123 P_ WaitingThreadsHd = Prelude_Z91Z93_closure;
124 P_ WaitingThreadsTl = Prelude_Z91Z93_closure;
126 TYPE_OF_SPARK PendingSparksBase[SPARK_POOLS];
127 TYPE_OF_SPARK PendingSparksLim[SPARK_POOLS];
129 TYPE_OF_SPARK PendingSparksHd[SPARK_POOLS];
130 TYPE_OF_SPARK PendingSparksTl[SPARK_POOLS];
132 #endif /* GRAN ; HWL */
134 static jmp_buf scheduler_loop;
136 I_ required_thread_count = 0;
137 I_ advisory_thread_count = 0;
139 EXTFUN(resumeThread);
141 /* Misc prototypes */
143 P_ NewThread PROTO((P_, W_, I_));
144 I_ blockFetch PROTO((P_, PROC, P_));
145 I_ HandleFetchRequest PROTO((P_, PROC, P_));
146 rtsBool InsertThread PROTO((P_ tso));
147 sparkq delete_from_spark_queue PROTO((sparkq, sparkq));
150 P_ NewThread PROTO((P_, W_));
153 I_ context_switch = 0;
154 I_ contextSwitchTime = 10000;
158 /* NB: GRAN and GUM use different representations of spark pools.
159 GRAN sparks are more flexible (containing e.g. granularity info)
160 but slower than GUM sparks. There is no fixed upper bound on the
161 number of GRAN sparks either. -- HWL
167 I_ SparkLimit[SPARK_POOLS];
170 initThreadPools(STG_NO_ARGS)
172 I_ i, size = RTSflags.ConcFlags.maxLocalSparks;
174 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
176 if ((PendingSparksBase[ADVISORY_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
179 if ((PendingSparksBase[REQUIRED_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
181 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
182 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
193 ScheduleThreads(topClosure)
201 #if defined(PROFILING) || defined(PAR)
202 if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
203 if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
205 if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
206 if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
209 fprintf(stderr, "Can't initialize virtual timer.\n");
213 context_switch = 0 /* 1 HWL */;
215 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
216 if ( RTSflags.GranFlags.Light && RTSflags.GranFlags.proc!=1 ) {
217 fprintf(stderr,"Qagh: In GrAnSim Light setup .proc must be 1\n");
221 if ( RTSflags.GranFlags.debug & 0x40 ) {
222 fprintf(stderr,"Doing init in ScheduleThreads now ...\n");
226 #if defined(GRAN) /* KH */
227 /* Init thread and spark queues on all processors */
228 for (i=0; i<RTSflags.GranFlags.proc; i++)
230 /* Init of RunnableThreads{Hd,Tl} etc now in main */
231 OutstandingFetches[i] = OutstandingFishes[i] = 0;
232 procStatus[i] = Idle;
233 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
234 BlockedOnFetch[i] = NULL;
238 CurrentProc = MainProc;
240 Idlers = RTSflags.GranFlags.proc;
248 * We perform GC so that a signal handler can install a new
249 * TopClosure and start a new main thread.
255 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
257 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
259 /* kludge to save the top closure as a root */
260 CurrentTSO = topClosure;
261 ReallyPerformThreadGC(0, rtsTrue);
262 topClosure = CurrentTSO;
264 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
266 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
269 fprintf(stderr, "Not enough heap for main thread\n");
274 RunnableThreadsHd = RunnableThreadsTl = tso;
276 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
277 ThreadQueueHd = ThreadQueueTl = tso;
279 # if defined(GRAN_CHECK)
280 if ( RTSflags.GranFlags.debug & 0x40 ) {
281 fprintf(stderr,"MainTSO has been initialized (0x%x)\n", tso);
287 if (RTSflags.ParFlags.granSimStats) {
288 DumpGranEvent(GR_START, tso);
289 sameThread = rtsTrue;
292 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.labelling)
293 DumpRawGranEvent(CurrentProc,(PROC)0,GR_START,
298 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
299 if (RTSflags.GranFlags.Light)
303 required_thread_count = 1;
304 advisory_thread_count = 0;
306 } /*if IAmMainThread ...*/
309 /* ----------------------------------------------------------------- */
310 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
311 /* ----------------------------------------------------------------- */
313 if(setjmp(scheduler_loop) < 0)
316 #if defined(GRAN) && defined(GRAN_CHECK)
317 if ( RTSflags.GranFlags.debug & 0x80 ) {
318 fprintf(stderr,"MAIN Schedule Loop; ThreadQueueHd is ");
319 G_TSO(ThreadQueueHd,1);
320 /* if (ThreadQueueHd == MainTSO) {
321 fprintf(stderr,"D> Event Queue is now:\n");
328 if (PendingFetches != Prelude_Z91Z93_closure) {
333 if (ThreadQueueHd == Prelude_Z91Z93_closure) {
334 fprintf(stderr, "Qu'vatlh! No runnable threads!\n");
337 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
338 QP_Event1("AG", ThreadQueueHd);
341 while (RunnableThreadsHd == Prelude_Z91Z93_closure) {
342 /* If we've no work */
343 if (WaitingThreadsHd == Prelude_Z91Z93_closure) {
345 fprintf(stderr, "No runnable threads!\n");
348 /* Block indef. waiting for I/O and timer expire */
354 if (RunnableThreadsHd == Prelude_Z91Z93_closure) {
355 if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
356 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
357 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
359 * If we're here (no runnable threads) and we have pending
360 * sparks, we must have a space problem. Get enough space
361 * to turn one of those pending sparks into a
362 * thread... ReallyPerformGC doesn't return until the
363 * space is available, so it may force global GC. ToDo:
364 * Is this unnecessary here? Duplicated in ReSchedule()?
367 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
368 SAVE_Hp -= THREAD_SPACE_REQUIRED;
371 * We really have absolutely no work. Send out a fish
372 * (there may be some out there already), and wait for
373 * something to arrive. We clearly can't run any threads
374 * until a SCHEDULE or RESUME arrives, and so that's what
375 * we're hoping to see. (Of course, we still have to
376 * respond to other types of messages.)
379 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
385 } else if (PacketsWaiting()) { /* Look for incoming messages */
391 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
392 QP_Event1("AG", RunnableThreadsHd);
397 if (RTSflags.ParFlags.granSimStats && !sameThread)
398 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
402 TimeOfNextEvent = get_time_of_next_event();
403 CurrentTSO = ThreadQueueHd;
404 if (RTSflags.GranFlags.Light) {
405 /* Save time of `virt. proc' which was active since last getevent and
406 restore time of `virt. proc' where CurrentTSO is living on. */
407 if(RTSflags.GranFlags.DoFairSchedule)
409 if (RTSflags.GranFlags.granSimStats &&
410 RTSflags.GranFlags.debug & 0x20000)
411 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
413 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
415 CurrentTime[CurrentProc] = TSO_CLOCK(CurrentTSO);
416 if(RTSflags.GranFlags.DoFairSchedule && __resched )
418 __resched = rtsFalse;
419 if (RTSflags.GranFlags.granSimStats &&
420 RTSflags.GranFlags.debug & 0x20000)
421 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
424 if (TSO_LINK(ThreadQueueHd)!=Prelude_Z91Z93_closure &&
425 (TimeOfNextEvent == 0 ||
426 TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
427 new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
428 CONTINUETHREAD,TSO_LINK(ThreadQueueHd),Prelude_Z91Z93_closure,NULL);
429 TimeOfNextEvent = get_time_of_next_event();
433 EndOfTimeSlice = CurrentTime[CurrentProc]+RTSflags.GranFlags.time_slice;
435 CurrentTSO = RunnableThreadsHd;
436 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
437 TSO_LINK(CurrentTSO) = Prelude_Z91Z93_closure;
439 if (RunnableThreadsHd == Prelude_Z91Z93_closure)
440 RunnableThreadsTl = Prelude_Z91Z93_closure;
443 /* If we're not running a timer, just leave the flag on */
444 if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
447 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
448 if (CurrentTSO == Prelude_Z91Z93_closure) {
449 fprintf(stderr,"Qagh: Trying to execute Prelude_Z91Z93_closure on proc %d (@ %d)\n",
450 CurrentProc,CurrentTime[CurrentProc]);
454 if (RTSflags.GranFlags.debug & 0x04) {
455 if (BlockedOnFetch[CurrentProc]) {
456 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
457 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
462 if ( (RTSflags.GranFlags.debug & 0x10) &&
463 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
464 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
465 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
470 #if 0 && defined(CONCURRENT)
471 fprintf(stderr, "ScheduleThreads: About to resume thread:%#x\n",
474 miniInterpret((StgFunPtr)resumeThread);
478 % Some remarks on GrAnSim -- HWL
480 The ReSchedule fct is the heart of GrAnSim. Based on its parameter it issues
481 a CONTINUETRHEAD to carry on executing the current thread in due course or it watches out for new work (e.g. called from EndThread).
483 Then it picks the next event (get_next_event) and handles it appropriately
484 (see switch construct). Note that a continue in the switch causes the next
485 event to be handled and a break causes a jmp to the scheduler_loop where
486 the TSO at the head of the current processor's runnable queue is executed.
488 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
489 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
493 GrAnSim rules here! Others stay out or you will be crashed.
494 Concurrent and parallel guys: please use the next door (a few pages down;
495 turn left at the !GRAN sign).
500 /* Prototypes of event handling functions. Only needed in ReSchedule */
501 void do_the_globalblock PROTO((eventq event));
502 void do_the_unblock PROTO((eventq event));
503 void do_the_fetchnode PROTO((eventq event));
504 void do_the_fetchreply PROTO((eventq event));
505 void do_the_movethread PROTO((eventq event));
506 void do_the_movespark PROTO((eventq event));
507 void gimme_spark PROTO((rtsBool *found_res, sparkq *prev_res, sparkq *spark_res));
508 void munch_spark PROTO((rtsBool found, sparkq prev, sparkq spark));
511 ReSchedule(what_next)
512 int what_next; /* Run the current thread again? */
514 sparkq spark, nextspark;
520 # if defined(GRAN_CHECK) && defined(GRAN)
521 if ( RTSflags.GranFlags.debug & 0x80 ) {
522 fprintf(stderr,"Entering ReSchedule with mode %u; tso is\n",what_next);
523 G_TSO(ThreadQueueHd,1);
527 # if defined(GRAN_CHECK) && defined(GRAN)
528 if ( (RTSflags.GranFlags.debug & 0x80) || (RTSflags.GranFlags.debug & 0x40 ) )
529 if (what_next<FIND_THREAD || what_next>END_OF_WORLD)
530 fprintf(stderr,"Qagh {ReSchedule}Daq: illegal parameter %u for what_next\n",
534 if (RTSflags.GranFlags.Light) {
535 /* Save current time; GranSim Light only */
536 TSO_CLOCK(CurrentTSO) = CurrentTime[CurrentProc];
539 /* Run the current thread again (if there is one) */
540 if(what_next==SAME_THREAD && ThreadQueueHd != Prelude_Z91Z93_closure)
542 /* A bit of a hassle if the event queue is empty, but ... */
543 CurrentTSO = ThreadQueueHd;
545 __resched = rtsFalse;
546 if (RTSflags.GranFlags.Light &&
547 TSO_LINK(ThreadQueueHd)!=Prelude_Z91Z93_closure &&
548 TSO_CLOCK(ThreadQueueHd)>TSO_CLOCK(TSO_LINK(ThreadQueueHd))) {
549 if(RTSflags.GranFlags.granSimStats &&
550 RTSflags.GranFlags.debug & 0x20000 )
551 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
553 ThreadQueueHd = TSO_LINK(CurrentTSO);
554 if (ThreadQueueHd==Prelude_Z91Z93_closure)
555 ThreadQueueTl=Prelude_Z91Z93_closure;
556 TSO_LINK(CurrentTSO) = Prelude_Z91Z93_closure;
557 InsertThread(CurrentTSO);
560 /* This code does round-Robin, if preferred. */
561 if(!RTSflags.GranFlags.Light &&
562 RTSflags.GranFlags.DoFairSchedule &&
563 TSO_LINK(CurrentTSO) != Prelude_Z91Z93_closure &&
564 CurrentTime[CurrentProc]>=EndOfTimeSlice)
566 ThreadQueueHd = TSO_LINK(CurrentTSO);
567 TSO_LINK(ThreadQueueTl) = CurrentTSO;
568 ThreadQueueTl = CurrentTSO;
569 TSO_LINK(CurrentTSO) = Prelude_Z91Z93_closure;
570 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
571 if ( RTSflags.GranFlags.granSimStats )
572 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
573 CurrentTSO = ThreadQueueHd;
576 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
577 CONTINUETHREAD,CurrentTSO,Prelude_Z91Z93_closure,NULL);
579 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
580 /* has been updated before that already. */
581 else if(what_next==NEW_THREAD && ThreadQueueHd != Prelude_Z91Z93_closure)
583 # if defined(GRAN_CHECK) && defined(GRAN)
584 fprintf(stderr,"Qagh: ReSchedule(NEW_THREAD) shouldn't be used with DoReScheduleOnFetch!!\n");
589 if(RTSflags.GranFlags.granSimStats &&
590 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
591 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
593 CurrentTSO = ThreadQueueHd;
594 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
595 CONTINUETHREAD,CurrentTSO,Prelude_Z91Z93_closure,NULL);
597 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
600 /* We go in here if the current thread is blocked on fetch => don'd CONT */
601 else if(what_next==CHANGE_THREAD)
603 /* just fall into event handling loop for next event */
606 /* We go in here if we have no runnable threads or what_next==0 */
609 procStatus[CurrentProc] = Idle;
610 /* That's now done in HandleIdlePEs!
611 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
612 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
614 CurrentTSO = Prelude_Z91Z93_closure;
617 /* ----------------------------------------------------------------- */
618 /* This part is the EVENT HANDLING LOOP */
619 /* ----------------------------------------------------------------- */
622 /* Choose the processor with the next event */
623 event = get_next_event();
624 CurrentProc = EVENT_PROC(event);
625 CurrentTSO = EVENT_TSO(event);
626 if (RTSflags.GranFlags.Light) {
629 /* Restore local clock of the virtual processor attached to CurrentTSO.
630 All costs will be associated to the `virt. proc' on which the tso
632 if (ActiveTSO != NULL) { /* already in system area */
633 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
634 if (RTSflags.GranFlags.DoFairSchedule)
636 if (RTSflags.GranFlags.granSimStats &&
637 RTSflags.GranFlags.debug & 0x20000)
638 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
641 switch (EVENT_TYPE(event))
644 case FINDWORK: /* inaccurate this way */
645 ActiveTSO = ThreadQueueHd;
649 case MOVESPARK: /* has tso of virt proc in tso field of event */
650 ActiveTSO = EVENT_TSO(event);
652 default: fprintf(stderr,"Illegal event type %s (%d) in GrAnSim Light setup\n",
653 event_names[EVENT_TYPE(event)],EVENT_TYPE(event));
656 CurrentTime[CurrentProc] = TSO_CLOCK(ActiveTSO);
657 if(RTSflags.GranFlags.DoFairSchedule)
659 if (RTSflags.GranFlags.granSimStats &&
660 RTSflags.GranFlags.debug & 0x20000)
661 DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
665 if(EVENT_TIME(event) > CurrentTime[CurrentProc] &&
666 EVENT_TYPE(event)!=CONTINUETHREAD)
667 CurrentTime[CurrentProc] = EVENT_TIME(event);
669 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
670 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
671 fprintf(stderr,"Qagh {ReSchedule}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
675 /* MAKE_BUSY(CurrentProc); don't think that's right in all cases now */
678 # if defined(GRAN_CHECK) && defined(GRAN)
679 if (RTSflags.GranFlags.debug & 0x80)
680 fprintf(stderr,"After get_next_event, before HandleIdlePEs\n");
683 /* Deal with the idlers */
684 if ( !RTSflags.GranFlags.Light )
687 # if defined(GRAN_CHECK) && defined(GRAN)
688 if ( RTSflags.GranFlags.event_trace_all ||
689 ( RTSflags.GranFlags.event_trace && EVENT_TYPE(event) != CONTINUETHREAD) ||
690 (RTSflags.GranFlags.debug & 0x80) )
694 switch (EVENT_TYPE(event))
696 /* Should just be continuing execution */
698 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
699 if ( (RTSflags.GranFlags.debug & 0x100) &&
700 (EVENT_TSO(event)!=RunnableThreadsHd[EVENT_PROC(event)]) ) {
701 fprintf(stderr,"Warning: Wrong TSO in CONTINUETHREAD: %#lx (%x) (PE: %d Hd: 0x%lx)\n",
702 EVENT_TSO(event), TSO_ID(EVENT_TSO(event)),
704 RunnableThreadsHd[EVENT_PROC(event)]);
706 if ( (RTSflags.GranFlags.debug & 0x04) &&
707 BlockedOnFetch[CurrentProc]) {
708 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
709 CurrentProc,CurrentTime[CurrentProc]);
714 if(ThreadQueueHd==Prelude_Z91Z93_closure)
716 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
717 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
718 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
721 break; /* fall into scheduler loop */
724 do_the_fetchnode(event);
725 continue; /* handle next event in event queue */
728 do_the_globalblock(event);
729 continue; /* handle next event in event queue */
732 do_the_fetchreply(event);
733 continue; /* handle next event in event queue */
735 case UNBLOCKTHREAD: /* Move from the blocked queue to the tail of */
736 do_the_unblock(event);
737 continue; /* handle next event in event queue */
739 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
740 /* the runnable queue ( i.e. Qu' SImqa'lu') */
741 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
742 TSO_BLOCKEDAT(EVENT_TSO(event));
743 StartThread(event,GR_RESUME);
747 StartThread(event,GR_START);
751 do_the_movethread(event);
752 continue; /* handle next event in event queue */
755 do_the_movespark(event);
756 continue; /* handle next event in event queue */
759 if( RTSflags.GranFlags.DoAlwaysCreateThreads ||
760 (ThreadQueueHd == Prelude_Z91Z93_closure &&
761 (RTSflags.GranFlags.FetchStrategy >= 2 ||
762 OutstandingFetches[CurrentProc] == 0)) )
768 ASSERT(procStatus[CurrentProc]==Sparking ||
769 RTSflags.GranFlags.DoAlwaysCreateThreads);
771 /* SImmoHwI' yInej! Search spark queue! */
772 gimme_spark (&found, &prev, &spark);
774 /* DaH chu' Qu' yIchen! Now create new work! */
775 munch_spark (found, prev, spark);
778 ASSERT(procStatus[CurrentProc]==Starting ||
779 procStatus[CurrentProc]==Idle ||
780 RTSflags.GranFlags.DoAlwaysCreateThreads);
782 continue; /* to the next event */
785 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
788 longjmp(scheduler_loop, 1);
792 /* ----------------------------------------------------------------- */
793 /* The main event handling functions; called from ReSchedule (switch) */
794 /* ----------------------------------------------------------------- */
797 do_the_globalblock(eventq event)
799 PROC proc = EVENT_PROC(event); /* proc that requested node */
800 P_ tso = EVENT_TSO(event), /* tso that requested node */
801 node = EVENT_NODE(event); /* requested, remote node */
803 # if defined(GRAN_CHECK) && defined(GRAN)
804 if ( RTSflags.GranFlags.Light ) {
805 fprintf(stderr,"Qagh: There should be no GLOBALBLOCKs in GrAnSim Light setup\n");
809 if (!RTSflags.GranFlags.DoGUMMFetching) {
810 fprintf(stderr,"Qagh: GLOBALBLOCK events only valid with GUMM fetching\n");
814 if ( (RTSflags.GranFlags.debug & 0x100) &&
815 IS_LOCAL_TO(PROCS(node),proc) ) {
816 fprintf(stderr,"Qagh: GLOBALBLOCK: Blocking on LOCAL node 0x %x (PE %d).\n",
820 /* CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime; */
821 if ( blockFetch(tso,proc,node) != 0 )
822 return; /* node has become local by now */
824 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* head of queue is next thread */
825 P_ tso = RunnableThreadsHd[proc]; /* awaken next thread */
826 if(tso != Prelude_Z91Z93_closure) {
827 new_event(proc,proc,CurrentTime[proc],
828 CONTINUETHREAD,tso,Prelude_Z91Z93_closure,NULL);
829 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
830 if(RTSflags.GranFlags.granSimStats)
831 DumpRawGranEvent(proc,CurrentProc,GR_SCHEDULE,tso,
832 Prelude_Z91Z93_closure,0);
833 MAKE_BUSY(proc); /* might have been fetching */
835 MAKE_IDLE(proc); /* no work on proc now */
837 } else { /* RTSflags.GranFlags.DoReScheduleOnFetch i.e. block-on-fetch */
838 /* other thread is already running */
839 /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
840 new_event(proc,proc,CurrentTime[proc],
841 CONTINUETHREAD,EVENT_TSO(event),
842 (RTSflags.GranFlags.DoGUMMFetching ? closure :
843 EVENT_NODE(event)),NULL);
849 do_the_unblock(eventq event)
851 PROC proc = EVENT_PROC(event), /* proc that requested node */
852 creator = EVENT_CREATOR(event); /* proc that requested node */
853 P_ tso = EVENT_TSO(event), /* tso that requested node */
854 node = EVENT_NODE(event); /* requested, remote node */
856 # if defined(GRAN) && defined(GRAN_CHECK)
857 if ( RTSflags.GranFlags.Light ) {
858 fprintf(stderr,"Qagh: There should be no UNBLOCKs in GrAnSim Light setup\n");
863 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
864 /* We count block-on-fetch as normal block time */
865 TSO_BLOCKTIME(tso) += CurrentTime[proc] - TSO_BLOCKEDAT(tso);
866 /* No costs for contextswitch or thread queueing in this case */
867 if(RTSflags.GranFlags.granSimStats)
868 DumpRawGranEvent(proc,CurrentProc,GR_RESUME,tso, Prelude_Z91Z93_closure,0);
869 new_event(proc,proc,CurrentTime[proc],CONTINUETHREAD,tso,node,NULL);
871 /* Reschedule on fetch causes additional costs here: */
872 /* Bring the TSO from the blocked queue into the threadq */
873 new_event(proc,proc,CurrentTime[proc]+RTSflags.GranFlags.gran_threadqueuetime,
874 RESUMETHREAD,tso,node,NULL);
879 do_the_fetchnode(eventq event)
883 # if defined(GRAN_CHECK) && defined(GRAN)
884 if ( RTSflags.GranFlags.Light ) {
885 fprintf(stderr,"Qagh: There should be no FETCHNODEs in GrAnSim Light setup\n");
889 if (RTSflags.GranFlags.SimplifiedFetch) {
890 fprintf(stderr,"Qagh: FETCHNODE events not valid with simplified fetch\n");
894 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
896 rc = HandleFetchRequest(EVENT_NODE(event),
897 EVENT_CREATOR(event),
899 if (rc == 4) { /* trigger GC */
900 # if defined(GRAN_CHECK) && defined(GRAN)
901 if (RTSflags.GcFlags.giveStats)
902 fprintf(RTSflags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %#lx, tso %#lx (%x))\n",
903 EVENT_NODE(event), EVENT_TSO(event), TSO_ID(EVENT_TSO(event)));
905 prepend_event(event);
906 ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
907 # if defined(GRAN_CHECK) && defined(GRAN)
908 if (RTSflags.GcFlags.giveStats) {
909 fprintf(RTSflags.GcFlags.statsFile,"***** SAVE_Hp=%#lx, SAVE_HpLim=%#lx, PACK_HEAP_REQUIRED=%#lx\n",
910 SAVE_Hp, SAVE_HpLim, PACK_HEAP_REQUIRED);
911 fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
912 tot_packets,tot_packet_size);
915 event = grab_event();
916 SAVE_Hp -= PACK_HEAP_REQUIRED-1;
918 /* GC knows that events are special beats and follows the pointer i.e. */
919 /* events are valid even if they moved. Hopefully, an EXIT is triggered */
920 /* if there is not enough heap after GC. */
926 do_the_fetchreply(eventq event)
930 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
931 if ( RTSflags.GranFlags.Light ) {
932 fprintf(stderr,"Qagh: There should be no FETCHREPLYs in GrAnSim Light setup\n");
936 if (RTSflags.GranFlags.SimplifiedFetch) {
937 fprintf(stderr,"Qagh: FETCHREPLY events not valid with simplified fetch\n");
941 if (RTSflags.GranFlags.debug & 0x10) {
942 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
943 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
945 fprintf(stderr,"Qagh: FETCHREPLY: TSO %#x (%x) has fetch mask not set @ %d\n",
946 CurrentTSO,TSO_ID(CurrentTSO),CurrentTime[CurrentProc]);
951 if (RTSflags.GranFlags.debug & 0x04) {
952 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
953 fprintf(stderr,"Qagh: FETCHREPLY: Proc %d (with TSO %#x (%x)) not blocked-on-fetch by TSO %#lx (%x)\n",
954 CurrentProc,CurrentTSO,TSO_ID(CurrentTSO),
955 BlockedOnFetch[CurrentProc], TSO_ID(BlockedOnFetch[CurrentProc]));
958 BlockedOnFetch[CurrentProc] = 0; /*- rtsFalse; -*/
963 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
965 if (RTSflags.GranFlags.DoGUMMFetching) { /* bulk (packet) fetching */
966 P_ buffer = EVENT_NODE(event);
967 PROC p = EVENT_PROC(event);
968 I_ size = buffer[PACK_SIZE_LOCN];
970 tso = EVENT_TSO(event);
972 /* NB: Fetch misses can't occur with GUMM fetching, as */
973 /* updatable closure are turned into RBHs and therefore locked */
974 /* for other processors that try to grab them. */
976 closure = UnpackGraph(buffer);
977 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_munpacktime;
979 /* Copy or move node to CurrentProc */
980 if (FetchNode(EVENT_NODE(event),
981 EVENT_CREATOR(event),
982 EVENT_PROC(event)) ) {
983 /* Fetch has failed i.e. node has been grabbed by another PE */
984 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
985 PROC p = where_is(node);
988 # if defined(GRAN_CHECK) && defined(GRAN)
989 if (RTSflags.GranFlags.PrintFetchMisses) {
990 fprintf(stderr,"Fetch miss @ %lu: node %#lx is at proc %u (rather than proc %u)\n",
991 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
994 # endif /* GRAN_CHECK */
996 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
998 /* Count fetch again !? */
999 ++TSO_FETCHCOUNT(tso);
1000 TSO_FETCHTIME(tso) += RTSflags.GranFlags.gran_fetchtime;
1002 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
1003 RTSflags.GranFlags.gran_latency;
1005 /* Chase the grabbed node */
1006 new_event(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
1008 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1009 if (RTSflags.GranFlags.debug & 0x04)
1010 BlockedOnFetch[CurrentProc] = tso; /*-rtsTrue;-*/
1012 if (RTSflags.GranFlags.debug & 0x10)
1013 TSO_TYPE(tso) |= FETCH_MASK_TSO;
1016 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
1018 return; /* NB: no REPLy has been processed; tso still sleeping */
1021 /* -- Qapla'! Fetch has been successful; node is here, now */
1022 ++TSO_FETCHCOUNT(EVENT_TSO(event));
1023 TSO_FETCHTIME(EVENT_TSO(event)) += RTSflags.GranFlags.gran_fetchtime;
1025 if (RTSflags.GranFlags.granSimStats)
1026 DumpRawGranEvent(CurrentProc,EVENT_CREATOR(event),GR_REPLY,
1028 (RTSflags.GranFlags.DoGUMMFetching ?
1033 --OutstandingFetches[CurrentProc];
1034 ASSERT(OutstandingFetches[CurrentProc] >= 0);
1035 # if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1036 if (OutstandingFetches[CurrentProc] < 0) {
1037 fprintf(stderr,"Qagh: OutstandingFetches of proc %u has become negative\n",CurrentProc);
1041 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1042 UNBLOCKTHREAD,EVENT_TSO(event),
1043 (RTSflags.GranFlags.DoGUMMFetching ?
1050 do_the_movethread(eventq event) {
1051 P_ tso = EVENT_TSO(event);
1052 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1053 if ( RTSflags.GranFlags.Light && CurrentProc!=1 ) {
1054 fprintf(stderr,"Qagh: There should be no MOVETHREADs in GrAnSim Light setup\n");
1057 if (!RTSflags.GranFlags.DoThreadMigration) {
1058 fprintf(stderr,"Qagh: MOVETHREAD events should never occur without -bM\n");
1061 if (PROCS(tso)!=0) {
1062 fprintf(stderr,"Qagh: Moved thread has a bitmask of 0%o (proc %d); should be 0\n",
1063 PROCS(tso), where_is(tso));
1067 --OutstandingFishes[CurrentProc];
1068 ASSERT(OutstandingFishes[CurrentProc]>=0);
1069 SET_PROCS(tso,ThisPE);
1070 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1071 StartThread(event,GR_STOLEN);
1075 do_the_movespark(eventq event){
1076 sparkq spark = EVENT_SPARK(event);
1078 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1080 if (RTSflags.GranFlags.granSimStats_Sparks)
1081 DumpRawGranEvent(CurrentProc,(PROC)0,SP_ACQUIRED,Prelude_Z91Z93_closure,
1083 spark_queue_len(CurrentProc,ADVISORY_POOL));
1085 #if defined(GRAN) && defined(GRAN_CHECK)
1086 if (!SHOULD_SPARK(SPARK_NODE(spark)))
1088 /* Not adding the spark to the spark queue would be the right */
1089 /* thing here, but it also would be cheating, as this info can't be */
1090 /* available in a real system. -- HWL */
1092 --OutstandingFishes[CurrentProc];
1093 ASSERT(OutstandingFishes[CurrentProc]>=0);
1095 add_to_spark_queue(spark);
1097 if (procStatus[CurrentProc]==Fishing)
1098 procStatus[CurrentProc] = Idle;
1100 /* add_to_spark_queue will increase the time of the current proc. */
1101 /* Just falling into FINDWORK is wrong as we might have other */
1102 /* events that are happening before that. Therefore, just create */
1103 /* a FINDWORK event and go back to main event handling loop. */
1105 /* Should we treat stolen sparks specially? Currently, we don't. */
1107 /* Now FINDWORK is created in HandleIdlePEs */
1108 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1109 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
1110 sparking[CurrentProc]=rtsTrue;
1114 /* Search the spark queue of the CurrentProc for a spark that's worth
1115 turning into a thread */
1117 gimme_spark (rtsBool *found_res, sparkq *prev_res, sparkq *spark_res)
1121 sparkq spark_of_non_local_node = NULL, spark_of_non_local_node_prev = NULL,
1122 low_priority_spark = NULL, low_priority_spark_prev = NULL,
1123 spark = NULL, prev = NULL, tmp = NULL;
1125 /* Choose a spark from the local spark queue */
1126 spark = SparkQueueHd;
1129 while (spark != NULL && !found)
1131 node = SPARK_NODE(spark);
1132 if (!SHOULD_SPARK(node))
1134 if(RTSflags.GranFlags.granSimStats_Sparks)
1135 DumpRawGranEvent(CurrentProc,(PROC)0,SP_PRUNED,Prelude_Z91Z93_closure,
1137 spark_queue_len(CurrentProc,ADVISORY_POOL));
1139 ASSERT(spark != NULL);
1142 spark = delete_from_spark_queue (prev,spark);
1144 /* -- node should eventually be sparked */
1145 else if (RTSflags.GranFlags.PreferSparksOfLocalNodes &&
1146 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1148 /* Remember first low priority spark */
1149 if (spark_of_non_local_node==NULL) {
1150 spark_of_non_local_node_prev = prev;
1151 spark_of_non_local_node = spark;
1154 if (SPARK_NEXT(spark)==NULL) {
1155 ASSERT(spark==SparkQueueTl); /* just for testing */
1156 prev = spark_of_non_local_node_prev;
1157 spark = spark_of_non_local_node;
1162 # if defined(GRAN) && defined(GRAN_CHECK)
1163 /* Should never happen; just for testing */
1164 if (spark==SparkQueueTl) {
1165 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1170 spark = SPARK_NEXT(spark);
1173 else if ( RTSflags.GranFlags.DoPrioritySparking ||
1174 (SPARK_GRAN_INFO(spark)>=RTSflags.GranFlags.SparkPriority2) )
1178 else /* only used if SparkPriority2 is defined */
1180 /* Remember first low priority spark */
1181 if (low_priority_spark==NULL) {
1182 low_priority_spark_prev = prev;
1183 low_priority_spark = spark;
1186 if (SPARK_NEXT(spark)==NULL) {
1187 ASSERT(spark==SparkQueueTl); /* just for testing */
1188 prev = low_priority_spark_prev;
1189 spark = low_priority_spark;
1190 found = rtsTrue; /* take low pri spark => rc is 2 */
1194 /* Should never happen; just for testing */
1195 if (spark==SparkQueueTl) {
1196 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1201 spark = SPARK_NEXT(spark);
1202 # if defined(GRAN_CHECK) && defined(GRAN)
1203 if ( RTSflags.GranFlags.debug & 0x40 ) {
1204 fprintf(stderr,"Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
1205 SPARK_GRAN_INFO(spark), RTSflags.GranFlags.SparkPriority,
1206 SPARK_NODE(spark), SPARK_NAME(spark));
1208 # endif /* GRAN_CHECK */
1210 } /* while (spark!=NULL && !found) */
1218 munch_spark (rtsBool found, sparkq prev, sparkq spark)
1222 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1225 # if defined(GRAN_CHECK) && defined(GRAN)
1226 if ( SPARK_GRAN_INFO(spark) < RTSflags.GranFlags.SparkPriority2 ) {
1227 tot_low_pri_sparks++;
1228 if ( RTSflags.GranFlags.debug & 0x40 ) {
1229 fprintf(stderr,"GRAN_TNG: No high priority spark available; low priority (%u) spark chosen: node=0x%lx; name=%u\n",
1230 SPARK_GRAN_INFO(spark),
1231 SPARK_NODE(spark), SPARK_NAME(spark));
1235 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcreatetime;
1237 node = SPARK_NODE(spark);
1238 if((tso = NewThread(node, T_REQUIRED, SPARK_GRAN_INFO(spark)))==NULL)
1240 /* Some kind of backoff needed here in case there's too little heap */
1241 # if defined(GRAN_CHECK) && defined(GRAN)
1242 if (RTSflags.GcFlags.giveStats)
1243 fprintf(RTSflags.GcFlags.statsFile,"***** vIS Qu' chen veQ boSwI'; spark=%#x, node=%#x; name=%u\n",
1244 /* (found==2 ? "no hi pri spark" : "hi pri spark"), */
1245 spark, node,SPARK_NAME(spark));
1247 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1248 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
1249 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,rtsTrue);
1251 return; /* was: continue; */ /* to the next event, eventually */
1254 if(RTSflags.GranFlags.granSimStats_Sparks)
1255 DumpRawGranEvent(CurrentProc,(PROC)0,SP_USED,Prelude_Z91Z93_closure,
1257 spark_queue_len(CurrentProc,ADVISORY_POOL));
1259 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1260 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1261 TSO_SPARKNAME(tso) = (0x1 >> 16) | (NEW_SPARKNAME_MASK & SPARK_NAME(spark)) ;
1263 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1264 STARTTHREAD,tso,node,NULL);
1266 procStatus[CurrentProc] = Starting;
1268 ASSERT(spark != NULL);
1269 /* ASSERT(SPARK_PREV(spark)==prev); */
1271 spark = delete_from_spark_queue (prev, spark);
1274 /* Make the PE idle if nothing sparked and we have no threads. */
1276 if(ThreadQueueHd == Prelude_Z91Z93_closure)
1278 MAKE_IDLE(CurrentProc);
1279 # if defined(GRAN_CHECK) && defined(GRAN)
1280 if ( (RTSflags.GranFlags.debug & 0x80) )
1281 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1282 # endif /* GRAN_CHECK */
1286 /* ut'lu'Qo' ; Don't think that's necessary any more -- HWL
1287 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1288 CONTINUETHREAD,ThreadQueueHd,Prelude_Z91Z93_closure,NULL);
1296 Here follows the non-GRAN @ReSchedule@.
1301 /* If you are concurrent and maybe even parallel please use this door. */
1305 int again; /* Run the current thread again? */
1313 * In the parallel world, we do unfair scheduling for the moment.
1314 * Ultimately, this should all be merged with the more
1315 * sophisticated GrAnSim scheduling options. (Of course, some
1316 * provision should be made for *required* threads to make sure
1317 * that they don't starve, but for now we assume that no one is
1318 * running concurrent Haskell on a multi-processor platform.)
1324 if (RunnableThreadsHd == Prelude_Z91Z93_closure)
1325 RunnableThreadsTl = CurrentTSO;
1326 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1327 RunnableThreadsHd = CurrentTSO;
1333 * In the sequential world, we assume that the whole point of running
1334 * the threaded build is for concurrent Haskell, so we provide round-robin
1339 if(RunnableThreadsHd == Prelude_Z91Z93_closure) {
1340 RunnableThreadsHd = CurrentTSO;
1342 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1343 if (DO_QP_PROF > 1) {
1344 QP_Event1("GA", CurrentTSO);
1347 RunnableThreadsTl = CurrentTSO;
1353 * Debugging code, which is useful enough (and cheap enough) to compile
1354 * in all the time. This makes sure that we don't access saved registers,
1355 * etc. in threads which are supposed to be sleeping.
1357 CurrentTSO = Prelude_Z91Z93_closure;
1358 CurrentRegTable = NULL;
1361 /* First the required sparks */
1363 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1364 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1366 if (SHOULD_SPARK(spark)) {
1367 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1369 if (RunnableThreadsHd == Prelude_Z91Z93_closure) {
1370 RunnableThreadsHd = tso;
1372 if (RTSflags.ParFlags.granSimStats) {
1373 DumpGranEvent(GR_START, tso);
1374 sameThread = rtsTrue;
1378 TSO_LINK(RunnableThreadsTl) = tso;
1380 if (RTSflags.ParFlags.granSimStats)
1381 DumpGranEvent(GR_STARTQ, tso);
1384 RunnableThreadsTl = tso;
1387 QP_Event0(threadId++, spark);
1389 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1390 if(RTSflags.GranFlags.granSimStats_Sparks)
1391 DumpGranEvent(SP_PRUNED,threadId++);
1392 ^^^^^^^^ should be a TSO
1396 PendingSparksHd[REQUIRED_POOL] = sparkp;
1398 /* Now, almost the same thing for advisory sparks */
1400 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1401 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1403 if (SHOULD_SPARK(spark)) {
1406 /* In the parallel world, don't create advisory threads if we are
1407 * about to rerun the same thread, or already have runnable threads,
1408 * or the main thread has terminated */
1409 (RunnableThreadsHd != Prelude_Z91Z93_closure ||
1410 (required_thread_count == 0 && IAmMainThread)) ||
1412 advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1413 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1415 advisory_thread_count++;
1416 if (RunnableThreadsHd == Prelude_Z91Z93_closure) {
1417 RunnableThreadsHd = tso;
1419 if (RTSflags.ParFlags.granSimStats) {
1420 DumpGranEvent(GR_START, tso);
1421 sameThread = rtsTrue;
1425 TSO_LINK(RunnableThreadsTl) = tso;
1427 if (RTSflags.ParFlags.granSimStats)
1428 DumpGranEvent(GR_STARTQ, tso);
1431 RunnableThreadsTl = tso;
1434 QP_Event0(threadId++, spark);
1436 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1437 if(RTSflags.GranFlags.granSimStats_Sparks)
1438 DumpGranEvent(SP_PRUNED,threadId++);
1439 ^^^^^^^^ should be a TSO
1443 PendingSparksHd[ADVISORY_POOL] = sparkp;
1446 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1448 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1456 %****************************************************************************
1458 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1459 Threads (GrAnSim only)}
1461 %****************************************************************************
1463 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1469 /* ngoqvam che' {GrAnSim}! */
1471 # if defined(GRAN_CHECK)
1472 /* This routine is only used for keeping a statistics of thread queue
1473 lengths to evaluate the impact of priority scheduling. -- HWL
1474 {spark_queue_len}vo' jInIHta'
1477 thread_queue_len(PROC proc)
1482 for (len = 0, prev = Prelude_Z91Z93_closure, next = RunnableThreadsHd[proc];
1483 next != Prelude_Z91Z93_closure;
1484 len++, prev = next, next = TSO_LINK(prev))
1489 # endif /* GRAN_CHECK */
1492 A large portion of @StartThread@ deals with maintaining a sorted thread
1493 queue, which is needed for the Priority Sparking option. Without that
1494 complication the code boils down to FIFO handling.
1497 StartThread(event,event_type)
1499 enum gran_event_types event_type;
1501 P_ tso = EVENT_TSO(event),
1502 node = EVENT_NODE(event);
1503 PROC proc = EVENT_PROC(event),
1504 creator = EVENT_CREATOR(event);
1507 rtsBool found = rtsFalse;
1509 ASSERT(CurrentProc==proc);
1511 # if defined(GRAN_CHECK)
1512 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1513 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1517 /* A wee bit of statistics gathering */
1519 tot_tq_len += thread_queue_len(CurrentProc);
1522 ASSERT(TSO_LINK(CurrentTSO)==Prelude_Z91Z93_closure); /* TMP-CHG HWL */
1524 /* Idle proc; same for pri spark and basic version */
1525 if(ThreadQueueHd==Prelude_Z91Z93_closure)
1527 CurrentTSO = ThreadQueueHd = ThreadQueueTl = tso;
1529 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1530 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1531 CONTINUETHREAD,tso,Prelude_Z91Z93_closure,NULL);
1533 if(RTSflags.GranFlags.granSimStats &&
1534 !( (event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1535 DumpRawGranEvent(CurrentProc,creator,event_type,
1537 TSO_SPARKNAME(tso));
1538 /* ^^^ SN (spark name) as optional info */
1539 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1540 /* ^^^ spark length as optional info */
1542 ASSERT(IS_IDLE(CurrentProc) || event_type==GR_RESUME ||
1543 (procStatus[CurrentProc]==Fishing && event_type==GR_STOLEN) ||
1544 procStatus[CurrentProc]==Starting);
1545 MAKE_BUSY(CurrentProc);
1549 /* In GrAnSim-Light we always have an idle `virtual' proc.
1550 The semantics of the one-and-only thread queue is different here:
1551 all threads in the queue are running (each on its own virtual processor);
1552 the queue is only needed internally in the simulator to interleave the
1553 reductions of the different processors.
1554 The one-and-only thread queue is sorted by the local clocks of the TSOs.
1556 if(RTSflags.GranFlags.Light)
1558 ASSERT(ThreadQueueHd!=Prelude_Z91Z93_closure);
1559 ASSERT(TSO_LINK(tso)==Prelude_Z91Z93_closure); /* TMP-CHG HWL */
1561 /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
1562 if(RTSflags.GranFlags.granSimStats &&
1563 (RTSflags.GranFlags.debug & 0x20000) &&
1564 TSO_LINK(ThreadQueueHd)==Prelude_Z91Z93_closure) {
1565 DumpRawGranEvent(CurrentProc,CurrentProc,GR_DESCHEDULE,
1566 ThreadQueueHd,Prelude_Z91Z93_closure,0);
1567 __resched = rtsTrue;
1570 if ( InsertThread(tso) ) { /* new head of queue */
1571 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1572 CONTINUETHREAD,tso,Prelude_Z91Z93_closure,NULL);
1575 if(RTSflags.GranFlags.granSimStats &&
1576 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1577 DumpRawGranEvent(CurrentProc,creator,event_type,
1579 TSO_SPARKNAME(tso));
1580 /* ^^^ SN (spark name) as optional info */
1581 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1582 /* ^^^ spark length as optional info */
1584 /* MAKE_BUSY(CurrentProc); */
1588 /* Only for Pri Sparking */
1589 if (RTSflags.GranFlags.DoPriorityScheduling && TSO_PRI(tso)!=0)
1590 /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
1591 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1592 (next != Prelude_Z91Z93_closure) &&
1593 !(found = (TSO_PRI(tso) >= TSO_PRI(next)));
1594 prev = next, next = TSO_LINK(next), count++)
1598 ASSERT(!IS_IDLE(CurrentProc));
1600 /* found can only be rtsTrue if pri sparking enabled */
1602 # if defined(GRAN_CHECK)
1603 ++non_end_add_threads;
1605 /* Add tso to ThreadQueue between prev and next */
1606 TSO_LINK(tso) = next;
1607 if ( next == Prelude_Z91Z93_closure ) {
1608 ThreadQueueTl = tso;
1610 /* no back link for TSO chain */
1613 if ( prev == Prelude_Z91Z93_closure ) {
1614 /* Never add TSO as first elem of thread queue; the first */
1615 /* element should be the one that is currently running -- HWL */
1616 # if defined(GRAN_CHECK)
1617 fprintf(stderr,"Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %#lx (PRI=%d) as first elem of threadQ (%#lx) on proc %u (@ %u)\n",
1618 tso, TSO_PRI(tso), ThreadQueueHd, CurrentProc,
1619 CurrentTime[CurrentProc]);
1622 TSO_LINK(prev) = tso;
1624 } else { /* !found */ /* or not pri sparking! */
1625 /* Add TSO to the end of the thread queue on that processor */
1626 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1627 ThreadQueueTl = EVENT_TSO(event);
1629 CurrentTime[CurrentProc] += count *
1630 RTSflags.GranFlags.gran_pri_sched_overhead +
1631 RTSflags.GranFlags.gran_threadqueuetime;
1633 if(RTSflags.GranFlags.DoThreadMigration)
1636 if(RTSflags.GranFlags.granSimStats &&
1637 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1638 DumpRawGranEvent(CurrentProc,creator,event_type+1,
1640 TSO_SPARKNAME(tso));
1641 /* ^^^ SN (spark name) as optional info */
1642 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1643 /* ^^^ spark length as optional info */
1645 # if defined(GRAN_CHECK)
1646 /* Check if thread queue is sorted. Only for testing, really! HWL */
1647 if ( RTSflags.GranFlags.DoPriorityScheduling && (RTSflags.GranFlags.debug & 0x400) ) {
1648 rtsBool sorted = rtsTrue;
1651 if (ThreadQueueHd==Prelude_Z91Z93_closure || TSO_LINK(ThreadQueueHd)==Prelude_Z91Z93_closure) {
1652 /* just 1 elem => ok */
1654 /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
1655 for (prev = TSO_LINK(ThreadQueueHd), next = TSO_LINK(prev);
1656 (next != Prelude_Z91Z93_closure) ;
1657 prev = next, next = TSO_LINK(prev)) {
1659 (TSO_PRI(prev) >= TSO_PRI(next));
1663 fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
1665 G_THREADQ(ThreadQueueHd,0x1);
1670 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1674 @InsertThread@, which is only used for GranSim Light, is similar to
1675 @StartThread@ in that it adds a TSO to a thread queue. However, it assumes
1676 that the thread queue is sorted by local clocks and it inserts the TSO at the
1677 right place in the queue. Don't create any event, just insert.
1686 rtsBool found = rtsFalse;
1688 # if defined(GRAN_CHECK)
1689 if ( !RTSflags.GranFlags.Light ) {
1690 fprintf(stderr,"Qagh {InsertThread}Daq: InsertThread should only be used in a GrAnSim Light setup\n");
1694 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1695 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1700 /* Idle proc; same for pri spark and basic version */
1701 if(ThreadQueueHd==Prelude_Z91Z93_closure)
1703 ThreadQueueHd = ThreadQueueTl = tso;
1704 /* MAKE_BUSY(CurrentProc); */
1708 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1709 (next != Prelude_Z91Z93_closure) &&
1710 !(found = (TSO_CLOCK(tso) < TSO_CLOCK(next)));
1711 prev = next, next = TSO_LINK(next), count++)
1714 /* found can only be rtsTrue if pri sparking enabled */
1716 /* Add tso to ThreadQueue between prev and next */
1717 TSO_LINK(tso) = next;
1718 if ( next == Prelude_Z91Z93_closure ) {
1719 ThreadQueueTl = tso;
1721 /* no back link for TSO chain */
1724 if ( prev == Prelude_Z91Z93_closure ) {
1725 ThreadQueueHd = tso;
1727 TSO_LINK(prev) = tso;
1729 } else { /* !found */ /* or not pri sparking! */
1730 /* Add TSO to the end of the thread queue on that processor */
1731 TSO_LINK(ThreadQueueTl) = tso;
1732 ThreadQueueTl = tso;
1734 return (prev == Prelude_Z91Z93_closure);
1739 Export work to idle PEs. This function is called from @ReSchedule@ before
1740 dispatching on the current event. @HandleIdlePEs@ iterates over all PEs,
1741 trying to get work for idle PEs. Note, that this is a simplification
1742 compared to GUM's fishing model. We try to compensate for that by making
1743 the cost for stealing work dependent on the number of idle processors and
1744 thereby on the probability with which a randomly sent fish would find work.
1751 # if defined(GRAN) && defined(GRAN_CHECK)
1752 if ( RTSflags.GranFlags.Light ) {
1753 fprintf(stderr,"Qagh {HandleIdlePEs}Daq: Should never be entered in GrAnSim Light setup\n");
1759 for(proc = 0; proc < RTSflags.GranFlags.proc; proc++)
1760 if(IS_IDLE(proc)) /* && IS_SPARKING(proc) && IS_STARTING(proc) */
1761 /* First look for local work! */
1762 if (PendingSparksHd[proc][ADVISORY_POOL]!=NULL)
1764 new_event(proc,proc,CurrentTime[proc],
1765 FINDWORK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,NULL);
1766 MAKE_SPARKING(proc);
1768 /* Then try to get remote work! */
1769 else if ((RTSflags.GranFlags.max_fishes==0 ||
1770 OutstandingFishes[proc]<RTSflags.GranFlags.max_fishes) )
1773 if(RTSflags.GranFlags.DoStealThreadsFirst &&
1774 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1776 if (SurplusThreads > 0l) /* Steal a thread */
1783 if(SparksAvail > 0l &&
1784 (RTSflags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1787 if (SurplusThreads > 0l &&
1788 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1794 Steal a spark and schedule moving it to proc. We want to look at PEs in
1795 clock order -- most retarded first. Currently sparks are only stolen from
1796 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1797 be changed to first steal from the former then from the latter.
1799 We model a sort of fishing mechanism by counting the number of sparks and
1800 threads we are currently stealing.
1807 sparkq spark, prev, next;
1808 rtsBool stolen = rtsFalse;
1809 TIME times[MAX_PROC], stealtime;
1810 unsigned ntimes=0, i, j;
1811 int first_later, upb, r;
1813 # if defined(GRAN) && defined(GRAN_CHECK)
1814 if ( RTSflags.GranFlags.Light ) {
1815 fprintf(stderr,"Qagh {StealSpark}Daq: Should never be entered in GrAnSim Light setup\n");
1820 /* times shall contain processors from which we may steal sparks */
1821 for(p=0; p < RTSflags.GranFlags.proc; ++p)
1823 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1824 CurrentTime[p] <= CurrentTime[CurrentProc])
1825 times[ntimes++] = p;
1828 for(i=0; i < ntimes; ++i)
1829 for(j=i+1; j < ntimes; ++j)
1830 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1832 unsigned temp = times[i];
1833 times[i] = times[j];
1837 /* Choose random processor to steal spark from; first look at processors */
1838 /* that are earlier than the current one (i.e. proc) */
1841 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
1845 while (!stolen && (ntimes>0)) {
1846 long unsigned int r, q=0;
1848 upb = (first_later==0) ? ntimes : first_later;
1850 if (RTSflags.GranFlags.RandomSteal) {
1851 r = lrand48(); /* [0, RAND_MAX] */
1855 /* -- ASSERT(r<=RAND_MAX); */
1856 i = (unsigned int) (r % upb); /* [0, upb) */
1857 /* -- ASSERT((i>=0) && (i<=upb)); */
1859 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
1861 # if defined(GRAN_CHECK)
1862 if ( RTSflags.GranFlags.debug & 0x2000 )
1863 fprintf(stderr,"RANDOM_STEAL: [index %u of %u] from %u (@ %lu) to %u (@ %lu) (q=%d) [rand value: %lu]\n",
1864 i, ntimes, p, CurrentTime[p], proc, CurrentTime[proc], q, r);
1867 /* Now go through sparkq and steal the first one that should be sparked*/
1868 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1869 spark != NULL && !stolen;
1872 next = SPARK_NEXT(spark);
1874 if ((IS_IDLE(p) || procStatus[p]==Sparking || procStatus[p]==Fishing) &&
1875 SPARK_NEXT(spark)==NULL)
1877 /* Be social! Don't steal the only spark of an idle processor */
1880 else if(SHOULD_SPARK(SPARK_NODE(spark)))
1882 /* Don't Steal local sparks */
1883 if(!SPARK_GLOBAL(spark))
1889 /* Prepare message for sending spark */
1890 CurrentTime[p] += RTSflags.GranFlags.gran_mpacktime;
1892 if(RTSflags.GranFlags.granSimStats_Sparks)
1893 DumpRawGranEvent(p,(PROC)0,SP_EXPORTED,Prelude_Z91Z93_closure,
1895 spark_queue_len(p,ADVISORY_POOL));
1897 SPARK_NEXT(spark) = NULL;
1899 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
1905 new_event(proc,p /* CurrentProc */,stealtime,
1906 MOVESPARK,Prelude_Z91Z93_closure,Prelude_Z91Z93_closure,spark);
1908 /* MAKE_BUSY(proc); not yet; busy when TSO in threadq */
1910 ++OutstandingFishes[proc];
1913 ++SPARK_GLOBAL(spark);
1916 CurrentTime[p] += RTSflags.GranFlags.gran_mtidytime;
1918 else /* !(SHOULD_SPARK(SPARK_NODE(spark))) */
1920 if(RTSflags.GranFlags.granSimStats_Sparks)
1921 DumpRawGranEvent(p,(PROC)0,SP_PRUNED,Prelude_Z91Z93_closure,
1923 spark_queue_len(p,ADVISORY_POOL));
1925 DisposeSpark(spark);
1928 if(spark == PendingSparksHd[p][ADVISORY_POOL])
1929 PendingSparksHd[p][ADVISORY_POOL] = next;
1932 SPARK_NEXT(prev) = next;
1933 } /* for (spark=... iterating over sparkq */
1935 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1936 PendingSparksTl[p][ADVISORY_POOL] = NULL;
1938 if (!stolen && (ntimes>0)) { /* nothing stealable from proc p :( */
1939 ASSERT(times[i]==p);
1941 /* remove p from the list (at pos i) */
1942 for (j=i; j+1<ntimes; j++)
1943 times[j] = times[j+1];
1946 /* update index to first proc which is later (or equal) than proc */
1949 (CurrentTime[times[first_later-1]]>CurrentTime[proc]);
1954 # if defined(GRAN_CHECK)
1955 if (stolen && (i!=0)) { /* only for statistics */
1957 ntimes_total += ntimes;
1958 fl_total += first_later;
1965 Steal a spark and schedule moving it to proc.
1974 TIME times[MAX_PROC], stealtime;
1975 unsigned ntimes=0, i, j;
1976 int first_later, upb, r;
1978 /* Hunt for a thread */
1980 # if defined(GRAN) && defined(GRAN_CHECK)
1981 if ( RTSflags.GranFlags.Light ) {
1982 fprintf(stderr,"Qagh {StealThread}: Should never be entered in GrAnSim Light setup\n");
1987 /* times shall contain processors from which we may steal threads */
1988 for(p=0; p < RTSflags.GranFlags.proc; ++p)
1989 if(proc != p && RunnableThreadsHd[p] != Prelude_Z91Z93_closure &&
1990 CurrentTime[p] <= CurrentTime[CurrentProc])
1991 times[ntimes++] = p;
1994 for(i=0; i < ntimes; ++i)
1995 for(j=i+1; j < ntimes; ++j)
1996 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1998 unsigned temp = times[i];
1999 times[i] = times[j];
2003 /* Choose random processor to steal spark from; first look at processors */
2004 /* that are earlier than the current one (i.e. proc) */
2007 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
2011 while (!found && (ntimes>0)) {
2012 long unsigned int r, q=0;
2014 upb = (first_later==0) ? ntimes : first_later;
2016 if (RTSflags.GranFlags.RandomSteal) {
2017 r = lrand48(); /* [0, RAND_MAX] */
2021 /* -- ASSERT(r<=RAND_MAX); */
2022 if ( RTSflags.GranFlags.debug & 0x2000 )
2023 fprintf(stderr,"rand value: %d " , r);
2024 i = (unsigned int) (r % upb); /* [0, upb] */
2025 /* -- ASSERT((i>=0) && (i<=upb)); */
2027 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
2029 # if defined(GRAN_CHECK)
2030 if ( RTSflags.GranFlags.debug & 0x2000 )
2031 fprintf(stderr,"RANDOM_STEAL; [index %u] from %u (@ %lu) to %u (@ %lu) (q=%d)\n",
2032 i, p, CurrentTime[p], proc, CurrentTime[proc], q);
2035 /* Steal the first exportable thread in the runnable queue after the */
2038 if(RunnableThreadsHd[p] != Prelude_Z91Z93_closure)
2040 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
2041 thread != Prelude_Z91Z93_closure && TSO_LOCKED(thread);
2042 prev = thread, thread = TSO_LINK(thread))
2045 if(thread != Prelude_Z91Z93_closure) /* Take thread out of runnable queue */
2047 TSO_LINK(prev) = TSO_LINK(thread);
2049 TSO_LINK(thread) = Prelude_Z91Z93_closure;
2051 if(RunnableThreadsTl[p] == thread)
2052 RunnableThreadsTl[p] = prev;
2054 /* Turn magic constants into params !? -- HWL */
2056 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mpacktime;
2058 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
2062 + 4l * RTSflags.GranFlags.gran_additional_latency
2063 + 5l * RTSflags.GranFlags.gran_munpacktime;
2065 /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
2066 SET_PROCS(thread,Nowhere /* PE_NUMBER(proc) */);
2068 /* Move from one queue to another */
2069 new_event(proc,p,stealtime,MOVETHREAD,thread,Prelude_Z91Z93_closure,NULL);
2070 /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
2071 ++OutstandingFishes[proc];
2076 if(RTSflags.GranFlags.granSimStats)
2077 DumpRawGranEvent(p,proc,GR_STEALING,thread,
2078 Prelude_Z91Z93_closure,0);
2080 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mtidytime;
2088 if (!found && (ntimes>0)) { /* nothing stealable from proc p */
2089 ASSERT(times[i]==p);
2091 /* remove p from the list (at pos i) */
2092 for (j=i; j+1<ntimes; j++)
2093 times[j] = times[j+1];
2097 # if defined(GRAN_CHECK) && defined(GRAN)
2098 if (found && (i!=0)) { /* only for statistics */
2104 TIME SparkStealTime()
2106 double fishdelay, sparkdelay, latencydelay;
2107 fishdelay = (double)RTSflags.GranFlags.proc/2;
2108 sparkdelay = fishdelay -
2109 ((fishdelay-1)/(double)(RTSflags.GranFlags.proc-1))*(double)idlers();
2110 latencydelay = sparkdelay*((double)RTSflags.GranFlags.gran_latency);
2113 fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
2114 fishdelay,sparkdelay,latencydelay,Idlers);
2116 return((TIME)latencydelay);
2118 #endif /* GRAN ; HWL */
2123 %****************************************************************************
2125 \subsection[thread-execution]{Executing Threads}
2127 %****************************************************************************
2129 First a set of functions for handling sparks and spark-queues that are
2130 attached to the processors. Currently, there are two spark-queues per
2134 \item A queue of @REQUIRED@ sparks i.e. these sparks will be definitely
2135 turned into threads ({\em non-discardable\/}). They are mainly used in concurrent
2136 Haskell. We don't use them in GrAnSim.
2137 \item A queue of @ADVISORY@ sparks i.e. they may be turned into threads if
2138 the RTS thinks that it is a good idea. However, these sparks are {\em
2139 discardable}. They will be discarded if the associated closure is
2140 generally not worth creating a new thread (indicated by a tag in the
2141 closure) or they may be pruned during GC if there are too many sparks
2146 EXTDATA_RO(StkO_info);
2147 EXTDATA_RO(TSO_info);
2148 EXTDATA_RO(WorldStateToken_closure);
2150 EXTFUN(EnterNodeCode);
2151 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
2154 /* ngoqvam che' {GrAnSim} */
2156 /* Slow but relatively reliable method uses stgMallocBytes */
2157 /* Eventually change that to heap allocated sparks. */
2159 /* -------------------------------------------------------------------------
2160 This is the main point where handling granularity information comes into
2162 ------------------------------------------------------------------------- */
2164 #define MAX_RAND_PRI 100
2167 Granularity info transformers.
2168 Applied to the GRAN_INFO field of a spark.
2170 static I_ ID(I_ x) { return(x); };
2171 static I_ INV(I_ x) { return(-x); };
2172 static I_ IGNORE(I_ x) { return (0); };
2173 static I_ RAND(I_ x) { return ((lrand48() % MAX_RAND_PRI) + 1); }
2175 /* NB: size_info and par_info are currently unused (what a shame!) -- HWL */
2178 NewSpark(node,name,gran_info,size_info,par_info,local)
2180 I_ name, gran_info, size_info, par_info, local;
2185 pri = RTSflags.GranFlags.RandomPriorities ? RAND(gran_info) :
2186 RTSflags.GranFlags.InversePriorities ? INV(gran_info) :
2187 RTSflags.GranFlags.IgnorePriorities ? IGNORE(gran_info) :
2190 if ( RTSflags.GranFlags.SparkPriority!=0 && pri<RTSflags.GranFlags.SparkPriority ) {
2191 if ( RTSflags.GranFlags.debug & 0x40 ) {
2192 fprintf(stderr,"NewSpark: Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
2193 pri, RTSflags.GranFlags.SparkPriority, node, name);
2195 return ((sparkq)NULL);
2198 newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
2199 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
2200 SPARK_NODE(newspark) = node;
2201 SPARK_NAME(newspark) = (name==1) ? TSO_SPARKNAME(CurrentTSO) : name;
2202 SPARK_GRAN_INFO(newspark) = pri;
2203 SPARK_GLOBAL(newspark) = !local; /* Check that with parAt, parAtAbs !!*/
2207 /* To make casm more convenient use this function to label strategies */
2209 set_sparkname(P_ tso, int name) {
2210 if (TSO_SPARKNAME(tso) & OLD_SPARKNAME_MASK == 1) {
2211 TSO_SPARKNAME(tso) &= NEW_SPARKNAME_MASK;
2212 TSO_SPARKNAME(tso) = TSO_SPARKNAME(tso) >> 16;
2213 TSO_SPARKNAME(tso) |= name;
2215 TSO_SPARKNAME(tso) = (TSO_SPARKNAME(tso) & OLD_SPARKNAME_MASK) | name ;
2217 if(0 && RTSflags.GranFlags.granSimStats)
2218 DumpRawGranEvent(CurrentProc,99,GR_START,
2220 TSO_SPARKNAME(tso));
2221 /* ^^^ SN (spark name) as optional info */
2222 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2223 /* ^^^ spark length as optional info */
2228 reset_sparkname(P_ tso) {
2229 TSO_SPARKNAME(tso) = (TSO_SPARKNAME(tso) & OLD_SPARKNAME_MASK) << 16;
2234 With PrioritySparking add_to_spark_queue performs an insert sort to keep
2235 the spark queue sorted. Otherwise the spark is just added to the end of
2240 add_to_spark_queue(spark)
2245 rtsBool found = rtsFalse;
2247 if ( spark == (sparkq)NULL ) {
2251 if (RTSflags.GranFlags.DoPrioritySparking && (SPARK_GRAN_INFO(spark)!=0) ) {
2253 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL], count=0;
2255 !(found = (SPARK_GRAN_INFO(spark) >= SPARK_GRAN_INFO(next)));
2256 prev = next, next = SPARK_NEXT(next), count++)
2259 } else { /* 'utQo' */
2261 found = rtsFalse; /* to add it at the end */
2266 SPARK_NEXT(spark) = next;
2267 if ( next == NULL ) {
2268 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2270 SPARK_PREV(next) = spark;
2272 SPARK_PREV(spark) = prev;
2273 if ( prev == NULL ) {
2274 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2276 SPARK_NEXT(prev) = spark;
2278 } else { /* (RTSflags.GranFlags.DoPrioritySparking && !found) || !DoPrioritySparking */
2279 SPARK_NEXT(spark) = NULL;
2280 SPARK_PREV(spark) = PendingSparksTl[CurrentProc][ADVISORY_POOL];
2281 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL)
2282 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2284 SPARK_NEXT(PendingSparksTl[CurrentProc][ADVISORY_POOL]) = spark;
2285 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2289 if (RTSflags.GranFlags.DoPrioritySparking) {
2290 CurrentTime[CurrentProc] += count * RTSflags.GranFlags.gran_pri_spark_overhead;
2293 # if defined(GRAN_CHECK)
2294 if ( RTSflags.GranFlags.debug & 0x1000 ) {
2295 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL];
2297 prev = next, next = SPARK_NEXT(next))
2299 if ( (prev!=NULL) && (prev!=PendingSparksTl[CurrentProc][ADVISORY_POOL]) )
2300 fprintf(stderr,"SparkQ inconsistency after adding spark %#lx: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
2301 spark,CurrentProc,ADVISORY_POOL,
2302 PendingSparksTl[CurrentProc][ADVISORY_POOL], prev);
2306 # if defined(GRAN_CHECK)
2307 /* Check if the sparkq is still sorted. Just for testing, really! */
2308 if ( RTSflags.GranFlags.debug & 0x400 ) {
2309 rtsBool sorted = rtsTrue;
2312 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL ||
2313 SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]) == NULL ) {
2314 /* just 1 elem => ok */
2316 for (prev = PendingSparksHd[CurrentProc][ADVISORY_POOL],
2317 next = SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]);
2319 prev = next, next = SPARK_NEXT(next)) {
2321 (SPARK_GRAN_INFO(prev) >= SPARK_GRAN_INFO(next));
2325 fprintf(stderr,"Warning: SPARKQ on PE %d is not sorted:\n",
2327 G_SPARKQ(PendingSparksHd[CurrentProc][ADVISORY_POOL],1);
2337 /* A SP_PRUNED line should be dumped when this is called from pruning or */
2338 /* discarding a spark! */
2347 DisposeSparkQ(spark)
2353 DisposeSparkQ(SPARK_NEXT(spark));
2356 if (SparksAvail < 0)
2357 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
2366 % {GrAnSim}vaD (Notes on GrAnSim) -- HWL:
2370 \paragraph{Notes on GrAnSim:}
2371 The following routines are for handling threads. Currently, we use an
2372 unfair scheduling policy in GrAnSim. Thus there are no explicit functions for
2373 scheduling here. If other scheduling policies are added to the system that
2374 code should go in here.
2377 /* Create a new TSO, with the specified closure to enter and thread type */
2381 NewThread(topClosure, type, pri)
2387 NewThread(topClosure, type)
2394 # if defined(GRAN) && defined(GRAN_CHECK)
2395 if ( RTSflags.GranFlags.Light && CurrentProc!=0) {
2396 fprintf(stderr,"Qagh {NewThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
2400 if (AvailableTSO != Prelude_Z91Z93_closure) {
2403 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
2405 AvailableTSO = TSO_LINK(tso);
2406 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
2409 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
2410 BYTES_TO_STGWORDS(sizeof(StgDouble)));
2412 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
2413 SET_TSO_HDR(tso, TSO_info, CCC);
2416 TSO_LINK(tso) = Prelude_Z91Z93_closure;
2418 TSO_PRI(tso) = pri; /* Priority of that TSO -- HWL */
2421 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
2423 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
2424 TSO_ID(tso) = threadId++;
2425 TSO_TYPE(tso) = type;
2426 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
2427 TSO_ARG1(tso) = /* TSO_ARG2(tso) = */ 0; /* FIX THIS -- HWL */
2428 TSO_SWITCH(tso) = NULL;
2435 #if defined(GRAN) || defined(PAR)
2436 TSO_SPARKNAME(tso) = 0;
2438 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
2440 TSO_STARTEDAT(tso) = CURRENT_TIME;
2442 TSO_EXPORTED(tso) = 0;
2443 TSO_BASICBLOCKS(tso) = 0;
2444 TSO_ALLOCS(tso) = 0;
2445 TSO_EXECTIME(tso) = 0;
2446 TSO_FETCHTIME(tso) = 0;
2447 TSO_FETCHCOUNT(tso) = 0;
2448 TSO_BLOCKTIME(tso) = 0;
2449 TSO_BLOCKCOUNT(tso) = 0;
2450 TSO_BLOCKEDAT(tso) = 0;
2451 TSO_GLOBALSPARKS(tso) = 0;
2452 TSO_LOCALSPARKS(tso) = 0;
2454 if (RTSflags.GranFlags.Light)
2455 TSO_CLOCK(tso) = TSO_STARTEDAT(tso); /* local clock */
2461 * set pc, Node (R1), liveness
2463 CurrentRegTable = TSO_INTERNAL_PTR(tso);
2464 SAVE_Liveness = LIVENESS_R1;
2465 SAVE_R1.p = topClosure;
2468 if (type == T_MAIN) {
2472 if (AvailableStack != Prelude_Z91Z93_closure) {
2473 stko = AvailableStack;
2475 SET_PROCS(stko,ThisPE);
2477 AvailableStack = STKO_LINK(AvailableStack);
2478 } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
2481 /* ALLOC_STK(STKO_HS,STKO_CHUNK_SIZE,0); use RTSflag now*/
2482 ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
2484 SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
2485 SET_STKO_HDR(stko, StkO_info, CCC);
2487 STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
2488 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
2489 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
2490 STKO_LINK(stko) = Prelude_Z91Z93_closure;
2491 STKO_RETURN(stko) = NULL;
2497 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
2500 if (type == T_MAIN) {
2501 STKO_SpA(stko) -= AREL(1);
2502 *STKO_SpA(stko) = (P_) WorldStateToken_closure;
2505 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
2509 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
2511 #if defined(GRAN_CHECK)
2512 tot_sq_len += spark_queue_len(CurrentProc,ADVISORY_POOL);
2520 In GrAnSim the @EndThread@ function is the place where statistics about the
2521 simulation are printed. I guess, that could be moved into @main.lc@.
2526 EndThread(STG_NO_ARGS)
2530 TIME now = CURRENT_TIME;
2534 if (RTSflags.TickyFlags.showTickyStats) {
2535 fprintf(RTSflags.TickyFlags.tickyFile,
2536 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
2537 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
2538 fprintf(RTSflags.TickyFlags.tickyFile,
2539 "\tB stack max. depth: %ld words\n",
2540 TSO_BHWM(CurrentTSO));
2545 QP_Event1("G*", CurrentTSO);
2549 ASSERT(CurrentTSO == ThreadQueueHd);
2551 if (RTSflags.GranFlags.DoThreadMigration)
2554 if(TSO_TYPE(CurrentTSO)==T_MAIN)
2558 for(i=0; i < RTSflags.GranFlags.proc; ++i) {
2560 while(RunnableThreadsHd[i] != Prelude_Z91Z93_closure)
2562 /* We schedule runnable threads before killing them to */
2563 /* make the job of bookkeeping the running, runnable, */
2564 /* blocked threads easier for scripts like gr2ps -- HWL */
2566 if (RTSflags.GranFlags.granSimStats && !is_first &&
2567 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2568 DumpRawGranEvent(i,(PROC)0,GR_SCHEDULE,
2569 RunnableThreadsHd[i],
2570 Prelude_Z91Z93_closure,0);
2571 if (!RTSflags.GranFlags.granSimStats_suppressed &&
2572 TSO_TYPE(RunnableThreadsHd[i])!=T_MAIN)
2573 DumpGranInfo(i,RunnableThreadsHd[i],rtsTrue);
2574 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
2575 is_first = rtsFalse;
2579 ThreadQueueHd = Prelude_Z91Z93_closure;
2580 /* Printing of statistics has been moved into end_gr_simulation */
2583 if (RTSflags.GranFlags.labelling && RTSflags.GranFlags.granSimStats &&
2584 !RTSflags.GranFlags.granSimStats_suppressed)
2585 DumpStartEventAt(TSO_STARTEDAT(CurrentTSO),where_is(CurrentTSO),0,GR_START,
2586 CurrentTSO,Nil_closure,
2587 TSO_SPARKNAME(CurrentTSO));
2588 /* ^^^ SN (spark name) as optional info */
2589 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2590 /* ^^^ spark length as optional info */
2592 if (RTSflags.GranFlags.granSimStats &&
2593 !RTSflags.GranFlags.granSimStats_suppressed)
2594 DumpGranInfo(CurrentProc,CurrentTSO,
2595 TSO_TYPE(CurrentTSO) != T_ADVISORY);
2597 if (RTSflags.GranFlags.granSimStats_Binary &&
2598 TSO_TYPE(CurrentTSO)==T_MAIN &&
2599 !RTSflags.GranFlags.granSimStats_suppressed)
2600 grterminate(CurrentTime[CurrentProc]);
2602 if (TSO_TYPE(CurrentTSO)!=T_MAIN)
2603 ActivateNextThread(CurrentProc);
2605 /* Note ThreadQueueHd is Nil when the main thread terminates
2606 if(ThreadQueueHd != Prelude_Z91Z93_closure)
2608 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.granSimStats_suppressed &&
2609 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2610 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2611 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadscheduletime;
2618 if (RTSflags.ParFlags.granSimStats) {
2619 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
2620 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
2624 switch (TSO_TYPE(CurrentTSO)) {
2626 required_thread_count--;
2629 if (GRANSIMSTATS_BINARY)
2633 longjmp(scheduler_loop, -1); /* i.e. the world comes to an end NOW */
2635 ReSchedule(0); /* i.e. the world will eventually come to an end */
2639 required_thread_count--;
2643 advisory_thread_count--;
2651 fprintf(stderr, "EndThread: %x unknown\n", TSO_TYPE(CurrentTSO));
2655 /* Reuse stack object space */
2656 ASSERT(STKO_LINK(SAVE_StkO) == Prelude_Z91Z93_closure);
2657 STKO_LINK(SAVE_StkO) = AvailableStack;
2658 AvailableStack = SAVE_StkO;
2660 TSO_LINK(CurrentTSO) = AvailableTSO;
2661 AvailableTSO = CurrentTSO;
2662 CurrentTSO = Prelude_Z91Z93_closure;
2663 CurrentRegTable = NULL;
2666 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
2667 /* proc or it's Prelude_Z91Z93_closure. In the latter case, a FINDWORK will be */
2668 /* issued by ReSchedule. */
2669 ReSchedule(SAME_THREAD); /* back for more! */
2671 ReSchedule(0); /* back for more! */
2677 %****************************************************************************
2679 \subsection[thread-blocking]{Local Blocking}
2681 %****************************************************************************
2685 #if defined(GRAN_COUNT)
2686 /* Some non-essential maybe-useful statistics-gathering */
2687 void CountnUPDs() { ++nUPDs; }
2688 void CountnUPDs_old() { ++nUPDs_old; }
2689 void CountnUPDs_new() { ++nUPDs_new; }
2691 void CountnPAPs() { ++nPAPs; }
2694 EXTDATA_RO(BQ_info);
2697 /* NB: non-GRAN version ToDo
2699 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
2702 P_ PendingFetches = Prelude_Z91Z93_closure;
2705 AwakenBlockingQueue(bqe)
2712 TIME now = CURRENT_TIME;
2717 while (bqe != Prelude_Z91Z93_closure) {
2719 while (IS_MUTABLE(INFO_PTR(bqe))) {
2720 switch (INFO_TYPE(INFO_PTR(bqe))) {
2724 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2727 if (RTSflags.ParFlags.granSimStats) {
2728 DumpGranEvent(GR_RESUMEQ, bqe);
2729 switch (TSO_QUEUE(bqe)) {
2731 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2734 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2738 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
2743 if (last_tso == NULL) {
2744 if (RunnableThreadsHd == Prelude_Z91Z93_closure) {
2745 RunnableThreadsHd = bqe;
2747 TSO_LINK(RunnableThreadsTl) = bqe;
2751 bqe = TSO_LINK(bqe);
2755 next = BF_LINK(bqe);
2756 BF_LINK(bqe) = PendingFetches;
2757 PendingFetches = bqe;
2759 if (last_tso != NULL)
2760 TSO_LINK(last_tso) = next;
2763 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
2764 INFO_PTR(bqe), (W_) bqe);
2771 if (last_tso != NULL) {
2772 RunnableThreadsTl = last_tso;
2774 TSO_LINK(last_tso) = Prelude_Z91Z93_closure;
2782 # if defined(GRAN_CHECK)
2784 /* First some useful test functions */
2786 EXTFUN(RBH_Save_0_info);
2787 EXTFUN(RBH_Save_1_info);
2788 EXTFUN(RBH_Save_2_info);
2796 char str[80], str0[80];
2798 fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
2799 CurrentProc,CurrentTime[CurrentProc]);
2800 if ( bqe == Prelude_Z91Z93_closure ) {
2801 fprintf(stderr," NIL.\n");
2804 if ( bqe == NULL ) {
2805 fprintf(stderr," NULL\n");
2808 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2809 W_ proc; /* RBH_Save_? closures! */
2811 /* Find where the tso lives */
2812 proc = where_is(bqe);
2813 it = INFO_TYPE(INFO_PTR(bqe));
2827 if(proc == CurrentProc)
2828 fprintf(stderr," %#lx (%x) L %s,", bqe, TSO_ID(bqe), str0);
2830 fprintf(stderr," %#lx (%x) G (PE %d) %s,", bqe, TSO_ID(bqe), proc, str0);
2835 bqe = TSO_LINK(bqe);
2838 bqe = TSO_LINK(bqe);
2841 bqe = Prelude_Z91Z93_closure;
2844 /* TSO_LINK(last_tso) = Prelude_Z91Z93_closure; */
2846 if ( bqe == Prelude_Z91Z93_closure )
2847 fprintf(stderr," NIL.\n");
2849 (INFO_PTR(bqe) == (P_) RBH_Save_0_info) ||
2850 (INFO_PTR(bqe) == (P_) RBH_Save_1_info) ||
2851 (INFO_PTR(bqe) == (P_) RBH_Save_2_info) )
2852 fprintf(stderr," RBH.\n");
2853 /* fprintf(stderr,"\n%s\n",str); */
2857 CHECK_BQ(node, tso, proc)
2864 PROC p = where_is(tso);
2865 rtsBool ok = rtsTrue;
2868 fprintf(stderr,"ERROR in CHECK_BQ: CurrentTSO %#lx (%x) on proc %d but CurrentProc = %d\n",
2869 tso, TSO_ID(tso), proc);
2873 switch (INFO_TYPE(INFO_PTR(node))) {
2875 case INFO_BH_U_TYPE:
2876 bqe = (P_) BQ_ENTRIES(node);
2877 return (rtsTrue); /* BHs don't have BQs */
2880 bqe = (P_) BQ_ENTRIES(node);
2882 case INFO_FMBQ_TYPE:
2883 fprintf(stderr,"CHECK_BQ: ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
2884 node, tso, TSO_ID(tso));
2887 case INFO_SPEC_RBH_TYPE:
2888 bqe = (P_) SPEC_RBH_BQ(node);
2890 case INFO_GEN_RBH_TYPE:
2891 bqe = (P_) GEN_RBH_BQ(node);
2896 I_ size, ptrs, nonptrs, vhs;
2897 char info_hdr_ty[80];
2899 fprintf(stderr, "CHECK_BQ: thought %#lx was a black hole (IP %#lx)",
2900 node, INFO_PTR(node));
2901 info_ptr = get_closure_info(node,
2902 &size, &ptrs, &nonptrs, &vhs,
2904 fprintf(stderr, " %s\n",info_hdr_ty);
2905 /* G_PRINT_NODE(node); */
2907 /* EXIT(EXIT_FAILURE); */
2911 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2912 W_ proc; /* RBH_Save_? closures! */
2914 /* Find where the tso lives */
2915 proc = where_is(bqe);
2916 it = INFO_TYPE(INFO_PTR(bqe));
2919 fprintf(stderr,"ERROR in CHECK_BQ [Node = 0x%lx, PE %d]: TSO %#lx (%x) already in BQ: ",
2920 node, proc, tso, TSO_ID(tso));
2921 PRINT_BQ(BQ_ENTRIES(node));
2925 bqe = TSO_LINK(bqe);
2929 /* End of test functions */
2930 # endif /* GRAN_CHECK */
2932 /* This version of AwakenBlockingQueue has been originally taken from the
2933 GUM code. It is now assimilated into GrAnSim */
2935 /* Note: This version assumes a pointer to a blocking queue rather than a
2936 node with an attached blocking queue as input */
2939 AwakenBlockingQueue(bqe)
2942 /* P_ tso = (P_) BQ_ENTRIES(node); */
2951 /* Compatibility mode with old libaries! 'oH jIvoQmoH */
2952 if (IS_BQ_CLOSURE(bqe))
2953 bqe = (P_)BQ_ENTRIES(bqe);
2954 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_SPEC_RBH_TYPE )
2955 bqe = (P_)SPEC_RBH_BQ(bqe);
2956 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_GEN_RBH_TYPE )
2957 bqe = (P_)GEN_RBH_BQ(bqe);
2959 # if defined(GRAN_CHECK)
2960 if ( RTSflags.GranFlags.debug & 0x100 ) {
2965 # if defined(GRAN_COUNT)
2967 if (tso != Prelude_Z91Z93_closure)
2971 # if defined(GRAN_CHECK)
2972 if (RTSflags.GranFlags.debug & 0x100)
2973 fprintf(stderr,"----- AwBQ: ");
2976 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2977 W_ proc; /* RBH_Save_? closures! */
2978 ASSERT(INFO_TYPE(INFO_PTR(bqe)) == INFO_TSO_TYPE);
2981 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2983 # if defined(GRAN_COUNT)
2987 /* Find where the tso lives */
2988 proc = where_is(bqe);
2990 if(proc == CurrentProc) {
2991 notifytime = CurrentTime[CurrentProc] + RTSflags.GranFlags.gran_lunblocktime;
2993 /* A better way of handling this would be to introduce a
2994 GLOBALUNBLOCK event which is created here. -- HWL */
2995 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
2996 notifytime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[proc]) +
2997 RTSflags.GranFlags.gran_gunblocktime;
2998 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
2999 /* new_event(proc, CurrentProc, notifytime,
3000 GLOBALUNBLOCK,bqe,Prelude_Z91Z93_closure,NULL); */
3002 /* cost the walk over the queue */
3003 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_lunblocktime;
3004 /* GrAnSim Light: make blocked TSO aware of the time that passed */
3005 if (RTSflags.GranFlags.Light)
3006 TSO_CLOCK(bqe) = notifytime;
3007 /* and create a resume message */
3008 new_event(proc, CurrentProc, notifytime,
3009 RESUMETHREAD,bqe,Prelude_Z91Z93_closure,NULL);
3011 if (notifytime<TimeOfNextEvent)
3012 TimeOfNextEvent = notifytime;
3014 # if defined(GRAN_CHECK)
3015 if (RTSflags.GranFlags.debug & 0x100) {
3016 fprintf(stderr," TSO %x (PE %d) %s,",
3017 TSO_ID(bqe), proc, ( (proc==CurrentProc) ? "L" : "G") );
3022 bqe = TSO_LINK(bqe);
3023 TSO_LINK(last) = Prelude_Z91Z93_closure;
3027 /* This was once used in a !do_gr_sim setup. Now only GrAnSim setup is */
3029 else /* Check if this is still valid for non-GrAnSim code -- HWL */
3031 if (ThreadQueueHd == Prelude_Z91Z93_closure)
3032 ThreadQueueHd = bqe;
3034 TSO_LINK(ThreadQueueTl) = bqe;
3036 if (RunnableThreadsHd == Prelude_Z91Z93_closure)
3037 RunnableThreadsHd = tso;
3039 TSO_LINK(RunnableThreadsTl) = tso;
3042 while(TSO_LINK(bqe) != Prelude_Z91Z93_closure) {
3043 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3046 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3049 bqe = TSO_LINK(bqe);
3052 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3055 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3061 if (RTSflags.GranFlags.debug & 0x100)
3062 fprintf(stderr,".\n");
3065 /* ngo' {GrAnSim}Qo' ngoq: RunnableThreadsTl = tso; */
3074 /* Different interface for GRAN */
3079 SAVE_Liveness = liveness;
3080 TSO_PC1(CurrentTSO) = Continue;
3082 QP_Event1("GR", CurrentTSO);
3084 ReSchedule(SAME_THREAD);
3093 SAVE_Liveness = args >> 1;
3094 TSO_PC1(CurrentTSO) = Continue;
3096 QP_Event1("GR", CurrentTSO);
3099 if (RTSflags.ParFlags.granSimStats) {
3100 /* Note that CURRENT_TIME may perform an unsafe call */
3101 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
3104 ReSchedule(args & 1);
3111 %****************************************************************************
3113 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
3115 %****************************************************************************
3117 The following GrAnSim routines simulate the fetching of nodes from a remote
3118 processor. We use a 1 word bitmask to indicate on which processor a node is
3119 lying. Thus, moving or copying a node from one processor to another just
3120 requires an appropriate change in this bitmask (using @SET_GA@).
3121 Additionally, the clocks have to be updated.
3123 A special case arises when the node that is needed by processor A has been
3124 moved from a processor B to a processor C between sending out a @FETCH@
3125 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
3131 /* ngoqvam che' {GrAnSim}! */
3133 /* Fetch node "node" to processor "p" */
3136 FetchNode(node,from,to)
3140 /* In case of RTSflags.GranFlags.DoGUMMFetching this fct should never be
3141 entered! Instead, UnpackGraph is used in ReSchedule */
3144 ASSERT(to==CurrentProc);
3146 # if defined(GRAN) && defined(GRAN_CHECK)
3147 if ( RTSflags.GranFlags.Light ) {
3148 fprintf(stderr,"Qagh {FetchNode}Daq: Should never be entered in GrAnSim Light setup\n");
3153 if ( RTSflags.GranFlags.DoGUMMFetching ) {
3154 fprintf(stderr,"Qagh: FetchNode should never be entered with DoGUMMFetching\n");
3158 /* Now fetch the children */
3159 if (!IS_LOCAL_TO(PROCS(node),from) &&
3160 !IS_LOCAL_TO(PROCS(node),to) )
3163 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
3164 PROCS(node) |= PE_NUMBER(to); /* Copy node */
3166 PROCS(node) = PE_NUMBER(to); /* Move node */
3171 /* --------------------------------------------------
3172 Cost of sending a packet of size n = C + P*n
3173 where C = packet construction constant,
3174 P = cost of packing one word into a packet
3175 [Should also account for multiple packets].
3176 -------------------------------------------------- */
3179 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
3180 nearby graph has been scheduled)
3181 1 ... node is already local (fetched by somebody else; no event is
3183 2 ... fetch request has been forwrded to the PE that now contains the
3185 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
3186 the current TSO is put into the blocking queue of that node
3187 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
3188 function to guarantee that the tso and node inputs are valid
3189 (they may be moved during GC).
3191 ToDo: Symbolic return codes; clean up code (separate GUMMFetching from
3192 single node fetching.
3196 HandleFetchRequest(node,p,tso)
3200 ASSERT(!RTSflags.GranFlags.Light);
3202 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
3204 # if defined(GRAN_CHECK)
3205 if (RTSflags.GranFlags.debug & 0x100 ) {
3207 I_ size, ptrs, nonptrs, vhs;
3208 char info_hdr_ty[80];
3210 info_ptr = get_closure_info(node,
3211 &size, &ptrs, &nonptrs, &vhs,
3213 fprintf(stderr,"Warning: HandleFetchRequest entered with local node %#lx (%s) (PE %d)\n",
3214 node,info_hdr_ty,p);
3217 if (RTSflags.GranFlags.DoGUMMFetching) {
3221 /* Create a 1-node-buffer and schedule a FETCHREPLY now */
3222 graph = PackOneNode(node, tso, &size);
3223 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3224 FETCHREPLY,tso,graph,NULL);
3226 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3227 FETCHREPLY,tso,node,NULL);
3231 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
3233 if(RTSflags.GranFlags.DoGUMMFetching) { /* {GUM}vo' ngoqvam vInIHta' (code from GUM) */
3237 if (IS_BLACK_HOLE(INFO_PTR(node))) { /* block on BH or RBH */
3238 new_event(p,CurrentProc,CurrentTime[p],
3239 GLOBALBLOCK,tso,node,NULL);
3240 /* Note: blockFetch is done when handling GLOBALBLOCK event */
3241 /* When this thread is reawoken it does the usual: it tries to
3242 enter the updated node and issues a fetch if it's remote.
3243 It has forgotten that it has sent a fetch already (i.e. a
3244 FETCHNODE is swallowed by a BH, leaving the thread in a BQ */
3245 --OutstandingFetches[p];
3249 # if defined(GRAN_CHECK)
3250 if (!RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[p])) {
3251 fprintf(stderr,"Qagh {HandleFetchRequest}Daq: tso 0x%lx (%x) not at head of proc %d (0x%lx)\n",
3252 tso, TSO_ID(tso), p, RunnableThreadsHd[p]);
3257 if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)
3258 return (4); /* out of heap */
3260 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3261 /* Send a reply to the originator */
3262 /* ToDo: Replace that by software costs for doing graph packing! */
3263 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_mpacktime;
3265 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3266 FETCHREPLY,tso,graph,NULL);
3268 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3270 } else { /* incremental (single closure) fetching */
3271 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3272 /* Send a reply to the originator */
3273 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3275 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3276 FETCHREPLY,tso,node,NULL);
3278 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3282 else /* Qu'vatlh! node has been grabbed by another proc => forward */
3284 PROC p_new = where_is(node);
3287 # if defined(GRAN_CHECK)
3288 if (RTSflags.GranFlags.debug & 0x2)
3289 fprintf(stderr,"Qu'vatlh! node %#lx has been grabbed by PE %d (current=%d; demander=%d) @ %d\n",
3290 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
3292 /* Prepare FORWARD message to proc p_new */
3293 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3295 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p_new]) +
3296 RTSflags.GranFlags.gran_latency;
3298 new_event(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
3300 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3308 @blockFetch@ blocks a @BlockedFetch@ node on some kind of black hole.
3310 Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
3312 {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
3313 create @FMBQ@'s (FetchMe blocking queues) to cope with global
3314 blocking. Instead, non-local TSO are put into the BQ in the same way as
3315 local TSOs. However, we have to check if a TSO is local or global in order
3316 to account for the latencies involved and for keeping track of the number
3317 of fetches that are really going on.
3323 0 ... ok; tso is now at beginning of BQ attached to the bh closure
3324 1 ... the bh closure is no BH any more; tso is immediately unblocked
3328 blockFetch(tso, proc, bh)
3329 P_ tso; /* TSO which gets blocked */
3330 PROC proc; /* PE where that tso was running */
3331 P_ bh; /* closure to block on (BH, RBH, BQ) */
3333 # if defined(GRAN_CHECK)
3334 if ( RTSflags.GranFlags.debug & 0x100 ) {
3336 I_ size, ptrs, nonptrs, vhs;
3337 char info_hdr_ty[80];
3339 info_ptr = get_closure_info(bh,
3340 &size, &ptrs, &nonptrs, &vhs,
3342 fprintf(stderr,"Blocking TSO %#lx (%x)(PE %d) on node %#lx (%s) (PE %d). No graph is packed!\n",
3343 tso, TSO_ID(tso), proc, bh, info_hdr_ty, where_is(bh));
3346 if ( !RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[proc]) ) {
3347 fprintf(stderr,"Qagh {blockFetch}Daq: TSO 0x%lx (%x) is not first on runnable list of proc %d (first is 0x%lx)\n",
3348 tso,TSO_ID(tso),proc,RunnableThreadsHd[proc]);
3353 if (!IS_BLACK_HOLE(INFO_PTR(bh))) { /* catches BHs and RBHs */
3354 # if defined(GRAN_CHECK)
3355 if ( RTSflags.GranFlags.debug & 0x100 ) {
3357 W_ size, ptrs, nonptrs, vhs;
3358 char str[80], junk_str[80];
3360 info = get_closure_info(bh, &size, &ptrs, &nonptrs, &vhs, str);
3361 fprintf(stderr,"blockFetch: node %#lx (%s) is not a BH => awakening TSO %#lx (%x) (PE %u)\n",
3362 bh, str, tso, TSO_ID(tso), proc);
3366 /* No BH anymore => immediately unblock tso */
3367 new_event(proc,proc,CurrentTime[proc],
3368 UNBLOCKTHREAD,tso,bh,NULL);
3370 /* Is this always a REPLY to a FETCH in the profile ? */
3371 if (RTSflags.GranFlags.granSimStats)
3372 DumpRawGranEvent(proc,proc,GR_REPLY,tso,bh,0);
3376 /* DaH {BQ}Daq Qu' Suq 'e' wISov!
3377 Now we know that we have to put the tso into the BQ.
3378 2 case: If block-on-fetch, tso is at head of threadq =>
3379 => take it out of threadq and into BQ
3380 If reschedule-on-fetch, tso is only pointed to be event
3381 => just put it into BQ
3383 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
3384 GranSimBlock(tso, proc, bh); /* get tso out of threadq & activate next
3385 thread (same as in BQ_entry) */
3386 } else { /* reschedule-on-fetch */
3387 if(RTSflags.GranFlags.granSimStats)
3388 DumpRawGranEvent(proc,where_is(bh),GR_BLOCK,tso,bh,0);
3390 ++TSO_BLOCKCOUNT(tso);
3391 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
3394 ASSERT(TSO_LINK(tso)==Prelude_Z91Z93_closure);
3396 /* Put tso into BQ */
3397 switch (INFO_TYPE(INFO_PTR(bh))) {
3399 case INFO_BH_U_TYPE:
3400 TSO_LINK(tso) = Prelude_Z91Z93_closure;
3401 SET_INFO_PTR(bh, BQ_info);
3402 BQ_ENTRIES(bh) = (W_) tso;
3404 #ifdef GC_MUT_REQUIRED
3406 * If we modify a black hole in the old generation, we have to make
3407 * sure it goes on the mutables list
3410 if (bh <= StorageMgrInfo.OldLim) {
3411 MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
3412 StorageMgrInfo.OldMutables = bh;
3414 MUT_LINK(bh) = MUT_NOT_LINKED;
3418 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3419 TSO_LINK(tso) = (P_) BQ_ENTRIES(bh);
3420 BQ_ENTRIES(bh) = (W_) tso;
3422 case INFO_FMBQ_TYPE:
3423 fprintf(stderr,"ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
3424 bh, tso, TSO_ID(tso));
3426 case INFO_SPEC_RBH_TYPE:
3427 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3428 TSO_LINK(tso) = (P_) SPEC_RBH_BQ(bh);
3429 SPEC_RBH_BQ(bh) = (W_) tso;
3431 case INFO_GEN_RBH_TYPE:
3432 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3433 TSO_LINK(tso) = (P_) GEN_RBH_BQ(bh);
3434 GEN_RBH_BQ(bh) = (W_) tso;
3439 I_ size, ptrs, nonptrs, vhs;
3440 char info_hdr_ty[80];
3442 fprintf(stderr, "Panic: thought %#lx was a black hole (IP %#lx)",
3444 # if defined(GRAN_CHECK)
3445 info_ptr = get_closure_info(bh,
3446 &size, &ptrs, &nonptrs, &vhs,
3448 fprintf(stderr, " %s\n",info_hdr_ty);
3460 %****************************************************************************
3462 \subsection[qp-profile]{Quasi-Parallel Profiling}
3464 %****************************************************************************
3467 /* ToDo: Check if this is really still used anywhere!? */
3472 /* *Virtual* Time in milliseconds */
3475 qp_elapsed_time(STG_NO_ARGS)
3477 extern StgDouble usertime();
3479 return ((long) (usertime() * 1e3));
3483 qp_elapsed_time(STG_NO_ARGS)
3485 return ((long) CurrentTime[CurrentProc] );
3490 init_qp_profiling(STG_NO_ARGS)
3493 char qp_filename[STATS_FILENAME_MAXLEN];
3495 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3496 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3497 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3501 fputs(prog_argv[0], qp_file);
3502 for(i = 1; prog_argv[i]; i++) {
3503 fputc(' ', qp_file);
3504 fputs(prog_argv[i], qp_file);
3506 fprintf(qp_file, " +RTS -C%d -t%d\n"
3507 , RTSflags.ConcFlags.ctxtSwitchTime
3508 , RTSflags.ConcFlags.maxThreads);
3510 fputs(time_str(), qp_file);
3511 fputc('\n', qp_file);
3516 QP_Event0(tid, node)
3520 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3524 QP_Event1(event, tso)
3528 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3529 TSO_ID(tso), TSO_NAME(tso));
3533 QP_Event2(event, tso1, tso2)
3537 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3538 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
3543 %****************************************************************************
3545 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
3547 %****************************************************************************
3549 Garbage collection code for the event queue. We walk the event queue
3550 so that if the only reference to a TSO is in some event (e.g. RESUME),
3551 the TSO is still preserved.
3553 The GC code now uses a breadth-first pruning strategy. This prevents
3554 the GC from keeping all sparks of the low-numbered PEs while discarding all
3555 sparks from high-numbered PEs. Such a depth-first pruning may have
3556 disastrous effects for programs that generate a huge number of sparks!
3561 extern smInfo StorageMgrInfo;
3563 /* Auxiliary functions needed in Save/RestoreSparkRoots if breadth-first */
3564 /* pruning is done. */
3567 arr_and(W_ arr[], I_ max)
3572 /* Doesn't work with max==0; but then, many things don't work in this */
3574 for (i=1, res = arr[0]; i<max; i++)
3581 arr_max(W_ arr[], I_ max)
3586 /* Doesn't work with max==0; but then, many things don't work in this */
3588 for (i=1, res = arr[0]; i<max; i++)
3589 res = (arr[i]>res) ? arr[i] : res;
3595 Routines working on spark queues.
3596 It would be a good idea to make that an ADT!
3600 spark_queue_len(PROC proc, I_ pool)
3602 sparkq prev, spark; /* prev only for testing !! */
3605 for (len = 0, prev = NULL, spark = PendingSparksHd[proc][pool];
3607 len++, prev = spark, spark = SPARK_NEXT(spark))
3610 # if defined(GRAN_CHECK)
3611 if ( RTSflags.GranFlags.debug & 0x1000 )
3612 if ( (prev!=NULL) && (prev!=PendingSparksTl[proc][pool]) )
3613 fprintf(stderr,"ERROR in spark_queue_len: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
3614 proc, pool, PendingSparksTl[proc][pool], prev);
3621 delete_from_spark_queue (prev,spark) /* unlink and dispose spark */
3623 { /* Global Vars: CurrentProc, SparkQueueHd, SparkQueueTl */
3626 # if defined(GRAN_CHECK)
3627 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3628 fprintf(stderr,"** |%#x:%#x| prev=%#x->(%#x), (%#x)<-spark=%#x->(%#x) <-(%#x)\n",
3629 SparkQueueHd, SparkQueueTl,
3630 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)),
3631 SPARK_PREV(spark), spark, SPARK_NEXT(spark),
3632 (SPARK_NEXT(spark)==NULL ? 0 : SPARK_PREV(SPARK_NEXT(spark))));
3636 tmp = SPARK_NEXT(spark);
3638 SparkQueueHd = SPARK_NEXT(spark);
3640 SPARK_NEXT(prev) = SPARK_NEXT(spark);
3642 if (SPARK_NEXT(spark)==NULL) {
3643 SparkQueueTl = prev;
3645 SPARK_PREV(SPARK_NEXT(spark)) = prev;
3647 if(SparkQueueHd == NULL)
3648 SparkQueueTl = NULL;
3649 SPARK_NEXT(spark) = NULL;
3651 DisposeSpark(spark);
3654 # if defined(GRAN_CHECK)
3655 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3656 fprintf(stderr,"## prev=%#x->(%#x)\n",
3657 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)));
3664 /* NB: These functions have been replaced by functions:
3665 EvacuateEvents, EvacuateSparks, (in ../storage/SMcopying.lc)
3666 LinkEvents, LinkSparks (in ../storage/SMcompacting.lc)
3667 Thus, GrAnSim does not need additional entries in the list of roots
3672 SaveEventRoots(num_ptr_roots)
3675 eventq event = EventHd;
3676 while(event != NULL)
3678 if(EVENT_TYPE(event) == RESUMETHREAD ||
3679 EVENT_TYPE(event) == MOVETHREAD ||
3680 EVENT_TYPE(event) == CONTINUETHREAD ||
3681 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3682 EVENT_TYPE(event) == STARTTHREAD )
3683 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3685 else if(EVENT_TYPE(event) == MOVESPARK)
3686 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
3688 else if (EVENT_TYPE(event) == FETCHNODE ||
3689 EVENT_TYPE(event) == FETCHREPLY )
3691 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3692 /* In the case of packet fetching, EVENT_NODE(event) points to */
3693 /* the packet (currently, malloced). The packet is just a list of */
3694 /* closure addresses, with the length of the list at index 1 (the */
3695 /* structure of the packet is defined in Pack.lc). */
3696 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3697 P_ buffer = (P_) EVENT_NODE(event);
3698 int size = (int) buffer[PACK_SIZE_LOCN], i;
3700 for (i = PACK_HDR_SIZE; i <= size-1; i++) {
3701 StorageMgrInfo.roots[num_ptr_roots++] = (P_) buffer[i];
3704 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3706 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3708 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3709 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3711 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3713 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3715 event = EVENT_NEXT(event);
3717 return(num_ptr_roots);
3720 #if defined(DEPTH_FIRST_PRUNING)
3721 /* Is it worthwhile keeping the depth-first pruning code !? -- HWL */
3724 SaveSparkRoots(num_ptr_roots)
3727 sparkq spark, /* prev, */ disposeQ=NULL;
3729 I_ i, sparkroots=0, prunedSparks=0;
3730 I_ tot_sparks[MAX_PROC], tot = 0;;
3732 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3733 tot_sparks[proc] = 0;
3734 for(i = 0; i < SPARK_POOLS; ++i) {
3735 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
3737 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
3739 if(++sparkroots <= MAX_SPARKS)
3741 if ( RTSflags.GcFlags.giveStats )
3742 if (i==ADVISORY_POOL) {
3746 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3750 SPARK_NODE(spark) = Prelude_Z91Z93_closure;
3751 if (prunedSparks==0) {
3759 } /* forall spark ... */
3760 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks>0) ) {
3761 fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
3762 prunedSparks,MAX_SPARKS,proc);
3763 if (disposeQ == PendingSparksHd[proc][i])
3764 PendingSparksHd[proc][i] = NULL;
3766 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
3767 DisposeSparkQ(disposeQ);
3771 } /* forall i ... */
3772 } /*forall proc .. */
3774 if ( RTSflags.GcFlags.giveStats ) {
3775 fprintf(RTSflags.GcFlags.statsFile,
3776 "Spark statistics (after pruning) (total sparks = %d):",tot);
3777 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3779 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3780 fprintf(RTSflags.GcFlags.statsFile,"\tPE %d: %d ",proc,tot_sparks[proc]);
3782 fprintf(RTSflags.GcFlags.statsFile,".\n");
3785 return(num_ptr_roots);
3788 #else /* !DEPTH_FIRST_PRUNING */
3790 /* In case of an excessive number of sparks, depth first pruning is a Bad */
3791 /* Idea as we might end up with all remaining sparks on processor 0 and */
3792 /* none on the other processors. So, this version uses breadth first */
3793 /* pruning. -- HWL */
3796 SaveSparkRoots(num_ptr_roots)
3800 curr_spark[MAX_PROC][SPARK_POOLS];
3803 endQueues[SPARK_POOLS], finishedQueues[SPARK_POOLS];
3805 prunedSparks[MAX_PROC][SPARK_POOLS];
3806 I_ tot_sparks[MAX_PROC], tot = 0;;
3809 # if defined(GRAN_CHECK) && defined(GRAN)
3810 if ( RTSflags.GranFlags.debug & 0x40 )
3811 fprintf(stderr,"D> Saving spark roots for GC ...\n");
3815 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3816 allProcs |= PE_NUMBER(proc);
3817 tot_sparks[proc] = 0;
3818 for(i = 0; i < SPARK_POOLS; ++i) {
3819 curr_spark[proc][i] = PendingSparksHd[proc][i];
3820 prunedSparks[proc][i] = 0;
3822 finishedQueues[i] = 0;
3826 /* Breadth first pruning */
3828 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3829 for(i = 0; i < SPARK_POOLS; ++i) {
3830 spark = curr_spark[proc][i];
3831 if ( spark != NULL ) {
3833 if(++sparkroots <= MAX_SPARKS)
3835 # if defined(GRAN_CHECK) && defined(GRAN)
3836 if ( (RTSflags.GranFlags.debug & 0x1000) &&
3837 (RTSflags.GcFlags.giveStats) )
3838 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d): 0x%lx \t(info ptr=%#lx)\n",
3839 num_ptr_roots,proc,i,SPARK_NODE(spark),
3840 INFO_PTR(SPARK_NODE(spark)));
3842 if ( RTSflags.GcFlags.giveStats )
3843 if (i==ADVISORY_POOL) {
3847 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3848 curr_spark[proc][i] = spark = SPARK_NEXT(spark);
3850 else /* sparkroots > MAX_SPARKS */
3852 if (curr_spark[proc][i] == PendingSparksHd[proc][i])
3853 PendingSparksHd[proc][i] = NULL;
3855 SPARK_NEXT(SPARK_PREV(curr_spark[proc][i])) = NULL;
3856 PendingSparksTl[proc][i] = SPARK_PREV(curr_spark[proc][i]);
3857 endQueues[i] |= PE_NUMBER(proc);
3859 } else { /* spark == NULL ; actually, this only has to be done once */
3860 endQueues[i] |= PE_NUMBER(proc);
3864 } while (arr_and(endQueues,SPARK_POOLS) != allProcs);
3866 /* The buffer for spark roots in StorageMgrInfo.roots is full */
3867 /* now. Prune all sparks on all processor starting with */
3868 /* curr_spark[proc][i]. */
3871 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3872 for(i = 0; i < SPARK_POOLS; ++i) {
3873 spark = curr_spark[proc][i];
3875 if ( spark != NULL ) {
3876 SPARK_NODE(spark) = Prelude_Z91Z93_closure;
3877 curr_spark[proc][i] = SPARK_NEXT(spark);
3879 prunedSparks[proc][i]++;
3880 DisposeSpark(spark);
3882 finishedQueues[i] |= PE_NUMBER(proc);
3886 } while (arr_and(finishedQueues,SPARK_POOLS) != allProcs);
3889 # if defined(GRAN_CHECK) && defined(GRAN)
3890 if ( RTSflags.GranFlags.debug & 0x1000) {
3891 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3892 for(i = 0; i < SPARK_POOLS; ++i) {
3893 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks[proc][i]>0)) {
3894 fprintf(RTSflags.GcFlags.statsFile,
3895 "Discarding %lu sparks on proc %d (pool %d) for GC purposes\n",
3896 prunedSparks[proc][i],proc,i);
3901 if ( RTSflags.GcFlags.giveStats ) {
3902 fprintf(RTSflags.GcFlags.statsFile,
3903 "Spark statistics (after discarding) (total sparks = %d):",tot);
3904 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3906 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3907 fprintf(RTSflags.GcFlags.statsFile,
3908 "\tPE %d: %d ",proc,tot_sparks[proc]);
3910 fprintf(RTSflags.GcFlags.statsFile,".\n");
3915 return(num_ptr_roots);
3918 #endif /* DEPTH_FIRST_PRUNING */
3921 GC roots must be restored in *reverse order*.
3922 The recursion is a little ugly, but is better than
3923 in-place pointer reversal.
3927 RestoreEvtRoots(event,num_ptr_roots)
3933 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
3935 if(EVENT_TYPE(event) == RESUMETHREAD ||
3936 EVENT_TYPE(event) == MOVETHREAD ||
3937 EVENT_TYPE(event) == CONTINUETHREAD ||
3938 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3939 EVENT_TYPE(event) == STARTTHREAD )
3940 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3942 else if(EVENT_TYPE(event) == MOVESPARK )
3943 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
3945 else if (EVENT_TYPE(event) == FETCHNODE ||
3946 EVENT_TYPE(event) == FETCHREPLY )
3948 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3949 P_ buffer = (P_) EVENT_NODE(event);
3950 int size = (int) buffer[PACK_SIZE_LOCN], i;
3952 for (i = size-1; i >= PACK_HDR_SIZE; i--) {
3953 buffer[i] = StorageMgrInfo.roots[--num_ptr_roots];
3956 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
3958 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3960 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3962 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
3963 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3965 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3967 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3970 return(num_ptr_roots);
3974 RestoreEventRoots(num_ptr_roots)
3977 return(RestoreEvtRoots(EventHd,num_ptr_roots));
3980 #if defined(DEPTH_FIRST_PRUNING)
3983 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
3985 I_ num_ptr_roots, sparkroots;
3989 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
3990 if(sparkroots <= MAX_SPARKS)
3992 P_ n = SPARK_NODE(spark);
3993 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
3994 # if defined(GRAN_CHECK) && defined(GRAN)
3995 if ( RTSflags.GranFlags.debug & 0x40 )
3996 fprintf(RTSflags.GcFlags.statsFile,
3997 "Restoring Spark Root %d: 0x%lx \t(info ptr=%#lx\n",
3998 num_ptr_roots,SPARK_NODE(spark),
3999 INFO_PTR(SPARK_NODE(spark)));
4002 # if defined(GRAN_CHECK) && defined(GRAN)
4004 if ( RTSflags.GranFlags.debug & 0x40 )
4005 fprintf(RTSflags.GcFlags.statsFile,
4006 "Error in RestoreSpkRoots (%d; @ spark %#lx): More than MAX_SPARKS (%d) sparks\n",
4007 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
4011 return(num_ptr_roots);
4015 RestoreSparkRoots(num_ptr_roots)
4021 #if defined(GRAN_JSM_SPARKS)
4022 fprintf(stderr,"Error: RestoreSparkRoots should be never be entered in a JSM style sparks set-up\n");
4026 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4027 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4028 /* of the for loop. For i that is currently not necessary. C is really */
4029 /* impressive in datatype abstraction! -- HWL */
4031 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4032 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4033 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
4036 return(num_ptr_roots);
4039 #else /* !DEPTH_FIRST_PRUNING */
4042 RestoreSparkRoots(num_ptr_roots)
4046 curr_spark[MAX_PROC][SPARK_POOLS];
4048 I_ i, max_len, len, pool, count,
4049 queue_len[MAX_PROC][SPARK_POOLS];
4051 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4052 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4053 /* of the for loop. For i that is currently not necessary. C is really */
4054 /* impressive in datatype abstraction! -- HWL */
4057 for (proc=0; proc < RTSflags.GranFlags.proc; proc++) {
4058 for (i=0; i<SPARK_POOLS; i++) {
4059 curr_spark[proc][i] = PendingSparksTl[proc][i];
4060 queue_len[proc][i] = spark_queue_len(proc,i);
4061 max_len = (queue_len[proc][i]>max_len) ? queue_len[proc][i] : max_len;
4065 for (len=max_len; len > 0; len--){
4066 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4067 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4068 if (queue_len[proc][i]>=len) {
4069 spark = curr_spark[proc][i];
4070 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4071 # if defined(GRAN_CHECK) && defined(GRAN)
4073 if ( (RTSflags.GranFlags.debug & 0x1000) &&
4074 (RTSflags.GcFlags.giveStats) )
4075 fprintf(RTSflags.GcFlags.statsFile,
4076 "Restoring Spark Root %d (PE %u, pool %u): 0x%lx \t(info ptr=%#lx)\n",
4077 num_ptr_roots,proc,i,SPARK_NODE(spark),
4078 INFO_PTR(SPARK_NODE(spark)));
4080 curr_spark[proc][i] = SPARK_PREV(spark);
4082 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],
4089 # if defined(GRAN_CHECK) && defined(GRAN)
4090 if ( (RTSflags.GranFlags.debug & 0x1000) && (RTSflags.GcFlags.giveStats) )
4091 fprintf(RTSflags.GcFlags.statsFile,"Number of restored spark roots: %d\n",
4094 return(num_ptr_roots);
4097 #endif /* DEPTH_FIRST_PRUNING */
4103 #endif /* CONCURRENT */ /* the whole module! */