2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
27 #if defined(CONCURRENT) /* the whole module! */
29 # define NON_POSIX_SOURCE /* so says Solaris */
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time. At present, it is only used for the first stack
43 chunk of a thread, the one that's got
44 @RTSflags.ConcFlags.stkChunkSize@ words.
47 P_ AvailableStack = PrelBase_Z91Z93_closure;
48 P_ AvailableTSO = PrelBase_Z91Z93_closure;
51 Macros for dealing with the new and improved GA field for simulating
52 parallel execution. Based on @CONCURRENT@ package. The GA field now
53 contains a mask, where the n-th bit stands for the n-th processor,
54 on which this data can be found. In case of multiple copies, several bits
55 are set. The total number of processors is bounded by @MAX_PROC@,
56 which should be <= the length of a word in bits. -- HWL
58 {{GranSim.lc}Daq ngoq' roQlu'ta'}
59 (Code has been moved to GranSim.lc).
61 %****************************************************************
63 \subsection[thread-getthread]{The Thread Scheduler}
65 %****************************************************************
67 This is the heart of the thread scheduling code.
69 Most of the changes for GranSim are in this part of the RTS.
70 Especially the @ReSchedule@ routine has been blown up quite a lot
71 It now contains the top-level event-handling loop.
73 Parts of the code that are not necessary for GranSim, but convenient to
74 have when developing it are marked with a @GRAN_CHECK@ variable.
77 STGRegisterTable *CurrentRegTable = NULL;
82 /* Only needed for GranSim Light; costs of operations during rescheduling
83 are associated to the virtual processor on which ActiveTSO is living */
85 rtsBool resched = rtsFalse; /* debugging only !!*/
87 /* Pointers to the head and tail of the runnable queues for each PE */
88 /* In GranSim Light only the thread/spark-queues of proc 0 are used */
89 P_ RunnableThreadsHd[MAX_PROC];
90 P_ RunnableThreadsTl[MAX_PROC];
92 P_ WaitThreadsHd[MAX_PROC];
93 P_ WaitThreadsTl[MAX_PROC];
95 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
96 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
98 /* One clock for each PE */
99 W_ CurrentTime[MAX_PROC];
101 /* Useful to restrict communication; cf fishing model in GUM */
102 I_ OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
104 /* Status of each PE (new since but independent of GranSim Light) */
105 enum proc_status procStatus[MAX_PROC];
107 #if defined(GRAN) && defined(GRAN_CHECK)
108 /* To check if the RTS ever tries to run a thread that should be blocked
109 because of fetching remote data */
110 P_ BlockedOnFetch[MAX_PROC];
113 W_ SparksAvail = 0; /* How many sparks are available */
114 W_ SurplusThreads = 0; /* How many excess threads are there */
116 TIME SparkStealTime();
120 P_ RunnableThreadsHd = PrelBase_Z91Z93_closure;
121 P_ RunnableThreadsTl = PrelBase_Z91Z93_closure;
123 P_ WaitingThreadsHd = PrelBase_Z91Z93_closure;
124 P_ WaitingThreadsTl = PrelBase_Z91Z93_closure;
126 TYPE_OF_SPARK PendingSparksBase[SPARK_POOLS];
127 TYPE_OF_SPARK PendingSparksLim[SPARK_POOLS];
129 TYPE_OF_SPARK PendingSparksHd[SPARK_POOLS];
130 TYPE_OF_SPARK PendingSparksTl[SPARK_POOLS];
132 #endif /* GRAN ; HWL */
134 static jmp_buf scheduler_loop;
136 I_ required_thread_count = 0;
137 I_ advisory_thread_count = 0;
139 EXTFUN(resumeThread);
141 /* Misc prototypes */
143 P_ NewThread PROTO((P_, W_, I_));
144 I_ blockFetch PROTO((P_, PROC, P_));
145 I_ HandleFetchRequest PROTO((P_, PROC, P_));
146 rtsBool InsertThread PROTO((P_ tso));
147 sparkq delete_from_spark_queue PROTO((sparkq, sparkq));
150 P_ NewThread PROTO((P_, W_));
153 I_ context_switch = 0;
154 I_ contextSwitchTime = 10000;
158 /* NB: GRAN and GUM use different representations of spark pools.
159 GRAN sparks are more flexible (containing e.g. granularity info)
160 but slower than GUM sparks. There is no fixed upper bound on the
161 number of GRAN sparks either. -- HWL
167 I_ SparkLimit[SPARK_POOLS];
170 initThreadPools(STG_NO_ARGS)
172 I_ i, size = RTSflags.ConcFlags.maxLocalSparks;
174 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
176 if ((PendingSparksBase[ADVISORY_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
179 if ((PendingSparksBase[REQUIRED_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
181 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
182 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
193 ScheduleThreads(topClosure)
201 #if defined(PROFILING) || defined(PAR)
202 if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
203 if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
205 if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
206 if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
209 fprintf(stderr, "Can't initialize virtual timer.\n");
213 context_switch = 0 /* 1 HWL */;
215 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
216 if ( RTSflags.GranFlags.Light && RTSflags.GranFlags.proc!=1 ) {
217 fprintf(stderr,"Qagh: In GrAnSim Light setup .proc must be 1\n");
221 if ( RTSflags.GranFlags.debug & 0x40 ) {
222 fprintf(stderr,"Doing init in ScheduleThreads now ...\n");
226 #if defined(GRAN) /* KH */
227 /* Init thread and spark queues on all processors */
228 for (i=0; i<RTSflags.GranFlags.proc; i++)
230 /* Init of RunnableThreads{Hd,Tl} etc now in main */
231 OutstandingFetches[i] = OutstandingFishes[i] = 0;
232 procStatus[i] = Idle;
233 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
234 BlockedOnFetch[i] = NULL;
238 CurrentProc = MainProc;
244 * We perform GC so that a signal handler can install a new
245 * TopClosure and start a new main thread.
251 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
253 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
255 /* kludge to save the top closure as a root */
256 CurrentTSO = topClosure;
257 ReallyPerformThreadGC(0, rtsTrue);
258 topClosure = CurrentTSO;
260 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
262 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
265 fprintf(stderr, "Not enough heap for main thread\n");
270 RunnableThreadsHd = RunnableThreadsTl = tso;
272 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
273 ThreadQueueHd = ThreadQueueTl = tso;
275 # if defined(GRAN_CHECK)
276 if ( RTSflags.GranFlags.debug & 0x40 ) {
277 fprintf(stderr,"MainTSO has been initialized (0x%x)\n", tso);
283 if (RTSflags.ParFlags.granSimStats) {
284 DumpGranEvent(GR_START, tso);
285 sameThread = rtsTrue;
288 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.labelling)
289 DumpRawGranEvent(CurrentProc,(PROC)0,GR_START,
294 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
295 if (RTSflags.GranFlags.Light)
299 required_thread_count = 1;
300 advisory_thread_count = 0;
302 } /*if IAmMainThread ...*/
305 /* ----------------------------------------------------------------- */
306 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
307 /* ----------------------------------------------------------------- */
309 if(setjmp(scheduler_loop) < 0)
312 #if defined(GRAN) && defined(GRAN_CHECK)
313 if ( RTSflags.GranFlags.debug & 0x80 ) {
314 fprintf(stderr,"MAIN Schedule Loop; ThreadQueueHd is ");
315 G_TSO(ThreadQueueHd,1);
316 /* if (ThreadQueueHd == MainTSO) {
317 fprintf(stderr,"D> Event Queue is now:\n");
324 if (PendingFetches != PrelBase_Z91Z93_closure) {
329 if (ThreadQueueHd == PrelBase_Z91Z93_closure) {
330 fprintf(stderr, "Qu'vatlh! No runnable threads!\n");
333 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
334 QP_Event1("AG", ThreadQueueHd);
337 while (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
338 /* If we've no work */
339 if (WaitingThreadsHd == PrelBase_Z91Z93_closure) {
341 fprintf(stderr, "No runnable threads!\n");
344 /* Block indef. waiting for I/O and timer expire */
350 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
351 if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
352 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
353 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
355 * If we're here (no runnable threads) and we have pending
356 * sparks, we must have a space problem. Get enough space
357 * to turn one of those pending sparks into a
358 * thread... ReallyPerformGC doesn't return until the
359 * space is available, so it may force global GC. ToDo:
360 * Is this unnecessary here? Duplicated in ReSchedule()?
363 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
364 SAVE_Hp -= THREAD_SPACE_REQUIRED;
367 * We really have absolutely no work. Send out a fish
368 * (there may be some out there already), and wait for
369 * something to arrive. We clearly can't run any threads
370 * until a SCHEDULE or RESUME arrives, and so that's what
371 * we're hoping to see. (Of course, we still have to
372 * respond to other types of messages.)
375 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
381 } else if (PacketsWaiting()) { /* Look for incoming messages */
387 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
388 QP_Event1("AG", RunnableThreadsHd);
393 if (RTSflags.ParFlags.granSimStats && !sameThread)
394 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
398 TimeOfNextEvent = get_time_of_next_event();
399 CurrentTSO = ThreadQueueHd;
400 if (RTSflags.GranFlags.Light) {
401 /* Save time of `virt. proc' which was active since last getevent and
402 restore time of `virt. proc' where CurrentTSO is living on. */
403 if(RTSflags.GranFlags.DoFairSchedule)
405 if (RTSflags.GranFlags.granSimStats &&
406 RTSflags.GranFlags.debug & 0x20000)
407 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
409 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
411 CurrentTime[CurrentProc] = TSO_CLOCK(CurrentTSO);
412 if(RTSflags.GranFlags.DoFairSchedule && resched )
415 if (RTSflags.GranFlags.granSimStats &&
416 RTSflags.GranFlags.debug & 0x20000)
417 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
420 if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
421 (TimeOfNextEvent == 0 ||
422 TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
423 new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
424 CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
425 TimeOfNextEvent = get_time_of_next_event();
429 EndOfTimeSlice = CurrentTime[CurrentProc]+RTSflags.GranFlags.time_slice;
431 CurrentTSO = RunnableThreadsHd;
432 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
433 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
435 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
436 RunnableThreadsTl = PrelBase_Z91Z93_closure;
439 /* If we're not running a timer, just leave the flag on */
440 if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
443 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
444 if (CurrentTSO == PrelBase_Z91Z93_closure) {
445 fprintf(stderr,"Qagh: Trying to execute PrelBase_Z91Z93_closure on proc %d (@ %d)\n",
446 CurrentProc,CurrentTime[CurrentProc]);
450 if (RTSflags.GranFlags.debug & 0x04) {
451 if (BlockedOnFetch[CurrentProc]) {
452 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
453 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
458 if ( (RTSflags.GranFlags.debug & 0x10) &&
459 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
460 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
461 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
466 #if 0 && defined(CONCURRENT)
467 fprintf(stderr, "ScheduleThreads: About to resume thread:%#x\n",
470 miniInterpret((StgFunPtr)resumeThread);
474 % Some remarks on GrAnSim -- HWL
476 The ReSchedule fct is the heart of GrAnSim. Based on its parameter it issues
477 a CONTINUETRHEAD to carry on executing the current thread in due course or it watches out for new work (e.g. called from EndThread).
479 Then it picks the next event (get_next_event) and handles it appropriately
480 (see switch construct). Note that a continue in the switch causes the next
481 event to be handled and a break causes a jmp to the scheduler_loop where
482 the TSO at the head of the current processor's runnable queue is executed.
484 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
485 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
489 GrAnSim rules here! Others stay out or you will be crashed.
490 Concurrent and parallel guys: please use the next door (a few pages down;
491 turn left at the !GRAN sign).
496 /* Prototypes of event handling functions. Only needed in ReSchedule */
497 void do_the_globalblock PROTO((eventq event));
498 void do_the_unblock PROTO((eventq event));
499 void do_the_fetchnode PROTO((eventq event));
500 void do_the_fetchreply PROTO((eventq event));
501 void do_the_movethread PROTO((eventq event));
502 void do_the_movespark PROTO((eventq event));
503 void gimme_spark PROTO((rtsBool *found_res, sparkq *prev_res, sparkq *spark_res));
504 void munch_spark PROTO((rtsBool found, sparkq prev, sparkq spark));
507 ReSchedule(what_next)
508 int what_next; /* Run the current thread again? */
510 sparkq spark, nextspark;
516 # if defined(GRAN_CHECK) && defined(GRAN)
517 if ( RTSflags.GranFlags.debug & 0x80 ) {
518 fprintf(stderr,"Entering ReSchedule with mode %u; tso is\n",what_next);
519 G_TSO(ThreadQueueHd,1);
523 # if defined(GRAN_CHECK) && defined(GRAN)
524 if ( (RTSflags.GranFlags.debug & 0x80) || (RTSflags.GranFlags.debug & 0x40 ) )
525 if (what_next<FIND_THREAD || what_next>END_OF_WORLD)
526 fprintf(stderr,"Qagh {ReSchedule}Daq: illegal parameter %u for what_next\n",
530 if (RTSflags.GranFlags.Light) {
531 /* Save current time; GranSim Light only */
532 TSO_CLOCK(CurrentTSO) = CurrentTime[CurrentProc];
535 /* Run the current thread again (if there is one) */
536 if(what_next==SAME_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
538 /* A bit of a hassle if the event queue is empty, but ... */
539 CurrentTSO = ThreadQueueHd;
542 if (RTSflags.GranFlags.Light &&
543 TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
544 TSO_CLOCK(ThreadQueueHd)>TSO_CLOCK(TSO_LINK(ThreadQueueHd))) {
545 if(RTSflags.GranFlags.granSimStats &&
546 RTSflags.GranFlags.debug & 0x20000 )
547 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
549 ThreadQueueHd = TSO_LINK(CurrentTSO);
550 if (ThreadQueueHd==PrelBase_Z91Z93_closure)
551 ThreadQueueTl=PrelBase_Z91Z93_closure;
552 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
553 InsertThread(CurrentTSO);
556 /* This code does round-Robin, if preferred. */
557 if(!RTSflags.GranFlags.Light &&
558 RTSflags.GranFlags.DoFairSchedule &&
559 TSO_LINK(CurrentTSO) != PrelBase_Z91Z93_closure &&
560 CurrentTime[CurrentProc]>=EndOfTimeSlice)
562 ThreadQueueHd = TSO_LINK(CurrentTSO);
563 TSO_LINK(ThreadQueueTl) = CurrentTSO;
564 ThreadQueueTl = CurrentTSO;
565 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
566 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
567 if ( RTSflags.GranFlags.granSimStats )
568 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
569 CurrentTSO = ThreadQueueHd;
572 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
573 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
575 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
576 /* has been updated before that already. */
577 else if(what_next==NEW_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
579 # if defined(GRAN_CHECK) && defined(GRAN)
580 fprintf(stderr,"Qagh: ReSchedule(NEW_THREAD) shouldn't be used with DoReScheduleOnFetch!!\n");
585 if(RTSflags.GranFlags.granSimStats &&
586 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
587 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
589 CurrentTSO = ThreadQueueHd;
590 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
591 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
593 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
596 /* We go in here if the current thread is blocked on fetch => don'd CONT */
597 else if(what_next==CHANGE_THREAD)
599 /* just fall into event handling loop for next event */
602 /* We go in here if we have no runnable threads or what_next==0 */
605 procStatus[CurrentProc] = Idle;
606 /* That's now done in HandleIdlePEs!
607 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
608 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
610 CurrentTSO = PrelBase_Z91Z93_closure;
613 /* ----------------------------------------------------------------- */
614 /* This part is the EVENT HANDLING LOOP */
615 /* ----------------------------------------------------------------- */
618 /* Choose the processor with the next event */
619 event = get_next_event();
620 CurrentProc = EVENT_PROC(event);
621 CurrentTSO = EVENT_TSO(event);
622 if (RTSflags.GranFlags.Light) {
625 /* Restore local clock of the virtual processor attached to CurrentTSO.
626 All costs will be associated to the `virt. proc' on which the tso
628 if (ActiveTSO != NULL) { /* already in system area */
629 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
630 if (RTSflags.GranFlags.DoFairSchedule)
632 if (RTSflags.GranFlags.granSimStats &&
633 RTSflags.GranFlags.debug & 0x20000)
634 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
637 switch (EVENT_TYPE(event))
640 case FINDWORK: /* inaccurate this way */
641 ActiveTSO = ThreadQueueHd;
645 case MOVESPARK: /* has tso of virt proc in tso field of event */
646 ActiveTSO = EVENT_TSO(event);
648 default: fprintf(stderr,"Illegal event type %s (%d) in GrAnSim Light setup\n",
649 event_names[EVENT_TYPE(event)],EVENT_TYPE(event));
652 CurrentTime[CurrentProc] = TSO_CLOCK(ActiveTSO);
653 if(RTSflags.GranFlags.DoFairSchedule)
655 if (RTSflags.GranFlags.granSimStats &&
656 RTSflags.GranFlags.debug & 0x20000)
657 DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
661 if(EVENT_TIME(event) > CurrentTime[CurrentProc] &&
662 EVENT_TYPE(event)!=CONTINUETHREAD)
663 CurrentTime[CurrentProc] = EVENT_TIME(event);
665 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
666 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
667 fprintf(stderr,"Qagh {ReSchedule}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
671 /* MAKE_BUSY(CurrentProc); don't think that's right in all cases now */
674 # if defined(GRAN_CHECK) && defined(GRAN)
675 if (RTSflags.GranFlags.debug & 0x80)
676 fprintf(stderr,"After get_next_event, before HandleIdlePEs\n");
679 /* Deal with the idlers */
680 if ( !RTSflags.GranFlags.Light )
683 # if defined(GRAN_CHECK) && defined(GRAN)
684 if ( RTSflags.GranFlags.event_trace_all ||
685 ( RTSflags.GranFlags.event_trace && EVENT_TYPE(event) != CONTINUETHREAD) ||
686 (RTSflags.GranFlags.debug & 0x80) )
690 switch (EVENT_TYPE(event))
692 /* Should just be continuing execution */
694 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
695 if ( (RTSflags.GranFlags.debug & 0x100) &&
696 (EVENT_TSO(event)!=RunnableThreadsHd[EVENT_PROC(event)]) ) {
697 fprintf(stderr,"Warning: Wrong TSO in CONTINUETHREAD: %#lx (%x) (PE: %d Hd: 0x%lx)\n",
698 EVENT_TSO(event), TSO_ID(EVENT_TSO(event)),
700 RunnableThreadsHd[EVENT_PROC(event)]);
702 if ( (RTSflags.GranFlags.debug & 0x04) &&
703 BlockedOnFetch[CurrentProc]) {
704 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
705 CurrentProc,CurrentTime[CurrentProc]);
710 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
712 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
713 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
714 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
717 break; /* fall into scheduler loop */
720 do_the_fetchnode(event);
721 continue; /* handle next event in event queue */
724 do_the_globalblock(event);
725 continue; /* handle next event in event queue */
728 do_the_fetchreply(event);
729 continue; /* handle next event in event queue */
731 case UNBLOCKTHREAD: /* Move from the blocked queue to the tail of */
732 do_the_unblock(event);
733 continue; /* handle next event in event queue */
735 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
736 /* the runnable queue ( i.e. Qu' SImqa'lu') */
737 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
738 TSO_BLOCKEDAT(EVENT_TSO(event));
739 StartThread(event,GR_RESUME);
743 StartThread(event,GR_START);
747 do_the_movethread(event);
748 continue; /* handle next event in event queue */
751 do_the_movespark(event);
752 continue; /* handle next event in event queue */
755 { /* Make sure that we have enough heap for creating a new
756 thread. This is a conservative estimate of the required heap.
757 This eliminates special checks for GC around NewThread within
760 I_ req_heap = TSO_HS + TSO_CTS_SIZE + STKO_HS +
761 RTSflags.ConcFlags.stkChunkSize;
763 if (SAVE_Hp + req_heap >= SAVE_HpLim ) {
764 ReallyPerformThreadGC(req_heap, rtsFalse);
766 if (IS_SPARKING(CurrentProc))
767 MAKE_IDLE(CurrentProc);
772 if( RTSflags.GranFlags.DoAlwaysCreateThreads ||
773 (ThreadQueueHd == PrelBase_Z91Z93_closure &&
774 (RTSflags.GranFlags.FetchStrategy >= 2 ||
775 OutstandingFetches[CurrentProc] == 0)) )
781 ASSERT(procStatus[CurrentProc]==Sparking ||
782 RTSflags.GranFlags.DoAlwaysCreateThreads);
784 /* SImmoHwI' yInej! Search spark queue! */
785 gimme_spark (&found, &prev, &spark);
787 /* DaH chu' Qu' yIchen! Now create new work! */
788 munch_spark (found, prev, spark);
790 /* ToDo: check ; not valid if GC occurs in munch_spark
791 ASSERT(procStatus[CurrentProc]==Starting ||
792 procStatus[CurrentProc]==Idle ||
793 RTSflags.GranFlags.DoAlwaysCreateThreads); */
795 continue; /* to the next event */
798 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
801 longjmp(scheduler_loop, 1);
805 /* ----------------------------------------------------------------- */
806 /* The main event handling functions; called from ReSchedule (switch) */
807 /* ----------------------------------------------------------------- */
810 do_the_globalblock(eventq event)
812 PROC proc = EVENT_PROC(event); /* proc that requested node */
813 P_ tso = EVENT_TSO(event), /* tso that requested node */
814 node = EVENT_NODE(event); /* requested, remote node */
816 # if defined(GRAN_CHECK) && defined(GRAN)
817 if ( RTSflags.GranFlags.Light ) {
818 fprintf(stderr,"Qagh: There should be no GLOBALBLOCKs in GrAnSim Light setup\n");
822 if (!RTSflags.GranFlags.DoGUMMFetching) {
823 fprintf(stderr,"Qagh: GLOBALBLOCK events only valid with GUMM fetching\n");
827 if ( (RTSflags.GranFlags.debug & 0x100) &&
828 IS_LOCAL_TO(PROCS(node),proc) ) {
829 fprintf(stderr,"Qagh: GLOBALBLOCK: Blocking on LOCAL node 0x %x (PE %d).\n",
833 /* CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime; */
834 if ( blockFetch(tso,proc,node) != 0 )
835 return; /* node has become local by now */
837 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* head of queue is next thread */
838 P_ tso = RunnableThreadsHd[proc]; /* awaken next thread */
839 if(tso != PrelBase_Z91Z93_closure) {
840 new_event(proc,proc,CurrentTime[proc],
841 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
842 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
843 if(RTSflags.GranFlags.granSimStats)
844 DumpRawGranEvent(proc,CurrentProc,GR_SCHEDULE,tso,
845 PrelBase_Z91Z93_closure,0);
846 MAKE_BUSY(proc); /* might have been fetching */
848 MAKE_IDLE(proc); /* no work on proc now */
850 } else { /* RTSflags.GranFlags.DoReScheduleOnFetch i.e. block-on-fetch */
851 /* other thread is already running */
852 /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
853 new_event(proc,proc,CurrentTime[proc],
854 CONTINUETHREAD,EVENT_TSO(event),
855 (RTSflags.GranFlags.DoGUMMFetching ? closure :
856 EVENT_NODE(event)),NULL);
862 do_the_unblock(eventq event)
864 PROC proc = EVENT_PROC(event), /* proc that requested node */
865 creator = EVENT_CREATOR(event); /* proc that requested node */
866 P_ tso = EVENT_TSO(event), /* tso that requested node */
867 node = EVENT_NODE(event); /* requested, remote node */
869 # if defined(GRAN) && defined(GRAN_CHECK)
870 if ( RTSflags.GranFlags.Light ) {
871 fprintf(stderr,"Qagh: There should be no UNBLOCKs in GrAnSim Light setup\n");
876 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
877 /* We count block-on-fetch as normal block time */
878 TSO_BLOCKTIME(tso) += CurrentTime[proc] - TSO_BLOCKEDAT(tso);
879 /* No costs for contextswitch or thread queueing in this case */
880 if(RTSflags.GranFlags.granSimStats)
881 DumpRawGranEvent(proc,CurrentProc,GR_RESUME,tso, PrelBase_Z91Z93_closure,0);
882 new_event(proc,proc,CurrentTime[proc],CONTINUETHREAD,tso,node,NULL);
884 /* Reschedule on fetch causes additional costs here: */
885 /* Bring the TSO from the blocked queue into the threadq */
886 new_event(proc,proc,CurrentTime[proc]+RTSflags.GranFlags.gran_threadqueuetime,
887 RESUMETHREAD,tso,node,NULL);
892 do_the_fetchnode(eventq event)
896 # if defined(GRAN_CHECK) && defined(GRAN)
897 if ( RTSflags.GranFlags.Light ) {
898 fprintf(stderr,"Qagh: There should be no FETCHNODEs in GrAnSim Light setup\n");
902 if (RTSflags.GranFlags.SimplifiedFetch) {
903 fprintf(stderr,"Qagh: FETCHNODE events not valid with simplified fetch\n");
907 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
909 rc = HandleFetchRequest(EVENT_NODE(event),
910 EVENT_CREATOR(event),
912 if (rc == 4) { /* trigger GC */
913 # if defined(GRAN_CHECK) && defined(GRAN)
914 if (RTSflags.GcFlags.giveStats)
915 fprintf(RTSflags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %#lx, tso %#lx (%x))\n",
916 EVENT_NODE(event), EVENT_TSO(event), TSO_ID(EVENT_TSO(event)));
918 prepend_event(event);
919 ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
920 # if defined(GRAN_CHECK) && defined(GRAN)
921 if (RTSflags.GcFlags.giveStats) {
922 fprintf(RTSflags.GcFlags.statsFile,"***** SAVE_Hp=%#lx, SAVE_HpLim=%#lx, PACK_HEAP_REQUIRED=%#lx\n",
923 SAVE_Hp, SAVE_HpLim, PACK_HEAP_REQUIRED);
924 fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
925 tot_packets,tot_packet_size);
928 event = grab_event();
929 SAVE_Hp -= PACK_HEAP_REQUIRED;
931 /* GC knows that events are special and follows the pointer i.e. */
932 /* events are valid even if they moved. An EXIT is triggered */
933 /* if there is not enough heap after GC. */
939 do_the_fetchreply(eventq event)
943 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
944 if ( RTSflags.GranFlags.Light ) {
945 fprintf(stderr,"Qagh: There should be no FETCHREPLYs in GrAnSim Light setup\n");
949 if (RTSflags.GranFlags.SimplifiedFetch) {
950 fprintf(stderr,"Qagh: FETCHREPLY events not valid with simplified fetch\n");
954 if (RTSflags.GranFlags.debug & 0x10) {
955 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
956 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
958 fprintf(stderr,"Qagh: FETCHREPLY: TSO %#x (%x) has fetch mask not set @ %d\n",
959 CurrentTSO,TSO_ID(CurrentTSO),CurrentTime[CurrentProc]);
964 if (RTSflags.GranFlags.debug & 0x04) {
965 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
966 fprintf(stderr,"Qagh: FETCHREPLY: Proc %d (with TSO %#x (%x)) not blocked-on-fetch by TSO %#lx (%x)\n",
967 CurrentProc,CurrentTSO,TSO_ID(CurrentTSO),
968 BlockedOnFetch[CurrentProc], TSO_ID(BlockedOnFetch[CurrentProc]));
971 BlockedOnFetch[CurrentProc] = 0; /*- rtsFalse; -*/
976 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
978 if (RTSflags.GranFlags.DoGUMMFetching) { /* bulk (packet) fetching */
979 P_ buffer = EVENT_NODE(event);
980 PROC p = EVENT_PROC(event);
981 I_ size = buffer[PACK_SIZE_LOCN];
983 tso = EVENT_TSO(event);
985 /* NB: Fetch misses can't occur with GUMM fetching, as */
986 /* updatable closure are turned into RBHs and therefore locked */
987 /* for other processors that try to grab them. */
989 closure = UnpackGraph(buffer);
990 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_munpacktime;
992 /* Copy or move node to CurrentProc */
993 if (FetchNode(EVENT_NODE(event),
994 EVENT_CREATOR(event),
995 EVENT_PROC(event)) ) {
996 /* Fetch has failed i.e. node has been grabbed by another PE */
997 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
998 PROC p = where_is(node);
1001 # if defined(GRAN_CHECK) && defined(GRAN)
1002 if (RTSflags.GranFlags.PrintFetchMisses) {
1003 fprintf(stderr,"Fetch miss @ %lu: node %#lx is at proc %u (rather than proc %u)\n",
1004 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
1007 # endif /* GRAN_CHECK */
1009 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
1011 /* Count fetch again !? */
1012 ++TSO_FETCHCOUNT(tso);
1013 TSO_FETCHTIME(tso) += RTSflags.GranFlags.gran_fetchtime;
1015 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
1016 RTSflags.GranFlags.gran_latency;
1018 /* Chase the grabbed node */
1019 new_event(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
1021 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1022 if (RTSflags.GranFlags.debug & 0x04)
1023 BlockedOnFetch[CurrentProc] = tso; /*-rtsTrue;-*/
1025 if (RTSflags.GranFlags.debug & 0x10)
1026 TSO_TYPE(tso) |= FETCH_MASK_TSO;
1029 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
1031 return; /* NB: no REPLy has been processed; tso still sleeping */
1034 /* -- Qapla'! Fetch has been successful; node is here, now */
1035 ++TSO_FETCHCOUNT(EVENT_TSO(event));
1036 TSO_FETCHTIME(EVENT_TSO(event)) += RTSflags.GranFlags.gran_fetchtime;
1038 if (RTSflags.GranFlags.granSimStats)
1039 DumpRawGranEvent(CurrentProc,EVENT_CREATOR(event),GR_REPLY,
1041 (RTSflags.GranFlags.DoGUMMFetching ?
1046 --OutstandingFetches[CurrentProc];
1047 ASSERT(OutstandingFetches[CurrentProc] >= 0);
1048 # if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1049 if (OutstandingFetches[CurrentProc] < 0) {
1050 fprintf(stderr,"Qagh: OutstandingFetches of proc %u has become negative\n",CurrentProc);
1054 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1055 UNBLOCKTHREAD,EVENT_TSO(event),
1056 (RTSflags.GranFlags.DoGUMMFetching ?
1063 do_the_movethread(eventq event) {
1064 P_ tso = EVENT_TSO(event);
1065 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1066 if ( RTSflags.GranFlags.Light && CurrentProc!=1 ) {
1067 fprintf(stderr,"Qagh: There should be no MOVETHREADs in GrAnSim Light setup\n");
1070 if (!RTSflags.GranFlags.DoThreadMigration) {
1071 fprintf(stderr,"Qagh: MOVETHREAD events should never occur without -bM\n");
1074 if (PROCS(tso)!=0) {
1075 fprintf(stderr,"Qagh: Moved thread has a bitmask of 0%o (proc %d); should be 0\n",
1076 PROCS(tso), where_is(tso));
1080 --OutstandingFishes[CurrentProc];
1081 ASSERT(OutstandingFishes[CurrentProc]>=0);
1082 SET_PROCS(tso,ThisPE);
1083 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1084 StartThread(event,GR_STOLEN);
1088 do_the_movespark(eventq event){
1089 sparkq spark = EVENT_SPARK(event);
1091 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1093 if (RTSflags.GranFlags.granSimStats_Sparks)
1094 DumpRawGranEvent(CurrentProc,(PROC)0,SP_ACQUIRED,PrelBase_Z91Z93_closure,
1096 spark_queue_len(CurrentProc,ADVISORY_POOL));
1098 #if defined(GRAN) && defined(GRAN_CHECK)
1099 if (!SHOULD_SPARK(SPARK_NODE(spark)))
1101 /* Not adding the spark to the spark queue would be the right */
1102 /* thing here, but it also would be cheating, as this info can't be */
1103 /* available in a real system. -- HWL */
1105 --OutstandingFishes[CurrentProc];
1106 ASSERT(OutstandingFishes[CurrentProc]>=0);
1108 add_to_spark_queue(spark);
1110 if (procStatus[CurrentProc]==Fishing)
1111 procStatus[CurrentProc] = Idle;
1113 /* add_to_spark_queue will increase the time of the current proc. */
1114 /* Just falling into FINDWORK is wrong as we might have other */
1115 /* events that are happening before that. Therefore, just create */
1116 /* a FINDWORK event and go back to main event handling loop. */
1118 /* Should we treat stolen sparks specially? Currently, we don't. */
1120 /* Now FINDWORK is created in HandleIdlePEs */
1121 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1122 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1123 sparking[CurrentProc]=rtsTrue;
1127 /* Search the spark queue of the CurrentProc for a spark that's worth
1128 turning into a thread */
1130 gimme_spark (rtsBool *found_res, sparkq *prev_res, sparkq *spark_res)
1134 sparkq spark_of_non_local_node = NULL, spark_of_non_local_node_prev = NULL,
1135 low_priority_spark = NULL, low_priority_spark_prev = NULL,
1136 spark = NULL, prev = NULL, tmp = NULL;
1138 /* Choose a spark from the local spark queue */
1139 spark = SparkQueueHd;
1142 while (spark != NULL && !found)
1144 node = SPARK_NODE(spark);
1145 if (!SHOULD_SPARK(node))
1147 if(RTSflags.GranFlags.granSimStats_Sparks)
1148 DumpRawGranEvent(CurrentProc,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
1150 spark_queue_len(CurrentProc,ADVISORY_POOL));
1152 ASSERT(spark != NULL);
1155 spark = delete_from_spark_queue (prev,spark);
1157 /* -- node should eventually be sparked */
1158 else if (RTSflags.GranFlags.PreferSparksOfLocalNodes &&
1159 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1161 /* Remember first low priority spark */
1162 if (spark_of_non_local_node==NULL) {
1163 spark_of_non_local_node_prev = prev;
1164 spark_of_non_local_node = spark;
1167 if (SPARK_NEXT(spark)==NULL) {
1168 ASSERT(spark==SparkQueueTl); /* just for testing */
1169 prev = spark_of_non_local_node_prev;
1170 spark = spark_of_non_local_node;
1175 # if defined(GRAN) && defined(GRAN_CHECK)
1176 /* Should never happen; just for testing */
1177 if (spark==SparkQueueTl) {
1178 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1183 spark = SPARK_NEXT(spark);
1186 else if ( RTSflags.GranFlags.DoPrioritySparking ||
1187 (SPARK_GRAN_INFO(spark)>=RTSflags.GranFlags.SparkPriority2) )
1191 else /* only used if SparkPriority2 is defined */
1193 /* Remember first low priority spark */
1194 if (low_priority_spark==NULL) {
1195 low_priority_spark_prev = prev;
1196 low_priority_spark = spark;
1199 if (SPARK_NEXT(spark)==NULL) {
1200 ASSERT(spark==SparkQueueTl); /* just for testing */
1201 prev = low_priority_spark_prev;
1202 spark = low_priority_spark;
1203 found = rtsTrue; /* take low pri spark => rc is 2 */
1207 /* Should never happen; just for testing */
1208 if (spark==SparkQueueTl) {
1209 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1214 spark = SPARK_NEXT(spark);
1215 # if defined(GRAN_CHECK) && defined(GRAN)
1216 if ( RTSflags.GranFlags.debug & 0x40 ) {
1217 fprintf(stderr,"Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
1218 SPARK_GRAN_INFO(spark), RTSflags.GranFlags.SparkPriority,
1219 SPARK_NODE(spark), SPARK_NAME(spark));
1221 # endif /* GRAN_CHECK */
1223 } /* while (spark!=NULL && !found) */
1231 munch_spark (rtsBool found, sparkq prev, sparkq spark)
1235 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1238 # if defined(GRAN_CHECK) && defined(GRAN)
1239 if ( SPARK_GRAN_INFO(spark) < RTSflags.GranFlags.SparkPriority2 ) {
1240 tot_low_pri_sparks++;
1241 if ( RTSflags.GranFlags.debug & 0x40 ) {
1242 fprintf(stderr,"GRAN_TNG: No high priority spark available; low priority (%u) spark chosen: node=0x%lx; name=%u\n",
1243 SPARK_GRAN_INFO(spark),
1244 SPARK_NODE(spark), SPARK_NAME(spark));
1248 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcreatetime;
1250 node = SPARK_NODE(spark);
1251 if((tso = NewThread(node, T_REQUIRED, SPARK_GRAN_INFO(spark)))==NULL)
1253 /* Some kind of backoff needed here in case there's too little heap */
1254 # if defined(GRAN_CHECK) && defined(GRAN)
1255 if (RTSflags.GcFlags.giveStats)
1256 fprintf(RTSflags.GcFlags.statsFile,"***** vIS Qu' chen veQ boSwI'; spark=%#x, node=%#x; name=%u\n",
1257 /* (found==2 ? "no hi pri spark" : "hi pri spark"), */
1258 spark, node,SPARK_NAME(spark));
1260 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1261 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1262 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,rtsFalse);
1263 SAVE_Hp -= TSO_HS+TSO_CTS_SIZE;
1265 return; /* was: continue; */ /* to the next event, eventually */
1268 if(RTSflags.GranFlags.granSimStats_Sparks)
1269 DumpRawGranEvent(CurrentProc,(PROC)0,SP_USED,PrelBase_Z91Z93_closure,
1271 spark_queue_len(CurrentProc,ADVISORY_POOL));
1273 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1274 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1275 TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1277 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1278 STARTTHREAD,tso,node,NULL);
1280 procStatus[CurrentProc] = Starting;
1282 ASSERT(spark != NULL);
1284 spark = delete_from_spark_queue (prev, spark);
1287 /* Make the PE idle if nothing sparked and we have no threads. */
1289 if(ThreadQueueHd == PrelBase_Z91Z93_closure)
1291 MAKE_IDLE(CurrentProc);
1292 # if defined(GRAN_CHECK) && defined(GRAN)
1293 if ( (RTSflags.GranFlags.debug & 0x80) )
1294 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1295 # endif /* GRAN_CHECK */
1299 /* ut'lu'Qo' ; Don't think that's necessary any more -- HWL
1300 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1301 CONTINUETHREAD,ThreadQueueHd,PrelBase_Z91Z93_closure,NULL);
1309 Here follows the non-GRAN @ReSchedule@.
1314 /* If you are concurrent and maybe even parallel please use this door. */
1318 int again; /* Run the current thread again? */
1326 * In the parallel world, we do unfair scheduling for the moment.
1327 * Ultimately, this should all be merged with the more
1328 * sophisticated GrAnSim scheduling options. (Of course, some
1329 * provision should be made for *required* threads to make sure
1330 * that they don't starve, but for now we assume that no one is
1331 * running concurrent Haskell on a multi-processor platform.)
1337 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
1338 RunnableThreadsTl = CurrentTSO;
1339 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1340 RunnableThreadsHd = CurrentTSO;
1346 * In the sequential world, we assume that the whole point of running
1347 * the threaded build is for concurrent Haskell, so we provide round-robin
1352 if(RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1353 RunnableThreadsHd = CurrentTSO;
1355 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1356 if (DO_QP_PROF > 1) {
1357 QP_Event1("GA", CurrentTSO);
1360 RunnableThreadsTl = CurrentTSO;
1366 * Debugging code, which is useful enough (and cheap enough) to compile
1367 * in all the time. This makes sure that we don't access saved registers,
1368 * etc. in threads which are supposed to be sleeping.
1370 CurrentTSO = PrelBase_Z91Z93_closure;
1371 CurrentRegTable = NULL;
1374 /* First the required sparks */
1376 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1377 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1379 if (SHOULD_SPARK(spark)) {
1380 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1382 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1383 RunnableThreadsHd = tso;
1385 if (RTSflags.ParFlags.granSimStats) {
1386 DumpGranEvent(GR_START, tso);
1387 sameThread = rtsTrue;
1391 TSO_LINK(RunnableThreadsTl) = tso;
1393 if (RTSflags.ParFlags.granSimStats)
1394 DumpGranEvent(GR_STARTQ, tso);
1397 RunnableThreadsTl = tso;
1400 QP_Event0(threadId++, spark);
1402 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1403 if(RTSflags.GranFlags.granSimStats_Sparks)
1404 DumpGranEvent(SP_PRUNED,threadId++);
1405 ^^^^^^^^ should be a TSO
1409 PendingSparksHd[REQUIRED_POOL] = sparkp;
1411 /* Now, almost the same thing for advisory sparks */
1413 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1414 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1416 if (SHOULD_SPARK(spark)) {
1419 /* In the parallel world, don't create advisory threads if we are
1420 * about to rerun the same thread, or already have runnable threads,
1421 * or the main thread has terminated */
1422 (RunnableThreadsHd != PrelBase_Z91Z93_closure ||
1423 (required_thread_count == 0 && IAmMainThread)) ||
1425 advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1426 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1428 advisory_thread_count++;
1429 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1430 RunnableThreadsHd = tso;
1432 if (RTSflags.ParFlags.granSimStats) {
1433 DumpGranEvent(GR_START, tso);
1434 sameThread = rtsTrue;
1438 TSO_LINK(RunnableThreadsTl) = tso;
1440 if (RTSflags.ParFlags.granSimStats)
1441 DumpGranEvent(GR_STARTQ, tso);
1444 RunnableThreadsTl = tso;
1447 QP_Event0(threadId++, spark);
1449 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1450 if(RTSflags.GranFlags.granSimStats_Sparks)
1451 DumpGranEvent(SP_PRUNED,threadId++);
1452 ^^^^^^^^ should be a TSO
1456 PendingSparksHd[ADVISORY_POOL] = sparkp;
1459 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1461 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1469 %****************************************************************************
1471 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1472 Threads (GrAnSim only)}
1474 %****************************************************************************
1476 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1482 /* ngoqvam che' {GrAnSim}! */
1484 # if defined(GRAN_CHECK)
1485 /* This routine is only used for keeping a statistics of thread queue
1486 lengths to evaluate the impact of priority scheduling. -- HWL
1487 {spark_queue_len}vo' jInIHta'
1490 thread_queue_len(PROC proc)
1495 for (len = 0, prev = PrelBase_Z91Z93_closure, next = RunnableThreadsHd[proc];
1496 next != PrelBase_Z91Z93_closure;
1497 len++, prev = next, next = TSO_LINK(prev))
1502 # endif /* GRAN_CHECK */
1505 A large portion of @StartThread@ deals with maintaining a sorted thread
1506 queue, which is needed for the Priority Sparking option. Without that
1507 complication the code boils down to FIFO handling.
1510 StartThread(event,event_type)
1512 enum gran_event_types event_type;
1514 P_ tso = EVENT_TSO(event),
1515 node = EVENT_NODE(event);
1516 PROC proc = EVENT_PROC(event),
1517 creator = EVENT_CREATOR(event);
1520 rtsBool found = rtsFalse;
1522 ASSERT(CurrentProc==proc);
1524 # if defined(GRAN_CHECK)
1525 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1526 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1530 /* A wee bit of statistics gathering */
1532 tot_tq_len += thread_queue_len(CurrentProc);
1535 ASSERT(TSO_LINK(CurrentTSO)==PrelBase_Z91Z93_closure);
1537 /* Idle proc; same for pri spark and basic version */
1538 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1540 CurrentTSO = ThreadQueueHd = ThreadQueueTl = tso;
1542 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1543 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1544 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1546 if(RTSflags.GranFlags.granSimStats &&
1547 !( (event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1548 DumpRawGranEvent(CurrentProc,creator,event_type,
1550 TSO_SPARKNAME(tso));
1551 /* ^^^ SN (spark name) as optional info */
1552 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1553 /* ^^^ spark length as optional info */
1555 ASSERT(IS_IDLE(CurrentProc) || event_type==GR_RESUME ||
1556 (procStatus[CurrentProc]==Fishing && event_type==GR_STOLEN) ||
1557 procStatus[CurrentProc]==Starting);
1558 MAKE_BUSY(CurrentProc);
1562 /* In GrAnSim-Light we always have an idle `virtual' proc.
1563 The semantics of the one-and-only thread queue is different here:
1564 all threads in the queue are running (each on its own virtual processor);
1565 the queue is only needed internally in the simulator to interleave the
1566 reductions of the different processors.
1567 The one-and-only thread queue is sorted by the local clocks of the TSOs.
1569 if(RTSflags.GranFlags.Light)
1571 ASSERT(ThreadQueueHd!=PrelBase_Z91Z93_closure);
1572 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
1574 /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
1575 if(RTSflags.GranFlags.granSimStats &&
1576 (RTSflags.GranFlags.debug & 0x20000) &&
1577 TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1578 DumpRawGranEvent(CurrentProc,CurrentProc,GR_DESCHEDULE,
1579 ThreadQueueHd,PrelBase_Z91Z93_closure,0);
1583 if ( InsertThread(tso) ) { /* new head of queue */
1584 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1585 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1588 if(RTSflags.GranFlags.granSimStats &&
1589 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1590 DumpRawGranEvent(CurrentProc,creator,event_type,
1592 TSO_SPARKNAME(tso));
1593 /* ^^^ SN (spark name) as optional info */
1594 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1595 /* ^^^ spark length as optional info */
1597 /* MAKE_BUSY(CurrentProc); */
1601 /* Only for Pri Sparking */
1602 if (RTSflags.GranFlags.DoPriorityScheduling && TSO_PRI(tso)!=0)
1603 /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
1604 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1605 (next != PrelBase_Z91Z93_closure) &&
1606 !(found = (TSO_PRI(tso) >= TSO_PRI(next)));
1607 prev = next, next = TSO_LINK(next), count++)
1611 ASSERT(!IS_IDLE(CurrentProc));
1613 /* found can only be rtsTrue if pri sparking enabled */
1615 # if defined(GRAN_CHECK)
1616 ++non_end_add_threads;
1618 /* Add tso to ThreadQueue between prev and next */
1619 TSO_LINK(tso) = next;
1620 if ( next == PrelBase_Z91Z93_closure ) {
1621 ThreadQueueTl = tso;
1623 /* no back link for TSO chain */
1626 if ( prev == PrelBase_Z91Z93_closure ) {
1627 /* Never add TSO as first elem of thread queue; the first */
1628 /* element should be the one that is currently running -- HWL */
1629 # if defined(GRAN_CHECK)
1630 fprintf(stderr,"Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %#lx (PRI=%d) as first elem of threadQ (%#lx) on proc %u (@ %u)\n",
1631 tso, TSO_PRI(tso), ThreadQueueHd, CurrentProc,
1632 CurrentTime[CurrentProc]);
1635 TSO_LINK(prev) = tso;
1637 } else { /* !found */ /* or not pri sparking! */
1638 /* Add TSO to the end of the thread queue on that processor */
1639 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1640 ThreadQueueTl = EVENT_TSO(event);
1642 CurrentTime[CurrentProc] += count *
1643 RTSflags.GranFlags.gran_pri_sched_overhead +
1644 RTSflags.GranFlags.gran_threadqueuetime;
1646 if(RTSflags.GranFlags.DoThreadMigration)
1649 if(RTSflags.GranFlags.granSimStats &&
1650 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1651 DumpRawGranEvent(CurrentProc,creator,event_type+1,
1653 TSO_SPARKNAME(tso));
1654 /* ^^^ SN (spark name) as optional info */
1655 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1656 /* ^^^ spark length as optional info */
1658 # if defined(GRAN_CHECK)
1659 /* Check if thread queue is sorted. Only for testing, really! HWL */
1660 if ( RTSflags.GranFlags.DoPriorityScheduling && (RTSflags.GranFlags.debug & 0x400) ) {
1661 rtsBool sorted = rtsTrue;
1664 if (ThreadQueueHd==PrelBase_Z91Z93_closure || TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1665 /* just 1 elem => ok */
1667 /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
1668 for (prev = TSO_LINK(ThreadQueueHd), next = TSO_LINK(prev);
1669 (next != PrelBase_Z91Z93_closure) ;
1670 prev = next, next = TSO_LINK(prev)) {
1672 (TSO_PRI(prev) >= TSO_PRI(next));
1676 fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
1678 G_THREADQ(ThreadQueueHd,0x1);
1683 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1687 @InsertThread@, which is only used for GranSim Light, is similar to
1688 @StartThread@ in that it adds a TSO to a thread queue. However, it assumes
1689 that the thread queue is sorted by local clocks and it inserts the TSO at the
1690 right place in the queue. Don't create any event, just insert.
1699 rtsBool found = rtsFalse;
1701 # if defined(GRAN_CHECK)
1702 if ( !RTSflags.GranFlags.Light ) {
1703 fprintf(stderr,"Qagh {InsertThread}Daq: InsertThread should only be used in a GrAnSim Light setup\n");
1707 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1708 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1713 /* Idle proc; same for pri spark and basic version */
1714 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1716 ThreadQueueHd = ThreadQueueTl = tso;
1717 /* MAKE_BUSY(CurrentProc); */
1721 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1722 (next != PrelBase_Z91Z93_closure) &&
1723 !(found = (TSO_CLOCK(tso) < TSO_CLOCK(next)));
1724 prev = next, next = TSO_LINK(next), count++)
1727 /* found can only be rtsTrue if pri sparking enabled */
1729 /* Add tso to ThreadQueue between prev and next */
1730 TSO_LINK(tso) = next;
1731 if ( next == PrelBase_Z91Z93_closure ) {
1732 ThreadQueueTl = tso;
1734 /* no back link for TSO chain */
1737 if ( prev == PrelBase_Z91Z93_closure ) {
1738 ThreadQueueHd = tso;
1740 TSO_LINK(prev) = tso;
1742 } else { /* !found */ /* or not pri sparking! */
1743 /* Add TSO to the end of the thread queue on that processor */
1744 TSO_LINK(ThreadQueueTl) = tso;
1745 ThreadQueueTl = tso;
1747 return (prev == PrelBase_Z91Z93_closure);
1752 Export work to idle PEs. This function is called from @ReSchedule@ before
1753 dispatching on the current event. @HandleIdlePEs@ iterates over all PEs,
1754 trying to get work for idle PEs. Note, that this is a simplification
1755 compared to GUM's fishing model. We try to compensate for that by making
1756 the cost for stealing work dependent on the number of idle processors and
1757 thereby on the probability with which a randomly sent fish would find work.
1764 # if defined(GRAN) && defined(GRAN_CHECK)
1765 if ( RTSflags.GranFlags.Light ) {
1766 fprintf(stderr,"Qagh {HandleIdlePEs}Daq: Should never be entered in GrAnSim Light setup\n");
1772 for(proc = 0; proc < RTSflags.GranFlags.proc; proc++)
1773 if(IS_IDLE(proc)) /* && IS_SPARKING(proc) && IS_STARTING(proc) */
1774 /* First look for local work! */
1775 if (PendingSparksHd[proc][ADVISORY_POOL]!=NULL)
1777 new_event(proc,proc,CurrentTime[proc],
1778 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1779 MAKE_SPARKING(proc);
1781 /* Then try to get remote work! */
1782 else if ((RTSflags.GranFlags.max_fishes==0 ||
1783 OutstandingFishes[proc]<RTSflags.GranFlags.max_fishes) )
1786 if(RTSflags.GranFlags.DoStealThreadsFirst &&
1787 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1789 if (SurplusThreads > 0l) /* Steal a thread */
1796 if(SparksAvail > 0l &&
1797 (RTSflags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1800 if (SurplusThreads > 0l &&
1801 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1807 Steal a spark and schedule moving it to proc. We want to look at PEs in
1808 clock order -- most retarded first. Currently sparks are only stolen from
1809 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1810 be changed to first steal from the former then from the latter.
1812 We model a sort of fishing mechanism by counting the number of sparks and
1813 threads we are currently stealing.
1820 sparkq spark, prev, next;
1821 rtsBool stolen = rtsFalse;
1822 TIME times[MAX_PROC], stealtime;
1823 unsigned ntimes=0, i, j;
1824 int first_later, upb, r;
1826 # if defined(GRAN) && defined(GRAN_CHECK)
1827 if ( RTSflags.GranFlags.Light ) {
1828 fprintf(stderr,"Qagh {StealSpark}Daq: Should never be entered in GrAnSim Light setup\n");
1833 /* times shall contain processors from which we may steal sparks */
1834 for(p=0; p < RTSflags.GranFlags.proc; ++p)
1836 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1837 CurrentTime[p] <= CurrentTime[CurrentProc])
1838 times[ntimes++] = p;
1841 for(i=0; i < ntimes; ++i)
1842 for(j=i+1; j < ntimes; ++j)
1843 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1845 unsigned temp = times[i];
1846 times[i] = times[j];
1850 /* Choose random processor to steal spark from; first look at processors */
1851 /* that are earlier than the current one (i.e. proc) */
1854 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
1858 while (!stolen && (ntimes>0)) {
1859 long unsigned int r, q=0;
1861 upb = (first_later==0) ? ntimes : first_later;
1863 if (RTSflags.GranFlags.RandomSteal) {
1864 r = lrand48(); /* [0, RAND_MAX] */
1868 /* -- ASSERT(r<=RAND_MAX); */
1869 i = (unsigned int) (r % upb); /* [0, upb) */
1870 /* -- ASSERT((i>=0) && (i<=upb)); */
1872 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
1874 # if defined(GRAN_CHECK)
1875 if ( RTSflags.GranFlags.debug & 0x2000 )
1876 fprintf(stderr,"RANDOM_STEAL: [index %u of %u] from %u (@ %lu) to %u (@ %lu) (q=%d) [rand value: %lu]\n",
1877 i, ntimes, p, CurrentTime[p], proc, CurrentTime[proc], q, r);
1880 /* Now go through sparkq and steal the first one that should be sparked*/
1881 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1882 spark != NULL && !stolen;
1885 next = SPARK_NEXT(spark);
1887 if ((IS_IDLE(p) || procStatus[p]==Sparking || procStatus[p]==Fishing) &&
1888 SPARK_NEXT(spark)==NULL)
1890 /* Be social! Don't steal the only spark of an idle processor */
1893 else if(SHOULD_SPARK(SPARK_NODE(spark)))
1895 /* Don't Steal local sparks */
1896 if(!SPARK_GLOBAL(spark))
1902 /* Prepare message for sending spark */
1903 CurrentTime[p] += RTSflags.GranFlags.gran_mpacktime;
1905 if(RTSflags.GranFlags.granSimStats_Sparks)
1906 DumpRawGranEvent(p,(PROC)0,SP_EXPORTED,PrelBase_Z91Z93_closure,
1908 spark_queue_len(p,ADVISORY_POOL));
1910 SPARK_NEXT(spark) = NULL;
1912 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
1918 new_event(proc,p /* CurrentProc */,stealtime,
1919 MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
1921 /* MAKE_BUSY(proc); not yet; busy when TSO in threadq */
1923 ++OutstandingFishes[proc];
1926 ++SPARK_GLOBAL(spark);
1929 CurrentTime[p] += RTSflags.GranFlags.gran_mtidytime;
1931 else /* !(SHOULD_SPARK(SPARK_NODE(spark))) */
1933 if(RTSflags.GranFlags.granSimStats_Sparks)
1934 DumpRawGranEvent(p,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
1936 spark_queue_len(p,ADVISORY_POOL));
1938 DisposeSpark(spark);
1941 if(spark == PendingSparksHd[p][ADVISORY_POOL])
1942 PendingSparksHd[p][ADVISORY_POOL] = next;
1945 SPARK_NEXT(prev) = next;
1946 } /* for (spark=... iterating over sparkq */
1948 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1949 PendingSparksTl[p][ADVISORY_POOL] = NULL;
1951 if (!stolen && (ntimes>0)) { /* nothing stealable from proc p :( */
1952 ASSERT(times[i]==p);
1954 /* remove p from the list (at pos i) */
1955 for (j=i; j+1<ntimes; j++)
1956 times[j] = times[j+1];
1959 /* update index to first proc which is later (or equal) than proc */
1962 (CurrentTime[times[first_later-1]]>CurrentTime[proc]);
1967 # if defined(GRAN_CHECK)
1968 if (stolen && (i!=0)) { /* only for statistics */
1970 ntimes_total += ntimes;
1971 fl_total += first_later;
1978 Steal a spark and schedule moving it to proc.
1987 TIME times[MAX_PROC], stealtime;
1988 unsigned ntimes=0, i, j;
1989 int first_later, upb, r;
1991 /* Hunt for a thread */
1993 # if defined(GRAN) && defined(GRAN_CHECK)
1994 if ( RTSflags.GranFlags.Light ) {
1995 fprintf(stderr,"Qagh {StealThread}: Should never be entered in GrAnSim Light setup\n");
2000 /* times shall contain processors from which we may steal threads */
2001 for(p=0; p < RTSflags.GranFlags.proc; ++p)
2002 if(proc != p && RunnableThreadsHd[p] != PrelBase_Z91Z93_closure &&
2003 CurrentTime[p] <= CurrentTime[CurrentProc])
2004 times[ntimes++] = p;
2007 for(i=0; i < ntimes; ++i)
2008 for(j=i+1; j < ntimes; ++j)
2009 if(CurrentTime[times[i]] > CurrentTime[times[j]])
2011 unsigned temp = times[i];
2012 times[i] = times[j];
2016 /* Choose random processor to steal spark from; first look at processors */
2017 /* that are earlier than the current one (i.e. proc) */
2020 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
2024 while (!found && (ntimes>0)) {
2025 long unsigned int r, q=0;
2027 upb = (first_later==0) ? ntimes : first_later;
2029 if (RTSflags.GranFlags.RandomSteal) {
2030 r = lrand48(); /* [0, RAND_MAX] */
2034 /* -- ASSERT(r<=RAND_MAX); */
2035 if ( RTSflags.GranFlags.debug & 0x2000 )
2036 fprintf(stderr,"rand value: %d " , r);
2037 i = (unsigned int) (r % upb); /* [0, upb] */
2038 /* -- ASSERT((i>=0) && (i<=upb)); */
2040 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
2042 # if defined(GRAN_CHECK)
2043 if ( RTSflags.GranFlags.debug & 0x2000 )
2044 fprintf(stderr,"RANDOM_STEAL; [index %u] from %u (@ %lu) to %u (@ %lu) (q=%d)\n",
2045 i, p, CurrentTime[p], proc, CurrentTime[proc], q);
2048 /* Steal the first exportable thread in the runnable queue after the */
2051 if(RunnableThreadsHd[p] != PrelBase_Z91Z93_closure)
2053 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
2054 thread != PrelBase_Z91Z93_closure && TSO_LOCKED(thread);
2055 prev = thread, thread = TSO_LINK(thread))
2058 if(thread != PrelBase_Z91Z93_closure) /* Take thread out of runnable queue */
2060 TSO_LINK(prev) = TSO_LINK(thread);
2062 TSO_LINK(thread) = PrelBase_Z91Z93_closure;
2064 if(RunnableThreadsTl[p] == thread)
2065 RunnableThreadsTl[p] = prev;
2067 /* Turn magic constants into params !? -- HWL */
2069 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mpacktime;
2071 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
2075 + 4l * RTSflags.GranFlags.gran_additional_latency
2076 + 5l * RTSflags.GranFlags.gran_munpacktime;
2078 /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
2079 SET_PROCS(thread,Nowhere /* PE_NUMBER(proc) */);
2081 /* Move from one queue to another */
2082 new_event(proc,p,stealtime,MOVETHREAD,thread,PrelBase_Z91Z93_closure,NULL);
2083 /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
2084 ++OutstandingFishes[proc];
2089 if(RTSflags.GranFlags.granSimStats)
2090 DumpRawGranEvent(p,proc,GR_STEALING,thread,
2091 PrelBase_Z91Z93_closure,0);
2093 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mtidytime;
2101 if (!found && (ntimes>0)) { /* nothing stealable from proc p */
2102 ASSERT(times[i]==p);
2104 /* remove p from the list (at pos i) */
2105 for (j=i; j+1<ntimes; j++)
2106 times[j] = times[j+1];
2110 # if defined(GRAN_CHECK) && defined(GRAN)
2111 if (found && (i!=0)) { /* only for statistics */
2118 SparkStealTime(void)
2120 double fishdelay, sparkdelay, latencydelay;
2121 fishdelay = (double)RTSflags.GranFlags.proc/2;
2122 sparkdelay = fishdelay -
2123 ((fishdelay-1)/(double)(RTSflags.GranFlags.proc-1))*(double)idlers();
2124 latencydelay = sparkdelay*((double)RTSflags.GranFlags.gran_latency);
2126 return((TIME)latencydelay);
2128 #endif /* GRAN ; HWL */
2133 %****************************************************************************
2135 \subsection[thread-execution]{Executing Threads}
2137 %****************************************************************************
2139 First a set of functions for handling sparks and spark-queues that are
2140 attached to the processors. Currently, there are two spark-queues per
2144 \item A queue of @REQUIRED@ sparks i.e. these sparks will be definitely
2145 turned into threads ({\em non-discardable\/}). They are mainly used in concurrent
2146 Haskell. We don't use them in GrAnSim.
2147 \item A queue of @ADVISORY@ sparks i.e. they may be turned into threads if
2148 the RTS thinks that it is a good idea. However, these sparks are {\em
2149 discardable}. They will be discarded if the associated closure is
2150 generally not worth creating a new thread (indicated by a tag in the
2151 closure) or they may be pruned during GC if there are too many sparks
2156 EXTDATA_RO(StkO_info);
2157 EXTDATA_RO(TSO_info);
2158 EXTDATA_RO(WorldStateToken_closure);
2160 EXTFUN(EnterNodeCode);
2161 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
2164 /* ngoqvam che' {GrAnSim} */
2166 /* Slow but relatively reliable method uses stgMallocBytes */
2167 /* Eventually change that to heap allocated sparks. */
2169 /* -------------------------------------------------------------------------
2170 This is the main point where handling granularity information comes into
2172 ------------------------------------------------------------------------- */
2174 #define MAX_RAND_PRI 100
2177 Granularity info transformers.
2178 Applied to the GRAN_INFO field of a spark.
2180 static I_ ID(I_ x) { return(x); };
2181 static I_ INV(I_ x) { return(-x); };
2182 static I_ IGNORE(I_ x) { return (0); };
2183 static I_ RAND(I_ x) { return ((lrand48() % MAX_RAND_PRI) + 1); }
2185 /* NB: size_info and par_info are currently unused (what a shame!) -- HWL */
2188 NewSpark(node,name,gran_info,size_info,par_info,local)
2190 I_ name, gran_info, size_info, par_info, local;
2195 pri = RTSflags.GranFlags.RandomPriorities ? RAND(gran_info) :
2196 RTSflags.GranFlags.InversePriorities ? INV(gran_info) :
2197 RTSflags.GranFlags.IgnorePriorities ? IGNORE(gran_info) :
2200 if ( RTSflags.GranFlags.SparkPriority!=0 && pri<RTSflags.GranFlags.SparkPriority ) {
2201 if ( RTSflags.GranFlags.debug & 0x40 ) {
2202 fprintf(stderr,"NewSpark: Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
2203 pri, RTSflags.GranFlags.SparkPriority, node, name);
2205 return ((sparkq)NULL);
2208 newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
2209 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
2210 SPARK_NODE(newspark) = node;
2211 SPARK_NAME(newspark) = (name==1) ? TSO_SPARKNAME(CurrentTSO) : name;
2212 SPARK_GRAN_INFO(newspark) = pri;
2213 SPARK_GLOBAL(newspark) = !local; /* Check that with parAt, parAtAbs !!*/
2217 /* To make casm more convenient use this function to label strategies */
2219 set_sparkname(P_ tso, int name) {
2220 TSO_SPARKNAME(tso) = name ;
2222 if(0 && RTSflags.GranFlags.granSimStats)
2223 DumpRawGranEvent(CurrentProc,99,GR_START,
2224 tso,PrelBase_Z91Z93_closure,
2225 TSO_SPARKNAME(tso));
2226 /* ^^^ SN (spark name) as optional info */
2227 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2228 /* ^^^ spark length as optional info */
2233 reset_sparkname(P_ tso) {
2234 TSO_SPARKNAME(tso) = 0;
2239 With PrioritySparking add_to_spark_queue performs an insert sort to keep
2240 the spark queue sorted. Otherwise the spark is just added to the end of
2245 add_to_spark_queue(spark)
2250 rtsBool found = rtsFalse;
2252 if ( spark == (sparkq)NULL ) {
2256 if (RTSflags.GranFlags.DoPrioritySparking && (SPARK_GRAN_INFO(spark)!=0) ) {
2258 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL], count=0;
2260 !(found = (SPARK_GRAN_INFO(spark) >= SPARK_GRAN_INFO(next)));
2261 prev = next, next = SPARK_NEXT(next), count++)
2264 } else { /* 'utQo' */
2266 found = rtsFalse; /* to add it at the end */
2271 SPARK_NEXT(spark) = next;
2272 if ( next == NULL ) {
2273 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2275 SPARK_PREV(next) = spark;
2277 SPARK_PREV(spark) = prev;
2278 if ( prev == NULL ) {
2279 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2281 SPARK_NEXT(prev) = spark;
2283 } else { /* (RTSflags.GranFlags.DoPrioritySparking && !found) || !DoPrioritySparking */
2284 SPARK_NEXT(spark) = NULL;
2285 SPARK_PREV(spark) = PendingSparksTl[CurrentProc][ADVISORY_POOL];
2286 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL)
2287 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2289 SPARK_NEXT(PendingSparksTl[CurrentProc][ADVISORY_POOL]) = spark;
2290 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2294 if (RTSflags.GranFlags.DoPrioritySparking) {
2295 CurrentTime[CurrentProc] += count * RTSflags.GranFlags.gran_pri_spark_overhead;
2298 # if defined(GRAN_CHECK)
2299 if ( RTSflags.GranFlags.debug & 0x1000 ) {
2300 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL];
2302 prev = next, next = SPARK_NEXT(next))
2304 if ( (prev!=NULL) && (prev!=PendingSparksTl[CurrentProc][ADVISORY_POOL]) )
2305 fprintf(stderr,"SparkQ inconsistency after adding spark %#lx: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
2306 spark,CurrentProc,ADVISORY_POOL,
2307 PendingSparksTl[CurrentProc][ADVISORY_POOL], prev);
2311 # if defined(GRAN_CHECK)
2312 /* Check if the sparkq is still sorted. Just for testing, really! */
2313 if ( RTSflags.GranFlags.debug & 0x400 ) {
2314 rtsBool sorted = rtsTrue;
2317 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL ||
2318 SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]) == NULL ) {
2319 /* just 1 elem => ok */
2321 for (prev = PendingSparksHd[CurrentProc][ADVISORY_POOL],
2322 next = SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]);
2324 prev = next, next = SPARK_NEXT(next)) {
2326 (SPARK_GRAN_INFO(prev) >= SPARK_GRAN_INFO(next));
2330 fprintf(stderr,"Warning: SPARKQ on PE %d is not sorted:\n",
2332 G_SPARKQ(PendingSparksHd[CurrentProc][ADVISORY_POOL],1);
2342 /* A SP_PRUNED line should be dumped when this is called from pruning or */
2343 /* discarding a spark! */
2352 DisposeSparkQ(spark)
2358 DisposeSparkQ(SPARK_NEXT(spark));
2361 if (SparksAvail < 0)
2362 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
2371 % {GrAnSim}vaD (Notes on GrAnSim) -- HWL:
2375 \paragraph{Notes on GrAnSim:}
2376 The following routines are for handling threads. Currently, we use an
2377 unfair scheduling policy in GrAnSim. Thus there are no explicit functions for
2378 scheduling here. If other scheduling policies are added to the system that
2379 code should go in here.
2382 /* Create a new TSO, with the specified closure to enter and thread type */
2386 NewThread(topClosure, type, pri)
2392 NewThread(topClosure, type)
2399 # if defined(GRAN) && defined(GRAN_CHECK)
2400 if ( RTSflags.GranFlags.Light && CurrentProc!=0) {
2401 fprintf(stderr,"Qagh {NewThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
2405 if (AvailableTSO != PrelBase_Z91Z93_closure) {
2408 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
2410 AvailableTSO = TSO_LINK(tso);
2411 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
2414 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
2415 BYTES_TO_STGWORDS(sizeof(StgDouble)));
2417 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
2418 SET_TSO_HDR(tso, TSO_info, CCC);
2421 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
2423 TSO_PRI(tso) = pri; /* Priority of that TSO -- HWL */
2425 #if defined(PROFILING) || defined(PAR)
2426 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
2428 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
2429 TSO_ID(tso) = threadId++;
2430 TSO_TYPE(tso) = type;
2431 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
2432 TSO_ARG1(tso) = /* TSO_ARG2(tso) = */ 0;
2433 TSO_SWITCH(tso) = NULL;
2440 #if defined(GRAN) || defined(PAR)
2441 TSO_SPARKNAME(tso) = 0;
2443 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
2445 TSO_STARTEDAT(tso) = CURRENT_TIME;
2447 TSO_EXPORTED(tso) = 0;
2448 TSO_BASICBLOCKS(tso) = 0;
2449 TSO_ALLOCS(tso) = 0;
2450 TSO_EXECTIME(tso) = 0;
2451 TSO_FETCHTIME(tso) = 0;
2452 TSO_FETCHCOUNT(tso) = 0;
2453 TSO_BLOCKTIME(tso) = 0;
2454 TSO_BLOCKCOUNT(tso) = 0;
2455 TSO_BLOCKEDAT(tso) = 0;
2456 TSO_GLOBALSPARKS(tso) = 0;
2457 TSO_LOCALSPARKS(tso) = 0;
2459 if (RTSflags.GranFlags.Light)
2460 TSO_CLOCK(tso) = TSO_STARTEDAT(tso); /* local clock */
2466 * set pc, Node (R1), liveness
2468 CurrentRegTable = TSO_INTERNAL_PTR(tso);
2469 SAVE_Liveness = LIVENESS_R1;
2470 SAVE_R1.p = topClosure;
2473 if (type == T_MAIN) {
2477 if (AvailableStack != PrelBase_Z91Z93_closure) {
2478 stko = AvailableStack;
2480 SET_PROCS(stko,ThisPE);
2482 AvailableStack = STKO_LINK(AvailableStack);
2483 } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
2486 /* ALLOC_STK(STKO_HS,STKO_CHUNK_SIZE,0); use RTSflag now*/
2487 ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
2489 SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
2490 SET_STKO_HDR(stko, StkO_info, CCC);
2492 STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
2493 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
2494 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
2495 STKO_LINK(stko) = PrelBase_Z91Z93_closure;
2496 STKO_RETURN(stko) = NULL;
2502 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
2505 if (type == T_MAIN) {
2506 STKO_SpA(stko) -= AREL(1);
2507 *STKO_SpA(stko) = (P_) WorldStateToken_closure;
2510 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
2514 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
2516 #if defined(GRAN_CHECK)
2517 tot_sq_len += spark_queue_len(CurrentProc,ADVISORY_POOL);
2525 In GrAnSim the @EndThread@ function is the place where statistics about the
2526 simulation are printed. I guess, that could be moved into @main.lc@.
2531 EndThread(STG_NO_ARGS)
2535 TIME now = CURRENT_TIME;
2539 if (RTSflags.TickyFlags.showTickyStats) {
2540 fprintf(RTSflags.TickyFlags.tickyFile,
2541 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
2542 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
2543 fprintf(RTSflags.TickyFlags.tickyFile,
2544 "\tB stack max. depth: %ld words\n",
2545 TSO_BHWM(CurrentTSO));
2550 QP_Event1("G*", CurrentTSO);
2554 ASSERT(CurrentTSO == ThreadQueueHd);
2556 if (RTSflags.GranFlags.DoThreadMigration)
2559 if(TSO_TYPE(CurrentTSO)==T_MAIN)
2563 for(i=0; i < RTSflags.GranFlags.proc; ++i) {
2565 while(RunnableThreadsHd[i] != PrelBase_Z91Z93_closure)
2567 /* We schedule runnable threads before killing them to */
2568 /* make the job of bookkeeping the running, runnable, */
2569 /* blocked threads easier for scripts like gr2ps -- HWL */
2571 if (RTSflags.GranFlags.granSimStats && !is_first &&
2572 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2573 DumpRawGranEvent(i,(PROC)0,GR_SCHEDULE,
2574 RunnableThreadsHd[i],
2575 PrelBase_Z91Z93_closure,0);
2576 if (!RTSflags.GranFlags.granSimStats_suppressed &&
2577 TSO_TYPE(RunnableThreadsHd[i])!=T_MAIN)
2578 DumpGranInfo(i,RunnableThreadsHd[i],rtsTrue);
2579 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
2580 is_first = rtsFalse;
2584 ThreadQueueHd = PrelBase_Z91Z93_closure;
2585 /* Printing of statistics has been moved into end_gr_simulation */
2588 if (RTSflags.GranFlags.labelling && RTSflags.GranFlags.granSimStats &&
2589 !RTSflags.GranFlags.granSimStats_suppressed)
2590 DumpStartEventAt(TSO_STARTEDAT(CurrentTSO),where_is(CurrentTSO),0,GR_START,
2591 CurrentTSO,PrelBase_Z91Z93_closure,
2592 TSO_SPARKNAME(CurrentTSO));
2593 /* ^^^ SN (spark name) as optional info */
2594 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2595 /* ^^^ spark length as optional info */
2597 if (RTSflags.GranFlags.granSimStats &&
2598 !RTSflags.GranFlags.granSimStats_suppressed)
2599 DumpGranInfo(CurrentProc,CurrentTSO,
2600 TSO_TYPE(CurrentTSO) != T_ADVISORY);
2602 if (RTSflags.GranFlags.granSimStats_Binary &&
2603 TSO_TYPE(CurrentTSO)==T_MAIN &&
2604 !RTSflags.GranFlags.granSimStats_suppressed)
2605 grterminate(CurrentTime[CurrentProc]);
2607 if (TSO_TYPE(CurrentTSO)!=T_MAIN)
2608 ActivateNextThread(CurrentProc);
2610 /* Note ThreadQueueHd is Nil when the main thread terminates
2611 if(ThreadQueueHd != PrelBase_Z91Z93_closure)
2613 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.granSimStats_suppressed &&
2614 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2615 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2616 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadscheduletime;
2623 if (RTSflags.ParFlags.granSimStats) {
2624 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
2625 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
2629 switch (TSO_TYPE(CurrentTSO)) {
2631 required_thread_count--;
2634 if (GRANSIMSTATS_BINARY)
2638 longjmp(scheduler_loop, -1); /* i.e. the world comes to an end NOW */
2640 ReSchedule(0); /* i.e. the world will eventually come to an end */
2644 required_thread_count--;
2648 advisory_thread_count--;
2656 fprintf(stderr, "EndThread: %x unknown\n", TSO_TYPE(CurrentTSO));
2660 /* Reuse stack object space */
2661 ASSERT(STKO_LINK(SAVE_StkO) == PrelBase_Z91Z93_closure);
2662 STKO_LINK(SAVE_StkO) = AvailableStack;
2663 AvailableStack = SAVE_StkO;
2665 TSO_LINK(CurrentTSO) = AvailableTSO;
2666 AvailableTSO = CurrentTSO;
2667 CurrentTSO = PrelBase_Z91Z93_closure;
2668 CurrentRegTable = NULL;
2671 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
2672 /* proc or it's PrelBase_Z91Z93_closure. In the latter case, a FINDWORK will be */
2673 /* issued by ReSchedule. */
2674 ReSchedule(SAME_THREAD); /* back for more! */
2676 ReSchedule(0); /* back for more! */
2682 %****************************************************************************
2684 \subsection[thread-blocking]{Local Blocking}
2686 %****************************************************************************
2690 #if defined(GRAN_COUNT)
2691 /* Some non-essential maybe-useful statistics-gathering */
2692 void CountnUPDs() { ++nUPDs; }
2693 void CountnUPDs_old() { ++nUPDs_old; }
2694 void CountnUPDs_new() { ++nUPDs_new; }
2696 void CountnPAPs() { ++nPAPs; }
2699 EXTDATA_RO(BQ_info);
2702 /* NB: non-GRAN version ToDo
2704 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
2707 P_ PendingFetches = PrelBase_Z91Z93_closure;
2710 AwakenBlockingQueue(bqe)
2717 TIME now = CURRENT_TIME;
2722 while (bqe != PrelBase_Z91Z93_closure) {
2724 while (IS_MUTABLE(INFO_PTR(bqe))) {
2725 switch (INFO_TYPE(INFO_PTR(bqe))) {
2729 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2732 if (RTSflags.ParFlags.granSimStats) {
2733 DumpGranEvent(GR_RESUMEQ, bqe);
2734 switch (TSO_QUEUE(bqe)) {
2736 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2739 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2743 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
2748 if (last_tso == NULL) {
2749 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
2750 RunnableThreadsHd = bqe;
2752 TSO_LINK(RunnableThreadsTl) = bqe;
2756 bqe = TSO_LINK(bqe);
2760 next = BF_LINK(bqe);
2761 BF_LINK(bqe) = PendingFetches;
2762 PendingFetches = bqe;
2764 if (last_tso != NULL)
2765 TSO_LINK(last_tso) = next;
2768 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
2769 INFO_PTR(bqe), (W_) bqe);
2776 if (last_tso != NULL) {
2777 RunnableThreadsTl = last_tso;
2779 TSO_LINK(last_tso) = PrelBase_Z91Z93_closure;
2787 # if defined(GRAN_CHECK)
2789 /* First some useful test functions */
2791 EXTFUN(RBH_Save_0_info);
2792 EXTFUN(RBH_Save_1_info);
2793 EXTFUN(RBH_Save_2_info);
2801 char str[80], str0[80];
2803 fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
2804 CurrentProc,CurrentTime[CurrentProc]);
2805 if ( bqe == PrelBase_Z91Z93_closure ) {
2806 fprintf(stderr," NIL.\n");
2809 if ( bqe == NULL ) {
2810 fprintf(stderr," NULL\n");
2813 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2814 W_ proc; /* RBH_Save_? closures! */
2816 /* Find where the tso lives */
2817 proc = where_is(bqe);
2818 it = INFO_TYPE(INFO_PTR(bqe));
2832 if(proc == CurrentProc)
2833 fprintf(stderr," %#lx (%x) L %s,", bqe, TSO_ID(bqe), str0);
2835 fprintf(stderr," %#lx (%x) G (PE %d) %s,", bqe, TSO_ID(bqe), proc, str0);
2840 bqe = TSO_LINK(bqe);
2843 bqe = TSO_LINK(bqe);
2846 bqe = PrelBase_Z91Z93_closure;
2849 /* TSO_LINK(last_tso) = PrelBase_Z91Z93_closure; */
2851 if ( bqe == PrelBase_Z91Z93_closure )
2852 fprintf(stderr," NIL.\n");
2854 (INFO_PTR(bqe) == (P_) RBH_Save_0_info) ||
2855 (INFO_PTR(bqe) == (P_) RBH_Save_1_info) ||
2856 (INFO_PTR(bqe) == (P_) RBH_Save_2_info) )
2857 fprintf(stderr," RBH.\n");
2858 /* fprintf(stderr,"\n%s\n",str); */
2862 CHECK_BQ(node, tso, proc)
2869 PROC p = where_is(tso);
2870 rtsBool ok = rtsTrue;
2873 fprintf(stderr,"ERROR in CHECK_BQ: CurrentTSO %#lx (%x) on proc %d but CurrentProc = %d\n",
2874 tso, TSO_ID(tso), proc);
2878 switch (INFO_TYPE(INFO_PTR(node))) {
2880 case INFO_BH_U_TYPE:
2881 bqe = (P_) BQ_ENTRIES(node);
2882 return (rtsTrue); /* BHs don't have BQs */
2885 bqe = (P_) BQ_ENTRIES(node);
2887 case INFO_FMBQ_TYPE:
2888 fprintf(stderr,"CHECK_BQ: ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
2889 node, tso, TSO_ID(tso));
2892 case INFO_SPEC_RBH_TYPE:
2893 bqe = (P_) SPEC_RBH_BQ(node);
2895 case INFO_GEN_RBH_TYPE:
2896 bqe = (P_) GEN_RBH_BQ(node);
2901 I_ size, ptrs, nonptrs, vhs;
2902 char info_hdr_ty[80];
2904 fprintf(stderr, "CHECK_BQ: thought %#lx was a black hole (IP %#lx)",
2905 node, INFO_PTR(node));
2906 info_ptr = get_closure_info(node,
2907 &size, &ptrs, &nonptrs, &vhs,
2909 fprintf(stderr, " %s\n",info_hdr_ty);
2910 /* G_PRINT_NODE(node); */
2912 /* EXIT(EXIT_FAILURE); */
2916 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2917 W_ proc; /* RBH_Save_? closures! */
2919 /* Find where the tso lives */
2920 proc = where_is(bqe);
2921 it = INFO_TYPE(INFO_PTR(bqe));
2924 fprintf(stderr,"ERROR in CHECK_BQ [Node = 0x%lx, PE %d]: TSO %#lx (%x) already in BQ: ",
2925 node, proc, tso, TSO_ID(tso));
2926 PRINT_BQ(BQ_ENTRIES(node));
2930 bqe = TSO_LINK(bqe);
2934 /* End of test functions */
2935 # endif /* GRAN_CHECK */
2937 /* This version of AwakenBlockingQueue has been originally taken from the
2938 GUM code. It is now assimilated into GrAnSim */
2940 /* Note: This version assumes a pointer to a blocking queue rather than a
2941 node with an attached blocking queue as input */
2944 AwakenBlockingQueue(bqe)
2947 /* P_ tso = (P_) BQ_ENTRIES(node); */
2956 /* Compatibility mode with old libaries! 'oH jIvoQmoH */
2957 if (IS_BQ_CLOSURE(bqe))
2958 bqe = (P_)BQ_ENTRIES(bqe);
2959 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_SPEC_RBH_TYPE )
2960 bqe = (P_)SPEC_RBH_BQ(bqe);
2961 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_GEN_RBH_TYPE )
2962 bqe = (P_)GEN_RBH_BQ(bqe);
2964 # if defined(GRAN_CHECK)
2965 if ( RTSflags.GranFlags.debug & 0x100 ) {
2970 # if defined(GRAN_COUNT)
2972 if (tso != PrelBase_Z91Z93_closure)
2976 # if defined(GRAN_CHECK)
2977 if (RTSflags.GranFlags.debug & 0x100)
2978 fprintf(stderr,"----- AwBQ: ");
2981 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2982 W_ proc; /* RBH_Save_? closures! */
2983 ASSERT(INFO_TYPE(INFO_PTR(bqe)) == INFO_TSO_TYPE);
2986 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2988 # if defined(GRAN_COUNT)
2992 /* Find where the tso lives */
2993 proc = where_is(bqe);
2995 if(proc == CurrentProc) {
2996 notifytime = CurrentTime[CurrentProc] + RTSflags.GranFlags.gran_lunblocktime;
2998 /* A better way of handling this would be to introduce a
2999 GLOBALUNBLOCK event which is created here. -- HWL */
3000 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3001 notifytime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[proc]) +
3002 RTSflags.GranFlags.gran_gunblocktime;
3003 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3004 /* new_event(proc, CurrentProc, notifytime,
3005 GLOBALUNBLOCK,bqe,PrelBase_Z91Z93_closure,NULL); */
3007 /* cost the walk over the queue */
3008 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_lunblocktime;
3009 /* GrAnSim Light: make blocked TSO aware of the time that passed */
3010 if (RTSflags.GranFlags.Light)
3011 TSO_CLOCK(bqe) = notifytime;
3012 /* and create a resume message */
3013 new_event(proc, CurrentProc, notifytime,
3014 RESUMETHREAD,bqe,PrelBase_Z91Z93_closure,NULL);
3016 if (notifytime<TimeOfNextEvent)
3017 TimeOfNextEvent = notifytime;
3019 # if defined(GRAN_CHECK)
3020 if (RTSflags.GranFlags.debug & 0x100) {
3021 fprintf(stderr," TSO %x (PE %d) %s,",
3022 TSO_ID(bqe), proc, ( (proc==CurrentProc) ? "L" : "G") );
3027 bqe = TSO_LINK(bqe);
3028 TSO_LINK(last) = PrelBase_Z91Z93_closure;
3032 /* This was once used in a !do_gr_sim setup. Now only GrAnSim setup is */
3034 else /* Check if this is still valid for non-GrAnSim code -- HWL */
3036 if (ThreadQueueHd == PrelBase_Z91Z93_closure)
3037 ThreadQueueHd = bqe;
3039 TSO_LINK(ThreadQueueTl) = bqe;
3041 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
3042 RunnableThreadsHd = tso;
3044 TSO_LINK(RunnableThreadsTl) = tso;
3047 while(TSO_LINK(bqe) != PrelBase_Z91Z93_closure) {
3048 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3051 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3054 bqe = TSO_LINK(bqe);
3057 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3060 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3066 if (RTSflags.GranFlags.debug & 0x100)
3067 fprintf(stderr,".\n");
3070 /* ngo' {GrAnSim}Qo' ngoq: RunnableThreadsTl = tso; */
3079 /* Different interface for GRAN */
3084 SAVE_Liveness = liveness;
3085 TSO_PC1(CurrentTSO) = Continue;
3087 QP_Event1("GR", CurrentTSO);
3089 ReSchedule(SAME_THREAD);
3098 SAVE_Liveness = args >> 1;
3099 TSO_PC1(CurrentTSO) = Continue;
3101 QP_Event1("GR", CurrentTSO);
3104 if (RTSflags.ParFlags.granSimStats) {
3105 /* Note that CURRENT_TIME may perform an unsafe call */
3106 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
3109 ReSchedule(args & 1);
3116 %****************************************************************************
3118 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
3120 %****************************************************************************
3122 The following GrAnSim routines simulate the fetching of nodes from a remote
3123 processor. We use a 1 word bitmask to indicate on which processor a node is
3124 lying. Thus, moving or copying a node from one processor to another just
3125 requires an appropriate change in this bitmask (using @SET_GA@).
3126 Additionally, the clocks have to be updated.
3128 A special case arises when the node that is needed by processor A has been
3129 moved from a processor B to a processor C between sending out a @FETCH@
3130 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
3136 /* ngoqvam che' {GrAnSim}! */
3138 /* Fetch node "node" to processor "p" */
3141 FetchNode(node,from,to)
3145 /* In case of RTSflags.GranFlags.DoGUMMFetching this fct should never be
3146 entered! Instead, UnpackGraph is used in ReSchedule */
3149 ASSERT(to==CurrentProc);
3151 # if defined(GRAN) && defined(GRAN_CHECK)
3152 if ( RTSflags.GranFlags.Light ) {
3153 fprintf(stderr,"Qagh {FetchNode}Daq: Should never be entered in GrAnSim Light setup\n");
3158 if ( RTSflags.GranFlags.DoGUMMFetching ) {
3159 fprintf(stderr,"Qagh: FetchNode should never be entered with DoGUMMFetching\n");
3163 /* Now fetch the children */
3164 if (!IS_LOCAL_TO(PROCS(node),from) &&
3165 !IS_LOCAL_TO(PROCS(node),to) )
3168 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
3169 PROCS(node) |= PE_NUMBER(to); /* Copy node */
3171 PROCS(node) = PE_NUMBER(to); /* Move node */
3176 /* --------------------------------------------------
3177 Cost of sending a packet of size n = C + P*n
3178 where C = packet construction constant,
3179 P = cost of packing one word into a packet
3180 [Should also account for multiple packets].
3181 -------------------------------------------------- */
3184 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
3185 nearby graph has been scheduled)
3186 1 ... node is already local (fetched by somebody else; no event is
3188 2 ... fetch request has been forwrded to the PE that now contains the
3190 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
3191 the current TSO is put into the blocking queue of that node
3192 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
3193 function to guarantee that the tso and node inputs are valid
3194 (they may be moved during GC).
3196 ToDo: Symbolic return codes; clean up code (separate GUMMFetching from
3197 single node fetching.
3201 HandleFetchRequest(node,p,tso)
3205 ASSERT(!RTSflags.GranFlags.Light);
3207 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
3209 # if defined(GRAN_CHECK)
3210 if (RTSflags.GranFlags.debug & 0x100 ) {
3212 I_ size, ptrs, nonptrs, vhs;
3213 char info_hdr_ty[80];
3215 info_ptr = get_closure_info(node,
3216 &size, &ptrs, &nonptrs, &vhs,
3218 fprintf(stderr,"Warning: HandleFetchRequest entered with local node %#lx (%s) (PE %d)\n",
3219 node,info_hdr_ty,p);
3222 if (RTSflags.GranFlags.DoGUMMFetching) {
3226 /* Create a 1-node-buffer and schedule a FETCHREPLY now */
3227 graph = PackOneNode(node, tso, &size);
3228 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3229 FETCHREPLY,tso,graph,NULL);
3231 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3232 FETCHREPLY,tso,node,NULL);
3236 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
3238 if(RTSflags.GranFlags.DoGUMMFetching) { /* {GUM}vo' ngoqvam vInIHta' (code from GUM) */
3242 if (IS_BLACK_HOLE(INFO_PTR(node))) { /* block on BH or RBH */
3243 new_event(p,CurrentProc,CurrentTime[p],
3244 GLOBALBLOCK,tso,node,NULL);
3245 /* Note: blockFetch is done when handling GLOBALBLOCK event */
3246 /* When this thread is reawoken it does the usual: it tries to
3247 enter the updated node and issues a fetch if it's remote.
3248 It has forgotten that it has sent a fetch already (i.e. a
3249 FETCHNODE is swallowed by a BH, leaving the thread in a BQ */
3250 --OutstandingFetches[p];
3254 # if defined(GRAN_CHECK)
3255 if (!RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[p])) {
3256 fprintf(stderr,"Qagh {HandleFetchRequest}Daq: tso 0x%lx (%x) not at head of proc %d (0x%lx)\n",
3257 tso, TSO_ID(tso), p, RunnableThreadsHd[p]);
3262 if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)
3263 return (4); /* out of heap */
3265 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3266 /* Send a reply to the originator */
3267 /* ToDo: Replace that by software costs for doing graph packing! */
3268 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_mpacktime;
3270 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3271 FETCHREPLY,tso,graph,NULL);
3273 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3275 } else { /* incremental (single closure) fetching */
3276 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3277 /* Send a reply to the originator */
3278 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3280 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3281 FETCHREPLY,tso,node,NULL);
3283 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3287 else /* Qu'vatlh! node has been grabbed by another proc => forward */
3289 PROC p_new = where_is(node);
3292 # if defined(GRAN_CHECK)
3293 if (RTSflags.GranFlags.debug & 0x2)
3294 fprintf(stderr,"Qu'vatlh! node %#lx has been grabbed by PE %d (current=%d; demander=%d) @ %d\n",
3295 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
3297 /* Prepare FORWARD message to proc p_new */
3298 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3300 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p_new]) +
3301 RTSflags.GranFlags.gran_latency;
3303 new_event(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
3305 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3313 @blockFetch@ blocks a @BlockedFetch@ node on some kind of black hole.
3315 Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
3317 {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
3318 create @FMBQ@'s (FetchMe blocking queues) to cope with global
3319 blocking. Instead, non-local TSO are put into the BQ in the same way as
3320 local TSOs. However, we have to check if a TSO is local or global in order
3321 to account for the latencies involved and for keeping track of the number
3322 of fetches that are really going on.
3328 0 ... ok; tso is now at beginning of BQ attached to the bh closure
3329 1 ... the bh closure is no BH any more; tso is immediately unblocked
3333 blockFetch(tso, proc, bh)
3334 P_ tso; /* TSO which gets blocked */
3335 PROC proc; /* PE where that tso was running */
3336 P_ bh; /* closure to block on (BH, RBH, BQ) */
3338 # if defined(GRAN_CHECK)
3339 if ( RTSflags.GranFlags.debug & 0x100 ) {
3341 I_ size, ptrs, nonptrs, vhs;
3342 char info_hdr_ty[80];
3344 info_ptr = get_closure_info(bh,
3345 &size, &ptrs, &nonptrs, &vhs,
3347 fprintf(stderr,"Blocking TSO %#lx (%x)(PE %d) on node %#lx (%s) (PE %d). No graph is packed!\n",
3348 tso, TSO_ID(tso), proc, bh, info_hdr_ty, where_is(bh));
3351 if ( !RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[proc]) ) {
3352 fprintf(stderr,"Qagh {blockFetch}Daq: TSO 0x%lx (%x) is not first on runnable list of proc %d (first is 0x%lx)\n",
3353 tso,TSO_ID(tso),proc,RunnableThreadsHd[proc]);
3358 if (!IS_BLACK_HOLE(INFO_PTR(bh))) { /* catches BHs and RBHs */
3359 # if defined(GRAN_CHECK)
3360 if ( RTSflags.GranFlags.debug & 0x100 ) {
3362 W_ size, ptrs, nonptrs, vhs;
3363 char str[80], junk_str[80];
3365 info = get_closure_info(bh, &size, &ptrs, &nonptrs, &vhs, str);
3366 fprintf(stderr,"blockFetch: node %#lx (%s) is not a BH => awakening TSO %#lx (%x) (PE %u)\n",
3367 bh, str, tso, TSO_ID(tso), proc);
3371 /* No BH anymore => immediately unblock tso */
3372 new_event(proc,proc,CurrentTime[proc],
3373 UNBLOCKTHREAD,tso,bh,NULL);
3375 /* Is this always a REPLY to a FETCH in the profile ? */
3376 if (RTSflags.GranFlags.granSimStats)
3377 DumpRawGranEvent(proc,proc,GR_REPLY,tso,bh,0);
3381 /* DaH {BQ}Daq Qu' Suq 'e' wISov!
3382 Now we know that we have to put the tso into the BQ.
3383 2 case: If block-on-fetch, tso is at head of threadq =>
3384 => take it out of threadq and into BQ
3385 If reschedule-on-fetch, tso is only pointed to be event
3386 => just put it into BQ
3388 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
3389 GranSimBlock(tso, proc, bh); /* get tso out of threadq & activate next
3390 thread (same as in BQ_entry) */
3391 } else { /* reschedule-on-fetch */
3392 if(RTSflags.GranFlags.granSimStats)
3393 DumpRawGranEvent(proc,where_is(bh),GR_BLOCK,tso,bh,0);
3395 ++TSO_BLOCKCOUNT(tso);
3396 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
3399 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
3401 /* Put tso into BQ */
3402 switch (INFO_TYPE(INFO_PTR(bh))) {
3404 case INFO_BH_U_TYPE:
3405 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
3406 SET_INFO_PTR(bh, BQ_info);
3407 BQ_ENTRIES(bh) = (W_) tso;
3409 #ifdef GC_MUT_REQUIRED
3411 * If we modify a black hole in the old generation, we have to make
3412 * sure it goes on the mutables list
3415 if (bh <= StorageMgrInfo.OldLim) {
3416 MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
3417 StorageMgrInfo.OldMutables = bh;
3419 MUT_LINK(bh) = MUT_NOT_LINKED;
3423 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3424 TSO_LINK(tso) = (P_) BQ_ENTRIES(bh);
3425 BQ_ENTRIES(bh) = (W_) tso;
3427 case INFO_FMBQ_TYPE:
3428 fprintf(stderr,"ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
3429 bh, tso, TSO_ID(tso));
3431 case INFO_SPEC_RBH_TYPE:
3432 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3433 TSO_LINK(tso) = (P_) SPEC_RBH_BQ(bh);
3434 SPEC_RBH_BQ(bh) = (W_) tso;
3436 case INFO_GEN_RBH_TYPE:
3437 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3438 TSO_LINK(tso) = (P_) GEN_RBH_BQ(bh);
3439 GEN_RBH_BQ(bh) = (W_) tso;
3444 I_ size, ptrs, nonptrs, vhs;
3445 char info_hdr_ty[80];
3447 fprintf(stderr, "Panic: thought %#lx was a black hole (IP %#lx)",
3449 # if defined(GRAN_CHECK)
3450 info_ptr = get_closure_info(bh,
3451 &size, &ptrs, &nonptrs, &vhs,
3453 fprintf(stderr, " %s\n",info_hdr_ty);
3465 %****************************************************************************
3467 \subsection[qp-profile]{Quasi-Parallel Profiling}
3469 %****************************************************************************
3472 /* ToDo: Check if this is really still used anywhere!? */
3477 /* *Virtual* Time in milliseconds */
3480 qp_elapsed_time(STG_NO_ARGS)
3482 extern StgDouble usertime();
3484 return ((long) (usertime() * 1e3));
3488 qp_elapsed_time(STG_NO_ARGS)
3490 return ((long) CurrentTime[CurrentProc] );
3495 init_qp_profiling(STG_NO_ARGS)
3498 char qp_filename[STATS_FILENAME_MAXLEN];
3500 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3501 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3502 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3506 fputs(prog_argv[0], qp_file);
3507 for(i = 1; prog_argv[i]; i++) {
3508 fputc(' ', qp_file);
3509 fputs(prog_argv[i], qp_file);
3511 fprintf(qp_file, " +RTS -C%d -t%d\n"
3512 , RTSflags.ConcFlags.ctxtSwitchTime
3513 , RTSflags.ConcFlags.maxThreads);
3515 fputs(time_str(), qp_file);
3516 fputc('\n', qp_file);
3521 QP_Event0(tid, node)
3525 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3529 QP_Event1(event, tso)
3533 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3534 TSO_ID(tso), TSO_NAME(tso));
3538 QP_Event2(event, tso1, tso2)
3542 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3543 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
3548 %****************************************************************************
3550 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
3552 %****************************************************************************
3554 Garbage collection code for the event queue. We walk the event queue
3555 so that if the only reference to a TSO is in some event (e.g. RESUME),
3556 the TSO is still preserved.
3558 The GC code now uses a breadth-first pruning strategy. This prevents
3559 the GC from keeping all sparks of the low-numbered PEs while discarding all
3560 sparks from high-numbered PEs. Such a depth-first pruning may have
3561 disastrous effects for programs that generate a huge number of sparks!
3566 extern smInfo StorageMgrInfo;
3568 /* Auxiliary functions needed in Save/RestoreSparkRoots if breadth-first */
3569 /* pruning is done. */
3572 arr_and(W_ arr[], I_ max)
3577 /* Doesn't work with max==0; but then, many things don't work in this */
3579 for (i=1, res = arr[0]; i<max; i++)
3586 arr_max(W_ arr[], I_ max)
3591 /* Doesn't work with max==0; but then, many things don't work in this */
3593 for (i=1, res = arr[0]; i<max; i++)
3594 res = (arr[i]>res) ? arr[i] : res;
3600 Routines working on spark queues.
3601 It would be a good idea to make that an ADT!
3605 spark_queue_len(PROC proc, I_ pool)
3607 sparkq prev, spark; /* prev only for testing !! */
3610 for (len = 0, prev = NULL, spark = PendingSparksHd[proc][pool];
3612 len++, prev = spark, spark = SPARK_NEXT(spark))
3615 # if defined(GRAN_CHECK)
3616 if ( RTSflags.GranFlags.debug & 0x1000 )
3617 if ( (prev!=NULL) && (prev!=PendingSparksTl[proc][pool]) )
3618 fprintf(stderr,"ERROR in spark_queue_len: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
3619 proc, pool, PendingSparksTl[proc][pool], prev);
3626 delete_from_spark_queue (prev,spark) /* unlink and dispose spark */
3628 { /* Global Vars: CurrentProc, SparkQueueHd, SparkQueueTl */
3631 # if defined(GRAN_CHECK)
3632 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3633 fprintf(stderr,"** |%#x:%#x| prev=%#x->(%#x), (%#x)<-spark=%#x->(%#x) <-(%#x)\n",
3634 SparkQueueHd, SparkQueueTl,
3635 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)),
3636 SPARK_PREV(spark), spark, SPARK_NEXT(spark),
3637 (SPARK_NEXT(spark)==NULL ? 0 : SPARK_PREV(SPARK_NEXT(spark))));
3641 tmp = SPARK_NEXT(spark);
3643 SparkQueueHd = SPARK_NEXT(spark);
3645 SPARK_NEXT(prev) = SPARK_NEXT(spark);
3647 if (SPARK_NEXT(spark)==NULL) {
3648 SparkQueueTl = prev;
3650 SPARK_PREV(SPARK_NEXT(spark)) = prev;
3652 if(SparkQueueHd == NULL)
3653 SparkQueueTl = NULL;
3654 SPARK_NEXT(spark) = NULL;
3656 DisposeSpark(spark);
3659 # if defined(GRAN_CHECK)
3660 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3661 fprintf(stderr,"## prev=%#x->(%#x)\n",
3662 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)));
3669 /* NB: These functions have been replaced by functions:
3670 EvacuateEvents, EvacuateSparks, (in ../storage/SMcopying.lc)
3671 LinkEvents, LinkSparks (in ../storage/SMcompacting.lc)
3672 Thus, GrAnSim does not need additional entries in the list of roots
3677 SaveEventRoots(num_ptr_roots)
3680 eventq event = EventHd;
3681 while(event != NULL)
3683 if(EVENT_TYPE(event) == RESUMETHREAD ||
3684 EVENT_TYPE(event) == MOVETHREAD ||
3685 EVENT_TYPE(event) == CONTINUETHREAD ||
3686 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3687 EVENT_TYPE(event) == STARTTHREAD )
3688 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3690 else if(EVENT_TYPE(event) == MOVESPARK)
3691 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
3693 else if (EVENT_TYPE(event) == FETCHNODE ||
3694 EVENT_TYPE(event) == FETCHREPLY )
3696 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3697 /* In the case of packet fetching, EVENT_NODE(event) points to */
3698 /* the packet (currently, malloced). The packet is just a list of */
3699 /* closure addresses, with the length of the list at index 1 (the */
3700 /* structure of the packet is defined in Pack.lc). */
3701 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3702 P_ buffer = (P_) EVENT_NODE(event);
3703 int size = (int) buffer[PACK_SIZE_LOCN], i;
3705 for (i = PACK_HDR_SIZE; i <= size-1; i++) {
3706 StorageMgrInfo.roots[num_ptr_roots++] = (P_) buffer[i];
3709 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3711 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3713 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3714 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3716 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3718 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3720 event = EVENT_NEXT(event);
3722 return(num_ptr_roots);
3725 #if defined(DEPTH_FIRST_PRUNING)
3726 /* Is it worthwhile keeping the depth-first pruning code !? -- HWL */
3729 SaveSparkRoots(num_ptr_roots)
3732 sparkq spark, /* prev, */ disposeQ=NULL;
3734 I_ i, sparkroots=0, prunedSparks=0;
3735 I_ tot_sparks[MAX_PROC], tot = 0;;
3737 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3738 tot_sparks[proc] = 0;
3739 for(i = 0; i < SPARK_POOLS; ++i) {
3740 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
3742 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
3744 if(++sparkroots <= MAX_SPARKS)
3746 if ( RTSflags.GcFlags.giveStats )
3747 if (i==ADVISORY_POOL) {
3751 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3755 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3756 if (prunedSparks==0) {
3764 } /* forall spark ... */
3765 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks>0) ) {
3766 fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
3767 prunedSparks,MAX_SPARKS,proc);
3768 if (disposeQ == PendingSparksHd[proc][i])
3769 PendingSparksHd[proc][i] = NULL;
3771 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
3772 DisposeSparkQ(disposeQ);
3776 } /* forall i ... */
3777 } /*forall proc .. */
3779 if ( RTSflags.GcFlags.giveStats ) {
3780 fprintf(RTSflags.GcFlags.statsFile,
3781 "Spark statistics (after pruning) (total sparks = %d):",tot);
3782 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3784 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3785 fprintf(RTSflags.GcFlags.statsFile,"\tPE %d: %d ",proc,tot_sparks[proc]);
3787 fprintf(RTSflags.GcFlags.statsFile,".\n");
3790 return(num_ptr_roots);
3793 #else /* !DEPTH_FIRST_PRUNING */
3795 /* In case of an excessive number of sparks, depth first pruning is a Bad */
3796 /* Idea as we might end up with all remaining sparks on processor 0 and */
3797 /* none on the other processors. So, this version uses breadth first */
3798 /* pruning. -- HWL */
3801 SaveSparkRoots(num_ptr_roots)
3805 curr_spark[MAX_PROC][SPARK_POOLS];
3808 endQueues[SPARK_POOLS], finishedQueues[SPARK_POOLS];
3810 prunedSparks[MAX_PROC][SPARK_POOLS];
3811 I_ tot_sparks[MAX_PROC], tot = 0;;
3814 # if defined(GRAN_CHECK) && defined(GRAN)
3815 if ( RTSflags.GranFlags.debug & 0x40 )
3816 fprintf(stderr,"D> Saving spark roots for GC ...\n");
3820 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3821 allProcs |= PE_NUMBER(proc);
3822 tot_sparks[proc] = 0;
3823 for(i = 0; i < SPARK_POOLS; ++i) {
3824 curr_spark[proc][i] = PendingSparksHd[proc][i];
3825 prunedSparks[proc][i] = 0;
3827 finishedQueues[i] = 0;
3831 /* Breadth first pruning */
3833 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3834 for(i = 0; i < SPARK_POOLS; ++i) {
3835 spark = curr_spark[proc][i];
3836 if ( spark != NULL ) {
3838 if(++sparkroots <= MAX_SPARKS)
3840 # if defined(GRAN_CHECK) && defined(GRAN)
3841 if ( (RTSflags.GranFlags.debug & 0x1000) &&
3842 (RTSflags.GcFlags.giveStats) )
3843 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d): 0x%lx \t(info ptr=%#lx)\n",
3844 num_ptr_roots,proc,i,SPARK_NODE(spark),
3845 INFO_PTR(SPARK_NODE(spark)));
3847 if ( RTSflags.GcFlags.giveStats )
3848 if (i==ADVISORY_POOL) {
3852 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3853 curr_spark[proc][i] = spark = SPARK_NEXT(spark);
3855 else /* sparkroots > MAX_SPARKS */
3857 if (curr_spark[proc][i] == PendingSparksHd[proc][i])
3858 PendingSparksHd[proc][i] = NULL;
3860 SPARK_NEXT(SPARK_PREV(curr_spark[proc][i])) = NULL;
3861 PendingSparksTl[proc][i] = SPARK_PREV(curr_spark[proc][i]);
3862 endQueues[i] |= PE_NUMBER(proc);
3864 } else { /* spark == NULL ; actually, this only has to be done once */
3865 endQueues[i] |= PE_NUMBER(proc);
3869 } while (arr_and(endQueues,SPARK_POOLS) != allProcs);
3871 /* The buffer for spark roots in StorageMgrInfo.roots is full */
3872 /* now. Prune all sparks on all processor starting with */
3873 /* curr_spark[proc][i]. */
3876 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3877 for(i = 0; i < SPARK_POOLS; ++i) {
3878 spark = curr_spark[proc][i];
3880 if ( spark != NULL ) {
3881 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3882 curr_spark[proc][i] = SPARK_NEXT(spark);
3884 prunedSparks[proc][i]++;
3885 DisposeSpark(spark);
3887 finishedQueues[i] |= PE_NUMBER(proc);
3891 } while (arr_and(finishedQueues,SPARK_POOLS) != allProcs);
3894 # if defined(GRAN_CHECK) && defined(GRAN)
3895 if ( RTSflags.GranFlags.debug & 0x1000) {
3896 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3897 for(i = 0; i < SPARK_POOLS; ++i) {
3898 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks[proc][i]>0)) {
3899 fprintf(RTSflags.GcFlags.statsFile,
3900 "Discarding %lu sparks on proc %d (pool %d) for GC purposes\n",
3901 prunedSparks[proc][i],proc,i);
3906 if ( RTSflags.GcFlags.giveStats ) {
3907 fprintf(RTSflags.GcFlags.statsFile,
3908 "Spark statistics (after discarding) (total sparks = %d):",tot);
3909 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3911 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3912 fprintf(RTSflags.GcFlags.statsFile,
3913 "\tPE %d: %d ",proc,tot_sparks[proc]);
3915 fprintf(RTSflags.GcFlags.statsFile,".\n");
3920 return(num_ptr_roots);
3923 #endif /* DEPTH_FIRST_PRUNING */
3926 GC roots must be restored in *reverse order*.
3927 The recursion is a little ugly, but is better than
3928 in-place pointer reversal.
3932 RestoreEvtRoots(event,num_ptr_roots)
3938 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
3940 if(EVENT_TYPE(event) == RESUMETHREAD ||
3941 EVENT_TYPE(event) == MOVETHREAD ||
3942 EVENT_TYPE(event) == CONTINUETHREAD ||
3943 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3944 EVENT_TYPE(event) == STARTTHREAD )
3945 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3947 else if(EVENT_TYPE(event) == MOVESPARK )
3948 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
3950 else if (EVENT_TYPE(event) == FETCHNODE ||
3951 EVENT_TYPE(event) == FETCHREPLY )
3953 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3954 P_ buffer = (P_) EVENT_NODE(event);
3955 int size = (int) buffer[PACK_SIZE_LOCN], i;
3957 for (i = size-1; i >= PACK_HDR_SIZE; i--) {
3958 buffer[i] = StorageMgrInfo.roots[--num_ptr_roots];
3961 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
3963 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3965 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3967 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
3968 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3970 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3972 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
3975 return(num_ptr_roots);
3979 RestoreEventRoots(num_ptr_roots)
3982 return(RestoreEvtRoots(EventHd,num_ptr_roots));
3985 #if defined(DEPTH_FIRST_PRUNING)
3988 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
3990 I_ num_ptr_roots, sparkroots;
3994 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
3995 if(sparkroots <= MAX_SPARKS)
3997 P_ n = SPARK_NODE(spark);
3998 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
3999 # if defined(GRAN_CHECK) && defined(GRAN)
4000 if ( RTSflags.GranFlags.debug & 0x40 )
4001 fprintf(RTSflags.GcFlags.statsFile,
4002 "Restoring Spark Root %d: 0x%lx \t(info ptr=%#lx\n",
4003 num_ptr_roots,SPARK_NODE(spark),
4004 INFO_PTR(SPARK_NODE(spark)));
4007 # if defined(GRAN_CHECK) && defined(GRAN)
4009 if ( RTSflags.GranFlags.debug & 0x40 )
4010 fprintf(RTSflags.GcFlags.statsFile,
4011 "Error in RestoreSpkRoots (%d; @ spark %#lx): More than MAX_SPARKS (%d) sparks\n",
4012 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
4016 return(num_ptr_roots);
4020 RestoreSparkRoots(num_ptr_roots)
4026 #if defined(GRAN_JSM_SPARKS)
4027 fprintf(stderr,"Error: RestoreSparkRoots should be never be entered in a JSM style sparks set-up\n");
4031 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4032 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4033 /* of the for loop. For i that is currently not necessary. C is really */
4034 /* impressive in datatype abstraction! -- HWL */
4036 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4037 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4038 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
4041 return(num_ptr_roots);
4044 #else /* !DEPTH_FIRST_PRUNING */
4047 RestoreSparkRoots(num_ptr_roots)
4051 curr_spark[MAX_PROC][SPARK_POOLS];
4053 I_ i, max_len, len, pool, count,
4054 queue_len[MAX_PROC][SPARK_POOLS];
4056 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4057 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4058 /* of the for loop. For i that is currently not necessary. C is really */
4059 /* impressive in datatype abstraction! -- HWL */
4062 for (proc=0; proc < RTSflags.GranFlags.proc; proc++) {
4063 for (i=0; i<SPARK_POOLS; i++) {
4064 curr_spark[proc][i] = PendingSparksTl[proc][i];
4065 queue_len[proc][i] = spark_queue_len(proc,i);
4066 max_len = (queue_len[proc][i]>max_len) ? queue_len[proc][i] : max_len;
4070 for (len=max_len; len > 0; len--){
4071 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4072 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4073 if (queue_len[proc][i]>=len) {
4074 spark = curr_spark[proc][i];
4075 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4076 # if defined(GRAN_CHECK) && defined(GRAN)
4078 if ( (RTSflags.GranFlags.debug & 0x1000) &&
4079 (RTSflags.GcFlags.giveStats) )
4080 fprintf(RTSflags.GcFlags.statsFile,
4081 "Restoring Spark Root %d (PE %u, pool %u): 0x%lx \t(info ptr=%#lx)\n",
4082 num_ptr_roots,proc,i,SPARK_NODE(spark),
4083 INFO_PTR(SPARK_NODE(spark)));
4085 curr_spark[proc][i] = SPARK_PREV(spark);
4087 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],
4094 # if defined(GRAN_CHECK) && defined(GRAN)
4095 if ( (RTSflags.GranFlags.debug & 0x1000) && (RTSflags.GcFlags.giveStats) )
4096 fprintf(RTSflags.GcFlags.statsFile,"Number of restored spark roots: %d\n",
4099 return(num_ptr_roots);
4102 #endif /* DEPTH_FIRST_PRUNING */
4108 #endif /* CONCURRENT */ /* the whole module! */