2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
25 NOTE: There's currently a couple of x86 only pieces in here. The reason
26 for this is the need for an expedient hack to make Concurrent Haskell
27 and stable pointers work sufficiently for Win32 applications.
28 (the changes in here are not x86 specific, but other parts of this patch are
31 ToDo: generalise to all platforms
35 #if defined(CONCURRENT) /* the whole module! */
38 # define NON_POSIX_SOURCE /* so says Solaris */
47 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
50 @AvailableStack@ is used to determine whether an existing stack can be
51 reused without new allocation, so reducing garbage collection, and
52 stack setup time. At present, it is only used for the first stack
53 chunk of a thread, the one that's got
54 @RTSflags.ConcFlags.stkChunkSize@ words.
57 P_ AvailableStack = PrelBase_Z91Z93_closure;
58 P_ AvailableTSO = PrelBase_Z91Z93_closure;
61 Macros for dealing with the new and improved GA field for simulating
62 parallel execution. Based on @CONCURRENT@ package. The GA field now
63 contains a mask, where the n-th bit stands for the n-th processor,
64 on which this data can be found. In case of multiple copies, several bits
65 are set. The total number of processors is bounded by @MAX_PROC@,
66 which should be <= the length of a word in bits. -- HWL
68 {{GranSim.lc}Daq ngoq' roQlu'ta'}
69 (Code has been moved to GranSim.lc).
71 %****************************************************************
73 \subsection[thread-getthread]{The Thread Scheduler}
75 %****************************************************************
77 This is the heart of the thread scheduling code.
79 Most of the changes for GranSim are in this part of the RTS.
80 Especially the @ReSchedule@ routine has been blown up quite a lot
81 It now contains the top-level event-handling loop.
83 Parts of the code that are not necessary for GranSim, but convenient to
84 have when developing it are marked with a @GRAN_CHECK@ variable.
87 STGRegisterTable *CurrentRegTable = NULL;
92 /* Only needed for GranSim Light; costs of operations during rescheduling
93 are associated to the virtual processor on which ActiveTSO is living */
95 rtsBool resched = rtsFalse; /* debugging only !!*/
97 /* Pointers to the head and tail of the runnable queues for each PE */
98 /* In GranSim Light only the thread/spark-queues of proc 0 are used */
99 P_ RunnableThreadsHd[MAX_PROC];
100 P_ RunnableThreadsTl[MAX_PROC];
102 P_ WaitThreadsHd[MAX_PROC];
103 P_ WaitThreadsTl[MAX_PROC];
105 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
106 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
108 /* One clock for each PE */
109 W_ CurrentTime[MAX_PROC];
111 /* Useful to restrict communication; cf fishing model in GUM */
112 I_ OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
114 /* Status of each PE (new since but independent of GranSim Light) */
115 enum proc_status procStatus[MAX_PROC];
117 #if defined(GRAN) && defined(GRAN_CHECK)
118 /* To check if the RTS ever tries to run a thread that should be blocked
119 because of fetching remote data */
120 P_ BlockedOnFetch[MAX_PROC];
123 W_ SparksAvail = 0; /* How many sparks are available */
124 W_ SurplusThreads = 0; /* How many excess threads are there */
126 TIME SparkStealTime();
130 P_ RunnableThreadsHd = PrelBase_Z91Z93_closure;
131 P_ RunnableThreadsTl = PrelBase_Z91Z93_closure;
133 P_ WaitingThreadsHd = PrelBase_Z91Z93_closure;
134 P_ WaitingThreadsTl = PrelBase_Z91Z93_closure;
136 TYPE_OF_SPARK PendingSparksBase[SPARK_POOLS];
137 TYPE_OF_SPARK PendingSparksLim[SPARK_POOLS];
139 TYPE_OF_SPARK PendingSparksHd[SPARK_POOLS];
140 TYPE_OF_SPARK PendingSparksTl[SPARK_POOLS];
142 #endif /* GRAN ; HWL */
144 static jmp_buf scheduler_loop;
145 #if defined(i386_TARGET_ARCH)
146 void SchedLoop(int ret);
147 extern StgInt entersFromC;
148 static jmp_buf finish_sched;
151 I_ required_thread_count = 0;
152 I_ advisory_thread_count = 0;
154 EXTFUN(resumeThread);
156 /* Misc prototypes */
158 P_ NewThread PROTO((P_, W_, I_));
159 I_ blockFetch PROTO((P_, PROC, P_));
160 I_ HandleFetchRequest PROTO((P_, PROC, P_));
161 rtsBool InsertThread PROTO((P_ tso));
162 sparkq delete_from_spark_queue PROTO((sparkq, sparkq));
165 P_ NewThread PROTO((P_, W_));
168 I_ context_switch = 0;
169 I_ contextSwitchTime = 10000;
173 /* NB: GRAN and GUM use different representations of spark pools.
174 GRAN sparks are more flexible (containing e.g. granularity info)
175 but slower than GUM sparks. There is no fixed upper bound on the
176 number of GRAN sparks either. -- HWL
182 I_ SparkLimit[SPARK_POOLS];
185 initThreadPools(STG_NO_ARGS)
187 I_ i, size = RTSflags.ConcFlags.maxLocalSparks;
189 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
191 if ((PendingSparksBase[ADVISORY_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
194 if ((PendingSparksBase[REQUIRED_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
196 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
197 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
208 ScheduleThreads(topClosure)
216 #if defined(PROFILING) || defined(PAR)
217 if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
218 if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
220 if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
221 if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
224 fprintf(stderr, "Can't initialize virtual timer.\n");
228 context_switch = 0 /* 1 HWL */;
230 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
231 if ( RTSflags.GranFlags.Light && RTSflags.GranFlags.proc!=1 ) {
232 fprintf(stderr,"Qagh: In GrAnSim Light setup .proc must be 1\n");
236 if ( RTSflags.GranFlags.debug & 0x40 ) {
237 fprintf(stderr,"Doing init in ScheduleThreads now ...\n");
241 #if defined(GRAN) /* KH */
242 /* Init thread and spark queues on all processors */
243 for (i=0; i<RTSflags.GranFlags.proc; i++)
245 /* Init of RunnableThreads{Hd,Tl} etc now in main */
246 OutstandingFetches[i] = OutstandingFishes[i] = 0;
247 procStatus[i] = Idle;
248 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
249 BlockedOnFetch[i] = NULL;
253 CurrentProc = MainProc;
259 * We perform GC so that a signal handler can install a new
260 * TopClosure and start a new main thread.
266 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
268 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
270 /* kludge to save the top closure as a root */
271 CurrentTSO = topClosure;
272 ReallyPerformThreadGC(0, rtsTrue);
273 topClosure = CurrentTSO;
275 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
277 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
280 fprintf(stderr, "Not enough heap for main thread\n");
285 RunnableThreadsHd = RunnableThreadsTl = tso;
287 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
288 ThreadQueueHd = ThreadQueueTl = tso;
290 # if defined(GRAN_CHECK)
291 if ( RTSflags.GranFlags.debug & 0x40 ) {
292 fprintf(stderr,"MainTSO has been initialized (0x%x)\n", tso);
298 if (RTSflags.ParFlags.granSimStats) {
299 DumpGranEvent(GR_START, tso);
300 sameThread = rtsTrue;
303 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.labelling)
304 DumpRawGranEvent(CurrentProc,(PROC)0,GR_START,
309 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
310 if (RTSflags.GranFlags.Light)
314 required_thread_count = 1;
315 advisory_thread_count = 0;
317 } /*if IAmMainThread ...*/
319 #if defined(i386_TARGET_ARCH)
320 if (setjmp(finish_sched) < 0) {
325 /* ----------------------------------------------------------------- */
326 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
327 /* ----------------------------------------------------------------- */
335 if ( (ret <0) || ( (setjmp(scheduler_loop) < 0) )) {
336 longjmp(finish_sched,-1);
339 if( (setjmp(scheduler_loop) < 0) ) {
344 #if defined(GRAN) && defined(GRAN_CHECK)
345 if ( RTSflags.GranFlags.debug & 0x80 ) {
346 fprintf(stderr,"MAIN Schedule Loop; ThreadQueueHd is ");
347 G_TSO(ThreadQueueHd,1);
348 /* if (ThreadQueueHd == MainTSO) {
349 fprintf(stderr,"D> Event Queue is now:\n");
356 if (PendingFetches != PrelBase_Z91Z93_closure) {
361 if (ThreadQueueHd == PrelBase_Z91Z93_closure) {
362 fprintf(stderr, "Qu'vatlh! No runnable threads!\n");
365 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
366 QP_Event1("AG", ThreadQueueHd);
369 while (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
370 /* If we've no work */
371 if (WaitingThreadsHd == PrelBase_Z91Z93_closure) {
374 exitc = NoRunnableThreadsHook();
378 /* Block indef. waiting for I/O and timer expire */
384 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
385 if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
386 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
387 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
389 * If we're here (no runnable threads) and we have pending
390 * sparks, we must have a space problem. Get enough space
391 * to turn one of those pending sparks into a
392 * thread... ReallyPerformGC doesn't return until the
393 * space is available, so it may force global GC. ToDo:
394 * Is this unnecessary here? Duplicated in ReSchedule()?
397 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
398 SAVE_Hp -= THREAD_SPACE_REQUIRED;
401 * We really have absolutely no work. Send out a fish
402 * (there may be some out there already), and wait for
403 * something to arrive. We clearly can't run any threads
404 * until a SCHEDULE or RESUME arrives, and so that's what
405 * we're hoping to see. (Of course, we still have to
406 * respond to other types of messages.)
409 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
415 } else if (PacketsWaiting()) { /* Look for incoming messages */
421 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
422 QP_Event1("AG", RunnableThreadsHd);
427 if (RTSflags.ParFlags.granSimStats && !sameThread)
428 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
432 TimeOfNextEvent = get_time_of_next_event();
433 CurrentTSO = ThreadQueueHd;
434 if (RTSflags.GranFlags.Light) {
435 /* Save time of `virt. proc' which was active since last getevent and
436 restore time of `virt. proc' where CurrentTSO is living on. */
437 if(RTSflags.GranFlags.DoFairSchedule)
439 if (RTSflags.GranFlags.granSimStats &&
440 RTSflags.GranFlags.debug & 0x20000)
441 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
443 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
445 CurrentTime[CurrentProc] = TSO_CLOCK(CurrentTSO);
446 if(RTSflags.GranFlags.DoFairSchedule && resched )
449 if (RTSflags.GranFlags.granSimStats &&
450 RTSflags.GranFlags.debug & 0x20000)
451 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
454 if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
455 (TimeOfNextEvent == 0 ||
456 TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
457 new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
458 CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
459 TimeOfNextEvent = get_time_of_next_event();
463 EndOfTimeSlice = CurrentTime[CurrentProc]+RTSflags.GranFlags.time_slice;
465 CurrentTSO = RunnableThreadsHd;
466 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
467 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
469 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
470 RunnableThreadsTl = PrelBase_Z91Z93_closure;
473 /* If we're not running a timer, just leave the flag on */
474 if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
477 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
478 if (CurrentTSO == PrelBase_Z91Z93_closure) {
479 fprintf(stderr,"Qagh: Trying to execute PrelBase_Z91Z93_closure on proc %d (@ %d)\n",
480 CurrentProc,CurrentTime[CurrentProc]);
484 if (RTSflags.GranFlags.debug & 0x04) {
485 if (BlockedOnFetch[CurrentProc]) {
486 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
487 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
492 if ( (RTSflags.GranFlags.debug & 0x10) &&
493 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
494 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
495 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
500 #if 0 && defined(i386_TARGET_ARCH)
501 fprintf(stderr, "ScheduleThreads: About to resume thread:%#x %d\n",
502 CurrentTSO, entersFromC);
504 miniInterpret((StgFunPtr)resumeThread);
508 % Some remarks on GrAnSim -- HWL
510 The ReSchedule fct is the heart of GrAnSim. Based on its parameter it issues
511 a CONTINUETRHEAD to carry on executing the current thread in due course or it watches out for new work (e.g. called from EndThread).
513 Then it picks the next event (get_next_event) and handles it appropriately
514 (see switch construct). Note that a continue in the switch causes the next
515 event to be handled and a break causes a jmp to the scheduler_loop where
516 the TSO at the head of the current processor's runnable queue is executed.
518 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
519 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
523 GrAnSim rules here! Others stay out or you will be crashed.
524 Concurrent and parallel guys: please use the next door (a few pages down;
525 turn left at the !GRAN sign).
530 /* Prototypes of event handling functions. Only needed in ReSchedule */
531 void do_the_globalblock PROTO((eventq event));
532 void do_the_unblock PROTO((eventq event));
533 void do_the_fetchnode PROTO((eventq event));
534 void do_the_fetchreply PROTO((eventq event));
535 void do_the_movethread PROTO((eventq event));
536 void do_the_movespark PROTO((eventq event));
537 void gimme_spark PROTO((rtsBool *found_res, sparkq *prev_res, sparkq *spark_res));
538 void munch_spark PROTO((rtsBool found, sparkq prev, sparkq spark));
541 ReSchedule(what_next)
542 int what_next; /* Run the current thread again? */
544 sparkq spark, nextspark;
550 # if defined(GRAN_CHECK) && defined(GRAN)
551 if ( RTSflags.GranFlags.debug & 0x80 ) {
552 fprintf(stderr,"Entering ReSchedule with mode %u; tso is\n",what_next);
553 G_TSO(ThreadQueueHd,1);
557 # if defined(GRAN_CHECK) && defined(GRAN)
558 if ( (RTSflags.GranFlags.debug & 0x80) || (RTSflags.GranFlags.debug & 0x40 ) )
559 if (what_next<FIND_THREAD || what_next>END_OF_WORLD)
560 fprintf(stderr,"Qagh {ReSchedule}Daq: illegal parameter %u for what_next\n",
564 if (RTSflags.GranFlags.Light) {
565 /* Save current time; GranSim Light only */
566 TSO_CLOCK(CurrentTSO) = CurrentTime[CurrentProc];
569 /* Run the current thread again (if there is one) */
570 if(what_next==SAME_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
572 /* A bit of a hassle if the event queue is empty, but ... */
573 CurrentTSO = ThreadQueueHd;
576 if (RTSflags.GranFlags.Light &&
577 TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
578 TSO_CLOCK(ThreadQueueHd)>TSO_CLOCK(TSO_LINK(ThreadQueueHd))) {
579 if(RTSflags.GranFlags.granSimStats &&
580 RTSflags.GranFlags.debug & 0x20000 )
581 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
583 ThreadQueueHd = TSO_LINK(CurrentTSO);
584 if (ThreadQueueHd==PrelBase_Z91Z93_closure)
585 ThreadQueueTl=PrelBase_Z91Z93_closure;
586 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
587 InsertThread(CurrentTSO);
590 /* This code does round-Robin, if preferred. */
591 if(!RTSflags.GranFlags.Light &&
592 RTSflags.GranFlags.DoFairSchedule &&
593 TSO_LINK(CurrentTSO) != PrelBase_Z91Z93_closure &&
594 CurrentTime[CurrentProc]>=EndOfTimeSlice)
596 ThreadQueueHd = TSO_LINK(CurrentTSO);
597 TSO_LINK(ThreadQueueTl) = CurrentTSO;
598 ThreadQueueTl = CurrentTSO;
599 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
600 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
601 if ( RTSflags.GranFlags.granSimStats )
602 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
603 CurrentTSO = ThreadQueueHd;
606 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
607 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
609 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
610 /* has been updated before that already. */
611 else if(what_next==NEW_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
613 # if defined(GRAN_CHECK) && defined(GRAN)
614 fprintf(stderr,"Qagh: ReSchedule(NEW_THREAD) shouldn't be used with DoReScheduleOnFetch!!\n");
619 if(RTSflags.GranFlags.granSimStats &&
620 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
621 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
623 CurrentTSO = ThreadQueueHd;
624 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
625 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
627 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
630 /* We go in here if the current thread is blocked on fetch => don'd CONT */
631 else if(what_next==CHANGE_THREAD)
633 /* just fall into event handling loop for next event */
636 /* We go in here if we have no runnable threads or what_next==0 */
639 procStatus[CurrentProc] = Idle;
640 /* That's now done in HandleIdlePEs!
641 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
642 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
644 CurrentTSO = PrelBase_Z91Z93_closure;
647 /* ----------------------------------------------------------------- */
648 /* This part is the EVENT HANDLING LOOP */
649 /* ----------------------------------------------------------------- */
652 /* Choose the processor with the next event */
653 event = get_next_event();
654 CurrentProc = EVENT_PROC(event);
655 CurrentTSO = EVENT_TSO(event);
656 if (RTSflags.GranFlags.Light) {
659 /* Restore local clock of the virtual processor attached to CurrentTSO.
660 All costs will be associated to the `virt. proc' on which the tso
662 if (ActiveTSO != NULL) { /* already in system area */
663 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
664 if (RTSflags.GranFlags.DoFairSchedule)
666 if (RTSflags.GranFlags.granSimStats &&
667 RTSflags.GranFlags.debug & 0x20000)
668 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
671 switch (EVENT_TYPE(event))
674 case FINDWORK: /* inaccurate this way */
675 ActiveTSO = ThreadQueueHd;
679 case MOVESPARK: /* has tso of virt proc in tso field of event */
680 ActiveTSO = EVENT_TSO(event);
682 default: fprintf(stderr,"Illegal event type %s (%d) in GrAnSim Light setup\n",
683 event_names[EVENT_TYPE(event)],EVENT_TYPE(event));
686 CurrentTime[CurrentProc] = TSO_CLOCK(ActiveTSO);
687 if(RTSflags.GranFlags.DoFairSchedule)
689 if (RTSflags.GranFlags.granSimStats &&
690 RTSflags.GranFlags.debug & 0x20000)
691 DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
695 if(EVENT_TIME(event) > CurrentTime[CurrentProc] &&
696 EVENT_TYPE(event)!=CONTINUETHREAD)
697 CurrentTime[CurrentProc] = EVENT_TIME(event);
699 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
700 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
701 fprintf(stderr,"Qagh {ReSchedule}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
705 /* MAKE_BUSY(CurrentProc); don't think that's right in all cases now */
708 # if defined(GRAN_CHECK) && defined(GRAN)
709 if (RTSflags.GranFlags.debug & 0x80)
710 fprintf(stderr,"After get_next_event, before HandleIdlePEs\n");
713 /* Deal with the idlers */
714 if ( !RTSflags.GranFlags.Light )
717 # if defined(GRAN_CHECK) && defined(GRAN)
718 if ( RTSflags.GranFlags.event_trace_all ||
719 ( RTSflags.GranFlags.event_trace && EVENT_TYPE(event) != CONTINUETHREAD) ||
720 (RTSflags.GranFlags.debug & 0x80) )
724 switch (EVENT_TYPE(event))
726 /* Should just be continuing execution */
728 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
729 if ( (RTSflags.GranFlags.debug & 0x100) &&
730 (EVENT_TSO(event)!=RunnableThreadsHd[EVENT_PROC(event)]) ) {
731 fprintf(stderr,"Warning: Wrong TSO in CONTINUETHREAD: %#lx (%x) (PE: %d Hd: 0x%lx)\n",
732 EVENT_TSO(event), TSO_ID(EVENT_TSO(event)),
734 RunnableThreadsHd[EVENT_PROC(event)]);
736 if ( (RTSflags.GranFlags.debug & 0x04) &&
737 BlockedOnFetch[CurrentProc]) {
738 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
739 CurrentProc,CurrentTime[CurrentProc]);
744 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
746 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
747 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
748 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
751 break; /* fall into scheduler loop */
754 do_the_fetchnode(event);
755 continue; /* handle next event in event queue */
758 do_the_globalblock(event);
759 continue; /* handle next event in event queue */
762 do_the_fetchreply(event);
763 continue; /* handle next event in event queue */
765 case UNBLOCKTHREAD: /* Move from the blocked queue to the tail of */
766 do_the_unblock(event);
767 continue; /* handle next event in event queue */
769 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
770 /* the runnable queue ( i.e. Qu' SImqa'lu') */
771 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
772 TSO_BLOCKEDAT(EVENT_TSO(event));
773 StartThread(event,GR_RESUME);
777 StartThread(event,GR_START);
781 do_the_movethread(event);
782 continue; /* handle next event in event queue */
785 do_the_movespark(event);
786 continue; /* handle next event in event queue */
789 { /* Make sure that we have enough heap for creating a new
790 thread. This is a conservative estimate of the required heap.
791 This eliminates special checks for GC around NewThread within
794 I_ req_heap = TSO_HS + TSO_CTS_SIZE + STKO_HS +
795 RTSflags.ConcFlags.stkChunkSize;
797 if (SAVE_Hp + req_heap >= SAVE_HpLim ) {
798 ReallyPerformThreadGC(req_heap, rtsFalse);
800 if (IS_SPARKING(CurrentProc))
801 MAKE_IDLE(CurrentProc);
806 if( RTSflags.GranFlags.DoAlwaysCreateThreads ||
807 (ThreadQueueHd == PrelBase_Z91Z93_closure &&
808 (RTSflags.GranFlags.FetchStrategy >= 2 ||
809 OutstandingFetches[CurrentProc] == 0)) )
815 ASSERT(procStatus[CurrentProc]==Sparking ||
816 RTSflags.GranFlags.DoAlwaysCreateThreads);
818 /* SImmoHwI' yInej! Search spark queue! */
819 gimme_spark (&found, &prev, &spark);
821 /* DaH chu' Qu' yIchen! Now create new work! */
822 munch_spark (found, prev, spark);
824 /* ToDo: check ; not valid if GC occurs in munch_spark
825 ASSERT(procStatus[CurrentProc]==Starting ||
826 procStatus[CurrentProc]==Idle ||
827 RTSflags.GranFlags.DoAlwaysCreateThreads); */
829 continue; /* to the next event */
832 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
835 #if defined(i386_TARGET_ARCH)
838 /* more than one thread has entered the Haskell world
839 via C (and stable pointers) - don't squeeze the C stack. */
842 /* Squeeze C stack */
843 longjmp(scheduler_loop, 1);
846 longjmp(scheduler_loop, 1);
851 /* ----------------------------------------------------------------- */
852 /* The main event handling functions; called from ReSchedule (switch) */
853 /* ----------------------------------------------------------------- */
856 do_the_globalblock(eventq event)
858 PROC proc = EVENT_PROC(event); /* proc that requested node */
859 P_ tso = EVENT_TSO(event), /* tso that requested node */
860 node = EVENT_NODE(event); /* requested, remote node */
862 # if defined(GRAN_CHECK) && defined(GRAN)
863 if ( RTSflags.GranFlags.Light ) {
864 fprintf(stderr,"Qagh: There should be no GLOBALBLOCKs in GrAnSim Light setup\n");
868 if (!RTSflags.GranFlags.DoGUMMFetching) {
869 fprintf(stderr,"Qagh: GLOBALBLOCK events only valid with GUMM fetching\n");
873 if ( (RTSflags.GranFlags.debug & 0x100) &&
874 IS_LOCAL_TO(PROCS(node),proc) ) {
875 fprintf(stderr,"Qagh: GLOBALBLOCK: Blocking on LOCAL node 0x %x (PE %d).\n",
879 /* CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime; */
880 if ( blockFetch(tso,proc,node) != 0 )
881 return; /* node has become local by now */
883 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* head of queue is next thread */
884 P_ tso = RunnableThreadsHd[proc]; /* awaken next thread */
885 if(tso != PrelBase_Z91Z93_closure) {
886 new_event(proc,proc,CurrentTime[proc],
887 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
888 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
889 if(RTSflags.GranFlags.granSimStats)
890 DumpRawGranEvent(proc,CurrentProc,GR_SCHEDULE,tso,
891 PrelBase_Z91Z93_closure,0);
892 MAKE_BUSY(proc); /* might have been fetching */
894 MAKE_IDLE(proc); /* no work on proc now */
896 } else { /* RTSflags.GranFlags.DoReScheduleOnFetch i.e. block-on-fetch */
897 /* other thread is already running */
898 /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
899 new_event(proc,proc,CurrentTime[proc],
900 CONTINUETHREAD,EVENT_TSO(event),
901 (RTSflags.GranFlags.DoGUMMFetching ? closure :
902 EVENT_NODE(event)),NULL);
908 do_the_unblock(eventq event)
910 PROC proc = EVENT_PROC(event), /* proc that requested node */
911 creator = EVENT_CREATOR(event); /* proc that requested node */
912 P_ tso = EVENT_TSO(event), /* tso that requested node */
913 node = EVENT_NODE(event); /* requested, remote node */
915 # if defined(GRAN) && defined(GRAN_CHECK)
916 if ( RTSflags.GranFlags.Light ) {
917 fprintf(stderr,"Qagh: There should be no UNBLOCKs in GrAnSim Light setup\n");
922 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
923 /* We count block-on-fetch as normal block time */
924 TSO_BLOCKTIME(tso) += CurrentTime[proc] - TSO_BLOCKEDAT(tso);
925 /* No costs for contextswitch or thread queueing in this case */
926 if(RTSflags.GranFlags.granSimStats)
927 DumpRawGranEvent(proc,CurrentProc,GR_RESUME,tso, PrelBase_Z91Z93_closure,0);
928 new_event(proc,proc,CurrentTime[proc],CONTINUETHREAD,tso,node,NULL);
930 /* Reschedule on fetch causes additional costs here: */
931 /* Bring the TSO from the blocked queue into the threadq */
932 new_event(proc,proc,CurrentTime[proc]+RTSflags.GranFlags.gran_threadqueuetime,
933 RESUMETHREAD,tso,node,NULL);
938 do_the_fetchnode(eventq event)
942 # if defined(GRAN_CHECK) && defined(GRAN)
943 if ( RTSflags.GranFlags.Light ) {
944 fprintf(stderr,"Qagh: There should be no FETCHNODEs in GrAnSim Light setup\n");
948 if (RTSflags.GranFlags.SimplifiedFetch) {
949 fprintf(stderr,"Qagh: FETCHNODE events not valid with simplified fetch\n");
953 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
955 rc = HandleFetchRequest(EVENT_NODE(event),
956 EVENT_CREATOR(event),
958 if (rc == 4) { /* trigger GC */
959 # if defined(GRAN_CHECK) && defined(GRAN)
960 if (RTSflags.GcFlags.giveStats)
961 fprintf(RTSflags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %#lx, tso %#lx (%x))\n",
962 EVENT_NODE(event), EVENT_TSO(event), TSO_ID(EVENT_TSO(event)));
964 prepend_event(event);
965 ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
966 # if defined(GRAN_CHECK) && defined(GRAN)
967 if (RTSflags.GcFlags.giveStats) {
968 fprintf(RTSflags.GcFlags.statsFile,"***** SAVE_Hp=%#lx, SAVE_HpLim=%#lx, PACK_HEAP_REQUIRED=%#lx\n",
969 SAVE_Hp, SAVE_HpLim, PACK_HEAP_REQUIRED);
970 fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
971 tot_packets,tot_packet_size);
974 event = grab_event();
975 SAVE_Hp -= PACK_HEAP_REQUIRED;
977 /* GC knows that events are special and follows the pointer i.e. */
978 /* events are valid even if they moved. An EXIT is triggered */
979 /* if there is not enough heap after GC. */
985 do_the_fetchreply(eventq event)
989 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
990 if ( RTSflags.GranFlags.Light ) {
991 fprintf(stderr,"Qagh: There should be no FETCHREPLYs in GrAnSim Light setup\n");
995 if (RTSflags.GranFlags.SimplifiedFetch) {
996 fprintf(stderr,"Qagh: FETCHREPLY events not valid with simplified fetch\n");
1000 if (RTSflags.GranFlags.debug & 0x10) {
1001 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
1002 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
1004 fprintf(stderr,"Qagh: FETCHREPLY: TSO %#x (%x) has fetch mask not set @ %d\n",
1005 CurrentTSO,TSO_ID(CurrentTSO),CurrentTime[CurrentProc]);
1010 if (RTSflags.GranFlags.debug & 0x04) {
1011 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
1012 fprintf(stderr,"Qagh: FETCHREPLY: Proc %d (with TSO %#x (%x)) not blocked-on-fetch by TSO %#lx (%x)\n",
1013 CurrentProc,CurrentTSO,TSO_ID(CurrentTSO),
1014 BlockedOnFetch[CurrentProc], TSO_ID(BlockedOnFetch[CurrentProc]));
1017 BlockedOnFetch[CurrentProc] = 0; /*- rtsFalse; -*/
1022 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1024 if (RTSflags.GranFlags.DoGUMMFetching) { /* bulk (packet) fetching */
1025 P_ buffer = EVENT_NODE(event);
1026 PROC p = EVENT_PROC(event);
1027 I_ size = buffer[PACK_SIZE_LOCN];
1029 tso = EVENT_TSO(event);
1031 /* NB: Fetch misses can't occur with GUMM fetching, as */
1032 /* updatable closure are turned into RBHs and therefore locked */
1033 /* for other processors that try to grab them. */
1035 closure = UnpackGraph(buffer);
1036 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_munpacktime;
1038 /* Copy or move node to CurrentProc */
1039 if (FetchNode(EVENT_NODE(event),
1040 EVENT_CREATOR(event),
1041 EVENT_PROC(event)) ) {
1042 /* Fetch has failed i.e. node has been grabbed by another PE */
1043 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
1044 PROC p = where_is(node);
1047 # if defined(GRAN_CHECK) && defined(GRAN)
1048 if (RTSflags.GranFlags.PrintFetchMisses) {
1049 fprintf(stderr,"Fetch miss @ %lu: node %#lx is at proc %u (rather than proc %u)\n",
1050 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
1053 # endif /* GRAN_CHECK */
1055 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
1057 /* Count fetch again !? */
1058 ++TSO_FETCHCOUNT(tso);
1059 TSO_FETCHTIME(tso) += RTSflags.GranFlags.gran_fetchtime;
1061 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
1062 RTSflags.GranFlags.gran_latency;
1064 /* Chase the grabbed node */
1065 new_event(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
1067 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1068 if (RTSflags.GranFlags.debug & 0x04)
1069 BlockedOnFetch[CurrentProc] = tso; /*-rtsTrue;-*/
1071 if (RTSflags.GranFlags.debug & 0x10)
1072 TSO_TYPE(tso) |= FETCH_MASK_TSO;
1075 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
1077 return; /* NB: no REPLy has been processed; tso still sleeping */
1080 /* -- Qapla'! Fetch has been successful; node is here, now */
1081 ++TSO_FETCHCOUNT(EVENT_TSO(event));
1082 TSO_FETCHTIME(EVENT_TSO(event)) += RTSflags.GranFlags.gran_fetchtime;
1084 if (RTSflags.GranFlags.granSimStats)
1085 DumpRawGranEvent(CurrentProc,EVENT_CREATOR(event),GR_REPLY,
1087 (RTSflags.GranFlags.DoGUMMFetching ?
1092 --OutstandingFetches[CurrentProc];
1093 ASSERT(OutstandingFetches[CurrentProc] >= 0);
1094 # if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1095 if (OutstandingFetches[CurrentProc] < 0) {
1096 fprintf(stderr,"Qagh: OutstandingFetches of proc %u has become negative\n",CurrentProc);
1100 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1101 UNBLOCKTHREAD,EVENT_TSO(event),
1102 (RTSflags.GranFlags.DoGUMMFetching ?
1109 do_the_movethread(eventq event) {
1110 P_ tso = EVENT_TSO(event);
1111 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1112 if ( RTSflags.GranFlags.Light && CurrentProc!=1 ) {
1113 fprintf(stderr,"Qagh: There should be no MOVETHREADs in GrAnSim Light setup\n");
1116 if (!RTSflags.GranFlags.DoThreadMigration) {
1117 fprintf(stderr,"Qagh: MOVETHREAD events should never occur without -bM\n");
1120 if (PROCS(tso)!=0) {
1121 fprintf(stderr,"Qagh: Moved thread has a bitmask of 0%o (proc %d); should be 0\n",
1122 PROCS(tso), where_is(tso));
1126 --OutstandingFishes[CurrentProc];
1127 ASSERT(OutstandingFishes[CurrentProc]>=0);
1128 SET_PROCS(tso,ThisPE);
1129 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1130 StartThread(event,GR_STOLEN);
1134 do_the_movespark(eventq event){
1135 sparkq spark = EVENT_SPARK(event);
1137 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1139 if (RTSflags.GranFlags.granSimStats_Sparks)
1140 DumpRawGranEvent(CurrentProc,(PROC)0,SP_ACQUIRED,PrelBase_Z91Z93_closure,
1142 spark_queue_len(CurrentProc,ADVISORY_POOL));
1144 #if defined(GRAN) && defined(GRAN_CHECK)
1145 if (!SHOULD_SPARK(SPARK_NODE(spark)))
1147 /* Not adding the spark to the spark queue would be the right */
1148 /* thing here, but it also would be cheating, as this info can't be */
1149 /* available in a real system. -- HWL */
1151 --OutstandingFishes[CurrentProc];
1152 ASSERT(OutstandingFishes[CurrentProc]>=0);
1154 add_to_spark_queue(spark);
1156 if (procStatus[CurrentProc]==Fishing)
1157 procStatus[CurrentProc] = Idle;
1159 /* add_to_spark_queue will increase the time of the current proc. */
1160 /* Just falling into FINDWORK is wrong as we might have other */
1161 /* events that are happening before that. Therefore, just create */
1162 /* a FINDWORK event and go back to main event handling loop. */
1164 /* Should we treat stolen sparks specially? Currently, we don't. */
1166 /* Now FINDWORK is created in HandleIdlePEs */
1167 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1168 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1169 sparking[CurrentProc]=rtsTrue;
1173 /* Search the spark queue of the CurrentProc for a spark that's worth
1174 turning into a thread */
1176 gimme_spark (rtsBool *found_res, sparkq *prev_res, sparkq *spark_res)
1180 sparkq spark_of_non_local_node = NULL, spark_of_non_local_node_prev = NULL,
1181 low_priority_spark = NULL, low_priority_spark_prev = NULL,
1182 spark = NULL, prev = NULL, tmp = NULL;
1184 /* Choose a spark from the local spark queue */
1185 spark = SparkQueueHd;
1188 while (spark != NULL && !found)
1190 node = SPARK_NODE(spark);
1191 if (!SHOULD_SPARK(node))
1193 if(RTSflags.GranFlags.granSimStats_Sparks)
1194 DumpRawGranEvent(CurrentProc,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
1196 spark_queue_len(CurrentProc,ADVISORY_POOL));
1198 ASSERT(spark != NULL);
1201 spark = delete_from_spark_queue (prev,spark);
1203 /* -- node should eventually be sparked */
1204 else if (RTSflags.GranFlags.PreferSparksOfLocalNodes &&
1205 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1207 /* Remember first low priority spark */
1208 if (spark_of_non_local_node==NULL) {
1209 spark_of_non_local_node_prev = prev;
1210 spark_of_non_local_node = spark;
1213 if (SPARK_NEXT(spark)==NULL) {
1214 ASSERT(spark==SparkQueueTl); /* just for testing */
1215 prev = spark_of_non_local_node_prev;
1216 spark = spark_of_non_local_node;
1221 # if defined(GRAN) && defined(GRAN_CHECK)
1222 /* Should never happen; just for testing */
1223 if (spark==SparkQueueTl) {
1224 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1229 spark = SPARK_NEXT(spark);
1232 else if ( RTSflags.GranFlags.DoPrioritySparking ||
1233 (SPARK_GRAN_INFO(spark)>=RTSflags.GranFlags.SparkPriority2) )
1237 else /* only used if SparkPriority2 is defined */
1239 /* Remember first low priority spark */
1240 if (low_priority_spark==NULL) {
1241 low_priority_spark_prev = prev;
1242 low_priority_spark = spark;
1245 if (SPARK_NEXT(spark)==NULL) {
1246 ASSERT(spark==SparkQueueTl); /* just for testing */
1247 prev = low_priority_spark_prev;
1248 spark = low_priority_spark;
1249 found = rtsTrue; /* take low pri spark => rc is 2 */
1253 /* Should never happen; just for testing */
1254 if (spark==SparkQueueTl) {
1255 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1260 spark = SPARK_NEXT(spark);
1261 # if defined(GRAN_CHECK) && defined(GRAN)
1262 if ( RTSflags.GranFlags.debug & 0x40 ) {
1263 fprintf(stderr,"Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
1264 SPARK_GRAN_INFO(spark), RTSflags.GranFlags.SparkPriority,
1265 SPARK_NODE(spark), SPARK_NAME(spark));
1267 # endif /* GRAN_CHECK */
1269 } /* while (spark!=NULL && !found) */
1277 munch_spark (rtsBool found, sparkq prev, sparkq spark)
1281 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1284 # if defined(GRAN_CHECK) && defined(GRAN)
1285 if ( SPARK_GRAN_INFO(spark) < RTSflags.GranFlags.SparkPriority2 ) {
1286 tot_low_pri_sparks++;
1287 if ( RTSflags.GranFlags.debug & 0x40 ) {
1288 fprintf(stderr,"GRAN_TNG: No high priority spark available; low priority (%u) spark chosen: node=0x%lx; name=%u\n",
1289 SPARK_GRAN_INFO(spark),
1290 SPARK_NODE(spark), SPARK_NAME(spark));
1294 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcreatetime;
1296 node = SPARK_NODE(spark);
1297 if((tso = NewThread(node, T_REQUIRED, SPARK_GRAN_INFO(spark)))==NULL)
1299 /* Some kind of backoff needed here in case there's too little heap */
1300 # if defined(GRAN_CHECK) && defined(GRAN)
1301 if (RTSflags.GcFlags.giveStats)
1302 fprintf(RTSflags.GcFlags.statsFile,"***** vIS Qu' chen veQ boSwI'; spark=%#x, node=%#x; name=%u\n",
1303 /* (found==2 ? "no hi pri spark" : "hi pri spark"), */
1304 spark, node,SPARK_NAME(spark));
1306 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1307 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1308 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,rtsFalse);
1309 SAVE_Hp -= TSO_HS+TSO_CTS_SIZE;
1311 return; /* was: continue; */ /* to the next event, eventually */
1314 if(RTSflags.GranFlags.granSimStats_Sparks)
1315 DumpRawGranEvent(CurrentProc,(PROC)0,SP_USED,PrelBase_Z91Z93_closure,
1317 spark_queue_len(CurrentProc,ADVISORY_POOL));
1319 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1320 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1321 TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1323 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1324 STARTTHREAD,tso,node,NULL);
1326 procStatus[CurrentProc] = Starting;
1328 ASSERT(spark != NULL);
1330 spark = delete_from_spark_queue (prev, spark);
1333 /* Make the PE idle if nothing sparked and we have no threads. */
1335 if(ThreadQueueHd == PrelBase_Z91Z93_closure)
1337 MAKE_IDLE(CurrentProc);
1338 # if defined(GRAN_CHECK) && defined(GRAN)
1339 if ( (RTSflags.GranFlags.debug & 0x80) )
1340 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1341 # endif /* GRAN_CHECK */
1345 /* ut'lu'Qo' ; Don't think that's necessary any more -- HWL
1346 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1347 CONTINUETHREAD,ThreadQueueHd,PrelBase_Z91Z93_closure,NULL);
1355 Here follows the non-GRAN @ReSchedule@.
1360 /* If you are concurrent and maybe even parallel please use this door. */
1364 int again; /* Run the current thread again? */
1372 * In the parallel world, we do unfair scheduling for the moment.
1373 * Ultimately, this should all be merged with the more
1374 * sophisticated GrAnSim scheduling options. (Of course, some
1375 * provision should be made for *required* threads to make sure
1376 * that they don't starve, but for now we assume that no one is
1377 * running concurrent Haskell on a multi-processor platform.)
1383 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
1384 RunnableThreadsTl = CurrentTSO;
1385 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1386 RunnableThreadsHd = CurrentTSO;
1392 * In the sequential world, we assume that the whole point of running
1393 * the threaded build is for concurrent Haskell, so we provide round-robin
1398 if(RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1399 RunnableThreadsHd = CurrentTSO;
1401 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1402 if (DO_QP_PROF > 1) {
1403 QP_Event1("GA", CurrentTSO);
1406 RunnableThreadsTl = CurrentTSO;
1412 * Debugging code, which is useful enough (and cheap enough) to compile
1413 * in all the time. This makes sure that we don't access saved registers,
1414 * etc. in threads which are supposed to be sleeping.
1416 CurrentTSO = PrelBase_Z91Z93_closure;
1417 CurrentRegTable = NULL;
1420 /* First the required sparks */
1422 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1423 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1425 if (SHOULD_SPARK(spark)) {
1426 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1428 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1429 RunnableThreadsHd = tso;
1431 if (RTSflags.ParFlags.granSimStats) {
1432 DumpGranEvent(GR_START, tso);
1433 sameThread = rtsTrue;
1437 TSO_LINK(RunnableThreadsTl) = tso;
1439 if (RTSflags.ParFlags.granSimStats)
1440 DumpGranEvent(GR_STARTQ, tso);
1443 RunnableThreadsTl = tso;
1446 QP_Event0(threadId++, spark);
1448 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1449 if(RTSflags.GranFlags.granSimStats_Sparks)
1450 DumpGranEvent(SP_PRUNED,threadId++);
1451 ^^^^^^^^ should be a TSO
1455 PendingSparksHd[REQUIRED_POOL] = sparkp;
1457 /* Now, almost the same thing for advisory sparks */
1459 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1460 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1462 if (SHOULD_SPARK(spark)) {
1465 /* In the parallel world, don't create advisory threads if we are
1466 * about to rerun the same thread, or already have runnable threads,
1467 * or the main thread has terminated */
1468 (RunnableThreadsHd != PrelBase_Z91Z93_closure ||
1469 (required_thread_count == 0 && IAmMainThread)) ||
1471 advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1472 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1474 advisory_thread_count++;
1475 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1476 RunnableThreadsHd = tso;
1478 if (RTSflags.ParFlags.granSimStats) {
1479 DumpGranEvent(GR_START, tso);
1480 sameThread = rtsTrue;
1484 TSO_LINK(RunnableThreadsTl) = tso;
1486 if (RTSflags.ParFlags.granSimStats)
1487 DumpGranEvent(GR_STARTQ, tso);
1490 RunnableThreadsTl = tso;
1493 QP_Event0(threadId++, spark);
1495 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1496 if(RTSflags.GranFlags.granSimStats_Sparks)
1497 DumpGranEvent(SP_PRUNED,threadId++);
1498 ^^^^^^^^ should be a TSO
1502 PendingSparksHd[ADVISORY_POOL] = sparkp;
1505 # if defined(i386_TARGET_ARCH)
1506 if (entersFromC) { /* more than one thread has entered the Haskell world
1507 via C (and stable pointers) */
1508 /* Don't squeeze C stack */
1509 if (required_thread_count <= 0) {
1510 longjmp(scheduler_loop, -1);
1512 SchedLoop(required_thread_count <= 0 ? -1 : 1);
1513 longjmp(scheduler_loop, -1);
1516 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1519 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1522 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1530 %****************************************************************************
1532 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1533 Threads (GrAnSim only)}
1535 %****************************************************************************
1537 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1543 /* ngoqvam che' {GrAnSim}! */
1545 # if defined(GRAN_CHECK)
1546 /* This routine is only used for keeping a statistics of thread queue
1547 lengths to evaluate the impact of priority scheduling. -- HWL
1548 {spark_queue_len}vo' jInIHta'
1551 thread_queue_len(PROC proc)
1556 for (len = 0, prev = PrelBase_Z91Z93_closure, next = RunnableThreadsHd[proc];
1557 next != PrelBase_Z91Z93_closure;
1558 len++, prev = next, next = TSO_LINK(prev))
1563 # endif /* GRAN_CHECK */
1566 A large portion of @StartThread@ deals with maintaining a sorted thread
1567 queue, which is needed for the Priority Sparking option. Without that
1568 complication the code boils down to FIFO handling.
1571 StartThread(event,event_type)
1573 enum gran_event_types event_type;
1575 P_ tso = EVENT_TSO(event),
1576 node = EVENT_NODE(event);
1577 PROC proc = EVENT_PROC(event),
1578 creator = EVENT_CREATOR(event);
1581 rtsBool found = rtsFalse;
1583 ASSERT(CurrentProc==proc);
1585 # if defined(GRAN_CHECK)
1586 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1587 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1591 /* A wee bit of statistics gathering */
1593 tot_tq_len += thread_queue_len(CurrentProc);
1596 ASSERT(TSO_LINK(CurrentTSO)==PrelBase_Z91Z93_closure);
1598 /* Idle proc; same for pri spark and basic version */
1599 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1601 CurrentTSO = ThreadQueueHd = ThreadQueueTl = tso;
1603 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1604 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1605 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1607 if(RTSflags.GranFlags.granSimStats &&
1608 !( (event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1609 DumpRawGranEvent(CurrentProc,creator,event_type,
1611 TSO_SPARKNAME(tso));
1612 /* ^^^ SN (spark name) as optional info */
1613 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1614 /* ^^^ spark length as optional info */
1616 ASSERT(IS_IDLE(CurrentProc) || event_type==GR_RESUME ||
1617 (procStatus[CurrentProc]==Fishing && event_type==GR_STOLEN) ||
1618 procStatus[CurrentProc]==Starting);
1619 MAKE_BUSY(CurrentProc);
1623 /* In GrAnSim-Light we always have an idle `virtual' proc.
1624 The semantics of the one-and-only thread queue is different here:
1625 all threads in the queue are running (each on its own virtual processor);
1626 the queue is only needed internally in the simulator to interleave the
1627 reductions of the different processors.
1628 The one-and-only thread queue is sorted by the local clocks of the TSOs.
1630 if(RTSflags.GranFlags.Light)
1632 ASSERT(ThreadQueueHd!=PrelBase_Z91Z93_closure);
1633 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
1635 /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
1636 if(RTSflags.GranFlags.granSimStats &&
1637 (RTSflags.GranFlags.debug & 0x20000) &&
1638 TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1639 DumpRawGranEvent(CurrentProc,CurrentProc,GR_DESCHEDULE,
1640 ThreadQueueHd,PrelBase_Z91Z93_closure,0);
1644 if ( InsertThread(tso) ) { /* new head of queue */
1645 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1646 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1649 if(RTSflags.GranFlags.granSimStats &&
1650 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1651 DumpRawGranEvent(CurrentProc,creator,event_type,
1653 TSO_SPARKNAME(tso));
1654 /* ^^^ SN (spark name) as optional info */
1655 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1656 /* ^^^ spark length as optional info */
1658 /* MAKE_BUSY(CurrentProc); */
1662 /* Only for Pri Sparking */
1663 if (RTSflags.GranFlags.DoPriorityScheduling && TSO_PRI(tso)!=0)
1664 /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
1665 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1666 (next != PrelBase_Z91Z93_closure) &&
1667 !(found = (TSO_PRI(tso) >= TSO_PRI(next)));
1668 prev = next, next = TSO_LINK(next), count++)
1672 ASSERT(!IS_IDLE(CurrentProc));
1674 /* found can only be rtsTrue if pri sparking enabled */
1676 # if defined(GRAN_CHECK)
1677 ++non_end_add_threads;
1679 /* Add tso to ThreadQueue between prev and next */
1680 TSO_LINK(tso) = next;
1681 if ( next == PrelBase_Z91Z93_closure ) {
1682 ThreadQueueTl = tso;
1684 /* no back link for TSO chain */
1687 if ( prev == PrelBase_Z91Z93_closure ) {
1688 /* Never add TSO as first elem of thread queue; the first */
1689 /* element should be the one that is currently running -- HWL */
1690 # if defined(GRAN_CHECK)
1691 fprintf(stderr,"Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %#lx (PRI=%d) as first elem of threadQ (%#lx) on proc %u (@ %u)\n",
1692 tso, TSO_PRI(tso), ThreadQueueHd, CurrentProc,
1693 CurrentTime[CurrentProc]);
1696 TSO_LINK(prev) = tso;
1698 } else { /* !found */ /* or not pri sparking! */
1699 /* Add TSO to the end of the thread queue on that processor */
1700 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1701 ThreadQueueTl = EVENT_TSO(event);
1703 CurrentTime[CurrentProc] += count *
1704 RTSflags.GranFlags.gran_pri_sched_overhead +
1705 RTSflags.GranFlags.gran_threadqueuetime;
1707 if(RTSflags.GranFlags.DoThreadMigration)
1710 if(RTSflags.GranFlags.granSimStats &&
1711 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1712 DumpRawGranEvent(CurrentProc,creator,event_type+1,
1714 TSO_SPARKNAME(tso));
1715 /* ^^^ SN (spark name) as optional info */
1716 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1717 /* ^^^ spark length as optional info */
1719 # if defined(GRAN_CHECK)
1720 /* Check if thread queue is sorted. Only for testing, really! HWL */
1721 if ( RTSflags.GranFlags.DoPriorityScheduling && (RTSflags.GranFlags.debug & 0x400) ) {
1722 rtsBool sorted = rtsTrue;
1725 if (ThreadQueueHd==PrelBase_Z91Z93_closure || TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1726 /* just 1 elem => ok */
1728 /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
1729 for (prev = TSO_LINK(ThreadQueueHd), next = TSO_LINK(prev);
1730 (next != PrelBase_Z91Z93_closure) ;
1731 prev = next, next = TSO_LINK(prev)) {
1733 (TSO_PRI(prev) >= TSO_PRI(next));
1737 fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
1739 G_THREADQ(ThreadQueueHd,0x1);
1744 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1748 @InsertThread@, which is only used for GranSim Light, is similar to
1749 @StartThread@ in that it adds a TSO to a thread queue. However, it assumes
1750 that the thread queue is sorted by local clocks and it inserts the TSO at the
1751 right place in the queue. Don't create any event, just insert.
1760 rtsBool found = rtsFalse;
1762 # if defined(GRAN_CHECK)
1763 if ( !RTSflags.GranFlags.Light ) {
1764 fprintf(stderr,"Qagh {InsertThread}Daq: InsertThread should only be used in a GrAnSim Light setup\n");
1768 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1769 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1774 /* Idle proc; same for pri spark and basic version */
1775 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1777 ThreadQueueHd = ThreadQueueTl = tso;
1778 /* MAKE_BUSY(CurrentProc); */
1782 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1783 (next != PrelBase_Z91Z93_closure) &&
1784 !(found = (TSO_CLOCK(tso) < TSO_CLOCK(next)));
1785 prev = next, next = TSO_LINK(next), count++)
1788 /* found can only be rtsTrue if pri sparking enabled */
1790 /* Add tso to ThreadQueue between prev and next */
1791 TSO_LINK(tso) = next;
1792 if ( next == PrelBase_Z91Z93_closure ) {
1793 ThreadQueueTl = tso;
1795 /* no back link for TSO chain */
1798 if ( prev == PrelBase_Z91Z93_closure ) {
1799 ThreadQueueHd = tso;
1801 TSO_LINK(prev) = tso;
1803 } else { /* !found */ /* or not pri sparking! */
1804 /* Add TSO to the end of the thread queue on that processor */
1805 TSO_LINK(ThreadQueueTl) = tso;
1806 ThreadQueueTl = tso;
1808 return (prev == PrelBase_Z91Z93_closure);
1813 Export work to idle PEs. This function is called from @ReSchedule@ before
1814 dispatching on the current event. @HandleIdlePEs@ iterates over all PEs,
1815 trying to get work for idle PEs. Note, that this is a simplification
1816 compared to GUM's fishing model. We try to compensate for that by making
1817 the cost for stealing work dependent on the number of idle processors and
1818 thereby on the probability with which a randomly sent fish would find work.
1825 # if defined(GRAN) && defined(GRAN_CHECK)
1826 if ( RTSflags.GranFlags.Light ) {
1827 fprintf(stderr,"Qagh {HandleIdlePEs}Daq: Should never be entered in GrAnSim Light setup\n");
1833 for(proc = 0; proc < RTSflags.GranFlags.proc; proc++)
1834 if(IS_IDLE(proc)) /* && IS_SPARKING(proc) && IS_STARTING(proc) */
1835 /* First look for local work! */
1836 if (PendingSparksHd[proc][ADVISORY_POOL]!=NULL)
1838 new_event(proc,proc,CurrentTime[proc],
1839 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1840 MAKE_SPARKING(proc);
1842 /* Then try to get remote work! */
1843 else if ((RTSflags.GranFlags.max_fishes==0 ||
1844 OutstandingFishes[proc]<RTSflags.GranFlags.max_fishes) )
1847 if(RTSflags.GranFlags.DoStealThreadsFirst &&
1848 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1850 if (SurplusThreads > 0l) /* Steal a thread */
1857 if(SparksAvail > 0l &&
1858 (RTSflags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1861 if (SurplusThreads > 0l &&
1862 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1868 Steal a spark and schedule moving it to proc. We want to look at PEs in
1869 clock order -- most retarded first. Currently sparks are only stolen from
1870 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1871 be changed to first steal from the former then from the latter.
1873 We model a sort of fishing mechanism by counting the number of sparks and
1874 threads we are currently stealing.
1881 sparkq spark, prev, next;
1882 rtsBool stolen = rtsFalse;
1883 TIME times[MAX_PROC], stealtime;
1884 unsigned ntimes=0, i, j;
1885 int first_later, upb, r;
1887 # if defined(GRAN) && defined(GRAN_CHECK)
1888 if ( RTSflags.GranFlags.Light ) {
1889 fprintf(stderr,"Qagh {StealSpark}Daq: Should never be entered in GrAnSim Light setup\n");
1894 /* times shall contain processors from which we may steal sparks */
1895 for(p=0; p < RTSflags.GranFlags.proc; ++p)
1897 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1898 CurrentTime[p] <= CurrentTime[CurrentProc])
1899 times[ntimes++] = p;
1902 for(i=0; i < ntimes; ++i)
1903 for(j=i+1; j < ntimes; ++j)
1904 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1906 unsigned temp = times[i];
1907 times[i] = times[j];
1911 /* Choose random processor to steal spark from; first look at processors */
1912 /* that are earlier than the current one (i.e. proc) */
1915 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
1919 while (!stolen && (ntimes>0)) {
1920 long unsigned int r, q=0;
1922 upb = (first_later==0) ? ntimes : first_later;
1924 if (RTSflags.GranFlags.RandomSteal) {
1925 r = lrand48(); /* [0, RAND_MAX] */
1929 /* -- ASSERT(r<=RAND_MAX); */
1930 i = (unsigned int) (r % upb); /* [0, upb) */
1931 /* -- ASSERT((i>=0) && (i<=upb)); */
1933 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
1935 # if defined(GRAN_CHECK)
1936 if ( RTSflags.GranFlags.debug & 0x2000 )
1937 fprintf(stderr,"RANDOM_STEAL: [index %u of %u] from %u (@ %lu) to %u (@ %lu) (q=%d) [rand value: %lu]\n",
1938 i, ntimes, p, CurrentTime[p], proc, CurrentTime[proc], q, r);
1941 /* Now go through sparkq and steal the first one that should be sparked*/
1942 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1943 spark != NULL && !stolen;
1946 next = SPARK_NEXT(spark);
1948 if ((IS_IDLE(p) || procStatus[p]==Sparking || procStatus[p]==Fishing) &&
1949 SPARK_NEXT(spark)==NULL)
1951 /* Be social! Don't steal the only spark of an idle processor */
1954 else if(SHOULD_SPARK(SPARK_NODE(spark)))
1956 /* Don't Steal local sparks */
1957 if(!SPARK_GLOBAL(spark))
1963 /* Prepare message for sending spark */
1964 CurrentTime[p] += RTSflags.GranFlags.gran_mpacktime;
1966 if(RTSflags.GranFlags.granSimStats_Sparks)
1967 DumpRawGranEvent(p,(PROC)0,SP_EXPORTED,PrelBase_Z91Z93_closure,
1969 spark_queue_len(p,ADVISORY_POOL));
1971 SPARK_NEXT(spark) = NULL;
1973 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
1979 new_event(proc,p /* CurrentProc */,stealtime,
1980 MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
1982 /* MAKE_BUSY(proc); not yet; busy when TSO in threadq */
1984 ++OutstandingFishes[proc];
1987 ++SPARK_GLOBAL(spark);
1990 CurrentTime[p] += RTSflags.GranFlags.gran_mtidytime;
1992 else /* !(SHOULD_SPARK(SPARK_NODE(spark))) */
1994 if(RTSflags.GranFlags.granSimStats_Sparks)
1995 DumpRawGranEvent(p,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
1997 spark_queue_len(p,ADVISORY_POOL));
1999 DisposeSpark(spark);
2002 if(spark == PendingSparksHd[p][ADVISORY_POOL])
2003 PendingSparksHd[p][ADVISORY_POOL] = next;
2006 SPARK_NEXT(prev) = next;
2007 } /* for (spark=... iterating over sparkq */
2009 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
2010 PendingSparksTl[p][ADVISORY_POOL] = NULL;
2012 if (!stolen && (ntimes>0)) { /* nothing stealable from proc p :( */
2013 ASSERT(times[i]==p);
2015 /* remove p from the list (at pos i) */
2016 for (j=i; j+1<ntimes; j++)
2017 times[j] = times[j+1];
2020 /* update index to first proc which is later (or equal) than proc */
2023 (CurrentTime[times[first_later-1]]>CurrentTime[proc]);
2028 # if defined(GRAN_CHECK)
2029 if (stolen && (i!=0)) { /* only for statistics */
2031 ntimes_total += ntimes;
2032 fl_total += first_later;
2039 Steal a spark and schedule moving it to proc.
2048 TIME times[MAX_PROC], stealtime;
2049 unsigned ntimes=0, i, j;
2050 int first_later, upb, r;
2052 /* Hunt for a thread */
2054 # if defined(GRAN) && defined(GRAN_CHECK)
2055 if ( RTSflags.GranFlags.Light ) {
2056 fprintf(stderr,"Qagh {StealThread}: Should never be entered in GrAnSim Light setup\n");
2061 /* times shall contain processors from which we may steal threads */
2062 for(p=0; p < RTSflags.GranFlags.proc; ++p)
2063 if(proc != p && RunnableThreadsHd[p] != PrelBase_Z91Z93_closure &&
2064 CurrentTime[p] <= CurrentTime[CurrentProc])
2065 times[ntimes++] = p;
2068 for(i=0; i < ntimes; ++i)
2069 for(j=i+1; j < ntimes; ++j)
2070 if(CurrentTime[times[i]] > CurrentTime[times[j]])
2072 unsigned temp = times[i];
2073 times[i] = times[j];
2077 /* Choose random processor to steal spark from; first look at processors */
2078 /* that are earlier than the current one (i.e. proc) */
2081 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
2085 while (!found && (ntimes>0)) {
2086 long unsigned int r, q=0;
2088 upb = (first_later==0) ? ntimes : first_later;
2090 if (RTSflags.GranFlags.RandomSteal) {
2091 r = lrand48(); /* [0, RAND_MAX] */
2095 /* -- ASSERT(r<=RAND_MAX); */
2096 if ( RTSflags.GranFlags.debug & 0x2000 )
2097 fprintf(stderr,"rand value: %d " , r);
2098 i = (unsigned int) (r % upb); /* [0, upb] */
2099 /* -- ASSERT((i>=0) && (i<=upb)); */
2101 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
2103 # if defined(GRAN_CHECK)
2104 if ( RTSflags.GranFlags.debug & 0x2000 )
2105 fprintf(stderr,"RANDOM_STEAL; [index %u] from %u (@ %lu) to %u (@ %lu) (q=%d)\n",
2106 i, p, CurrentTime[p], proc, CurrentTime[proc], q);
2109 /* Steal the first exportable thread in the runnable queue after the */
2112 if(RunnableThreadsHd[p] != PrelBase_Z91Z93_closure)
2114 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
2115 thread != PrelBase_Z91Z93_closure && TSO_LOCKED(thread);
2116 prev = thread, thread = TSO_LINK(thread))
2119 if(thread != PrelBase_Z91Z93_closure) /* Take thread out of runnable queue */
2121 TSO_LINK(prev) = TSO_LINK(thread);
2123 TSO_LINK(thread) = PrelBase_Z91Z93_closure;
2125 if(RunnableThreadsTl[p] == thread)
2126 RunnableThreadsTl[p] = prev;
2128 /* Turn magic constants into params !? -- HWL */
2130 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mpacktime;
2132 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
2136 + 4l * RTSflags.GranFlags.gran_additional_latency
2137 + 5l * RTSflags.GranFlags.gran_munpacktime;
2139 /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
2140 SET_PROCS(thread,Nowhere /* PE_NUMBER(proc) */);
2142 /* Move from one queue to another */
2143 new_event(proc,p,stealtime,MOVETHREAD,thread,PrelBase_Z91Z93_closure,NULL);
2144 /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
2145 ++OutstandingFishes[proc];
2150 if(RTSflags.GranFlags.granSimStats)
2151 DumpRawGranEvent(p,proc,GR_STEALING,thread,
2152 PrelBase_Z91Z93_closure,0);
2154 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mtidytime;
2162 if (!found && (ntimes>0)) { /* nothing stealable from proc p */
2163 ASSERT(times[i]==p);
2165 /* remove p from the list (at pos i) */
2166 for (j=i; j+1<ntimes; j++)
2167 times[j] = times[j+1];
2171 # if defined(GRAN_CHECK) && defined(GRAN)
2172 if (found && (i!=0)) { /* only for statistics */
2179 SparkStealTime(void)
2181 double fishdelay, sparkdelay, latencydelay;
2182 fishdelay = (double)RTSflags.GranFlags.proc/2;
2183 sparkdelay = fishdelay -
2184 ((fishdelay-1)/(double)(RTSflags.GranFlags.proc-1))*(double)idlers();
2185 latencydelay = sparkdelay*((double)RTSflags.GranFlags.gran_latency);
2187 return((TIME)latencydelay);
2189 #endif /* GRAN ; HWL */
2194 %****************************************************************************
2196 \subsection[thread-execution]{Executing Threads}
2198 %****************************************************************************
2200 First a set of functions for handling sparks and spark-queues that are
2201 attached to the processors. Currently, there are two spark-queues per
2205 \item A queue of @REQUIRED@ sparks i.e. these sparks will be definitely
2206 turned into threads ({\em non-discardable\/}). They are mainly used in concurrent
2207 Haskell. We don't use them in GrAnSim.
2208 \item A queue of @ADVISORY@ sparks i.e. they may be turned into threads if
2209 the RTS thinks that it is a good idea. However, these sparks are {\em
2210 discardable}. They will be discarded if the associated closure is
2211 generally not worth creating a new thread (indicated by a tag in the
2212 closure) or they may be pruned during GC if there are too many sparks
2217 EXTDATA_RO(StkO_info);
2218 EXTDATA_RO(TSO_info);
2219 EXTDATA_RO(realWorldZh_closure);
2221 EXTFUN(EnterNodeCode);
2222 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
2225 /* ngoqvam che' {GrAnSim} */
2227 /* Slow but relatively reliable method uses stgMallocBytes */
2228 /* Eventually change that to heap allocated sparks. */
2230 /* -------------------------------------------------------------------------
2231 This is the main point where handling granularity information comes into
2233 ------------------------------------------------------------------------- */
2235 #define MAX_RAND_PRI 100
2238 Granularity info transformers.
2239 Applied to the GRAN_INFO field of a spark.
2241 static I_ ID(I_ x) { return(x); };
2242 static I_ INV(I_ x) { return(-x); };
2243 static I_ IGNORE(I_ x) { return (0); };
2244 static I_ RAND(I_ x) { return ((lrand48() % MAX_RAND_PRI) + 1); }
2246 /* NB: size_info and par_info are currently unused (what a shame!) -- HWL */
2249 NewSpark(node,name,gran_info,size_info,par_info,local)
2251 I_ name, gran_info, size_info, par_info, local;
2256 pri = RTSflags.GranFlags.RandomPriorities ? RAND(gran_info) :
2257 RTSflags.GranFlags.InversePriorities ? INV(gran_info) :
2258 RTSflags.GranFlags.IgnorePriorities ? IGNORE(gran_info) :
2261 if ( RTSflags.GranFlags.SparkPriority!=0 && pri<RTSflags.GranFlags.SparkPriority ) {
2262 if ( RTSflags.GranFlags.debug & 0x40 ) {
2263 fprintf(stderr,"NewSpark: Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
2264 pri, RTSflags.GranFlags.SparkPriority, node, name);
2266 return ((sparkq)NULL);
2269 newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
2270 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
2271 SPARK_NODE(newspark) = node;
2272 SPARK_NAME(newspark) = (name==1) ? TSO_SPARKNAME(CurrentTSO) : name;
2273 SPARK_GRAN_INFO(newspark) = pri;
2274 SPARK_GLOBAL(newspark) = !local; /* Check that with parAt, parAtAbs !!*/
2278 /* To make casm more convenient use this function to label strategies */
2280 set_sparkname(P_ tso, int name) {
2281 TSO_SPARKNAME(tso) = name ;
2283 if(0 && RTSflags.GranFlags.granSimStats)
2284 DumpRawGranEvent(CurrentProc,99,GR_START,
2285 tso,PrelBase_Z91Z93_closure,
2286 TSO_SPARKNAME(tso));
2287 /* ^^^ SN (spark name) as optional info */
2288 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2289 /* ^^^ spark length as optional info */
2294 reset_sparkname(P_ tso) {
2295 TSO_SPARKNAME(tso) = 0;
2300 With PrioritySparking add_to_spark_queue performs an insert sort to keep
2301 the spark queue sorted. Otherwise the spark is just added to the end of
2306 add_to_spark_queue(spark)
2311 rtsBool found = rtsFalse;
2313 if ( spark == (sparkq)NULL ) {
2317 if (RTSflags.GranFlags.DoPrioritySparking && (SPARK_GRAN_INFO(spark)!=0) ) {
2319 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL], count=0;
2321 !(found = (SPARK_GRAN_INFO(spark) >= SPARK_GRAN_INFO(next)));
2322 prev = next, next = SPARK_NEXT(next), count++)
2325 } else { /* 'utQo' */
2327 found = rtsFalse; /* to add it at the end */
2332 SPARK_NEXT(spark) = next;
2333 if ( next == NULL ) {
2334 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2336 SPARK_PREV(next) = spark;
2338 SPARK_PREV(spark) = prev;
2339 if ( prev == NULL ) {
2340 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2342 SPARK_NEXT(prev) = spark;
2344 } else { /* (RTSflags.GranFlags.DoPrioritySparking && !found) || !DoPrioritySparking */
2345 SPARK_NEXT(spark) = NULL;
2346 SPARK_PREV(spark) = PendingSparksTl[CurrentProc][ADVISORY_POOL];
2347 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL)
2348 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2350 SPARK_NEXT(PendingSparksTl[CurrentProc][ADVISORY_POOL]) = spark;
2351 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2355 if (RTSflags.GranFlags.DoPrioritySparking) {
2356 CurrentTime[CurrentProc] += count * RTSflags.GranFlags.gran_pri_spark_overhead;
2359 # if defined(GRAN_CHECK)
2360 if ( RTSflags.GranFlags.debug & 0x1000 ) {
2361 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL];
2363 prev = next, next = SPARK_NEXT(next))
2365 if ( (prev!=NULL) && (prev!=PendingSparksTl[CurrentProc][ADVISORY_POOL]) )
2366 fprintf(stderr,"SparkQ inconsistency after adding spark %#lx: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
2367 spark,CurrentProc,ADVISORY_POOL,
2368 PendingSparksTl[CurrentProc][ADVISORY_POOL], prev);
2372 # if defined(GRAN_CHECK)
2373 /* Check if the sparkq is still sorted. Just for testing, really! */
2374 if ( RTSflags.GranFlags.debug & 0x400 ) {
2375 rtsBool sorted = rtsTrue;
2378 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL ||
2379 SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]) == NULL ) {
2380 /* just 1 elem => ok */
2382 for (prev = PendingSparksHd[CurrentProc][ADVISORY_POOL],
2383 next = SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]);
2385 prev = next, next = SPARK_NEXT(next)) {
2387 (SPARK_GRAN_INFO(prev) >= SPARK_GRAN_INFO(next));
2391 fprintf(stderr,"Warning: SPARKQ on PE %d is not sorted:\n",
2393 G_SPARKQ(PendingSparksHd[CurrentProc][ADVISORY_POOL],1);
2403 /* A SP_PRUNED line should be dumped when this is called from pruning or */
2404 /* discarding a spark! */
2413 DisposeSparkQ(spark)
2419 DisposeSparkQ(SPARK_NEXT(spark));
2422 if (SparksAvail < 0)
2423 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
2432 % {GrAnSim}vaD (Notes on GrAnSim) -- HWL:
2436 \paragraph{Notes on GrAnSim:}
2437 The following routines are for handling threads. Currently, we use an
2438 unfair scheduling policy in GrAnSim. Thus there are no explicit functions for
2439 scheduling here. If other scheduling policies are added to the system that
2440 code should go in here.
2443 /* Create a new TSO, with the specified closure to enter and thread type */
2447 NewThread(topClosure, type, pri)
2453 NewThread(topClosure, type)
2460 # if defined(GRAN) && defined(GRAN_CHECK)
2461 if ( RTSflags.GranFlags.Light && CurrentProc!=0) {
2462 fprintf(stderr,"Qagh {NewThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
2466 if (AvailableTSO != PrelBase_Z91Z93_closure) {
2469 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
2471 AvailableTSO = TSO_LINK(tso);
2472 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
2475 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
2476 BYTES_TO_STGWORDS(sizeof(StgDouble)));
2478 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
2479 SET_TSO_HDR(tso, TSO_info, CCC);
2482 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
2484 TSO_PRI(tso) = pri; /* Priority of that TSO -- HWL */
2486 #if defined(PROFILING) || defined(PAR)
2487 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
2489 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
2490 TSO_ID(tso) = threadId++;
2491 TSO_TYPE(tso) = type;
2492 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
2493 TSO_ARG1(tso) = /* TSO_ARG2(tso) = */ 0;
2494 TSO_SWITCH(tso) = NULL;
2501 #if defined(GRAN) || defined(PAR)
2502 TSO_SPARKNAME(tso) = 0;
2504 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
2506 TSO_STARTEDAT(tso) = CURRENT_TIME;
2508 TSO_EXPORTED(tso) = 0;
2509 TSO_BASICBLOCKS(tso) = 0;
2510 TSO_ALLOCS(tso) = 0;
2511 TSO_EXECTIME(tso) = 0;
2512 TSO_FETCHTIME(tso) = 0;
2513 TSO_FETCHCOUNT(tso) = 0;
2514 TSO_BLOCKTIME(tso) = 0;
2515 TSO_BLOCKCOUNT(tso) = 0;
2516 TSO_BLOCKEDAT(tso) = 0;
2517 TSO_GLOBALSPARKS(tso) = 0;
2518 TSO_LOCALSPARKS(tso) = 0;
2520 if (RTSflags.GranFlags.Light)
2521 TSO_CLOCK(tso) = TSO_STARTEDAT(tso); /* local clock */
2527 * set pc, Node (R1), liveness
2529 CurrentRegTable = TSO_INTERNAL_PTR(tso);
2530 SAVE_Liveness = LIVENESS_R1;
2531 SAVE_R1.p = topClosure;
2534 if (type == T_MAIN) {
2538 if (AvailableStack != PrelBase_Z91Z93_closure) {
2539 stko = AvailableStack;
2541 SET_PROCS(stko,ThisPE);
2543 AvailableStack = STKO_LINK(AvailableStack);
2544 } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
2547 /* ALLOC_STK(STKO_HS,STKO_CHUNK_SIZE,0); use RTSflag now*/
2548 ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
2550 SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
2551 SET_STKO_HDR(stko, StkO_info, CCC);
2553 STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
2554 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
2555 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
2556 STKO_LINK(stko) = PrelBase_Z91Z93_closure;
2557 STKO_RETURN(stko) = NULL;
2563 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
2566 if (type == T_MAIN) {
2567 STKO_SpA(stko) -= AREL(1);
2568 *STKO_SpA(stko) = (P_) realWorldZh_closure;
2571 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
2575 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
2577 #if defined(GRAN_CHECK)
2578 tot_sq_len += spark_queue_len(CurrentProc,ADVISORY_POOL);
2586 In GrAnSim the @EndThread@ function is the place where statistics about the
2587 simulation are printed. I guess, that could be moved into @main.lc@.
2592 EndThread(STG_NO_ARGS)
2596 TIME now = CURRENT_TIME;
2600 if (RTSflags.TickyFlags.showTickyStats) {
2601 fprintf(RTSflags.TickyFlags.tickyFile,
2602 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
2603 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
2604 fprintf(RTSflags.TickyFlags.tickyFile,
2605 "\tB stack max. depth: %ld words\n",
2606 TSO_BHWM(CurrentTSO));
2611 QP_Event1("G*", CurrentTSO);
2615 ASSERT(CurrentTSO == ThreadQueueHd);
2617 if (RTSflags.GranFlags.DoThreadMigration)
2620 if(TSO_TYPE(CurrentTSO)==T_MAIN)
2624 for(i=0; i < RTSflags.GranFlags.proc; ++i) {
2626 while(RunnableThreadsHd[i] != PrelBase_Z91Z93_closure)
2628 /* We schedule runnable threads before killing them to */
2629 /* make the job of bookkeeping the running, runnable, */
2630 /* blocked threads easier for scripts like gr2ps -- HWL */
2632 if (RTSflags.GranFlags.granSimStats && !is_first &&
2633 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2634 DumpRawGranEvent(i,(PROC)0,GR_SCHEDULE,
2635 RunnableThreadsHd[i],
2636 PrelBase_Z91Z93_closure,0);
2637 if (!RTSflags.GranFlags.granSimStats_suppressed &&
2638 TSO_TYPE(RunnableThreadsHd[i])!=T_MAIN)
2639 DumpGranInfo(i,RunnableThreadsHd[i],rtsTrue);
2640 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
2641 is_first = rtsFalse;
2645 ThreadQueueHd = PrelBase_Z91Z93_closure;
2646 /* Printing of statistics has been moved into end_gr_simulation */
2649 if (RTSflags.GranFlags.labelling && RTSflags.GranFlags.granSimStats &&
2650 !RTSflags.GranFlags.granSimStats_suppressed)
2651 DumpStartEventAt(TSO_STARTEDAT(CurrentTSO),where_is(CurrentTSO),0,GR_START,
2652 CurrentTSO,PrelBase_Z91Z93_closure,
2653 TSO_SPARKNAME(CurrentTSO));
2654 /* ^^^ SN (spark name) as optional info */
2655 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2656 /* ^^^ spark length as optional info */
2658 if (RTSflags.GranFlags.granSimStats &&
2659 !RTSflags.GranFlags.granSimStats_suppressed)
2660 DumpGranInfo(CurrentProc,CurrentTSO,
2661 TSO_TYPE(CurrentTSO) != T_ADVISORY);
2663 if (RTSflags.GranFlags.granSimStats_Binary &&
2664 TSO_TYPE(CurrentTSO)==T_MAIN &&
2665 !RTSflags.GranFlags.granSimStats_suppressed)
2666 grterminate(CurrentTime[CurrentProc]);
2668 if (TSO_TYPE(CurrentTSO)!=T_MAIN)
2669 ActivateNextThread(CurrentProc);
2671 /* Note ThreadQueueHd is Nil when the main thread terminates
2672 if(ThreadQueueHd != PrelBase_Z91Z93_closure)
2674 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.granSimStats_suppressed &&
2675 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2676 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2677 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadscheduletime;
2684 if (RTSflags.ParFlags.granSimStats) {
2685 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
2686 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
2690 switch (TSO_TYPE(CurrentTSO)) {
2692 required_thread_count--;
2695 if (GRANSIMSTATS_BINARY)
2699 longjmp(scheduler_loop, -1); /* i.e. the world comes to an end NOW */
2701 ReSchedule(0); /* i.e. the world will eventually come to an end */
2705 required_thread_count--;
2709 advisory_thread_count--;
2717 fprintf(stderr, "EndThread: %x unknown\n", TSO_TYPE(CurrentTSO));
2721 /* Reuse stack object space */
2722 ASSERT(STKO_LINK(SAVE_StkO) == PrelBase_Z91Z93_closure);
2723 STKO_LINK(SAVE_StkO) = AvailableStack;
2724 AvailableStack = SAVE_StkO;
2726 TSO_LINK(CurrentTSO) = AvailableTSO;
2727 AvailableTSO = CurrentTSO;
2728 CurrentTSO = PrelBase_Z91Z93_closure;
2729 CurrentRegTable = NULL;
2732 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
2733 /* proc or it's PrelBase_Z91Z93_closure. In the latter case, a FINDWORK will be */
2734 /* issued by ReSchedule. */
2735 ReSchedule(SAME_THREAD); /* back for more! */
2737 ReSchedule(0); /* back for more! */
2743 %****************************************************************************
2745 \subsection[thread-blocking]{Local Blocking}
2747 %****************************************************************************
2751 #if defined(GRAN_COUNT)
2752 /* Some non-essential maybe-useful statistics-gathering */
2753 void CountnUPDs() { ++nUPDs; }
2754 void CountnUPDs_old() { ++nUPDs_old; }
2755 void CountnUPDs_new() { ++nUPDs_new; }
2757 void CountnPAPs() { ++nPAPs; }
2760 EXTDATA_RO(BQ_info);
2763 /* NB: non-GRAN version ToDo
2765 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
2768 P_ PendingFetches = PrelBase_Z91Z93_closure;
2771 AwakenBlockingQueue(bqe)
2778 TIME now = CURRENT_TIME;
2783 while (bqe != PrelBase_Z91Z93_closure) {
2785 while (IS_MUTABLE(INFO_PTR(bqe))) {
2786 switch (INFO_TYPE(INFO_PTR(bqe))) {
2790 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2793 if (RTSflags.ParFlags.granSimStats) {
2794 DumpGranEvent(GR_RESUMEQ, bqe);
2795 switch (TSO_QUEUE(bqe)) {
2797 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2800 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2804 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
2809 if (last_tso == NULL) {
2810 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
2811 RunnableThreadsHd = bqe;
2813 TSO_LINK(RunnableThreadsTl) = bqe;
2817 bqe = TSO_LINK(bqe);
2821 next = BF_LINK(bqe);
2822 BF_LINK(bqe) = PendingFetches;
2823 PendingFetches = bqe;
2825 if (last_tso != NULL)
2826 TSO_LINK(last_tso) = next;
2829 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
2830 INFO_PTR(bqe), (W_) bqe);
2837 if (last_tso != NULL) {
2838 RunnableThreadsTl = last_tso;
2840 TSO_LINK(last_tso) = PrelBase_Z91Z93_closure;
2848 # if defined(GRAN_CHECK)
2850 /* First some useful test functions */
2852 EXTFUN(RBH_Save_0_info);
2853 EXTFUN(RBH_Save_1_info);
2854 EXTFUN(RBH_Save_2_info);
2862 char str[80], str0[80];
2864 fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
2865 CurrentProc,CurrentTime[CurrentProc]);
2866 if ( bqe == PrelBase_Z91Z93_closure ) {
2867 fprintf(stderr," NIL.\n");
2870 if ( bqe == NULL ) {
2871 fprintf(stderr," NULL\n");
2874 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2875 W_ proc; /* RBH_Save_? closures! */
2877 /* Find where the tso lives */
2878 proc = where_is(bqe);
2879 it = INFO_TYPE(INFO_PTR(bqe));
2893 if(proc == CurrentProc)
2894 fprintf(stderr," %#lx (%x) L %s,", bqe, TSO_ID(bqe), str0);
2896 fprintf(stderr," %#lx (%x) G (PE %d) %s,", bqe, TSO_ID(bqe), proc, str0);
2901 bqe = TSO_LINK(bqe);
2904 bqe = TSO_LINK(bqe);
2907 bqe = PrelBase_Z91Z93_closure;
2910 /* TSO_LINK(last_tso) = PrelBase_Z91Z93_closure; */
2912 if ( bqe == PrelBase_Z91Z93_closure )
2913 fprintf(stderr," NIL.\n");
2915 (INFO_PTR(bqe) == (P_) RBH_Save_0_info) ||
2916 (INFO_PTR(bqe) == (P_) RBH_Save_1_info) ||
2917 (INFO_PTR(bqe) == (P_) RBH_Save_2_info) )
2918 fprintf(stderr," RBH.\n");
2919 /* fprintf(stderr,"\n%s\n",str); */
2923 CHECK_BQ(node, tso, proc)
2930 PROC p = where_is(tso);
2931 rtsBool ok = rtsTrue;
2934 fprintf(stderr,"ERROR in CHECK_BQ: CurrentTSO %#lx (%x) on proc %d but CurrentProc = %d\n",
2935 tso, TSO_ID(tso), proc);
2939 switch (INFO_TYPE(INFO_PTR(node))) {
2941 case INFO_BH_U_TYPE:
2942 bqe = (P_) BQ_ENTRIES(node);
2943 return (rtsTrue); /* BHs don't have BQs */
2946 bqe = (P_) BQ_ENTRIES(node);
2948 case INFO_FMBQ_TYPE:
2949 fprintf(stderr,"CHECK_BQ: ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
2950 node, tso, TSO_ID(tso));
2953 case INFO_SPEC_RBH_TYPE:
2954 bqe = (P_) SPEC_RBH_BQ(node);
2956 case INFO_GEN_RBH_TYPE:
2957 bqe = (P_) GEN_RBH_BQ(node);
2962 I_ size, ptrs, nonptrs, vhs;
2963 char info_hdr_ty[80];
2965 fprintf(stderr, "CHECK_BQ: thought %#lx was a black hole (IP %#lx)",
2966 node, INFO_PTR(node));
2967 info_ptr = get_closure_info(node,
2968 &size, &ptrs, &nonptrs, &vhs,
2970 fprintf(stderr, " %s\n",info_hdr_ty);
2971 /* G_PRINT_NODE(node); */
2973 /* EXIT(EXIT_FAILURE); */
2977 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2978 W_ proc; /* RBH_Save_? closures! */
2980 /* Find where the tso lives */
2981 proc = where_is(bqe);
2982 it = INFO_TYPE(INFO_PTR(bqe));
2985 fprintf(stderr,"ERROR in CHECK_BQ [Node = 0x%lx, PE %d]: TSO %#lx (%x) already in BQ: ",
2986 node, proc, tso, TSO_ID(tso));
2987 PRINT_BQ(BQ_ENTRIES(node));
2991 bqe = TSO_LINK(bqe);
2995 /* End of test functions */
2996 # endif /* GRAN_CHECK */
2998 /* This version of AwakenBlockingQueue has been originally taken from the
2999 GUM code. It is now assimilated into GrAnSim */
3001 /* Note: This version assumes a pointer to a blocking queue rather than a
3002 node with an attached blocking queue as input */
3005 AwakenBlockingQueue(bqe)
3008 /* P_ tso = (P_) BQ_ENTRIES(node); */
3017 /* Compatibility mode with old libaries! 'oH jIvoQmoH */
3018 if (IS_BQ_CLOSURE(bqe))
3019 bqe = (P_)BQ_ENTRIES(bqe);
3020 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_SPEC_RBH_TYPE )
3021 bqe = (P_)SPEC_RBH_BQ(bqe);
3022 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_GEN_RBH_TYPE )
3023 bqe = (P_)GEN_RBH_BQ(bqe);
3025 # if defined(GRAN_CHECK)
3026 if ( RTSflags.GranFlags.debug & 0x100 ) {
3031 # if defined(GRAN_COUNT)
3033 if (tso != PrelBase_Z91Z93_closure)
3037 # if defined(GRAN_CHECK)
3038 if (RTSflags.GranFlags.debug & 0x100)
3039 fprintf(stderr,"----- AwBQ: ");
3042 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
3043 W_ proc; /* RBH_Save_? closures! */
3044 ASSERT(INFO_TYPE(INFO_PTR(bqe)) == INFO_TSO_TYPE);
3047 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3049 # if defined(GRAN_COUNT)
3053 /* Find where the tso lives */
3054 proc = where_is(bqe);
3056 if(proc == CurrentProc) {
3057 notifytime = CurrentTime[CurrentProc] + RTSflags.GranFlags.gran_lunblocktime;
3059 /* A better way of handling this would be to introduce a
3060 GLOBALUNBLOCK event which is created here. -- HWL */
3061 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3062 notifytime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[proc]) +
3063 RTSflags.GranFlags.gran_gunblocktime;
3064 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3065 /* new_event(proc, CurrentProc, notifytime,
3066 GLOBALUNBLOCK,bqe,PrelBase_Z91Z93_closure,NULL); */
3068 /* cost the walk over the queue */
3069 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_lunblocktime;
3070 /* GrAnSim Light: make blocked TSO aware of the time that passed */
3071 if (RTSflags.GranFlags.Light)
3072 TSO_CLOCK(bqe) = notifytime;
3073 /* and create a resume message */
3074 new_event(proc, CurrentProc, notifytime,
3075 RESUMETHREAD,bqe,PrelBase_Z91Z93_closure,NULL);
3077 if (notifytime<TimeOfNextEvent)
3078 TimeOfNextEvent = notifytime;
3080 # if defined(GRAN_CHECK)
3081 if (RTSflags.GranFlags.debug & 0x100) {
3082 fprintf(stderr," TSO %x (PE %d) %s,",
3083 TSO_ID(bqe), proc, ( (proc==CurrentProc) ? "L" : "G") );
3088 bqe = TSO_LINK(bqe);
3089 TSO_LINK(last) = PrelBase_Z91Z93_closure;
3093 /* This was once used in a !do_gr_sim setup. Now only GrAnSim setup is */
3095 else /* Check if this is still valid for non-GrAnSim code -- HWL */
3097 if (ThreadQueueHd == PrelBase_Z91Z93_closure)
3098 ThreadQueueHd = bqe;
3100 TSO_LINK(ThreadQueueTl) = bqe;
3102 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
3103 RunnableThreadsHd = tso;
3105 TSO_LINK(RunnableThreadsTl) = tso;
3108 while(TSO_LINK(bqe) != PrelBase_Z91Z93_closure) {
3109 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3112 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3115 bqe = TSO_LINK(bqe);
3118 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3121 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3127 if (RTSflags.GranFlags.debug & 0x100)
3128 fprintf(stderr,".\n");
3131 /* ngo' {GrAnSim}Qo' ngoq: RunnableThreadsTl = tso; */
3140 /* Different interface for GRAN */
3145 SAVE_Liveness = liveness;
3146 TSO_PC1(CurrentTSO) = Continue;
3148 QP_Event1("GR", CurrentTSO);
3150 ReSchedule(SAME_THREAD);
3159 SAVE_Liveness = args >> 1;
3160 TSO_PC1(CurrentTSO) = Continue;
3162 QP_Event1("GR", CurrentTSO);
3165 if (RTSflags.ParFlags.granSimStats) {
3166 /* Note that CURRENT_TIME may perform an unsafe call */
3167 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
3170 ReSchedule(args & 1);
3177 %****************************************************************************
3179 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
3181 %****************************************************************************
3183 The following GrAnSim routines simulate the fetching of nodes from a remote
3184 processor. We use a 1 word bitmask to indicate on which processor a node is
3185 lying. Thus, moving or copying a node from one processor to another just
3186 requires an appropriate change in this bitmask (using @SET_GA@).
3187 Additionally, the clocks have to be updated.
3189 A special case arises when the node that is needed by processor A has been
3190 moved from a processor B to a processor C between sending out a @FETCH@
3191 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
3197 /* ngoqvam che' {GrAnSim}! */
3199 /* Fetch node "node" to processor "p" */
3202 FetchNode(node,from,to)
3206 /* In case of RTSflags.GranFlags.DoGUMMFetching this fct should never be
3207 entered! Instead, UnpackGraph is used in ReSchedule */
3210 ASSERT(to==CurrentProc);
3212 # if defined(GRAN) && defined(GRAN_CHECK)
3213 if ( RTSflags.GranFlags.Light ) {
3214 fprintf(stderr,"Qagh {FetchNode}Daq: Should never be entered in GrAnSim Light setup\n");
3219 if ( RTSflags.GranFlags.DoGUMMFetching ) {
3220 fprintf(stderr,"Qagh: FetchNode should never be entered with DoGUMMFetching\n");
3224 /* Now fetch the children */
3225 if (!IS_LOCAL_TO(PROCS(node),from) &&
3226 !IS_LOCAL_TO(PROCS(node),to) )
3229 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
3230 PROCS(node) |= PE_NUMBER(to); /* Copy node */
3232 PROCS(node) = PE_NUMBER(to); /* Move node */
3237 /* --------------------------------------------------
3238 Cost of sending a packet of size n = C + P*n
3239 where C = packet construction constant,
3240 P = cost of packing one word into a packet
3241 [Should also account for multiple packets].
3242 -------------------------------------------------- */
3245 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
3246 nearby graph has been scheduled)
3247 1 ... node is already local (fetched by somebody else; no event is
3249 2 ... fetch request has been forwrded to the PE that now contains the
3251 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
3252 the current TSO is put into the blocking queue of that node
3253 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
3254 function to guarantee that the tso and node inputs are valid
3255 (they may be moved during GC).
3257 ToDo: Symbolic return codes; clean up code (separate GUMMFetching from
3258 single node fetching.
3262 HandleFetchRequest(node,p,tso)
3266 ASSERT(!RTSflags.GranFlags.Light);
3268 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
3270 # if defined(GRAN_CHECK)
3271 if (RTSflags.GranFlags.debug & 0x100 ) {
3273 I_ size, ptrs, nonptrs, vhs;
3274 char info_hdr_ty[80];
3276 info_ptr = get_closure_info(node,
3277 &size, &ptrs, &nonptrs, &vhs,
3279 fprintf(stderr,"Warning: HandleFetchRequest entered with local node %#lx (%s) (PE %d)\n",
3280 node,info_hdr_ty,p);
3283 if (RTSflags.GranFlags.DoGUMMFetching) {
3287 /* Create a 1-node-buffer and schedule a FETCHREPLY now */
3288 graph = PackOneNode(node, tso, &size);
3289 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3290 FETCHREPLY,tso,graph,NULL);
3292 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3293 FETCHREPLY,tso,node,NULL);
3297 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
3299 if(RTSflags.GranFlags.DoGUMMFetching) { /* {GUM}vo' ngoqvam vInIHta' (code from GUM) */
3303 if (IS_BLACK_HOLE(INFO_PTR(node))) { /* block on BH or RBH */
3304 new_event(p,CurrentProc,CurrentTime[p],
3305 GLOBALBLOCK,tso,node,NULL);
3306 /* Note: blockFetch is done when handling GLOBALBLOCK event */
3307 /* When this thread is reawoken it does the usual: it tries to
3308 enter the updated node and issues a fetch if it's remote.
3309 It has forgotten that it has sent a fetch already (i.e. a
3310 FETCHNODE is swallowed by a BH, leaving the thread in a BQ */
3311 --OutstandingFetches[p];
3315 # if defined(GRAN_CHECK)
3316 if (!RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[p])) {
3317 fprintf(stderr,"Qagh {HandleFetchRequest}Daq: tso 0x%lx (%x) not at head of proc %d (0x%lx)\n",
3318 tso, TSO_ID(tso), p, RunnableThreadsHd[p]);
3323 if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)
3324 return (4); /* out of heap */
3326 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3327 /* Send a reply to the originator */
3328 /* ToDo: Replace that by software costs for doing graph packing! */
3329 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_mpacktime;
3331 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3332 FETCHREPLY,tso,graph,NULL);
3334 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3336 } else { /* incremental (single closure) fetching */
3337 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3338 /* Send a reply to the originator */
3339 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3341 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3342 FETCHREPLY,tso,node,NULL);
3344 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3348 else /* Qu'vatlh! node has been grabbed by another proc => forward */
3350 PROC p_new = where_is(node);
3353 # if defined(GRAN_CHECK)
3354 if (RTSflags.GranFlags.debug & 0x2)
3355 fprintf(stderr,"Qu'vatlh! node %#lx has been grabbed by PE %d (current=%d; demander=%d) @ %d\n",
3356 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
3358 /* Prepare FORWARD message to proc p_new */
3359 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3361 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p_new]) +
3362 RTSflags.GranFlags.gran_latency;
3364 new_event(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
3366 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3374 @blockFetch@ blocks a @BlockedFetch@ node on some kind of black hole.
3376 Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
3378 {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
3379 create @FMBQ@'s (FetchMe blocking queues) to cope with global
3380 blocking. Instead, non-local TSO are put into the BQ in the same way as
3381 local TSOs. However, we have to check if a TSO is local or global in order
3382 to account for the latencies involved and for keeping track of the number
3383 of fetches that are really going on.
3389 0 ... ok; tso is now at beginning of BQ attached to the bh closure
3390 1 ... the bh closure is no BH any more; tso is immediately unblocked
3394 blockFetch(tso, proc, bh)
3395 P_ tso; /* TSO which gets blocked */
3396 PROC proc; /* PE where that tso was running */
3397 P_ bh; /* closure to block on (BH, RBH, BQ) */
3399 # if defined(GRAN_CHECK)
3400 if ( RTSflags.GranFlags.debug & 0x100 ) {
3402 I_ size, ptrs, nonptrs, vhs;
3403 char info_hdr_ty[80];
3405 info_ptr = get_closure_info(bh,
3406 &size, &ptrs, &nonptrs, &vhs,
3408 fprintf(stderr,"Blocking TSO %#lx (%x)(PE %d) on node %#lx (%s) (PE %d). No graph is packed!\n",
3409 tso, TSO_ID(tso), proc, bh, info_hdr_ty, where_is(bh));
3412 if ( !RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[proc]) ) {
3413 fprintf(stderr,"Qagh {blockFetch}Daq: TSO 0x%lx (%x) is not first on runnable list of proc %d (first is 0x%lx)\n",
3414 tso,TSO_ID(tso),proc,RunnableThreadsHd[proc]);
3419 if (!IS_BLACK_HOLE(INFO_PTR(bh))) { /* catches BHs and RBHs */
3420 # if defined(GRAN_CHECK)
3421 if ( RTSflags.GranFlags.debug & 0x100 ) {
3423 W_ size, ptrs, nonptrs, vhs;
3424 char str[80], junk_str[80];
3426 info = get_closure_info(bh, &size, &ptrs, &nonptrs, &vhs, str);
3427 fprintf(stderr,"blockFetch: node %#lx (%s) is not a BH => awakening TSO %#lx (%x) (PE %u)\n",
3428 bh, str, tso, TSO_ID(tso), proc);
3432 /* No BH anymore => immediately unblock tso */
3433 new_event(proc,proc,CurrentTime[proc],
3434 UNBLOCKTHREAD,tso,bh,NULL);
3436 /* Is this always a REPLY to a FETCH in the profile ? */
3437 if (RTSflags.GranFlags.granSimStats)
3438 DumpRawGranEvent(proc,proc,GR_REPLY,tso,bh,0);
3442 /* DaH {BQ}Daq Qu' Suq 'e' wISov!
3443 Now we know that we have to put the tso into the BQ.
3444 2 case: If block-on-fetch, tso is at head of threadq =>
3445 => take it out of threadq and into BQ
3446 If reschedule-on-fetch, tso is only pointed to be event
3447 => just put it into BQ
3449 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
3450 GranSimBlock(tso, proc, bh); /* get tso out of threadq & activate next
3451 thread (same as in BQ_entry) */
3452 } else { /* reschedule-on-fetch */
3453 if(RTSflags.GranFlags.granSimStats)
3454 DumpRawGranEvent(proc,where_is(bh),GR_BLOCK,tso,bh,0);
3456 ++TSO_BLOCKCOUNT(tso);
3457 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
3460 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
3462 /* Put tso into BQ */
3463 switch (INFO_TYPE(INFO_PTR(bh))) {
3465 case INFO_BH_U_TYPE:
3466 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
3467 SET_INFO_PTR(bh, BQ_info);
3468 BQ_ENTRIES(bh) = (W_) tso;
3470 #ifdef GC_MUT_REQUIRED
3472 * If we modify a black hole in the old generation, we have to make
3473 * sure it goes on the mutables list
3476 if (bh <= StorageMgrInfo.OldLim) {
3477 MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
3478 StorageMgrInfo.OldMutables = bh;
3480 MUT_LINK(bh) = MUT_NOT_LINKED;
3484 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3485 TSO_LINK(tso) = (P_) BQ_ENTRIES(bh);
3486 BQ_ENTRIES(bh) = (W_) tso;
3488 case INFO_FMBQ_TYPE:
3489 fprintf(stderr,"ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
3490 bh, tso, TSO_ID(tso));
3492 case INFO_SPEC_RBH_TYPE:
3493 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3494 TSO_LINK(tso) = (P_) SPEC_RBH_BQ(bh);
3495 SPEC_RBH_BQ(bh) = (W_) tso;
3497 case INFO_GEN_RBH_TYPE:
3498 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3499 TSO_LINK(tso) = (P_) GEN_RBH_BQ(bh);
3500 GEN_RBH_BQ(bh) = (W_) tso;
3505 I_ size, ptrs, nonptrs, vhs;
3506 char info_hdr_ty[80];
3508 fprintf(stderr, "Panic: thought %#lx was a black hole (IP %#lx)",
3510 # if defined(GRAN_CHECK)
3511 info_ptr = get_closure_info(bh,
3512 &size, &ptrs, &nonptrs, &vhs,
3514 fprintf(stderr, " %s\n",info_hdr_ty);
3526 %****************************************************************************
3528 \subsection[qp-profile]{Quasi-Parallel Profiling}
3530 %****************************************************************************
3533 /* ToDo: Check if this is really still used anywhere!? */
3538 /* *Virtual* Time in milliseconds */
3541 qp_elapsed_time(STG_NO_ARGS)
3543 extern StgDouble usertime();
3545 return ((long) (usertime() * 1e3));
3549 qp_elapsed_time(STG_NO_ARGS)
3551 return ((long) CurrentTime[CurrentProc] );
3556 init_qp_profiling(STG_NO_ARGS)
3559 char qp_filename[STATS_FILENAME_MAXLEN];
3561 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3562 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3563 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3567 fputs(prog_argv[0], qp_file);
3568 for(i = 1; prog_argv[i]; i++) {
3569 fputc(' ', qp_file);
3570 fputs(prog_argv[i], qp_file);
3572 fprintf(qp_file, " +RTS -C%d -t%d\n"
3573 , RTSflags.ConcFlags.ctxtSwitchTime
3574 , RTSflags.ConcFlags.maxThreads);
3576 fputs(time_str(), qp_file);
3577 fputc('\n', qp_file);
3582 QP_Event0(tid, node)
3586 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3590 QP_Event1(event, tso)
3594 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3595 TSO_ID(tso), TSO_NAME(tso));
3599 QP_Event2(event, tso1, tso2)
3603 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3604 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
3609 %****************************************************************************
3611 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
3613 %****************************************************************************
3615 Garbage collection code for the event queue. We walk the event queue
3616 so that if the only reference to a TSO is in some event (e.g. RESUME),
3617 the TSO is still preserved.
3619 The GC code now uses a breadth-first pruning strategy. This prevents
3620 the GC from keeping all sparks of the low-numbered PEs while discarding all
3621 sparks from high-numbered PEs. Such a depth-first pruning may have
3622 disastrous effects for programs that generate a huge number of sparks!
3627 extern smInfo StorageMgrInfo;
3629 /* Auxiliary functions needed in Save/RestoreSparkRoots if breadth-first */
3630 /* pruning is done. */
3633 arr_and(W_ arr[], I_ max)
3638 /* Doesn't work with max==0; but then, many things don't work in this */
3640 for (i=1, res = arr[0]; i<max; i++)
3647 arr_max(W_ arr[], I_ max)
3652 /* Doesn't work with max==0; but then, many things don't work in this */
3654 for (i=1, res = arr[0]; i<max; i++)
3655 res = (arr[i]>res) ? arr[i] : res;
3661 Routines working on spark queues.
3662 It would be a good idea to make that an ADT!
3666 spark_queue_len(PROC proc, I_ pool)
3668 sparkq prev, spark; /* prev only for testing !! */
3671 for (len = 0, prev = NULL, spark = PendingSparksHd[proc][pool];
3673 len++, prev = spark, spark = SPARK_NEXT(spark))
3676 # if defined(GRAN_CHECK)
3677 if ( RTSflags.GranFlags.debug & 0x1000 )
3678 if ( (prev!=NULL) && (prev!=PendingSparksTl[proc][pool]) )
3679 fprintf(stderr,"ERROR in spark_queue_len: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
3680 proc, pool, PendingSparksTl[proc][pool], prev);
3687 delete_from_spark_queue (prev,spark) /* unlink and dispose spark */
3689 { /* Global Vars: CurrentProc, SparkQueueHd, SparkQueueTl */
3692 # if defined(GRAN_CHECK)
3693 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3694 fprintf(stderr,"** |%#x:%#x| prev=%#x->(%#x), (%#x)<-spark=%#x->(%#x) <-(%#x)\n",
3695 SparkQueueHd, SparkQueueTl,
3696 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)),
3697 SPARK_PREV(spark), spark, SPARK_NEXT(spark),
3698 (SPARK_NEXT(spark)==NULL ? 0 : SPARK_PREV(SPARK_NEXT(spark))));
3702 tmp = SPARK_NEXT(spark);
3704 SparkQueueHd = SPARK_NEXT(spark);
3706 SPARK_NEXT(prev) = SPARK_NEXT(spark);
3708 if (SPARK_NEXT(spark)==NULL) {
3709 SparkQueueTl = prev;
3711 SPARK_PREV(SPARK_NEXT(spark)) = prev;
3713 if(SparkQueueHd == NULL)
3714 SparkQueueTl = NULL;
3715 SPARK_NEXT(spark) = NULL;
3717 DisposeSpark(spark);
3720 # if defined(GRAN_CHECK)
3721 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3722 fprintf(stderr,"## prev=%#x->(%#x)\n",
3723 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)));
3730 /* NB: These functions have been replaced by functions:
3731 EvacuateEvents, EvacuateSparks, (in ../storage/SMcopying.lc)
3732 LinkEvents, LinkSparks (in ../storage/SMcompacting.lc)
3733 Thus, GrAnSim does not need additional entries in the list of roots
3738 SaveEventRoots(num_ptr_roots)
3741 eventq event = EventHd;
3742 while(event != NULL)
3744 if(EVENT_TYPE(event) == RESUMETHREAD ||
3745 EVENT_TYPE(event) == MOVETHREAD ||
3746 EVENT_TYPE(event) == CONTINUETHREAD ||
3747 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3748 EVENT_TYPE(event) == STARTTHREAD )
3749 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3751 else if(EVENT_TYPE(event) == MOVESPARK)
3752 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
3754 else if (EVENT_TYPE(event) == FETCHNODE ||
3755 EVENT_TYPE(event) == FETCHREPLY )
3757 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3758 /* In the case of packet fetching, EVENT_NODE(event) points to */
3759 /* the packet (currently, malloced). The packet is just a list of */
3760 /* closure addresses, with the length of the list at index 1 (the */
3761 /* structure of the packet is defined in Pack.lc). */
3762 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3763 P_ buffer = (P_) EVENT_NODE(event);
3764 int size = (int) buffer[PACK_SIZE_LOCN], i;
3766 for (i = PACK_HDR_SIZE; i <= size-1; i++) {
3767 StorageMgrInfo.roots[num_ptr_roots++] = (P_) buffer[i];
3770 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3772 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3774 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3775 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3777 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3779 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3781 event = EVENT_NEXT(event);
3783 return(num_ptr_roots);
3786 #if defined(DEPTH_FIRST_PRUNING)
3787 /* Is it worthwhile keeping the depth-first pruning code !? -- HWL */
3790 SaveSparkRoots(num_ptr_roots)
3793 sparkq spark, /* prev, */ disposeQ=NULL;
3795 I_ i, sparkroots=0, prunedSparks=0;
3796 I_ tot_sparks[MAX_PROC], tot = 0;;
3798 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3799 tot_sparks[proc] = 0;
3800 for(i = 0; i < SPARK_POOLS; ++i) {
3801 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
3803 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
3805 if(++sparkroots <= MAX_SPARKS)
3807 if ( RTSflags.GcFlags.giveStats )
3808 if (i==ADVISORY_POOL) {
3812 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3816 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3817 if (prunedSparks==0) {
3825 } /* forall spark ... */
3826 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks>0) ) {
3827 fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
3828 prunedSparks,MAX_SPARKS,proc);
3829 if (disposeQ == PendingSparksHd[proc][i])
3830 PendingSparksHd[proc][i] = NULL;
3832 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
3833 DisposeSparkQ(disposeQ);
3837 } /* forall i ... */
3838 } /*forall proc .. */
3840 if ( RTSflags.GcFlags.giveStats ) {
3841 fprintf(RTSflags.GcFlags.statsFile,
3842 "Spark statistics (after pruning) (total sparks = %d):",tot);
3843 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3845 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3846 fprintf(RTSflags.GcFlags.statsFile,"\tPE %d: %d ",proc,tot_sparks[proc]);
3848 fprintf(RTSflags.GcFlags.statsFile,".\n");
3851 return(num_ptr_roots);
3854 #else /* !DEPTH_FIRST_PRUNING */
3856 /* In case of an excessive number of sparks, depth first pruning is a Bad */
3857 /* Idea as we might end up with all remaining sparks on processor 0 and */
3858 /* none on the other processors. So, this version uses breadth first */
3859 /* pruning. -- HWL */
3862 SaveSparkRoots(num_ptr_roots)
3866 curr_spark[MAX_PROC][SPARK_POOLS];
3869 endQueues[SPARK_POOLS], finishedQueues[SPARK_POOLS];
3871 prunedSparks[MAX_PROC][SPARK_POOLS];
3872 I_ tot_sparks[MAX_PROC], tot = 0;;
3875 # if defined(GRAN_CHECK) && defined(GRAN)
3876 if ( RTSflags.GranFlags.debug & 0x40 )
3877 fprintf(stderr,"D> Saving spark roots for GC ...\n");
3881 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3882 allProcs |= PE_NUMBER(proc);
3883 tot_sparks[proc] = 0;
3884 for(i = 0; i < SPARK_POOLS; ++i) {
3885 curr_spark[proc][i] = PendingSparksHd[proc][i];
3886 prunedSparks[proc][i] = 0;
3888 finishedQueues[i] = 0;
3892 /* Breadth first pruning */
3894 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3895 for(i = 0; i < SPARK_POOLS; ++i) {
3896 spark = curr_spark[proc][i];
3897 if ( spark != NULL ) {
3899 if(++sparkroots <= MAX_SPARKS)
3901 # if defined(GRAN_CHECK) && defined(GRAN)
3902 if ( (RTSflags.GranFlags.debug & 0x1000) &&
3903 (RTSflags.GcFlags.giveStats) )
3904 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d): 0x%lx \t(info ptr=%#lx)\n",
3905 num_ptr_roots,proc,i,SPARK_NODE(spark),
3906 INFO_PTR(SPARK_NODE(spark)));
3908 if ( RTSflags.GcFlags.giveStats )
3909 if (i==ADVISORY_POOL) {
3913 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3914 curr_spark[proc][i] = spark = SPARK_NEXT(spark);
3916 else /* sparkroots > MAX_SPARKS */
3918 if (curr_spark[proc][i] == PendingSparksHd[proc][i])
3919 PendingSparksHd[proc][i] = NULL;
3921 SPARK_NEXT(SPARK_PREV(curr_spark[proc][i])) = NULL;
3922 PendingSparksTl[proc][i] = SPARK_PREV(curr_spark[proc][i]);
3923 endQueues[i] |= PE_NUMBER(proc);
3925 } else { /* spark == NULL ; actually, this only has to be done once */
3926 endQueues[i] |= PE_NUMBER(proc);
3930 } while (arr_and(endQueues,SPARK_POOLS) != allProcs);
3932 /* The buffer for spark roots in StorageMgrInfo.roots is full */
3933 /* now. Prune all sparks on all processor starting with */
3934 /* curr_spark[proc][i]. */
3937 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3938 for(i = 0; i < SPARK_POOLS; ++i) {
3939 spark = curr_spark[proc][i];
3941 if ( spark != NULL ) {
3942 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3943 curr_spark[proc][i] = SPARK_NEXT(spark);
3945 prunedSparks[proc][i]++;
3946 DisposeSpark(spark);
3948 finishedQueues[i] |= PE_NUMBER(proc);
3952 } while (arr_and(finishedQueues,SPARK_POOLS) != allProcs);
3955 # if defined(GRAN_CHECK) && defined(GRAN)
3956 if ( RTSflags.GranFlags.debug & 0x1000) {
3957 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3958 for(i = 0; i < SPARK_POOLS; ++i) {
3959 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks[proc][i]>0)) {
3960 fprintf(RTSflags.GcFlags.statsFile,
3961 "Discarding %lu sparks on proc %d (pool %d) for GC purposes\n",
3962 prunedSparks[proc][i],proc,i);
3967 if ( RTSflags.GcFlags.giveStats ) {
3968 fprintf(RTSflags.GcFlags.statsFile,
3969 "Spark statistics (after discarding) (total sparks = %d):",tot);
3970 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3972 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3973 fprintf(RTSflags.GcFlags.statsFile,
3974 "\tPE %d: %d ",proc,tot_sparks[proc]);
3976 fprintf(RTSflags.GcFlags.statsFile,".\n");
3981 return(num_ptr_roots);
3984 #endif /* DEPTH_FIRST_PRUNING */
3987 GC roots must be restored in *reverse order*.
3988 The recursion is a little ugly, but is better than
3989 in-place pointer reversal.
3993 RestoreEvtRoots(event,num_ptr_roots)
3999 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
4001 if(EVENT_TYPE(event) == RESUMETHREAD ||
4002 EVENT_TYPE(event) == MOVETHREAD ||
4003 EVENT_TYPE(event) == CONTINUETHREAD ||
4004 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
4005 EVENT_TYPE(event) == STARTTHREAD )
4006 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4008 else if(EVENT_TYPE(event) == MOVESPARK )
4009 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
4011 else if (EVENT_TYPE(event) == FETCHNODE ||
4012 EVENT_TYPE(event) == FETCHREPLY )
4014 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
4015 P_ buffer = (P_) EVENT_NODE(event);
4016 int size = (int) buffer[PACK_SIZE_LOCN], i;
4018 for (i = size-1; i >= PACK_HDR_SIZE; i--) {
4019 buffer[i] = StorageMgrInfo.roots[--num_ptr_roots];
4022 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
4024 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4026 else if (EVENT_TYPE(event) == GLOBALBLOCK)
4028 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
4029 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4031 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
4033 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4036 return(num_ptr_roots);
4040 RestoreEventRoots(num_ptr_roots)
4043 return(RestoreEvtRoots(EventHd,num_ptr_roots));
4046 #if defined(DEPTH_FIRST_PRUNING)
4049 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
4051 I_ num_ptr_roots, sparkroots;
4055 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
4056 if(sparkroots <= MAX_SPARKS)
4058 P_ n = SPARK_NODE(spark);
4059 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4060 # if defined(GRAN_CHECK) && defined(GRAN)
4061 if ( RTSflags.GranFlags.debug & 0x40 )
4062 fprintf(RTSflags.GcFlags.statsFile,
4063 "Restoring Spark Root %d: 0x%lx \t(info ptr=%#lx\n",
4064 num_ptr_roots,SPARK_NODE(spark),
4065 INFO_PTR(SPARK_NODE(spark)));
4068 # if defined(GRAN_CHECK) && defined(GRAN)
4070 if ( RTSflags.GranFlags.debug & 0x40 )
4071 fprintf(RTSflags.GcFlags.statsFile,
4072 "Error in RestoreSpkRoots (%d; @ spark %#lx): More than MAX_SPARKS (%d) sparks\n",
4073 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
4077 return(num_ptr_roots);
4081 RestoreSparkRoots(num_ptr_roots)
4087 #if defined(GRAN_JSM_SPARKS)
4088 fprintf(stderr,"Error: RestoreSparkRoots should be never be entered in a JSM style sparks set-up\n");
4092 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4093 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4094 /* of the for loop. For i that is currently not necessary. C is really */
4095 /* impressive in datatype abstraction! -- HWL */
4097 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4098 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4099 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
4102 return(num_ptr_roots);
4105 #else /* !DEPTH_FIRST_PRUNING */
4108 RestoreSparkRoots(num_ptr_roots)
4112 curr_spark[MAX_PROC][SPARK_POOLS];
4114 I_ i, max_len, len, pool, count,
4115 queue_len[MAX_PROC][SPARK_POOLS];
4117 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4118 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4119 /* of the for loop. For i that is currently not necessary. C is really */
4120 /* impressive in datatype abstraction! -- HWL */
4123 for (proc=0; proc < RTSflags.GranFlags.proc; proc++) {
4124 for (i=0; i<SPARK_POOLS; i++) {
4125 curr_spark[proc][i] = PendingSparksTl[proc][i];
4126 queue_len[proc][i] = spark_queue_len(proc,i);
4127 max_len = (queue_len[proc][i]>max_len) ? queue_len[proc][i] : max_len;
4131 for (len=max_len; len > 0; len--){
4132 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4133 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4134 if (queue_len[proc][i]>=len) {
4135 spark = curr_spark[proc][i];
4136 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4137 # if defined(GRAN_CHECK) && defined(GRAN)
4139 if ( (RTSflags.GranFlags.debug & 0x1000) &&
4140 (RTSflags.GcFlags.giveStats) )
4141 fprintf(RTSflags.GcFlags.statsFile,
4142 "Restoring Spark Root %d (PE %u, pool %u): 0x%lx \t(info ptr=%#lx)\n",
4143 num_ptr_roots,proc,i,SPARK_NODE(spark),
4144 INFO_PTR(SPARK_NODE(spark)));
4146 curr_spark[proc][i] = SPARK_PREV(spark);
4148 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],
4155 # if defined(GRAN_CHECK) && defined(GRAN)
4156 if ( (RTSflags.GranFlags.debug & 0x1000) && (RTSflags.GcFlags.giveStats) )
4157 fprintf(RTSflags.GcFlags.statsFile,"Number of restored spark roots: %d\n",
4160 return(num_ptr_roots);
4163 #endif /* DEPTH_FIRST_PRUNING */
4169 #endif /* CONCURRENT */ /* the whole module! */