2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
25 NOTE: There's currently a couple of x86 only pieces in here. The reason
26 for this is the need for an expedient hack to make Concurrent Haskell
27 and stable pointers work sufficiently for Win32 applications.
28 (the changes in here are not x86 specific, but other parts of this patch are
31 ToDo: generalise to all platforms
35 #if defined(CONCURRENT) /* the whole module! */
38 # define NON_POSIX_SOURCE /* so says Solaris */
47 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
50 @AvailableStack@ is used to determine whether an existing stack can be
51 reused without new allocation, so reducing garbage collection, and
52 stack setup time. At present, it is only used for the first stack
53 chunk of a thread, the one that's got
54 @RTSflags.ConcFlags.stkChunkSize@ words.
57 P_ AvailableStack = PrelBase_Z91Z93_closure;
58 P_ AvailableTSO = PrelBase_Z91Z93_closure;
61 Macros for dealing with the new and improved GA field for simulating
62 parallel execution. Based on @CONCURRENT@ package. The GA field now
63 contains a mask, where the n-th bit stands for the n-th processor,
64 on which this data can be found. In case of multiple copies, several bits
65 are set. The total number of processors is bounded by @MAX_PROC@,
66 which should be <= the length of a word in bits. -- HWL
68 {{GranSim.lc}Daq ngoq' roQlu'ta'}
69 (Code has been moved to GranSim.lc).
71 %****************************************************************
73 \subsection[thread-getthread]{The Thread Scheduler}
75 %****************************************************************
77 This is the heart of the thread scheduling code.
79 Most of the changes for GranSim are in this part of the RTS.
80 Especially the @ReSchedule@ routine has been blown up quite a lot
81 It now contains the top-level event-handling loop.
83 Parts of the code that are not necessary for GranSim, but convenient to
84 have when developing it are marked with a @GRAN_CHECK@ variable.
87 STGRegisterTable *CurrentRegTable = NULL;
92 /* Only needed for GranSim Light; costs of operations during rescheduling
93 are associated to the virtual processor on which ActiveTSO is living */
95 rtsBool resched = rtsFalse; /* debugging only !!*/
97 /* Pointers to the head and tail of the runnable queues for each PE */
98 /* In GranSim Light only the thread/spark-queues of proc 0 are used */
99 P_ RunnableThreadsHd[MAX_PROC];
100 P_ RunnableThreadsTl[MAX_PROC];
102 P_ WaitThreadsHd[MAX_PROC];
103 P_ WaitThreadsTl[MAX_PROC];
105 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
106 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
108 /* One clock for each PE */
109 W_ CurrentTime[MAX_PROC];
111 /* Useful to restrict communication; cf fishing model in GUM */
112 I_ OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
114 /* Status of each PE (new since but independent of GranSim Light) */
115 enum proc_status procStatus[MAX_PROC];
117 #if defined(GRAN) && defined(GRAN_CHECK)
118 /* To check if the RTS ever tries to run a thread that should be blocked
119 because of fetching remote data */
120 P_ BlockedOnFetch[MAX_PROC];
123 W_ SparksAvail = 0; /* How many sparks are available */
124 W_ SurplusThreads = 0; /* How many excess threads are there */
126 TIME SparkStealTime();
130 P_ RunnableThreadsHd = PrelBase_Z91Z93_closure;
131 P_ RunnableThreadsTl = PrelBase_Z91Z93_closure;
133 P_ WaitingThreadsHd = PrelBase_Z91Z93_closure;
134 P_ WaitingThreadsTl = PrelBase_Z91Z93_closure;
136 TYPE_OF_SPARK PendingSparksBase[SPARK_POOLS];
137 TYPE_OF_SPARK PendingSparksLim[SPARK_POOLS];
139 TYPE_OF_SPARK PendingSparksHd[SPARK_POOLS];
140 TYPE_OF_SPARK PendingSparksTl[SPARK_POOLS];
142 #endif /* GRAN ; HWL */
144 static jmp_buf scheduler_loop;
145 #if defined(i386_TARGET_ARCH)
146 void SchedLoop(int ret);
147 extern StgInt entersFromC;
148 static jmp_buf finish_sched;
151 I_ required_thread_count = 0;
152 I_ advisory_thread_count = 0;
154 EXTFUN(resumeThread);
156 /* Misc prototypes */
158 P_ NewThread PROTO((P_, W_, I_));
159 I_ blockFetch PROTO((P_, PROC, P_));
160 I_ HandleFetchRequest PROTO((P_, PROC, P_));
161 rtsBool InsertThread PROTO((P_ tso));
162 sparkq delete_from_spark_queue PROTO((sparkq, sparkq));
165 P_ NewThread PROTO((P_, W_));
168 I_ context_switch = 0;
169 I_ contextSwitchTime = 10000;
173 /* NB: GRAN and GUM use different representations of spark pools.
174 GRAN sparks are more flexible (containing e.g. granularity info)
175 but slower than GUM sparks. There is no fixed upper bound on the
176 number of GRAN sparks either. -- HWL
180 I_ sparksIgnored =0, sparksCreated = 0;
184 #if defined(CONCURRENT) && !defined(GRAN)
185 I_ SparkLimit[SPARK_POOLS];
188 initThreadPools(STG_NO_ARGS)
190 I_ i, size = RTSflags.ConcFlags.maxLocalSparks;
192 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
194 if ((PendingSparksBase[ADVISORY_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
197 if ((PendingSparksBase[REQUIRED_POOL] = (TYPE_OF_SPARK) malloc(size * SIZE_OF_SPARK)) == NULL)
199 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
200 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
211 ScheduleThreads(topClosure)
219 #if defined(PROFILING) || defined(PAR)
220 if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
221 if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
223 if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
224 if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
227 fprintf(stderr, "Can't initialize virtual timer.\n");
231 context_switch = 0 /* 1 HWL */;
233 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
234 if ( RTSflags.GranFlags.Light && RTSflags.GranFlags.proc!=1 ) {
235 fprintf(stderr,"Qagh: In GrAnSim Light setup .proc must be 1\n");
239 if ( RTSflags.GranFlags.debug & 0x40 ) {
240 fprintf(stderr,"Doing init in ScheduleThreads now ...\n");
244 #if defined(GRAN) /* KH */
245 /* Init thread and spark queues on all processors */
246 for (i=0; i<RTSflags.GranFlags.proc; i++)
248 /* Init of RunnableThreads{Hd,Tl} etc now in main */
249 OutstandingFetches[i] = OutstandingFishes[i] = 0;
250 procStatus[i] = Idle;
251 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
252 BlockedOnFetch[i] = NULL;
256 CurrentProc = MainProc;
262 * We perform GC so that a signal handler can install a new
263 * TopClosure and start a new main thread.
269 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
271 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
273 /* kludge to save the top closure as a root */
274 CurrentTSO = topClosure;
275 ReallyPerformThreadGC(0, rtsTrue);
276 topClosure = CurrentTSO;
278 if ((tso = NewThread(topClosure, T_MAIN, MAIN_PRI)) == NULL) {
280 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
283 fprintf(stderr, "Not enough heap for main thread\n");
288 RunnableThreadsHd = RunnableThreadsTl = tso;
290 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
291 ThreadQueueHd = ThreadQueueTl = tso;
293 # if defined(GRAN_CHECK)
294 if ( RTSflags.GranFlags.debug & 0x40 ) {
295 fprintf(stderr,"MainTSO has been initialized (0x%x)\n", tso);
301 if (RTSflags.ParFlags.granSimStats) {
302 DumpGranEvent(GR_START, tso);
303 sameThread = rtsTrue;
306 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.labelling)
307 DumpRawGranEvent(CurrentProc,(PROC)0,GR_START,
312 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
313 if (RTSflags.GranFlags.Light)
317 required_thread_count = 1;
318 advisory_thread_count = 0;
320 } /*if IAmMainThread ...*/
322 #if defined(i386_TARGET_ARCH)
323 if (setjmp(finish_sched) < 0) {
328 /* ----------------------------------------------------------------- */
329 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
330 /* ----------------------------------------------------------------- */
338 if ( (ret <0) || ( (setjmp(scheduler_loop) < 0) )) {
339 longjmp(finish_sched,-1);
342 if( (setjmp(scheduler_loop) < 0) ) {
347 #if defined(GRAN) && defined(GRAN_CHECK)
348 if ( RTSflags.GranFlags.debug & 0x80 ) {
349 fprintf(stderr,"MAIN Schedule Loop; ThreadQueueHd is ");
350 G_TSO(ThreadQueueHd,1);
351 /* if (ThreadQueueHd == MainTSO) {
352 fprintf(stderr,"D> Event Queue is now:\n");
359 if (PendingFetches != PrelBase_Z91Z93_closure) {
364 if (ThreadQueueHd == PrelBase_Z91Z93_closure) {
365 fprintf(stderr, "Qu'vatlh! No runnable threads!\n");
368 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
369 QP_Event1("AG", ThreadQueueHd);
372 while (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
373 /* If we've no work */
374 if (WaitingThreadsHd == PrelBase_Z91Z93_closure) {
377 exitc = NoRunnableThreadsHook();
381 /* Block indef. waiting for I/O and timer expire */
387 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
388 if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
389 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
390 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
392 * If we're here (no runnable threads) and we have pending
393 * sparks, we must have a space problem. Get enough space
394 * to turn one of those pending sparks into a
395 * thread... ReallyPerformGC doesn't return until the
396 * space is available, so it may force global GC. ToDo:
397 * Is this unnecessary here? Duplicated in ReSchedule()?
400 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
401 SAVE_Hp -= THREAD_SPACE_REQUIRED;
404 * We really have absolutely no work. Send out a fish
405 * (there may be some out there already), and wait for
406 * something to arrive. We clearly can't run any threads
407 * until a SCHEDULE or RESUME arrives, and so that's what
408 * we're hoping to see. (Of course, we still have to
409 * respond to other types of messages.)
412 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
418 } else if (PacketsWaiting()) { /* Look for incoming messages */
424 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
425 QP_Event1("AG", RunnableThreadsHd);
430 if (RTSflags.ParFlags.granSimStats && !sameThread)
431 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
435 TimeOfNextEvent = get_time_of_next_event();
436 CurrentTSO = ThreadQueueHd;
437 if (RTSflags.GranFlags.Light) {
438 /* Save time of `virt. proc' which was active since last getevent and
439 restore time of `virt. proc' where CurrentTSO is living on. */
440 if(RTSflags.GranFlags.DoFairSchedule)
442 if (RTSflags.GranFlags.granSimStats &&
443 RTSflags.GranFlags.debug & 0x20000)
444 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
446 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
448 CurrentTime[CurrentProc] = TSO_CLOCK(CurrentTSO);
449 if(RTSflags.GranFlags.DoFairSchedule && resched )
452 if (RTSflags.GranFlags.granSimStats &&
453 RTSflags.GranFlags.debug & 0x20000)
454 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
457 if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
458 (TimeOfNextEvent == 0 ||
459 TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
460 new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
461 CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
462 TimeOfNextEvent = get_time_of_next_event();
466 EndOfTimeSlice = CurrentTime[CurrentProc]+RTSflags.GranFlags.time_slice;
468 CurrentTSO = RunnableThreadsHd;
469 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
470 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
472 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
473 RunnableThreadsTl = PrelBase_Z91Z93_closure;
476 /* If we're not running a timer, just leave the flag on */
477 if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
480 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
481 if (CurrentTSO == PrelBase_Z91Z93_closure) {
482 fprintf(stderr,"Qagh: Trying to execute PrelBase_Z91Z93_closure on proc %d (@ %d)\n",
483 CurrentProc,CurrentTime[CurrentProc]);
487 if (RTSflags.GranFlags.debug & 0x04) {
488 if (BlockedOnFetch[CurrentProc]) {
489 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
490 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
495 if ( (RTSflags.GranFlags.debug & 0x10) &&
496 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
497 fprintf(stderr,"Qagh: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
498 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
503 #if 0 && defined(i386_TARGET_ARCH)
504 fprintf(stderr, "ScheduleThreads: About to resume thread:%#x %d\n",
505 CurrentTSO, entersFromC);
507 miniInterpret((StgFunPtr)resumeThread);
511 % Some remarks on GrAnSim -- HWL
513 The ReSchedule fct is the heart of GrAnSim. Based on its parameter it issues
514 a CONTINUETRHEAD to carry on executing the current thread in due course or it watches out for new work (e.g. called from EndThread).
516 Then it picks the next event (get_next_event) and handles it appropriately
517 (see switch construct). Note that a continue in the switch causes the next
518 event to be handled and a break causes a jmp to the scheduler_loop where
519 the TSO at the head of the current processor's runnable queue is executed.
521 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
522 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
526 GrAnSim rules here! Others stay out or you will be crashed.
527 Concurrent and parallel guys: please use the next door (a few pages down;
528 turn left at the !GRAN sign).
533 /* Prototypes of event handling functions. Only needed in ReSchedule */
534 void do_the_globalblock PROTO((eventq event));
535 void do_the_unblock PROTO((eventq event));
536 void do_the_fetchnode PROTO((eventq event));
537 void do_the_fetchreply PROTO((eventq event));
538 void do_the_movethread PROTO((eventq event));
539 void do_the_movespark PROTO((eventq event));
540 void gimme_spark PROTO((rtsBool *found_res, sparkq *prev_res, sparkq *spark_res));
541 void munch_spark PROTO((rtsBool found, sparkq prev, sparkq spark));
544 ReSchedule(what_next)
545 int what_next; /* Run the current thread again? */
547 sparkq spark, nextspark;
553 # if defined(GRAN_CHECK) && defined(GRAN)
554 if ( RTSflags.GranFlags.debug & 0x80 ) {
555 fprintf(stderr,"Entering ReSchedule with mode %u; tso is\n",what_next);
556 G_TSO(ThreadQueueHd,1);
560 # if defined(GRAN_CHECK) && defined(GRAN)
561 if ( (RTSflags.GranFlags.debug & 0x80) || (RTSflags.GranFlags.debug & 0x40 ) )
562 if (what_next<FIND_THREAD || what_next>END_OF_WORLD)
563 fprintf(stderr,"Qagh {ReSchedule}Daq: illegal parameter %u for what_next\n",
567 if (RTSflags.GranFlags.Light) {
568 /* Save current time; GranSim Light only */
569 TSO_CLOCK(CurrentTSO) = CurrentTime[CurrentProc];
572 /* Run the current thread again (if there is one) */
573 if(what_next==SAME_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
575 /* A bit of a hassle if the event queue is empty, but ... */
576 CurrentTSO = ThreadQueueHd;
579 if (RTSflags.GranFlags.Light &&
580 TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
581 TSO_CLOCK(ThreadQueueHd)>TSO_CLOCK(TSO_LINK(ThreadQueueHd))) {
582 if(RTSflags.GranFlags.granSimStats &&
583 RTSflags.GranFlags.debug & 0x20000 )
584 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
586 ThreadQueueHd = TSO_LINK(CurrentTSO);
587 if (ThreadQueueHd==PrelBase_Z91Z93_closure)
588 ThreadQueueTl=PrelBase_Z91Z93_closure;
589 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
590 InsertThread(CurrentTSO);
593 /* This code does round-Robin, if preferred. */
594 if(!RTSflags.GranFlags.Light &&
595 RTSflags.GranFlags.DoFairSchedule &&
596 TSO_LINK(CurrentTSO) != PrelBase_Z91Z93_closure &&
597 CurrentTime[CurrentProc]>=EndOfTimeSlice)
599 ThreadQueueHd = TSO_LINK(CurrentTSO);
600 TSO_LINK(ThreadQueueTl) = CurrentTSO;
601 ThreadQueueTl = CurrentTSO;
602 TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
603 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
604 if ( RTSflags.GranFlags.granSimStats )
605 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
606 CurrentTSO = ThreadQueueHd;
609 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
610 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
612 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
613 /* has been updated before that already. */
614 else if(what_next==NEW_THREAD && ThreadQueueHd != PrelBase_Z91Z93_closure)
616 # if defined(GRAN_CHECK) && defined(GRAN)
617 fprintf(stderr,"Qagh: ReSchedule(NEW_THREAD) shouldn't be used with DoReScheduleOnFetch!!\n");
622 if(RTSflags.GranFlags.granSimStats &&
623 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
624 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
626 CurrentTSO = ThreadQueueHd;
627 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
628 CONTINUETHREAD,CurrentTSO,PrelBase_Z91Z93_closure,NULL);
630 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
633 /* We go in here if the current thread is blocked on fetch => don'd CONT */
634 else if(what_next==CHANGE_THREAD)
636 /* just fall into event handling loop for next event */
639 /* We go in here if we have no runnable threads or what_next==0 */
642 procStatus[CurrentProc] = Idle;
643 /* That's now done in HandleIdlePEs!
644 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
645 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
647 CurrentTSO = PrelBase_Z91Z93_closure;
650 /* ----------------------------------------------------------------- */
651 /* This part is the EVENT HANDLING LOOP */
652 /* ----------------------------------------------------------------- */
655 /* Choose the processor with the next event */
656 event = get_next_event();
657 CurrentProc = EVENT_PROC(event);
658 CurrentTSO = EVENT_TSO(event);
659 if (RTSflags.GranFlags.Light) {
662 /* Restore local clock of the virtual processor attached to CurrentTSO.
663 All costs will be associated to the `virt. proc' on which the tso
665 if (ActiveTSO != NULL) { /* already in system area */
666 TSO_CLOCK(ActiveTSO) = CurrentTime[CurrentProc];
667 if (RTSflags.GranFlags.DoFairSchedule)
669 if (RTSflags.GranFlags.granSimStats &&
670 RTSflags.GranFlags.debug & 0x20000)
671 DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
674 switch (EVENT_TYPE(event))
677 case FINDWORK: /* inaccurate this way */
678 ActiveTSO = ThreadQueueHd;
682 case MOVESPARK: /* has tso of virt proc in tso field of event */
683 ActiveTSO = EVENT_TSO(event);
685 default: fprintf(stderr,"Illegal event type %s (%d) in GrAnSim Light setup\n",
686 event_names[EVENT_TYPE(event)],EVENT_TYPE(event));
689 CurrentTime[CurrentProc] = TSO_CLOCK(ActiveTSO);
690 if(RTSflags.GranFlags.DoFairSchedule)
692 if (RTSflags.GranFlags.granSimStats &&
693 RTSflags.GranFlags.debug & 0x20000)
694 DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
698 if(EVENT_TIME(event) > CurrentTime[CurrentProc] &&
699 EVENT_TYPE(event)!=CONTINUETHREAD)
700 CurrentTime[CurrentProc] = EVENT_TIME(event);
702 # if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
703 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
704 fprintf(stderr,"Qagh {ReSchedule}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
708 /* MAKE_BUSY(CurrentProc); don't think that's right in all cases now */
711 # if defined(GRAN_CHECK) && defined(GRAN)
712 if (RTSflags.GranFlags.debug & 0x80)
713 fprintf(stderr,"After get_next_event, before HandleIdlePEs\n");
716 /* Deal with the idlers */
717 if ( !RTSflags.GranFlags.Light )
720 # if defined(GRAN_CHECK) && defined(GRAN)
721 if ( RTSflags.GranFlags.event_trace_all ||
722 ( RTSflags.GranFlags.event_trace && EVENT_TYPE(event) != CONTINUETHREAD) ||
723 (RTSflags.GranFlags.debug & 0x80) )
727 switch (EVENT_TYPE(event))
729 /* Should just be continuing execution */
731 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
732 if ( (RTSflags.GranFlags.debug & 0x100) &&
733 (EVENT_TSO(event)!=RunnableThreadsHd[EVENT_PROC(event)]) ) {
734 fprintf(stderr,"Warning: Wrong TSO in CONTINUETHREAD: %#lx (%x) (PE: %d Hd: 0x%lx)\n",
735 EVENT_TSO(event), TSO_ID(EVENT_TSO(event)),
737 RunnableThreadsHd[EVENT_PROC(event)]);
739 if ( (RTSflags.GranFlags.debug & 0x04) &&
740 BlockedOnFetch[CurrentProc]) {
741 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
742 CurrentProc,CurrentTime[CurrentProc]);
747 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
749 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
750 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
751 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
754 break; /* fall into scheduler loop */
757 do_the_fetchnode(event);
758 continue; /* handle next event in event queue */
761 do_the_globalblock(event);
762 continue; /* handle next event in event queue */
765 do_the_fetchreply(event);
766 continue; /* handle next event in event queue */
768 case UNBLOCKTHREAD: /* Move from the blocked queue to the tail of */
769 do_the_unblock(event);
770 continue; /* handle next event in event queue */
772 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
773 /* the runnable queue ( i.e. Qu' SImqa'lu') */
774 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
775 TSO_BLOCKEDAT(EVENT_TSO(event));
776 StartThread(event,GR_RESUME);
780 StartThread(event,GR_START);
784 do_the_movethread(event);
785 continue; /* handle next event in event queue */
788 do_the_movespark(event);
789 continue; /* handle next event in event queue */
792 { /* Make sure that we have enough heap for creating a new
793 thread. This is a conservative estimate of the required heap.
794 This eliminates special checks for GC around NewThread within
797 I_ req_heap = TSO_HS + TSO_CTS_SIZE + STKO_HS +
798 RTSflags.ConcFlags.stkChunkSize;
800 if (SAVE_Hp + req_heap >= SAVE_HpLim ) {
801 ReallyPerformThreadGC(req_heap, rtsFalse);
803 if (IS_SPARKING(CurrentProc))
804 MAKE_IDLE(CurrentProc);
809 if( RTSflags.GranFlags.DoAlwaysCreateThreads ||
810 (ThreadQueueHd == PrelBase_Z91Z93_closure &&
811 (RTSflags.GranFlags.FetchStrategy >= 2 ||
812 OutstandingFetches[CurrentProc] == 0)) )
818 ASSERT(procStatus[CurrentProc]==Sparking ||
819 RTSflags.GranFlags.DoAlwaysCreateThreads);
821 /* SImmoHwI' yInej! Search spark queue! */
822 gimme_spark (&found, &prev, &spark);
824 /* DaH chu' Qu' yIchen! Now create new work! */
825 munch_spark (found, prev, spark);
827 /* ToDo: check ; not valid if GC occurs in munch_spark
828 ASSERT(procStatus[CurrentProc]==Starting ||
829 procStatus[CurrentProc]==Idle ||
830 RTSflags.GranFlags.DoAlwaysCreateThreads); */
832 continue; /* to the next event */
835 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
838 #if defined(i386_TARGET_ARCH)
841 /* more than one thread has entered the Haskell world
842 via C (and stable pointers) - don't squeeze the C stack. */
845 /* Squeeze C stack */
846 longjmp(scheduler_loop, 1);
849 longjmp(scheduler_loop, 1);
854 /* ----------------------------------------------------------------- */
855 /* The main event handling functions; called from ReSchedule (switch) */
856 /* ----------------------------------------------------------------- */
859 do_the_globalblock(eventq event)
861 PROC proc = EVENT_PROC(event); /* proc that requested node */
862 P_ tso = EVENT_TSO(event), /* tso that requested node */
863 node = EVENT_NODE(event); /* requested, remote node */
865 # if defined(GRAN_CHECK) && defined(GRAN)
866 if ( RTSflags.GranFlags.Light ) {
867 fprintf(stderr,"Qagh: There should be no GLOBALBLOCKs in GrAnSim Light setup\n");
871 if (!RTSflags.GranFlags.DoGUMMFetching) {
872 fprintf(stderr,"Qagh: GLOBALBLOCK events only valid with GUMM fetching\n");
876 if ( (RTSflags.GranFlags.debug & 0x100) &&
877 IS_LOCAL_TO(PROCS(node),proc) ) {
878 fprintf(stderr,"Qagh: GLOBALBLOCK: Blocking on LOCAL node 0x %x (PE %d).\n",
882 /* CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime; */
883 if ( blockFetch(tso,proc,node) != 0 )
884 return; /* node has become local by now */
886 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* head of queue is next thread */
887 P_ tso = RunnableThreadsHd[proc]; /* awaken next thread */
888 if(tso != PrelBase_Z91Z93_closure) {
889 new_event(proc,proc,CurrentTime[proc],
890 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
891 CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
892 if(RTSflags.GranFlags.granSimStats)
893 DumpRawGranEvent(proc,CurrentProc,GR_SCHEDULE,tso,
894 PrelBase_Z91Z93_closure,0);
895 MAKE_BUSY(proc); /* might have been fetching */
897 MAKE_IDLE(proc); /* no work on proc now */
899 } else { /* RTSflags.GranFlags.DoReScheduleOnFetch i.e. block-on-fetch */
900 /* other thread is already running */
901 /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
902 new_event(proc,proc,CurrentTime[proc],
903 CONTINUETHREAD,EVENT_TSO(event),
904 (RTSflags.GranFlags.DoGUMMFetching ? closure :
905 EVENT_NODE(event)),NULL);
911 do_the_unblock(eventq event)
913 PROC proc = EVENT_PROC(event), /* proc that requested node */
914 creator = EVENT_CREATOR(event); /* proc that requested node */
915 P_ tso = EVENT_TSO(event), /* tso that requested node */
916 node = EVENT_NODE(event); /* requested, remote node */
918 # if defined(GRAN) && defined(GRAN_CHECK)
919 if ( RTSflags.GranFlags.Light ) {
920 fprintf(stderr,"Qagh: There should be no UNBLOCKs in GrAnSim Light setup\n");
925 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
926 /* We count block-on-fetch as normal block time */
927 TSO_BLOCKTIME(tso) += CurrentTime[proc] - TSO_BLOCKEDAT(tso);
928 /* No costs for contextswitch or thread queueing in this case */
929 if(RTSflags.GranFlags.granSimStats)
930 DumpRawGranEvent(proc,CurrentProc,GR_RESUME,tso, PrelBase_Z91Z93_closure,0);
931 new_event(proc,proc,CurrentTime[proc],CONTINUETHREAD,tso,node,NULL);
933 /* Reschedule on fetch causes additional costs here: */
934 /* Bring the TSO from the blocked queue into the threadq */
935 new_event(proc,proc,CurrentTime[proc]+RTSflags.GranFlags.gran_threadqueuetime,
936 RESUMETHREAD,tso,node,NULL);
941 do_the_fetchnode(eventq event)
945 # if defined(GRAN_CHECK) && defined(GRAN)
946 if ( RTSflags.GranFlags.Light ) {
947 fprintf(stderr,"Qagh: There should be no FETCHNODEs in GrAnSim Light setup\n");
951 if (RTSflags.GranFlags.SimplifiedFetch) {
952 fprintf(stderr,"Qagh: FETCHNODE events not valid with simplified fetch\n");
956 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
958 rc = HandleFetchRequest(EVENT_NODE(event),
959 EVENT_CREATOR(event),
961 if (rc == 4) { /* trigger GC */
962 # if defined(GRAN_CHECK) && defined(GRAN)
963 if (RTSflags.GcFlags.giveStats)
964 fprintf(RTSflags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %#lx, tso %#lx (%x))\n",
965 EVENT_NODE(event), EVENT_TSO(event), TSO_ID(EVENT_TSO(event)));
967 prepend_event(event);
968 ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
969 # if defined(GRAN_CHECK) && defined(GRAN)
970 if (RTSflags.GcFlags.giveStats) {
971 fprintf(RTSflags.GcFlags.statsFile,"***** SAVE_Hp=%#lx, SAVE_HpLim=%#lx, PACK_HEAP_REQUIRED=%#lx\n",
972 SAVE_Hp, SAVE_HpLim, PACK_HEAP_REQUIRED);
973 fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
974 tot_packets,tot_packet_size);
977 event = grab_event();
978 SAVE_Hp -= PACK_HEAP_REQUIRED;
980 /* GC knows that events are special and follows the pointer i.e. */
981 /* events are valid even if they moved. An EXIT is triggered */
982 /* if there is not enough heap after GC. */
988 do_the_fetchreply(eventq event)
992 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
993 if ( RTSflags.GranFlags.Light ) {
994 fprintf(stderr,"Qagh: There should be no FETCHREPLYs in GrAnSim Light setup\n");
998 if (RTSflags.GranFlags.SimplifiedFetch) {
999 fprintf(stderr,"Qagh: FETCHREPLY events not valid with simplified fetch\n");
1003 if (RTSflags.GranFlags.debug & 0x10) {
1004 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
1005 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
1007 fprintf(stderr,"Qagh: FETCHREPLY: TSO %#x (%x) has fetch mask not set @ %d\n",
1008 CurrentTSO,TSO_ID(CurrentTSO),CurrentTime[CurrentProc]);
1013 if (RTSflags.GranFlags.debug & 0x04) {
1014 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
1015 fprintf(stderr,"Qagh: FETCHREPLY: Proc %d (with TSO %#x (%x)) not blocked-on-fetch by TSO %#lx (%x)\n",
1016 CurrentProc,CurrentTSO,TSO_ID(CurrentTSO),
1017 BlockedOnFetch[CurrentProc], TSO_ID(BlockedOnFetch[CurrentProc]));
1020 BlockedOnFetch[CurrentProc] = 0; /*- rtsFalse; -*/
1025 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1027 if (RTSflags.GranFlags.DoGUMMFetching) { /* bulk (packet) fetching */
1028 P_ buffer = EVENT_NODE(event);
1029 PROC p = EVENT_PROC(event);
1030 I_ size = buffer[PACK_SIZE_LOCN];
1032 tso = EVENT_TSO(event);
1034 /* NB: Fetch misses can't occur with GUMM fetching, as */
1035 /* updatable closure are turned into RBHs and therefore locked */
1036 /* for other processors that try to grab them. */
1038 closure = UnpackGraph(buffer);
1039 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_munpacktime;
1041 /* Copy or move node to CurrentProc */
1042 if (FetchNode(EVENT_NODE(event),
1043 EVENT_CREATOR(event),
1044 EVENT_PROC(event)) ) {
1045 /* Fetch has failed i.e. node has been grabbed by another PE */
1046 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
1047 PROC p = where_is(node);
1050 # if defined(GRAN_CHECK) && defined(GRAN)
1051 if (RTSflags.GranFlags.PrintFetchMisses) {
1052 fprintf(stderr,"Fetch miss @ %lu: node %#lx is at proc %u (rather than proc %u)\n",
1053 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
1056 # endif /* GRAN_CHECK */
1058 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
1060 /* Count fetch again !? */
1061 ++TSO_FETCHCOUNT(tso);
1062 TSO_FETCHTIME(tso) += RTSflags.GranFlags.gran_fetchtime;
1064 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
1065 RTSflags.GranFlags.gran_latency;
1067 /* Chase the grabbed node */
1068 new_event(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
1070 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1071 if (RTSflags.GranFlags.debug & 0x04)
1072 BlockedOnFetch[CurrentProc] = tso; /*-rtsTrue;-*/
1074 if (RTSflags.GranFlags.debug & 0x10)
1075 TSO_TYPE(tso) |= FETCH_MASK_TSO;
1078 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
1080 return; /* NB: no REPLy has been processed; tso still sleeping */
1083 /* -- Qapla'! Fetch has been successful; node is here, now */
1084 ++TSO_FETCHCOUNT(EVENT_TSO(event));
1085 TSO_FETCHTIME(EVENT_TSO(event)) += RTSflags.GranFlags.gran_fetchtime;
1087 if (RTSflags.GranFlags.granSimStats)
1088 DumpRawGranEvent(CurrentProc,EVENT_CREATOR(event),GR_REPLY,
1090 (RTSflags.GranFlags.DoGUMMFetching ?
1095 --OutstandingFetches[CurrentProc];
1096 ASSERT(OutstandingFetches[CurrentProc] >= 0);
1097 # if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1098 if (OutstandingFetches[CurrentProc] < 0) {
1099 fprintf(stderr,"Qagh: OutstandingFetches of proc %u has become negative\n",CurrentProc);
1103 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1104 UNBLOCKTHREAD,EVENT_TSO(event),
1105 (RTSflags.GranFlags.DoGUMMFetching ?
1112 do_the_movethread(eventq event) {
1113 P_ tso = EVENT_TSO(event);
1114 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1115 if ( RTSflags.GranFlags.Light && CurrentProc!=1 ) {
1116 fprintf(stderr,"Qagh: There should be no MOVETHREADs in GrAnSim Light setup\n");
1119 if (!RTSflags.GranFlags.DoThreadMigration) {
1120 fprintf(stderr,"Qagh: MOVETHREAD events should never occur without -bM\n");
1123 if (PROCS(tso)!=0) {
1124 fprintf(stderr,"Qagh: Moved thread has a bitmask of 0%o (proc %d); should be 0\n",
1125 PROCS(tso), where_is(tso));
1129 --OutstandingFishes[CurrentProc];
1130 ASSERT(OutstandingFishes[CurrentProc]>=0);
1131 SET_PROCS(tso,ThisPE);
1132 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1133 StartThread(event,GR_STOLEN);
1137 do_the_movespark(eventq event){
1138 sparkq spark = EVENT_SPARK(event);
1140 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_munpacktime;
1142 if (RTSflags.GranFlags.granSimStats_Sparks)
1143 DumpRawGranEvent(CurrentProc,(PROC)0,SP_ACQUIRED,PrelBase_Z91Z93_closure,
1145 spark_queue_len(CurrentProc,ADVISORY_POOL));
1147 #if defined(GRAN) && defined(GRAN_CHECK)
1148 if (!SHOULD_SPARK(SPARK_NODE(spark)))
1150 /* Not adding the spark to the spark queue would be the right */
1151 /* thing here, but it also would be cheating, as this info can't be */
1152 /* available in a real system. -- HWL */
1154 --OutstandingFishes[CurrentProc];
1155 ASSERT(OutstandingFishes[CurrentProc]>=0);
1157 add_to_spark_queue(spark);
1159 if (procStatus[CurrentProc]==Fishing)
1160 procStatus[CurrentProc] = Idle;
1162 /* add_to_spark_queue will increase the time of the current proc. */
1163 /* Just falling into FINDWORK is wrong as we might have other */
1164 /* events that are happening before that. Therefore, just create */
1165 /* a FINDWORK event and go back to main event handling loop. */
1167 /* Should we treat stolen sparks specially? Currently, we don't. */
1169 /* Now FINDWORK is created in HandleIdlePEs */
1170 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1171 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1172 sparking[CurrentProc]=rtsTrue;
1176 /* Search the spark queue of the CurrentProc for a spark that's worth
1177 turning into a thread */
1179 gimme_spark (rtsBool *found_res, sparkq *prev_res, sparkq *spark_res)
1183 sparkq spark_of_non_local_node = NULL, spark_of_non_local_node_prev = NULL,
1184 low_priority_spark = NULL, low_priority_spark_prev = NULL,
1185 spark = NULL, prev = NULL, tmp = NULL;
1187 /* Choose a spark from the local spark queue */
1188 spark = SparkQueueHd;
1191 while (spark != NULL && !found)
1193 node = SPARK_NODE(spark);
1194 if (!SHOULD_SPARK(node))
1196 if(RTSflags.GranFlags.granSimStats_Sparks)
1197 DumpRawGranEvent(CurrentProc,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
1199 spark_queue_len(CurrentProc,ADVISORY_POOL));
1201 ASSERT(spark != NULL);
1204 spark = delete_from_spark_queue (prev,spark);
1206 /* -- node should eventually be sparked */
1207 else if (RTSflags.GranFlags.PreferSparksOfLocalNodes &&
1208 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1210 /* Remember first low priority spark */
1211 if (spark_of_non_local_node==NULL) {
1212 spark_of_non_local_node_prev = prev;
1213 spark_of_non_local_node = spark;
1216 if (SPARK_NEXT(spark)==NULL) {
1217 ASSERT(spark==SparkQueueTl); /* just for testing */
1218 prev = spark_of_non_local_node_prev;
1219 spark = spark_of_non_local_node;
1224 # if defined(GRAN) && defined(GRAN_CHECK)
1225 /* Should never happen; just for testing */
1226 if (spark==SparkQueueTl) {
1227 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1232 spark = SPARK_NEXT(spark);
1235 else if ( RTSflags.GranFlags.DoPrioritySparking ||
1236 (SPARK_GRAN_INFO(spark)>=RTSflags.GranFlags.SparkPriority2) )
1240 else /* only used if SparkPriority2 is defined */
1242 /* Remember first low priority spark */
1243 if (low_priority_spark==NULL) {
1244 low_priority_spark_prev = prev;
1245 low_priority_spark = spark;
1248 if (SPARK_NEXT(spark)==NULL) {
1249 ASSERT(spark==SparkQueueTl); /* just for testing */
1250 prev = low_priority_spark_prev;
1251 spark = low_priority_spark;
1252 found = rtsTrue; /* take low pri spark => rc is 2 */
1256 /* Should never happen; just for testing */
1257 if (spark==SparkQueueTl) {
1258 fprintf(stderr,"ReSchedule: Last spark != SparkQueueTl\n");
1263 spark = SPARK_NEXT(spark);
1264 # if defined(GRAN_CHECK) && defined(GRAN)
1265 if ( RTSflags.GranFlags.debug & 0x40 ) {
1266 fprintf(stderr,"Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
1267 SPARK_GRAN_INFO(spark), RTSflags.GranFlags.SparkPriority,
1268 SPARK_NODE(spark), SPARK_NAME(spark));
1270 # endif /* GRAN_CHECK */
1272 } /* while (spark!=NULL && !found) */
1280 munch_spark (rtsBool found, sparkq prev, sparkq spark)
1284 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1287 # if defined(GRAN_CHECK) && defined(GRAN)
1288 if ( SPARK_GRAN_INFO(spark) < RTSflags.GranFlags.SparkPriority2 ) {
1289 tot_low_pri_sparks++;
1290 if ( RTSflags.GranFlags.debug & 0x40 ) {
1291 fprintf(stderr,"GRAN_TNG: No high priority spark available; low priority (%u) spark chosen: node=0x%lx; name=%u\n",
1292 SPARK_GRAN_INFO(spark),
1293 SPARK_NODE(spark), SPARK_NAME(spark));
1297 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadcreatetime;
1299 node = SPARK_NODE(spark);
1300 if((tso = NewThread(node, T_REQUIRED, SPARK_GRAN_INFO(spark)))==NULL)
1302 /* Some kind of backoff needed here in case there's too little heap */
1303 # if defined(GRAN_CHECK) && defined(GRAN)
1304 if (RTSflags.GcFlags.giveStats)
1305 fprintf(RTSflags.GcFlags.statsFile,"***** vIS Qu' chen veQ boSwI'; spark=%#x, node=%#x; name=%u\n",
1306 /* (found==2 ? "no hi pri spark" : "hi pri spark"), */
1307 spark, node,SPARK_NAME(spark));
1309 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1310 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1311 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,rtsFalse);
1312 SAVE_Hp -= TSO_HS+TSO_CTS_SIZE;
1314 return; /* was: continue; */ /* to the next event, eventually */
1317 if(RTSflags.GranFlags.granSimStats_Sparks)
1318 DumpRawGranEvent(CurrentProc,(PROC)0,SP_USED,PrelBase_Z91Z93_closure,
1320 spark_queue_len(CurrentProc,ADVISORY_POOL));
1322 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1323 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1324 TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1326 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1327 STARTTHREAD,tso,node,NULL);
1329 procStatus[CurrentProc] = Starting;
1331 ASSERT(spark != NULL);
1333 spark = delete_from_spark_queue (prev, spark);
1336 /* Make the PE idle if nothing sparked and we have no threads. */
1338 if(ThreadQueueHd == PrelBase_Z91Z93_closure)
1340 MAKE_IDLE(CurrentProc);
1341 # if defined(GRAN_CHECK) && defined(GRAN)
1342 if ( (RTSflags.GranFlags.debug & 0x80) )
1343 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1344 # endif /* GRAN_CHECK */
1348 /* ut'lu'Qo' ; Don't think that's necessary any more -- HWL
1349 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1350 CONTINUETHREAD,ThreadQueueHd,PrelBase_Z91Z93_closure,NULL);
1358 Here follows the non-GRAN @ReSchedule@.
1363 /* If you are concurrent and maybe even parallel please use this door. */
1367 int again; /* Run the current thread again? */
1375 * In the parallel world, we do unfair scheduling for the moment.
1376 * Ultimately, this should all be merged with the more
1377 * sophisticated GrAnSim scheduling options. (Of course, some
1378 * provision should be made for *required* threads to make sure
1379 * that they don't starve, but for now we assume that no one is
1380 * running concurrent Haskell on a multi-processor platform.)
1386 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
1387 RunnableThreadsTl = CurrentTSO;
1388 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1389 RunnableThreadsHd = CurrentTSO;
1395 * In the sequential world, we assume that the whole point of running
1396 * the threaded build is for concurrent Haskell, so we provide round-robin
1401 if(RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1402 RunnableThreadsHd = CurrentTSO;
1404 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1405 if (DO_QP_PROF > 1) {
1406 QP_Event1("GA", CurrentTSO);
1409 RunnableThreadsTl = CurrentTSO;
1415 * Debugging code, which is useful enough (and cheap enough) to compile
1416 * in all the time. This makes sure that we don't access saved registers,
1417 * etc. in threads which are supposed to be sleeping.
1419 CurrentTSO = PrelBase_Z91Z93_closure;
1420 CurrentRegTable = NULL;
1423 /* First the required sparks */
1425 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1426 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1428 if (SHOULD_SPARK(spark)) {
1429 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1431 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1432 RunnableThreadsHd = tso;
1434 if (RTSflags.ParFlags.granSimStats) {
1435 DumpGranEvent(GR_START, tso);
1436 sameThread = rtsTrue;
1440 TSO_LINK(RunnableThreadsTl) = tso;
1442 if (RTSflags.ParFlags.granSimStats)
1443 DumpGranEvent(GR_STARTQ, tso);
1446 RunnableThreadsTl = tso;
1449 QP_Event0(threadId++, spark);
1451 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1452 if(RTSflags.GranFlags.granSimStats_Sparks)
1453 DumpGranEvent(SP_PRUNED,threadId++);
1454 ^^^^^^^^ should be a TSO
1458 PendingSparksHd[REQUIRED_POOL] = sparkp;
1460 /* Now, almost the same thing for advisory sparks */
1462 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1463 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1465 if (SHOULD_SPARK(spark)) {
1468 /* In the parallel world, don't create advisory threads if we are
1469 * about to rerun the same thread, or already have runnable threads,
1470 * or the main thread has terminated */
1471 (RunnableThreadsHd != PrelBase_Z91Z93_closure ||
1472 (required_thread_count == 0 && IAmMainThread)) ||
1474 advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1475 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1477 advisory_thread_count++;
1478 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
1479 RunnableThreadsHd = tso;
1481 if (RTSflags.ParFlags.granSimStats) {
1482 DumpGranEvent(GR_START, tso);
1483 sameThread = rtsTrue;
1487 TSO_LINK(RunnableThreadsTl) = tso;
1489 if (RTSflags.ParFlags.granSimStats)
1490 DumpGranEvent(GR_STARTQ, tso);
1493 RunnableThreadsTl = tso;
1496 QP_Event0(threadId++, spark);
1498 /* ToDo: Fix log entries for pruned sparks in GUM -- HWL */
1499 if(RTSflags.GranFlags.granSimStats_Sparks)
1500 DumpGranEvent(SP_PRUNED,threadId++);
1501 ^^^^^^^^ should be a TSO
1505 PendingSparksHd[ADVISORY_POOL] = sparkp;
1508 # if defined(i386_TARGET_ARCH)
1509 if (entersFromC) { /* more than one thread has entered the Haskell world
1510 via C (and stable pointers) */
1511 /* Don't squeeze C stack */
1512 if (required_thread_count <= 0) {
1513 longjmp(scheduler_loop, -1);
1515 SchedLoop(required_thread_count <= 0 ? -1 : 1);
1516 longjmp(scheduler_loop, -1);
1519 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1522 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1525 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1533 %****************************************************************************
1535 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1536 Threads (GrAnSim only)}
1538 %****************************************************************************
1540 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1546 /* ngoqvam che' {GrAnSim}! */
1548 # if defined(GRAN_CHECK)
1549 /* This routine is only used for keeping a statistics of thread queue
1550 lengths to evaluate the impact of priority scheduling. -- HWL
1551 {spark_queue_len}vo' jInIHta'
1554 thread_queue_len(PROC proc)
1559 for (len = 0, prev = PrelBase_Z91Z93_closure, next = RunnableThreadsHd[proc];
1560 next != PrelBase_Z91Z93_closure;
1561 len++, prev = next, next = TSO_LINK(prev))
1566 # endif /* GRAN_CHECK */
1569 A large portion of @StartThread@ deals with maintaining a sorted thread
1570 queue, which is needed for the Priority Sparking option. Without that
1571 complication the code boils down to FIFO handling.
1574 StartThread(event,event_type)
1576 enum gran_event_types event_type;
1578 P_ tso = EVENT_TSO(event),
1579 node = EVENT_NODE(event);
1580 PROC proc = EVENT_PROC(event),
1581 creator = EVENT_CREATOR(event);
1584 rtsBool found = rtsFalse;
1586 ASSERT(CurrentProc==proc);
1588 # if defined(GRAN_CHECK)
1589 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1590 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1594 /* A wee bit of statistics gathering */
1596 tot_tq_len += thread_queue_len(CurrentProc);
1599 ASSERT(TSO_LINK(CurrentTSO)==PrelBase_Z91Z93_closure);
1601 /* Idle proc; same for pri spark and basic version */
1602 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1604 CurrentTSO = ThreadQueueHd = ThreadQueueTl = tso;
1606 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1607 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1608 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1610 if(RTSflags.GranFlags.granSimStats &&
1611 !( (event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1612 DumpRawGranEvent(CurrentProc,creator,event_type,
1614 TSO_SPARKNAME(tso));
1615 /* ^^^ SN (spark name) as optional info */
1616 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1617 /* ^^^ spark length as optional info */
1619 ASSERT(IS_IDLE(CurrentProc) || event_type==GR_RESUME ||
1620 (procStatus[CurrentProc]==Fishing && event_type==GR_STOLEN) ||
1621 procStatus[CurrentProc]==Starting);
1622 MAKE_BUSY(CurrentProc);
1626 /* In GrAnSim-Light we always have an idle `virtual' proc.
1627 The semantics of the one-and-only thread queue is different here:
1628 all threads in the queue are running (each on its own virtual processor);
1629 the queue is only needed internally in the simulator to interleave the
1630 reductions of the different processors.
1631 The one-and-only thread queue is sorted by the local clocks of the TSOs.
1633 if(RTSflags.GranFlags.Light)
1635 ASSERT(ThreadQueueHd!=PrelBase_Z91Z93_closure);
1636 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
1638 /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
1639 if(RTSflags.GranFlags.granSimStats &&
1640 (RTSflags.GranFlags.debug & 0x20000) &&
1641 TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1642 DumpRawGranEvent(CurrentProc,CurrentProc,GR_DESCHEDULE,
1643 ThreadQueueHd,PrelBase_Z91Z93_closure,0);
1647 if ( InsertThread(tso) ) { /* new head of queue */
1648 new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1649 CONTINUETHREAD,tso,PrelBase_Z91Z93_closure,NULL);
1652 if(RTSflags.GranFlags.granSimStats &&
1653 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1654 DumpRawGranEvent(CurrentProc,creator,event_type,
1656 TSO_SPARKNAME(tso));
1657 /* ^^^ SN (spark name) as optional info */
1658 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1659 /* ^^^ spark length as optional info */
1661 /* MAKE_BUSY(CurrentProc); */
1665 /* Only for Pri Sparking */
1666 if (RTSflags.GranFlags.DoPriorityScheduling && TSO_PRI(tso)!=0)
1667 /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
1668 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1669 (next != PrelBase_Z91Z93_closure) &&
1670 !(found = (TSO_PRI(tso) >= TSO_PRI(next)));
1671 prev = next, next = TSO_LINK(next), count++)
1675 ASSERT(!IS_IDLE(CurrentProc));
1677 /* found can only be rtsTrue if pri sparking enabled */
1679 # if defined(GRAN_CHECK)
1680 ++non_end_add_threads;
1682 /* Add tso to ThreadQueue between prev and next */
1683 TSO_LINK(tso) = next;
1684 if ( next == PrelBase_Z91Z93_closure ) {
1685 ThreadQueueTl = tso;
1687 /* no back link for TSO chain */
1690 if ( prev == PrelBase_Z91Z93_closure ) {
1691 /* Never add TSO as first elem of thread queue; the first */
1692 /* element should be the one that is currently running -- HWL */
1693 # if defined(GRAN_CHECK)
1694 fprintf(stderr,"Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %#lx (PRI=%d) as first elem of threadQ (%#lx) on proc %u (@ %u)\n",
1695 tso, TSO_PRI(tso), ThreadQueueHd, CurrentProc,
1696 CurrentTime[CurrentProc]);
1699 TSO_LINK(prev) = tso;
1701 } else { /* !found */ /* or not pri sparking! */
1702 /* Add TSO to the end of the thread queue on that processor */
1703 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1704 ThreadQueueTl = EVENT_TSO(event);
1706 CurrentTime[CurrentProc] += count *
1707 RTSflags.GranFlags.gran_pri_sched_overhead +
1708 RTSflags.GranFlags.gran_threadqueuetime;
1710 if(RTSflags.GranFlags.DoThreadMigration)
1713 if(RTSflags.GranFlags.granSimStats &&
1714 !(( event_type == GR_START || event_type == GR_STARTQ) && RTSflags.GranFlags.labelling) )
1715 DumpRawGranEvent(CurrentProc,creator,event_type+1,
1717 TSO_SPARKNAME(tso));
1718 /* ^^^ SN (spark name) as optional info */
1719 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
1720 /* ^^^ spark length as optional info */
1722 # if defined(GRAN_CHECK)
1723 /* Check if thread queue is sorted. Only for testing, really! HWL */
1724 if ( RTSflags.GranFlags.DoPriorityScheduling && (RTSflags.GranFlags.debug & 0x400) ) {
1725 rtsBool sorted = rtsTrue;
1728 if (ThreadQueueHd==PrelBase_Z91Z93_closure || TSO_LINK(ThreadQueueHd)==PrelBase_Z91Z93_closure) {
1729 /* just 1 elem => ok */
1731 /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
1732 for (prev = TSO_LINK(ThreadQueueHd), next = TSO_LINK(prev);
1733 (next != PrelBase_Z91Z93_closure) ;
1734 prev = next, next = TSO_LINK(prev)) {
1736 (TSO_PRI(prev) >= TSO_PRI(next));
1740 fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
1742 G_THREADQ(ThreadQueueHd,0x1);
1747 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadqueuetime;
1751 @InsertThread@, which is only used for GranSim Light, is similar to
1752 @StartThread@ in that it adds a TSO to a thread queue. However, it assumes
1753 that the thread queue is sorted by local clocks and it inserts the TSO at the
1754 right place in the queue. Don't create any event, just insert.
1763 rtsBool found = rtsFalse;
1765 # if defined(GRAN_CHECK)
1766 if ( !RTSflags.GranFlags.Light ) {
1767 fprintf(stderr,"Qagh {InsertThread}Daq: InsertThread should only be used in a GrAnSim Light setup\n");
1771 if ( RTSflags.GranFlags.Light && CurrentProc!=0 ) {
1772 fprintf(stderr,"Qagh {StartThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
1777 /* Idle proc; same for pri spark and basic version */
1778 if(ThreadQueueHd==PrelBase_Z91Z93_closure)
1780 ThreadQueueHd = ThreadQueueTl = tso;
1781 /* MAKE_BUSY(CurrentProc); */
1785 for (prev = ThreadQueueHd, next = TSO_LINK(ThreadQueueHd), count=0;
1786 (next != PrelBase_Z91Z93_closure) &&
1787 !(found = (TSO_CLOCK(tso) < TSO_CLOCK(next)));
1788 prev = next, next = TSO_LINK(next), count++)
1791 /* found can only be rtsTrue if pri sparking enabled */
1793 /* Add tso to ThreadQueue between prev and next */
1794 TSO_LINK(tso) = next;
1795 if ( next == PrelBase_Z91Z93_closure ) {
1796 ThreadQueueTl = tso;
1798 /* no back link for TSO chain */
1801 if ( prev == PrelBase_Z91Z93_closure ) {
1802 ThreadQueueHd = tso;
1804 TSO_LINK(prev) = tso;
1806 } else { /* !found */ /* or not pri sparking! */
1807 /* Add TSO to the end of the thread queue on that processor */
1808 TSO_LINK(ThreadQueueTl) = tso;
1809 ThreadQueueTl = tso;
1811 return (prev == PrelBase_Z91Z93_closure);
1816 Export work to idle PEs. This function is called from @ReSchedule@ before
1817 dispatching on the current event. @HandleIdlePEs@ iterates over all PEs,
1818 trying to get work for idle PEs. Note, that this is a simplification
1819 compared to GUM's fishing model. We try to compensate for that by making
1820 the cost for stealing work dependent on the number of idle processors and
1821 thereby on the probability with which a randomly sent fish would find work.
1828 # if defined(GRAN) && defined(GRAN_CHECK)
1829 if ( RTSflags.GranFlags.Light ) {
1830 fprintf(stderr,"Qagh {HandleIdlePEs}Daq: Should never be entered in GrAnSim Light setup\n");
1836 for(proc = 0; proc < RTSflags.GranFlags.proc; proc++)
1837 if(IS_IDLE(proc)) /* && IS_SPARKING(proc) && IS_STARTING(proc) */
1838 /* First look for local work! */
1839 if (PendingSparksHd[proc][ADVISORY_POOL]!=NULL)
1841 new_event(proc,proc,CurrentTime[proc],
1842 FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
1843 MAKE_SPARKING(proc);
1845 /* Then try to get remote work! */
1846 else if ((RTSflags.GranFlags.max_fishes==0 ||
1847 OutstandingFishes[proc]<RTSflags.GranFlags.max_fishes) )
1850 if(RTSflags.GranFlags.DoStealThreadsFirst &&
1851 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1853 if (SurplusThreads > 0l) /* Steal a thread */
1860 if(SparksAvail > 0l &&
1861 (RTSflags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1864 if (SurplusThreads > 0l &&
1865 (RTSflags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1871 Steal a spark and schedule moving it to proc. We want to look at PEs in
1872 clock order -- most retarded first. Currently sparks are only stolen from
1873 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1874 be changed to first steal from the former then from the latter.
1876 We model a sort of fishing mechanism by counting the number of sparks and
1877 threads we are currently stealing.
1884 sparkq spark, prev, next;
1885 rtsBool stolen = rtsFalse;
1886 TIME times[MAX_PROC], stealtime;
1887 unsigned ntimes=0, i, j;
1888 int first_later, upb, r;
1890 # if defined(GRAN) && defined(GRAN_CHECK)
1891 if ( RTSflags.GranFlags.Light ) {
1892 fprintf(stderr,"Qagh {StealSpark}Daq: Should never be entered in GrAnSim Light setup\n");
1897 /* times shall contain processors from which we may steal sparks */
1898 for(p=0; p < RTSflags.GranFlags.proc; ++p)
1900 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1901 CurrentTime[p] <= CurrentTime[CurrentProc])
1902 times[ntimes++] = p;
1905 for(i=0; i < ntimes; ++i)
1906 for(j=i+1; j < ntimes; ++j)
1907 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1909 unsigned temp = times[i];
1910 times[i] = times[j];
1914 /* Choose random processor to steal spark from; first look at processors */
1915 /* that are earlier than the current one (i.e. proc) */
1918 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
1922 while (!stolen && (ntimes>0)) {
1923 long unsigned int r, q=0;
1925 upb = (first_later==0) ? ntimes : first_later;
1927 if (RTSflags.GranFlags.RandomSteal) {
1928 r = lrand48(); /* [0, RAND_MAX] */
1932 /* -- ASSERT(r<=RAND_MAX); */
1933 i = (unsigned int) (r % upb); /* [0, upb) */
1934 /* -- ASSERT((i>=0) && (i<=upb)); */
1936 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
1938 # if defined(GRAN_CHECK)
1939 if ( RTSflags.GranFlags.debug & 0x2000 )
1940 fprintf(stderr,"RANDOM_STEAL: [index %u of %u] from %u (@ %lu) to %u (@ %lu) (q=%d) [rand value: %lu]\n",
1941 i, ntimes, p, CurrentTime[p], proc, CurrentTime[proc], q, r);
1944 /* Now go through sparkq and steal the first one that should be sparked*/
1945 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1946 spark != NULL && !stolen;
1949 next = SPARK_NEXT(spark);
1951 if ((IS_IDLE(p) || procStatus[p]==Sparking || procStatus[p]==Fishing) &&
1952 SPARK_NEXT(spark)==NULL)
1954 /* Be social! Don't steal the only spark of an idle processor */
1957 else if(SHOULD_SPARK(SPARK_NODE(spark)))
1959 /* Don't Steal local sparks */
1960 if(!SPARK_GLOBAL(spark))
1966 /* Prepare message for sending spark */
1967 CurrentTime[p] += RTSflags.GranFlags.gran_mpacktime;
1969 if(RTSflags.GranFlags.granSimStats_Sparks)
1970 DumpRawGranEvent(p,(PROC)0,SP_EXPORTED,PrelBase_Z91Z93_closure,
1972 spark_queue_len(p,ADVISORY_POOL));
1974 SPARK_NEXT(spark) = NULL;
1976 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
1982 new_event(proc,p /* CurrentProc */,stealtime,
1983 MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
1985 /* MAKE_BUSY(proc); not yet; busy when TSO in threadq */
1987 ++OutstandingFishes[proc];
1990 ++SPARK_GLOBAL(spark);
1993 CurrentTime[p] += RTSflags.GranFlags.gran_mtidytime;
1995 else /* !(SHOULD_SPARK(SPARK_NODE(spark))) */
1997 if(RTSflags.GranFlags.granSimStats_Sparks)
1998 DumpRawGranEvent(p,(PROC)0,SP_PRUNED,PrelBase_Z91Z93_closure,
2000 spark_queue_len(p,ADVISORY_POOL));
2002 DisposeSpark(spark);
2005 if(spark == PendingSparksHd[p][ADVISORY_POOL])
2006 PendingSparksHd[p][ADVISORY_POOL] = next;
2009 SPARK_NEXT(prev) = next;
2010 } /* for (spark=... iterating over sparkq */
2012 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
2013 PendingSparksTl[p][ADVISORY_POOL] = NULL;
2015 if (!stolen && (ntimes>0)) { /* nothing stealable from proc p :( */
2016 ASSERT(times[i]==p);
2018 /* remove p from the list (at pos i) */
2019 for (j=i; j+1<ntimes; j++)
2020 times[j] = times[j+1];
2023 /* update index to first proc which is later (or equal) than proc */
2026 (CurrentTime[times[first_later-1]]>CurrentTime[proc]);
2031 # if defined(GRAN_CHECK)
2032 if (stolen && (i!=0)) { /* only for statistics */
2034 ntimes_total += ntimes;
2035 fl_total += first_later;
2042 Steal a spark and schedule moving it to proc.
2051 TIME times[MAX_PROC], stealtime;
2052 unsigned ntimes=0, i, j;
2053 int first_later, upb, r;
2055 /* Hunt for a thread */
2057 # if defined(GRAN) && defined(GRAN_CHECK)
2058 if ( RTSflags.GranFlags.Light ) {
2059 fprintf(stderr,"Qagh {StealThread}: Should never be entered in GrAnSim Light setup\n");
2064 /* times shall contain processors from which we may steal threads */
2065 for(p=0; p < RTSflags.GranFlags.proc; ++p)
2066 if(proc != p && RunnableThreadsHd[p] != PrelBase_Z91Z93_closure &&
2067 CurrentTime[p] <= CurrentTime[CurrentProc])
2068 times[ntimes++] = p;
2071 for(i=0; i < ntimes; ++i)
2072 for(j=i+1; j < ntimes; ++j)
2073 if(CurrentTime[times[i]] > CurrentTime[times[j]])
2075 unsigned temp = times[i];
2076 times[i] = times[j];
2080 /* Choose random processor to steal spark from; first look at processors */
2081 /* that are earlier than the current one (i.e. proc) */
2084 (first_later < ntimes) && (CurrentTime[times[first_later]] < CurrentTime[proc]);
2088 while (!found && (ntimes>0)) {
2089 long unsigned int r, q=0;
2091 upb = (first_later==0) ? ntimes : first_later;
2093 if (RTSflags.GranFlags.RandomSteal) {
2094 r = lrand48(); /* [0, RAND_MAX] */
2098 /* -- ASSERT(r<=RAND_MAX); */
2099 if ( RTSflags.GranFlags.debug & 0x2000 )
2100 fprintf(stderr,"rand value: %d " , r);
2101 i = (unsigned int) (r % upb); /* [0, upb] */
2102 /* -- ASSERT((i>=0) && (i<=upb)); */
2104 /* -- ASSERT((p>=0) && (p<MAX_PROC)); */
2106 # if defined(GRAN_CHECK)
2107 if ( RTSflags.GranFlags.debug & 0x2000 )
2108 fprintf(stderr,"RANDOM_STEAL; [index %u] from %u (@ %lu) to %u (@ %lu) (q=%d)\n",
2109 i, p, CurrentTime[p], proc, CurrentTime[proc], q);
2112 /* Steal the first exportable thread in the runnable queue after the */
2115 if(RunnableThreadsHd[p] != PrelBase_Z91Z93_closure)
2117 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
2118 thread != PrelBase_Z91Z93_closure && TSO_LOCKED(thread);
2119 prev = thread, thread = TSO_LINK(thread))
2122 if(thread != PrelBase_Z91Z93_closure) /* Take thread out of runnable queue */
2124 TSO_LINK(prev) = TSO_LINK(thread);
2126 TSO_LINK(thread) = PrelBase_Z91Z93_closure;
2128 if(RunnableThreadsTl[p] == thread)
2129 RunnableThreadsTl[p] = prev;
2131 /* Turn magic constants into params !? -- HWL */
2133 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mpacktime;
2135 stealtime = (CurrentTime[p] > CurrentTime[proc] ?
2139 + 4l * RTSflags.GranFlags.gran_additional_latency
2140 + 5l * RTSflags.GranFlags.gran_munpacktime;
2142 /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
2143 SET_PROCS(thread,Nowhere /* PE_NUMBER(proc) */);
2145 /* Move from one queue to another */
2146 new_event(proc,p,stealtime,MOVETHREAD,thread,PrelBase_Z91Z93_closure,NULL);
2147 /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
2148 ++OutstandingFishes[proc];
2153 if(RTSflags.GranFlags.granSimStats)
2154 DumpRawGranEvent(p,proc,GR_STEALING,thread,
2155 PrelBase_Z91Z93_closure,0);
2157 CurrentTime[p] += 5l * RTSflags.GranFlags.gran_mtidytime;
2165 if (!found && (ntimes>0)) { /* nothing stealable from proc p */
2166 ASSERT(times[i]==p);
2168 /* remove p from the list (at pos i) */
2169 for (j=i; j+1<ntimes; j++)
2170 times[j] = times[j+1];
2174 # if defined(GRAN_CHECK) && defined(GRAN)
2175 if (found && (i!=0)) { /* only for statistics */
2182 SparkStealTime(void)
2184 double fishdelay, sparkdelay, latencydelay;
2185 fishdelay = (double)RTSflags.GranFlags.proc/2;
2186 sparkdelay = fishdelay -
2187 ((fishdelay-1)/(double)(RTSflags.GranFlags.proc-1))*(double)idlers();
2188 latencydelay = sparkdelay*((double)RTSflags.GranFlags.gran_latency);
2190 return((TIME)latencydelay);
2192 #endif /* GRAN ; HWL */
2197 %****************************************************************************
2199 \subsection[thread-execution]{Executing Threads}
2201 %****************************************************************************
2203 First a set of functions for handling sparks and spark-queues that are
2204 attached to the processors. Currently, there are two spark-queues per
2208 \item A queue of @REQUIRED@ sparks i.e. these sparks will be definitely
2209 turned into threads ({\em non-discardable\/}). They are mainly used in concurrent
2210 Haskell. We don't use them in GrAnSim.
2211 \item A queue of @ADVISORY@ sparks i.e. they may be turned into threads if
2212 the RTS thinks that it is a good idea. However, these sparks are {\em
2213 discardable}. They will be discarded if the associated closure is
2214 generally not worth creating a new thread (indicated by a tag in the
2215 closure) or they may be pruned during GC if there are too many sparks
2220 EXTDATA_RO(StkO_info);
2221 EXTDATA_RO(TSO_info);
2222 EXTDATA_RO(realWorldZh_closure);
2224 EXTFUN(EnterNodeCode);
2225 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
2228 /* ngoqvam che' {GrAnSim} */
2230 /* Slow but relatively reliable method uses stgMallocBytes */
2231 /* Eventually change that to heap allocated sparks. */
2233 /* -------------------------------------------------------------------------
2234 This is the main point where handling granularity information comes into
2236 ------------------------------------------------------------------------- */
2238 #define MAX_RAND_PRI 100
2241 Granularity info transformers.
2242 Applied to the GRAN_INFO field of a spark.
2244 static I_ ID(I_ x) { return(x); };
2245 static I_ INV(I_ x) { return(-x); };
2246 static I_ IGNORE(I_ x) { return (0); };
2247 static I_ RAND(I_ x) { return ((lrand48() % MAX_RAND_PRI) + 1); }
2249 /* NB: size_info and par_info are currently unused (what a shame!) -- HWL */
2252 NewSpark(node,name,gran_info,size_info,par_info,local)
2254 I_ name, gran_info, size_info, par_info, local;
2259 pri = RTSflags.GranFlags.RandomPriorities ? RAND(gran_info) :
2260 RTSflags.GranFlags.InversePriorities ? INV(gran_info) :
2261 RTSflags.GranFlags.IgnorePriorities ? IGNORE(gran_info) :
2264 if ( RTSflags.GranFlags.SparkPriority!=0 && pri<RTSflags.GranFlags.SparkPriority ) {
2265 if ( RTSflags.GranFlags.debug & 0x40 ) {
2266 fprintf(stderr,"NewSpark: Ignoring spark of priority %u (SparkPriority=%u); node=0x%lx; name=%u\n",
2267 pri, RTSflags.GranFlags.SparkPriority, node, name);
2269 return ((sparkq)NULL);
2272 newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
2273 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
2274 SPARK_NODE(newspark) = node;
2275 SPARK_NAME(newspark) = (name==1) ? TSO_SPARKNAME(CurrentTSO) : name;
2276 SPARK_GRAN_INFO(newspark) = pri;
2277 SPARK_GLOBAL(newspark) = !local; /* Check that with parAt, parAtAbs !!*/
2281 /* To make casm more convenient use this function to label strategies */
2283 set_sparkname(P_ tso, int name) {
2284 TSO_SPARKNAME(tso) = name ;
2286 if(0 && RTSflags.GranFlags.granSimStats)
2287 DumpRawGranEvent(CurrentProc,99,GR_START,
2288 tso,PrelBase_Z91Z93_closure,
2289 TSO_SPARKNAME(tso));
2290 /* ^^^ SN (spark name) as optional info */
2291 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2292 /* ^^^ spark length as optional info */
2297 reset_sparkname(P_ tso) {
2298 TSO_SPARKNAME(tso) = 0;
2303 With PrioritySparking add_to_spark_queue performs an insert sort to keep
2304 the spark queue sorted. Otherwise the spark is just added to the end of
2309 add_to_spark_queue(spark)
2314 rtsBool found = rtsFalse;
2316 if ( spark == (sparkq)NULL ) {
2320 if (RTSflags.GranFlags.DoPrioritySparking && (SPARK_GRAN_INFO(spark)!=0) ) {
2322 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL], count=0;
2324 !(found = (SPARK_GRAN_INFO(spark) >= SPARK_GRAN_INFO(next)));
2325 prev = next, next = SPARK_NEXT(next), count++)
2328 } else { /* 'utQo' */
2330 found = rtsFalse; /* to add it at the end */
2335 SPARK_NEXT(spark) = next;
2336 if ( next == NULL ) {
2337 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2339 SPARK_PREV(next) = spark;
2341 SPARK_PREV(spark) = prev;
2342 if ( prev == NULL ) {
2343 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2345 SPARK_NEXT(prev) = spark;
2347 } else { /* (RTSflags.GranFlags.DoPrioritySparking && !found) || !DoPrioritySparking */
2348 SPARK_NEXT(spark) = NULL;
2349 SPARK_PREV(spark) = PendingSparksTl[CurrentProc][ADVISORY_POOL];
2350 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL)
2351 PendingSparksHd[CurrentProc][ADVISORY_POOL] = spark;
2353 SPARK_NEXT(PendingSparksTl[CurrentProc][ADVISORY_POOL]) = spark;
2354 PendingSparksTl[CurrentProc][ADVISORY_POOL] = spark;
2358 if (RTSflags.GranFlags.DoPrioritySparking) {
2359 CurrentTime[CurrentProc] += count * RTSflags.GranFlags.gran_pri_spark_overhead;
2362 # if defined(GRAN_CHECK)
2363 if ( RTSflags.GranFlags.debug & 0x1000 ) {
2364 for (prev = NULL, next = PendingSparksHd[CurrentProc][ADVISORY_POOL];
2366 prev = next, next = SPARK_NEXT(next))
2368 if ( (prev!=NULL) && (prev!=PendingSparksTl[CurrentProc][ADVISORY_POOL]) )
2369 fprintf(stderr,"SparkQ inconsistency after adding spark %#lx: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
2370 spark,CurrentProc,ADVISORY_POOL,
2371 PendingSparksTl[CurrentProc][ADVISORY_POOL], prev);
2375 # if defined(GRAN_CHECK)
2376 /* Check if the sparkq is still sorted. Just for testing, really! */
2377 if ( RTSflags.GranFlags.debug & 0x400 ) {
2378 rtsBool sorted = rtsTrue;
2381 if (PendingSparksHd[CurrentProc][ADVISORY_POOL] == NULL ||
2382 SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]) == NULL ) {
2383 /* just 1 elem => ok */
2385 for (prev = PendingSparksHd[CurrentProc][ADVISORY_POOL],
2386 next = SPARK_NEXT(PendingSparksHd[CurrentProc][ADVISORY_POOL]);
2388 prev = next, next = SPARK_NEXT(next)) {
2390 (SPARK_GRAN_INFO(prev) >= SPARK_GRAN_INFO(next));
2394 fprintf(stderr,"Warning: SPARKQ on PE %d is not sorted:\n",
2396 G_SPARKQ(PendingSparksHd[CurrentProc][ADVISORY_POOL],1);
2406 /* A SP_PRUNED line should be dumped when this is called from pruning or */
2407 /* discarding a spark! */
2416 DisposeSparkQ(spark)
2422 DisposeSparkQ(SPARK_NEXT(spark));
2425 if (SparksAvail < 0)
2426 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
2435 % {GrAnSim}vaD (Notes on GrAnSim) -- HWL:
2439 \paragraph{Notes on GrAnSim:}
2440 The following routines are for handling threads. Currently, we use an
2441 unfair scheduling policy in GrAnSim. Thus there are no explicit functions for
2442 scheduling here. If other scheduling policies are added to the system that
2443 code should go in here.
2446 /* Create a new TSO, with the specified closure to enter and thread type */
2450 NewThread(topClosure, type, pri)
2456 NewThread(topClosure, type)
2463 # if defined(GRAN) && defined(GRAN_CHECK)
2464 if ( RTSflags.GranFlags.Light && CurrentProc!=0) {
2465 fprintf(stderr,"Qagh {NewThread}Daq: CurrentProc must be 0 in GrAnSim Light setup\n");
2469 if (AvailableTSO != PrelBase_Z91Z93_closure) {
2472 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
2474 AvailableTSO = TSO_LINK(tso);
2475 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
2478 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
2479 BYTES_TO_STGWORDS(sizeof(StgDouble)));
2481 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
2482 SET_TSO_HDR(tso, TSO_info, CCC);
2485 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
2487 TSO_PRI(tso) = pri; /* Priority of that TSO -- HWL */
2489 #if defined(PROFILING) || defined(PAR)
2490 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
2492 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
2493 TSO_ID(tso) = threadId++;
2494 TSO_TYPE(tso) = type;
2495 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
2496 TSO_ARG1(tso) = /* TSO_ARG2(tso) = */ 0;
2497 TSO_SWITCH(tso) = NULL;
2504 #if defined(GRAN) || defined(PAR)
2505 TSO_SPARKNAME(tso) = 0;
2507 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
2509 TSO_STARTEDAT(tso) = CURRENT_TIME;
2511 TSO_EXPORTED(tso) = 0;
2512 TSO_BASICBLOCKS(tso) = 0;
2513 TSO_ALLOCS(tso) = 0;
2514 TSO_EXECTIME(tso) = 0;
2515 TSO_FETCHTIME(tso) = 0;
2516 TSO_FETCHCOUNT(tso) = 0;
2517 TSO_BLOCKTIME(tso) = 0;
2518 TSO_BLOCKCOUNT(tso) = 0;
2519 TSO_BLOCKEDAT(tso) = 0;
2520 TSO_GLOBALSPARKS(tso) = 0;
2521 TSO_LOCALSPARKS(tso) = 0;
2523 if (RTSflags.GranFlags.Light)
2524 TSO_CLOCK(tso) = TSO_STARTEDAT(tso); /* local clock */
2530 * set pc, Node (R1), liveness
2532 CurrentRegTable = TSO_INTERNAL_PTR(tso);
2533 SAVE_Liveness = LIVENESS_R1;
2534 SAVE_R1.p = topClosure;
2537 if (type == T_MAIN) {
2541 if (AvailableStack != PrelBase_Z91Z93_closure) {
2542 stko = AvailableStack;
2544 SET_PROCS(stko,ThisPE);
2546 AvailableStack = STKO_LINK(AvailableStack);
2547 } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
2550 /* ALLOC_STK(STKO_HS,STKO_CHUNK_SIZE,0); use RTSflag now*/
2551 ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
2553 SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
2554 SET_STKO_HDR(stko, StkO_info, CCC);
2556 STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
2557 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
2558 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
2559 STKO_LINK(stko) = PrelBase_Z91Z93_closure;
2560 STKO_RETURN(stko) = NULL;
2566 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
2569 if (type == T_MAIN) {
2570 STKO_SpB(stko) -= BREL(1);
2571 *STKO_SpB(stko) = (P_) realWorldZh_closure;
2574 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
2578 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
2580 #if defined(GRAN_CHECK)
2581 tot_sq_len += spark_queue_len(CurrentProc,ADVISORY_POOL);
2589 In GrAnSim the @EndThread@ function is the place where statistics about the
2590 simulation are printed. I guess, that could be moved into @main.lc@.
2595 EndThread(STG_NO_ARGS)
2599 TIME now = CURRENT_TIME;
2603 if (RTSflags.TickyFlags.showTickyStats) {
2604 fprintf(RTSflags.TickyFlags.tickyFile,
2605 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
2606 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
2607 fprintf(RTSflags.TickyFlags.tickyFile,
2608 "\tB stack max. depth: %ld words\n",
2609 TSO_BHWM(CurrentTSO));
2614 QP_Event1("G*", CurrentTSO);
2618 ASSERT(CurrentTSO == ThreadQueueHd);
2620 if (RTSflags.GranFlags.DoThreadMigration)
2623 if(TSO_TYPE(CurrentTSO)==T_MAIN)
2627 for(i=0; i < RTSflags.GranFlags.proc; ++i) {
2629 while(RunnableThreadsHd[i] != PrelBase_Z91Z93_closure)
2631 /* We schedule runnable threads before killing them to */
2632 /* make the job of bookkeeping the running, runnable, */
2633 /* blocked threads easier for scripts like gr2ps -- HWL */
2635 if (RTSflags.GranFlags.granSimStats && !is_first &&
2636 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2637 DumpRawGranEvent(i,(PROC)0,GR_SCHEDULE,
2638 RunnableThreadsHd[i],
2639 PrelBase_Z91Z93_closure,0);
2640 if (!RTSflags.GranFlags.granSimStats_suppressed &&
2641 TSO_TYPE(RunnableThreadsHd[i])!=T_MAIN)
2642 DumpGranInfo(i,RunnableThreadsHd[i],rtsTrue);
2643 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
2644 is_first = rtsFalse;
2648 ThreadQueueHd = PrelBase_Z91Z93_closure;
2649 /* Printing of statistics has been moved into end_gr_simulation */
2652 if (RTSflags.GranFlags.labelling && RTSflags.GranFlags.granSimStats &&
2653 !RTSflags.GranFlags.granSimStats_suppressed)
2654 DumpStartEventAt(TSO_STARTEDAT(CurrentTSO),where_is(CurrentTSO),0,GR_START,
2655 CurrentTSO,PrelBase_Z91Z93_closure,
2656 TSO_SPARKNAME(CurrentTSO));
2657 /* ^^^ SN (spark name) as optional info */
2658 /* spark_queue_len(CurrentProc,ADVISORY_POOL)); */
2659 /* ^^^ spark length as optional info */
2661 if (RTSflags.GranFlags.granSimStats &&
2662 !RTSflags.GranFlags.granSimStats_suppressed)
2663 DumpGranInfo(CurrentProc,CurrentTSO,
2664 TSO_TYPE(CurrentTSO) != T_ADVISORY);
2666 if (RTSflags.GranFlags.granSimStats_Binary &&
2667 TSO_TYPE(CurrentTSO)==T_MAIN &&
2668 !RTSflags.GranFlags.granSimStats_suppressed)
2669 grterminate(CurrentTime[CurrentProc]);
2671 if (TSO_TYPE(CurrentTSO)!=T_MAIN)
2672 ActivateNextThread(CurrentProc);
2674 /* Note ThreadQueueHd is Nil when the main thread terminates
2675 if(ThreadQueueHd != PrelBase_Z91Z93_closure)
2677 if (RTSflags.GranFlags.granSimStats && !RTSflags.GranFlags.granSimStats_suppressed &&
2678 (!RTSflags.GranFlags.Light || RTSflags.GranFlags.debug & 0x20000) )
2679 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2680 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_threadscheduletime;
2687 if (RTSflags.ParFlags.granSimStats) {
2688 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
2689 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
2693 switch (TSO_TYPE(CurrentTSO)) {
2695 required_thread_count--;
2698 if (GRANSIMSTATS_BINARY)
2702 longjmp(scheduler_loop, -1); /* i.e. the world comes to an end NOW */
2704 ReSchedule(0); /* i.e. the world will eventually come to an end */
2708 required_thread_count--;
2712 advisory_thread_count--;
2720 fprintf(stderr, "EndThread: %x unknown\n", TSO_TYPE(CurrentTSO));
2724 /* Reuse stack object space */
2725 ASSERT(STKO_LINK(SAVE_StkO) == PrelBase_Z91Z93_closure);
2726 STKO_LINK(SAVE_StkO) = AvailableStack;
2727 AvailableStack = SAVE_StkO;
2729 TSO_LINK(CurrentTSO) = AvailableTSO;
2730 AvailableTSO = CurrentTSO;
2731 CurrentTSO = PrelBase_Z91Z93_closure;
2732 CurrentRegTable = NULL;
2735 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
2736 /* proc or it's PrelBase_Z91Z93_closure. In the latter case, a FINDWORK will be */
2737 /* issued by ReSchedule. */
2738 ReSchedule(SAME_THREAD); /* back for more! */
2740 ReSchedule(0); /* back for more! */
2746 %****************************************************************************
2748 \subsection[thread-blocking]{Local Blocking}
2750 %****************************************************************************
2754 #if defined(GRAN_COUNT)
2755 /* Some non-essential maybe-useful statistics-gathering */
2756 void CountnUPDs() { ++nUPDs; }
2757 void CountnUPDs_old() { ++nUPDs_old; }
2758 void CountnUPDs_new() { ++nUPDs_new; }
2760 void CountnPAPs() { ++nPAPs; }
2763 EXTDATA_RO(BQ_info);
2766 /* NB: non-GRAN version ToDo
2768 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
2771 P_ PendingFetches = PrelBase_Z91Z93_closure;
2774 AwakenBlockingQueue(bqe)
2781 TIME now = CURRENT_TIME;
2786 while (bqe != PrelBase_Z91Z93_closure) {
2788 while (IS_MUTABLE(INFO_PTR(bqe))) {
2789 switch (INFO_TYPE(INFO_PTR(bqe))) {
2793 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
2796 if (RTSflags.ParFlags.granSimStats) {
2797 DumpGranEvent(GR_RESUMEQ, bqe);
2798 switch (TSO_QUEUE(bqe)) {
2800 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2803 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
2807 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
2812 if (last_tso == NULL) {
2813 if (RunnableThreadsHd == PrelBase_Z91Z93_closure) {
2814 RunnableThreadsHd = bqe;
2816 TSO_LINK(RunnableThreadsTl) = bqe;
2820 bqe = TSO_LINK(bqe);
2824 next = BF_LINK(bqe);
2825 BF_LINK(bqe) = PendingFetches;
2826 PendingFetches = bqe;
2828 if (last_tso != NULL)
2829 TSO_LINK(last_tso) = next;
2832 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
2833 INFO_PTR(bqe), (W_) bqe);
2840 if (last_tso != NULL) {
2841 RunnableThreadsTl = last_tso;
2843 TSO_LINK(last_tso) = PrelBase_Z91Z93_closure;
2851 # if defined(GRAN_CHECK)
2853 /* First some useful test functions */
2855 EXTFUN(RBH_Save_0_info);
2856 EXTFUN(RBH_Save_1_info);
2857 EXTFUN(RBH_Save_2_info);
2865 char str[80], str0[80];
2867 fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
2868 CurrentProc,CurrentTime[CurrentProc]);
2869 if ( bqe == PrelBase_Z91Z93_closure ) {
2870 fprintf(stderr," NIL.\n");
2873 if ( bqe == NULL ) {
2874 fprintf(stderr," NULL\n");
2877 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2878 W_ proc; /* RBH_Save_? closures! */
2880 /* Find where the tso lives */
2881 proc = where_is(bqe);
2882 it = INFO_TYPE(INFO_PTR(bqe));
2896 if(proc == CurrentProc)
2897 fprintf(stderr," %#lx (%x) L %s,", bqe, TSO_ID(bqe), str0);
2899 fprintf(stderr," %#lx (%x) G (PE %d) %s,", bqe, TSO_ID(bqe), proc, str0);
2904 bqe = TSO_LINK(bqe);
2907 bqe = TSO_LINK(bqe);
2910 bqe = PrelBase_Z91Z93_closure;
2913 /* TSO_LINK(last_tso) = PrelBase_Z91Z93_closure; */
2915 if ( bqe == PrelBase_Z91Z93_closure )
2916 fprintf(stderr," NIL.\n");
2918 (INFO_PTR(bqe) == (P_) RBH_Save_0_info) ||
2919 (INFO_PTR(bqe) == (P_) RBH_Save_1_info) ||
2920 (INFO_PTR(bqe) == (P_) RBH_Save_2_info) )
2921 fprintf(stderr," RBH.\n");
2922 /* fprintf(stderr,"\n%s\n",str); */
2926 CHECK_BQ(node, tso, proc)
2933 PROC p = where_is(tso);
2934 rtsBool ok = rtsTrue;
2937 fprintf(stderr,"ERROR in CHECK_BQ: CurrentTSO %#lx (%x) on proc %d but CurrentProc = %d\n",
2938 tso, TSO_ID(tso), proc);
2942 switch (INFO_TYPE(INFO_PTR(node))) {
2944 case INFO_BH_U_TYPE:
2945 bqe = (P_) BQ_ENTRIES(node);
2946 return (rtsTrue); /* BHs don't have BQs */
2949 bqe = (P_) BQ_ENTRIES(node);
2951 case INFO_FMBQ_TYPE:
2952 fprintf(stderr,"CHECK_BQ: ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
2953 node, tso, TSO_ID(tso));
2956 case INFO_SPEC_RBH_TYPE:
2957 bqe = (P_) SPEC_RBH_BQ(node);
2959 case INFO_GEN_RBH_TYPE:
2960 bqe = (P_) GEN_RBH_BQ(node);
2965 I_ size, ptrs, nonptrs, vhs;
2966 char info_hdr_ty[80];
2968 fprintf(stderr, "CHECK_BQ: thought %#lx was a black hole (IP %#lx)",
2969 node, INFO_PTR(node));
2970 info_ptr = get_closure_info(node,
2971 &size, &ptrs, &nonptrs, &vhs,
2973 fprintf(stderr, " %s\n",info_hdr_ty);
2974 /* G_PRINT_NODE(node); */
2976 /* EXIT(EXIT_FAILURE); */
2980 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
2981 W_ proc; /* RBH_Save_? closures! */
2983 /* Find where the tso lives */
2984 proc = where_is(bqe);
2985 it = INFO_TYPE(INFO_PTR(bqe));
2988 fprintf(stderr,"ERROR in CHECK_BQ [Node = 0x%lx, PE %d]: TSO %#lx (%x) already in BQ: ",
2989 node, proc, tso, TSO_ID(tso));
2990 PRINT_BQ(BQ_ENTRIES(node));
2994 bqe = TSO_LINK(bqe);
2998 /* End of test functions */
2999 # endif /* GRAN_CHECK */
3001 /* This version of AwakenBlockingQueue has been originally taken from the
3002 GUM code. It is now assimilated into GrAnSim */
3004 /* Note: This version assumes a pointer to a blocking queue rather than a
3005 node with an attached blocking queue as input */
3008 AwakenBlockingQueue(bqe)
3011 /* P_ tso = (P_) BQ_ENTRIES(node); */
3020 /* Compatibility mode with old libaries! 'oH jIvoQmoH */
3021 if (IS_BQ_CLOSURE(bqe))
3022 bqe = (P_)BQ_ENTRIES(bqe);
3023 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_SPEC_RBH_TYPE )
3024 bqe = (P_)SPEC_RBH_BQ(bqe);
3025 else if ( INFO_TYPE(INFO_PTR(bqe)) == INFO_GEN_RBH_TYPE )
3026 bqe = (P_)GEN_RBH_BQ(bqe);
3028 # if defined(GRAN_CHECK)
3029 if ( RTSflags.GranFlags.debug & 0x100 ) {
3034 # if defined(GRAN_COUNT)
3036 if (tso != PrelBase_Z91Z93_closure)
3040 # if defined(GRAN_CHECK)
3041 if (RTSflags.GranFlags.debug & 0x100)
3042 fprintf(stderr,"----- AwBQ: ");
3045 while (IS_MUTABLE(INFO_PTR(bqe))) { /* This distinguishes TSOs from */
3046 W_ proc; /* RBH_Save_? closures! */
3047 ASSERT(INFO_TYPE(INFO_PTR(bqe)) == INFO_TSO_TYPE);
3050 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3052 # if defined(GRAN_COUNT)
3056 /* Find where the tso lives */
3057 proc = where_is(bqe);
3059 if(proc == CurrentProc) {
3060 notifytime = CurrentTime[CurrentProc] + RTSflags.GranFlags.gran_lunblocktime;
3062 /* A better way of handling this would be to introduce a
3063 GLOBALUNBLOCK event which is created here. -- HWL */
3064 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3065 notifytime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[proc]) +
3066 RTSflags.GranFlags.gran_gunblocktime;
3067 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3068 /* new_event(proc, CurrentProc, notifytime,
3069 GLOBALUNBLOCK,bqe,PrelBase_Z91Z93_closure,NULL); */
3071 /* cost the walk over the queue */
3072 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_lunblocktime;
3073 /* GrAnSim Light: make blocked TSO aware of the time that passed */
3074 if (RTSflags.GranFlags.Light)
3075 TSO_CLOCK(bqe) = notifytime;
3076 /* and create a resume message */
3077 new_event(proc, CurrentProc, notifytime,
3078 RESUMETHREAD,bqe,PrelBase_Z91Z93_closure,NULL);
3080 if (notifytime<TimeOfNextEvent)
3081 TimeOfNextEvent = notifytime;
3083 # if defined(GRAN_CHECK)
3084 if (RTSflags.GranFlags.debug & 0x100) {
3085 fprintf(stderr," TSO %x (PE %d) %s,",
3086 TSO_ID(bqe), proc, ( (proc==CurrentProc) ? "L" : "G") );
3091 bqe = TSO_LINK(bqe);
3092 TSO_LINK(last) = PrelBase_Z91Z93_closure;
3096 /* This was once used in a !do_gr_sim setup. Now only GrAnSim setup is */
3098 else /* Check if this is still valid for non-GrAnSim code -- HWL */
3100 if (ThreadQueueHd == PrelBase_Z91Z93_closure)
3101 ThreadQueueHd = bqe;
3103 TSO_LINK(ThreadQueueTl) = bqe;
3105 if (RunnableThreadsHd == PrelBase_Z91Z93_closure)
3106 RunnableThreadsHd = tso;
3108 TSO_LINK(RunnableThreadsTl) = tso;
3111 while(TSO_LINK(bqe) != PrelBase_Z91Z93_closure) {
3112 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3115 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3118 bqe = TSO_LINK(bqe);
3121 assert(TSO_INTERNAL_PTR(bqe)->rR[0].p == node);
3124 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
3130 if (RTSflags.GranFlags.debug & 0x100)
3131 fprintf(stderr,".\n");
3134 /* ngo' {GrAnSim}Qo' ngoq: RunnableThreadsTl = tso; */
3143 /* Different interface for GRAN */
3148 SAVE_Liveness = liveness;
3149 TSO_PC1(CurrentTSO) = Continue;
3151 QP_Event1("GR", CurrentTSO);
3153 ReSchedule(SAME_THREAD);
3162 SAVE_Liveness = args >> 1;
3163 TSO_PC1(CurrentTSO) = Continue;
3165 QP_Event1("GR", CurrentTSO);
3168 if (RTSflags.ParFlags.granSimStats) {
3169 /* Note that CURRENT_TIME may perform an unsafe call */
3170 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
3173 ReSchedule(args & 1);
3180 %****************************************************************************
3182 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
3184 %****************************************************************************
3186 The following GrAnSim routines simulate the fetching of nodes from a remote
3187 processor. We use a 1 word bitmask to indicate on which processor a node is
3188 lying. Thus, moving or copying a node from one processor to another just
3189 requires an appropriate change in this bitmask (using @SET_GA@).
3190 Additionally, the clocks have to be updated.
3192 A special case arises when the node that is needed by processor A has been
3193 moved from a processor B to a processor C between sending out a @FETCH@
3194 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
3200 /* ngoqvam che' {GrAnSim}! */
3202 /* Fetch node "node" to processor "p" */
3205 FetchNode(node,from,to)
3209 /* In case of RTSflags.GranFlags.DoGUMMFetching this fct should never be
3210 entered! Instead, UnpackGraph is used in ReSchedule */
3213 ASSERT(to==CurrentProc);
3215 # if defined(GRAN) && defined(GRAN_CHECK)
3216 if ( RTSflags.GranFlags.Light ) {
3217 fprintf(stderr,"Qagh {FetchNode}Daq: Should never be entered in GrAnSim Light setup\n");
3222 if ( RTSflags.GranFlags.DoGUMMFetching ) {
3223 fprintf(stderr,"Qagh: FetchNode should never be entered with DoGUMMFetching\n");
3227 /* Now fetch the children */
3228 if (!IS_LOCAL_TO(PROCS(node),from) &&
3229 !IS_LOCAL_TO(PROCS(node),to) )
3232 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
3233 PROCS(node) |= PE_NUMBER(to); /* Copy node */
3235 PROCS(node) = PE_NUMBER(to); /* Move node */
3240 /* --------------------------------------------------
3241 Cost of sending a packet of size n = C + P*n
3242 where C = packet construction constant,
3243 P = cost of packing one word into a packet
3244 [Should also account for multiple packets].
3245 -------------------------------------------------- */
3248 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
3249 nearby graph has been scheduled)
3250 1 ... node is already local (fetched by somebody else; no event is
3252 2 ... fetch request has been forwrded to the PE that now contains the
3254 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
3255 the current TSO is put into the blocking queue of that node
3256 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
3257 function to guarantee that the tso and node inputs are valid
3258 (they may be moved during GC).
3260 ToDo: Symbolic return codes; clean up code (separate GUMMFetching from
3261 single node fetching.
3265 HandleFetchRequest(node,p,tso)
3269 ASSERT(!RTSflags.GranFlags.Light);
3271 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
3273 # if defined(GRAN_CHECK)
3274 if (RTSflags.GranFlags.debug & 0x100 ) {
3276 I_ size, ptrs, nonptrs, vhs;
3277 char info_hdr_ty[80];
3279 info_ptr = get_closure_info(node,
3280 &size, &ptrs, &nonptrs, &vhs,
3282 fprintf(stderr,"Warning: HandleFetchRequest entered with local node %#lx (%s) (PE %d)\n",
3283 node,info_hdr_ty,p);
3286 if (RTSflags.GranFlags.DoGUMMFetching) {
3290 /* Create a 1-node-buffer and schedule a FETCHREPLY now */
3291 graph = PackOneNode(node, tso, &size);
3292 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3293 FETCHREPLY,tso,graph,NULL);
3295 new_event(p,CurrentProc,CurrentTime[CurrentProc],
3296 FETCHREPLY,tso,node,NULL);
3300 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
3302 if(RTSflags.GranFlags.DoGUMMFetching) { /* {GUM}vo' ngoqvam vInIHta' (code from GUM) */
3306 if (IS_BLACK_HOLE(INFO_PTR(node))) { /* block on BH or RBH */
3307 new_event(p,CurrentProc,CurrentTime[p],
3308 GLOBALBLOCK,tso,node,NULL);
3309 /* Note: blockFetch is done when handling GLOBALBLOCK event */
3310 /* When this thread is reawoken it does the usual: it tries to
3311 enter the updated node and issues a fetch if it's remote.
3312 It has forgotten that it has sent a fetch already (i.e. a
3313 FETCHNODE is swallowed by a BH, leaving the thread in a BQ */
3314 --OutstandingFetches[p];
3318 # if defined(GRAN_CHECK)
3319 if (!RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[p])) {
3320 fprintf(stderr,"Qagh {HandleFetchRequest}Daq: tso 0x%lx (%x) not at head of proc %d (0x%lx)\n",
3321 tso, TSO_ID(tso), p, RunnableThreadsHd[p]);
3326 if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)
3327 return (4); /* out of heap */
3329 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3330 /* Send a reply to the originator */
3331 /* ToDo: Replace that by software costs for doing graph packing! */
3332 CurrentTime[CurrentProc] += size * RTSflags.GranFlags.gran_mpacktime;
3334 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3335 FETCHREPLY,tso,graph,NULL);
3337 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3339 } else { /* incremental (single closure) fetching */
3340 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
3341 /* Send a reply to the originator */
3342 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3344 new_event(p,CurrentProc,CurrentTime[CurrentProc]+RTSflags.GranFlags.gran_latency,
3345 FETCHREPLY,tso,node,NULL);
3347 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3351 else /* Qu'vatlh! node has been grabbed by another proc => forward */
3353 PROC p_new = where_is(node);
3356 # if defined(GRAN_CHECK)
3357 if (RTSflags.GranFlags.debug & 0x2)
3358 fprintf(stderr,"Qu'vatlh! node %#lx has been grabbed by PE %d (current=%d; demander=%d) @ %d\n",
3359 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
3361 /* Prepare FORWARD message to proc p_new */
3362 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
3364 fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p_new]) +
3365 RTSflags.GranFlags.gran_latency;
3367 new_event(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
3369 CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
3377 @blockFetch@ blocks a @BlockedFetch@ node on some kind of black hole.
3379 Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
3381 {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
3382 create @FMBQ@'s (FetchMe blocking queues) to cope with global
3383 blocking. Instead, non-local TSO are put into the BQ in the same way as
3384 local TSOs. However, we have to check if a TSO is local or global in order
3385 to account for the latencies involved and for keeping track of the number
3386 of fetches that are really going on.
3392 0 ... ok; tso is now at beginning of BQ attached to the bh closure
3393 1 ... the bh closure is no BH any more; tso is immediately unblocked
3397 blockFetch(tso, proc, bh)
3398 P_ tso; /* TSO which gets blocked */
3399 PROC proc; /* PE where that tso was running */
3400 P_ bh; /* closure to block on (BH, RBH, BQ) */
3402 # if defined(GRAN_CHECK)
3403 if ( RTSflags.GranFlags.debug & 0x100 ) {
3405 I_ size, ptrs, nonptrs, vhs;
3406 char info_hdr_ty[80];
3408 info_ptr = get_closure_info(bh,
3409 &size, &ptrs, &nonptrs, &vhs,
3411 fprintf(stderr,"Blocking TSO %#lx (%x)(PE %d) on node %#lx (%s) (PE %d). No graph is packed!\n",
3412 tso, TSO_ID(tso), proc, bh, info_hdr_ty, where_is(bh));
3415 if ( !RTSflags.GranFlags.DoReScheduleOnFetch && (tso != RunnableThreadsHd[proc]) ) {
3416 fprintf(stderr,"Qagh {blockFetch}Daq: TSO 0x%lx (%x) is not first on runnable list of proc %d (first is 0x%lx)\n",
3417 tso,TSO_ID(tso),proc,RunnableThreadsHd[proc]);
3422 if (!IS_BLACK_HOLE(INFO_PTR(bh))) { /* catches BHs and RBHs */
3423 # if defined(GRAN_CHECK)
3424 if ( RTSflags.GranFlags.debug & 0x100 ) {
3426 W_ size, ptrs, nonptrs, vhs;
3427 char str[80], junk_str[80];
3429 info = get_closure_info(bh, &size, &ptrs, &nonptrs, &vhs, str);
3430 fprintf(stderr,"blockFetch: node %#lx (%s) is not a BH => awakening TSO %#lx (%x) (PE %u)\n",
3431 bh, str, tso, TSO_ID(tso), proc);
3435 /* No BH anymore => immediately unblock tso */
3436 new_event(proc,proc,CurrentTime[proc],
3437 UNBLOCKTHREAD,tso,bh,NULL);
3439 /* Is this always a REPLY to a FETCH in the profile ? */
3440 if (RTSflags.GranFlags.granSimStats)
3441 DumpRawGranEvent(proc,proc,GR_REPLY,tso,bh,0);
3445 /* DaH {BQ}Daq Qu' Suq 'e' wISov!
3446 Now we know that we have to put the tso into the BQ.
3447 2 case: If block-on-fetch, tso is at head of threadq =>
3448 => take it out of threadq and into BQ
3449 If reschedule-on-fetch, tso is only pointed to be event
3450 => just put it into BQ
3452 if (!RTSflags.GranFlags.DoReScheduleOnFetch) { /* block-on-fetch */
3453 GranSimBlock(tso, proc, bh); /* get tso out of threadq & activate next
3454 thread (same as in BQ_entry) */
3455 } else { /* reschedule-on-fetch */
3456 if(RTSflags.GranFlags.granSimStats)
3457 DumpRawGranEvent(proc,where_is(bh),GR_BLOCK,tso,bh,0);
3459 ++TSO_BLOCKCOUNT(tso);
3460 TSO_BLOCKEDAT(tso) = CurrentTime[proc];
3463 ASSERT(TSO_LINK(tso)==PrelBase_Z91Z93_closure);
3465 /* Put tso into BQ */
3466 switch (INFO_TYPE(INFO_PTR(bh))) {
3468 case INFO_BH_U_TYPE:
3469 TSO_LINK(tso) = PrelBase_Z91Z93_closure;
3470 SET_INFO_PTR(bh, BQ_info);
3471 BQ_ENTRIES(bh) = (W_) tso;
3473 #ifdef GC_MUT_REQUIRED
3475 * If we modify a black hole in the old generation, we have to make
3476 * sure it goes on the mutables list
3479 if (bh <= StorageMgrInfo.OldLim) {
3480 MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
3481 StorageMgrInfo.OldMutables = bh;
3483 MUT_LINK(bh) = MUT_NOT_LINKED;
3487 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3488 TSO_LINK(tso) = (P_) BQ_ENTRIES(bh);
3489 BQ_ENTRIES(bh) = (W_) tso;
3491 case INFO_FMBQ_TYPE:
3492 fprintf(stderr,"ERROR: FMBQ closure (%#lx) found in GrAnSim (TSO=%#lx (%x))\n",
3493 bh, tso, TSO_ID(tso));
3495 case INFO_SPEC_RBH_TYPE:
3496 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3497 TSO_LINK(tso) = (P_) SPEC_RBH_BQ(bh);
3498 SPEC_RBH_BQ(bh) = (W_) tso;
3500 case INFO_GEN_RBH_TYPE:
3501 /* BF_LINK(bf) = (P_) BQ_ENTRIES(bh); */
3502 TSO_LINK(tso) = (P_) GEN_RBH_BQ(bh);
3503 GEN_RBH_BQ(bh) = (W_) tso;
3508 I_ size, ptrs, nonptrs, vhs;
3509 char info_hdr_ty[80];
3511 fprintf(stderr, "Panic: thought %#lx was a black hole (IP %#lx)",
3513 # if defined(GRAN_CHECK)
3514 info_ptr = get_closure_info(bh,
3515 &size, &ptrs, &nonptrs, &vhs,
3517 fprintf(stderr, " %s\n",info_hdr_ty);
3529 %****************************************************************************
3531 \subsection[qp-profile]{Quasi-Parallel Profiling}
3533 %****************************************************************************
3536 /* ToDo: Check if this is really still used anywhere!? */
3541 /* *Virtual* Time in milliseconds */
3544 qp_elapsed_time(STG_NO_ARGS)
3546 extern StgDouble usertime();
3548 return ((long) (usertime() * 1e3));
3552 qp_elapsed_time(STG_NO_ARGS)
3554 return ((long) CurrentTime[CurrentProc] );
3559 init_qp_profiling(STG_NO_ARGS)
3562 char qp_filename[STATS_FILENAME_MAXLEN];
3564 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3565 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3566 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3570 fputs(prog_argv[0], qp_file);
3571 for(i = 1; prog_argv[i]; i++) {
3572 fputc(' ', qp_file);
3573 fputs(prog_argv[i], qp_file);
3575 fprintf(qp_file, " +RTS -C%d -t%d\n"
3576 , RTSflags.ConcFlags.ctxtSwitchTime
3577 , RTSflags.ConcFlags.maxThreads);
3579 fputs(time_str(), qp_file);
3580 fputc('\n', qp_file);
3585 QP_Event0(tid, node)
3589 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3593 QP_Event1(event, tso)
3597 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3598 TSO_ID(tso), TSO_NAME(tso));
3602 QP_Event2(event, tso1, tso2)
3606 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3607 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
3612 %****************************************************************************
3614 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
3616 %****************************************************************************
3618 Garbage collection code for the event queue. We walk the event queue
3619 so that if the only reference to a TSO is in some event (e.g. RESUME),
3620 the TSO is still preserved.
3622 The GC code now uses a breadth-first pruning strategy. This prevents
3623 the GC from keeping all sparks of the low-numbered PEs while discarding all
3624 sparks from high-numbered PEs. Such a depth-first pruning may have
3625 disastrous effects for programs that generate a huge number of sparks!
3630 extern smInfo StorageMgrInfo;
3632 /* Auxiliary functions needed in Save/RestoreSparkRoots if breadth-first */
3633 /* pruning is done. */
3636 arr_and(W_ arr[], I_ max)
3641 /* Doesn't work with max==0; but then, many things don't work in this */
3643 for (i=1, res = arr[0]; i<max; i++)
3650 arr_max(W_ arr[], I_ max)
3655 /* Doesn't work with max==0; but then, many things don't work in this */
3657 for (i=1, res = arr[0]; i<max; i++)
3658 res = (arr[i]>res) ? arr[i] : res;
3664 Routines working on spark queues.
3665 It would be a good idea to make that an ADT!
3669 spark_queue_len(PROC proc, I_ pool)
3671 sparkq prev, spark; /* prev only for testing !! */
3674 for (len = 0, prev = NULL, spark = PendingSparksHd[proc][pool];
3676 len++, prev = spark, spark = SPARK_NEXT(spark))
3679 # if defined(GRAN_CHECK)
3680 if ( RTSflags.GranFlags.debug & 0x1000 )
3681 if ( (prev!=NULL) && (prev!=PendingSparksTl[proc][pool]) )
3682 fprintf(stderr,"ERROR in spark_queue_len: (PE %u, pool %u) PendingSparksTl (%#lx) not end of queue (%#lx)\n",
3683 proc, pool, PendingSparksTl[proc][pool], prev);
3690 delete_from_spark_queue (prev,spark) /* unlink and dispose spark */
3692 { /* Global Vars: CurrentProc, SparkQueueHd, SparkQueueTl */
3695 # if defined(GRAN_CHECK)
3696 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3697 fprintf(stderr,"** |%#x:%#x| prev=%#x->(%#x), (%#x)<-spark=%#x->(%#x) <-(%#x)\n",
3698 SparkQueueHd, SparkQueueTl,
3699 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)),
3700 SPARK_PREV(spark), spark, SPARK_NEXT(spark),
3701 (SPARK_NEXT(spark)==NULL ? 0 : SPARK_PREV(SPARK_NEXT(spark))));
3705 tmp = SPARK_NEXT(spark);
3707 SparkQueueHd = SPARK_NEXT(spark);
3709 SPARK_NEXT(prev) = SPARK_NEXT(spark);
3711 if (SPARK_NEXT(spark)==NULL) {
3712 SparkQueueTl = prev;
3714 SPARK_PREV(SPARK_NEXT(spark)) = prev;
3716 if(SparkQueueHd == NULL)
3717 SparkQueueTl = NULL;
3718 SPARK_NEXT(spark) = NULL;
3720 DisposeSpark(spark);
3723 # if defined(GRAN_CHECK)
3724 if ( RTSflags.GranFlags.debug & 0x10000 ) {
3725 fprintf(stderr,"## prev=%#x->(%#x)\n",
3726 prev, (prev==NULL ? 0 : SPARK_NEXT(prev)));
3733 /* NB: These functions have been replaced by functions:
3734 EvacuateEvents, EvacuateSparks, (in ../storage/SMcopying.lc)
3735 LinkEvents, LinkSparks (in ../storage/SMcompacting.lc)
3736 Thus, GrAnSim does not need additional entries in the list of roots
3741 SaveEventRoots(num_ptr_roots)
3744 eventq event = EventHd;
3745 while(event != NULL)
3747 if(EVENT_TYPE(event) == RESUMETHREAD ||
3748 EVENT_TYPE(event) == MOVETHREAD ||
3749 EVENT_TYPE(event) == CONTINUETHREAD ||
3750 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
3751 EVENT_TYPE(event) == STARTTHREAD )
3752 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3754 else if(EVENT_TYPE(event) == MOVESPARK)
3755 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
3757 else if (EVENT_TYPE(event) == FETCHNODE ||
3758 EVENT_TYPE(event) == FETCHREPLY )
3760 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3761 /* In the case of packet fetching, EVENT_NODE(event) points to */
3762 /* the packet (currently, malloced). The packet is just a list of */
3763 /* closure addresses, with the length of the list at index 1 (the */
3764 /* structure of the packet is defined in Pack.lc). */
3765 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
3766 P_ buffer = (P_) EVENT_NODE(event);
3767 int size = (int) buffer[PACK_SIZE_LOCN], i;
3769 for (i = PACK_HDR_SIZE; i <= size-1; i++) {
3770 StorageMgrInfo.roots[num_ptr_roots++] = (P_) buffer[i];
3773 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3775 else if (EVENT_TYPE(event) == GLOBALBLOCK)
3777 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3778 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
3780 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
3782 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
3784 event = EVENT_NEXT(event);
3786 return(num_ptr_roots);
3789 #if defined(DEPTH_FIRST_PRUNING)
3790 /* Is it worthwhile keeping the depth-first pruning code !? -- HWL */
3793 SaveSparkRoots(num_ptr_roots)
3796 sparkq spark, /* prev, */ disposeQ=NULL;
3798 I_ i, sparkroots=0, prunedSparks=0;
3799 I_ tot_sparks[MAX_PROC], tot = 0;;
3801 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3802 tot_sparks[proc] = 0;
3803 for(i = 0; i < SPARK_POOLS; ++i) {
3804 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
3806 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
3808 if(++sparkroots <= MAX_SPARKS)
3810 if ( RTSflags.GcFlags.giveStats )
3811 if (i==ADVISORY_POOL) {
3815 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3819 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3820 if (prunedSparks==0) {
3828 } /* forall spark ... */
3829 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks>0) ) {
3830 fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
3831 prunedSparks,MAX_SPARKS,proc);
3832 if (disposeQ == PendingSparksHd[proc][i])
3833 PendingSparksHd[proc][i] = NULL;
3835 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
3836 DisposeSparkQ(disposeQ);
3840 } /* forall i ... */
3841 } /*forall proc .. */
3843 if ( RTSflags.GcFlags.giveStats ) {
3844 fprintf(RTSflags.GcFlags.statsFile,
3845 "Spark statistics (after pruning) (total sparks = %d):",tot);
3846 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3848 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3849 fprintf(RTSflags.GcFlags.statsFile,"\tPE %d: %d ",proc,tot_sparks[proc]);
3851 fprintf(RTSflags.GcFlags.statsFile,".\n");
3854 return(num_ptr_roots);
3857 #else /* !DEPTH_FIRST_PRUNING */
3859 /* In case of an excessive number of sparks, depth first pruning is a Bad */
3860 /* Idea as we might end up with all remaining sparks on processor 0 and */
3861 /* none on the other processors. So, this version uses breadth first */
3862 /* pruning. -- HWL */
3865 SaveSparkRoots(num_ptr_roots)
3869 curr_spark[MAX_PROC][SPARK_POOLS];
3872 endQueues[SPARK_POOLS], finishedQueues[SPARK_POOLS];
3874 prunedSparks[MAX_PROC][SPARK_POOLS];
3875 I_ tot_sparks[MAX_PROC], tot = 0;;
3878 # if defined(GRAN_CHECK) && defined(GRAN)
3879 if ( RTSflags.GranFlags.debug & 0x40 )
3880 fprintf(stderr,"D> Saving spark roots for GC ...\n");
3884 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3885 allProcs |= PE_NUMBER(proc);
3886 tot_sparks[proc] = 0;
3887 for(i = 0; i < SPARK_POOLS; ++i) {
3888 curr_spark[proc][i] = PendingSparksHd[proc][i];
3889 prunedSparks[proc][i] = 0;
3891 finishedQueues[i] = 0;
3895 /* Breadth first pruning */
3897 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3898 for(i = 0; i < SPARK_POOLS; ++i) {
3899 spark = curr_spark[proc][i];
3900 if ( spark != NULL ) {
3902 if(++sparkroots <= MAX_SPARKS)
3904 # if defined(GRAN_CHECK) && defined(GRAN)
3905 if ( (RTSflags.GranFlags.debug & 0x1000) &&
3906 (RTSflags.GcFlags.giveStats) )
3907 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d): 0x%lx \t(info ptr=%#lx)\n",
3908 num_ptr_roots,proc,i,SPARK_NODE(spark),
3909 INFO_PTR(SPARK_NODE(spark)));
3911 if ( RTSflags.GcFlags.giveStats )
3912 if (i==ADVISORY_POOL) {
3916 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
3917 curr_spark[proc][i] = spark = SPARK_NEXT(spark);
3919 else /* sparkroots > MAX_SPARKS */
3921 if (curr_spark[proc][i] == PendingSparksHd[proc][i])
3922 PendingSparksHd[proc][i] = NULL;
3924 SPARK_NEXT(SPARK_PREV(curr_spark[proc][i])) = NULL;
3925 PendingSparksTl[proc][i] = SPARK_PREV(curr_spark[proc][i]);
3926 endQueues[i] |= PE_NUMBER(proc);
3928 } else { /* spark == NULL ; actually, this only has to be done once */
3929 endQueues[i] |= PE_NUMBER(proc);
3933 } while (arr_and(endQueues,SPARK_POOLS) != allProcs);
3935 /* The buffer for spark roots in StorageMgrInfo.roots is full */
3936 /* now. Prune all sparks on all processor starting with */
3937 /* curr_spark[proc][i]. */
3940 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3941 for(i = 0; i < SPARK_POOLS; ++i) {
3942 spark = curr_spark[proc][i];
3944 if ( spark != NULL ) {
3945 SPARK_NODE(spark) = PrelBase_Z91Z93_closure;
3946 curr_spark[proc][i] = SPARK_NEXT(spark);
3948 prunedSparks[proc][i]++;
3949 DisposeSpark(spark);
3951 finishedQueues[i] |= PE_NUMBER(proc);
3955 } while (arr_and(finishedQueues,SPARK_POOLS) != allProcs);
3958 # if defined(GRAN_CHECK) && defined(GRAN)
3959 if ( RTSflags.GranFlags.debug & 0x1000) {
3960 for(proc = 0; proc < RTSflags.GranFlags.proc; ++proc) {
3961 for(i = 0; i < SPARK_POOLS; ++i) {
3962 if ( (RTSflags.GcFlags.giveStats) && (prunedSparks[proc][i]>0)) {
3963 fprintf(RTSflags.GcFlags.statsFile,
3964 "Discarding %lu sparks on proc %d (pool %d) for GC purposes\n",
3965 prunedSparks[proc][i],proc,i);
3970 if ( RTSflags.GcFlags.giveStats ) {
3971 fprintf(RTSflags.GcFlags.statsFile,
3972 "Spark statistics (after discarding) (total sparks = %d):",tot);
3973 for (proc=0; proc<RTSflags.GranFlags.proc; proc++) {
3975 fprintf(RTSflags.GcFlags.statsFile,"\n> ");
3976 fprintf(RTSflags.GcFlags.statsFile,
3977 "\tPE %d: %d ",proc,tot_sparks[proc]);
3979 fprintf(RTSflags.GcFlags.statsFile,".\n");
3984 return(num_ptr_roots);
3987 #endif /* DEPTH_FIRST_PRUNING */
3990 GC roots must be restored in *reverse order*.
3991 The recursion is a little ugly, but is better than
3992 in-place pointer reversal.
3996 RestoreEvtRoots(event,num_ptr_roots)
4002 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
4004 if(EVENT_TYPE(event) == RESUMETHREAD ||
4005 EVENT_TYPE(event) == MOVETHREAD ||
4006 EVENT_TYPE(event) == CONTINUETHREAD ||
4007 /* EVENT_TYPE(event) >= CONTINUETHREAD1 || */
4008 EVENT_TYPE(event) == STARTTHREAD )
4009 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4011 else if(EVENT_TYPE(event) == MOVESPARK )
4012 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
4014 else if (EVENT_TYPE(event) == FETCHNODE ||
4015 EVENT_TYPE(event) == FETCHREPLY )
4017 if ( RTSflags.GranFlags.DoGUMMFetching && (EVENT_TYPE(event)==FETCHREPLY)) {
4018 P_ buffer = (P_) EVENT_NODE(event);
4019 int size = (int) buffer[PACK_SIZE_LOCN], i;
4021 for (i = size-1; i >= PACK_HDR_SIZE; i--) {
4022 buffer[i] = StorageMgrInfo.roots[--num_ptr_roots];
4025 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
4027 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4029 else if (EVENT_TYPE(event) == GLOBALBLOCK)
4031 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
4032 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4034 else if (EVENT_TYPE(event) == UNBLOCKTHREAD)
4036 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
4039 return(num_ptr_roots);
4043 RestoreEventRoots(num_ptr_roots)
4046 return(RestoreEvtRoots(EventHd,num_ptr_roots));
4049 #if defined(DEPTH_FIRST_PRUNING)
4052 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
4054 I_ num_ptr_roots, sparkroots;
4058 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
4059 if(sparkroots <= MAX_SPARKS)
4061 P_ n = SPARK_NODE(spark);
4062 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4063 # if defined(GRAN_CHECK) && defined(GRAN)
4064 if ( RTSflags.GranFlags.debug & 0x40 )
4065 fprintf(RTSflags.GcFlags.statsFile,
4066 "Restoring Spark Root %d: 0x%lx \t(info ptr=%#lx\n",
4067 num_ptr_roots,SPARK_NODE(spark),
4068 INFO_PTR(SPARK_NODE(spark)));
4071 # if defined(GRAN_CHECK) && defined(GRAN)
4073 if ( RTSflags.GranFlags.debug & 0x40 )
4074 fprintf(RTSflags.GcFlags.statsFile,
4075 "Error in RestoreSpkRoots (%d; @ spark %#lx): More than MAX_SPARKS (%d) sparks\n",
4076 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
4080 return(num_ptr_roots);
4084 RestoreSparkRoots(num_ptr_roots)
4090 #if defined(GRAN_JSM_SPARKS)
4091 fprintf(stderr,"Error: RestoreSparkRoots should be never be entered in a JSM style sparks set-up\n");
4095 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4096 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4097 /* of the for loop. For i that is currently not necessary. C is really */
4098 /* impressive in datatype abstraction! -- HWL */
4100 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4101 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4102 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
4105 return(num_ptr_roots);
4108 #else /* !DEPTH_FIRST_PRUNING */
4111 RestoreSparkRoots(num_ptr_roots)
4115 curr_spark[MAX_PROC][SPARK_POOLS];
4117 I_ i, max_len, len, pool, count,
4118 queue_len[MAX_PROC][SPARK_POOLS];
4120 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
4121 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
4122 /* of the for loop. For i that is currently not necessary. C is really */
4123 /* impressive in datatype abstraction! -- HWL */
4126 for (proc=0; proc < RTSflags.GranFlags.proc; proc++) {
4127 for (i=0; i<SPARK_POOLS; i++) {
4128 curr_spark[proc][i] = PendingSparksTl[proc][i];
4129 queue_len[proc][i] = spark_queue_len(proc,i);
4130 max_len = (queue_len[proc][i]>max_len) ? queue_len[proc][i] : max_len;
4134 for (len=max_len; len > 0; len--){
4135 for(proc = RTSflags.GranFlags.proc - 1; (proc >= 0) && (proc < RTSflags.GranFlags.proc); --proc) {
4136 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
4137 if (queue_len[proc][i]>=len) {
4138 spark = curr_spark[proc][i];
4139 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
4140 # if defined(GRAN_CHECK) && defined(GRAN)
4142 if ( (RTSflags.GranFlags.debug & 0x1000) &&
4143 (RTSflags.GcFlags.giveStats) )
4144 fprintf(RTSflags.GcFlags.statsFile,
4145 "Restoring Spark Root %d (PE %u, pool %u): 0x%lx \t(info ptr=%#lx)\n",
4146 num_ptr_roots,proc,i,SPARK_NODE(spark),
4147 INFO_PTR(SPARK_NODE(spark)));
4149 curr_spark[proc][i] = SPARK_PREV(spark);
4151 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],
4158 # if defined(GRAN_CHECK) && defined(GRAN)
4159 if ( (RTSflags.GranFlags.debug & 0x1000) && (RTSflags.GcFlags.giveStats) )
4160 fprintf(RTSflags.GcFlags.statsFile,"Number of restored spark roots: %d\n",
4163 return(num_ptr_roots);
4166 #endif /* DEPTH_FIRST_PRUNING */
4172 #endif /* CONCURRENT */ /* the whole module! */