2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
27 #if defined(CONCURRENT)
29 # define NON_POSIX_SOURCE /* so says Solaris */
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time. At present, it is only used for the first stack
43 chunk of a thread, the one that's got
44 @RTSflags.ConcFlags.stkChunkSize@ words.
47 P_ AvailableStack = Nil_closure;
48 P_ AvailableTSO = Nil_closure;
51 Macros for dealing with the new and improved GA field for simulating
52 parallel execution. Based on @CONCURRENT@ package. The GA field now
53 contains a mask, where the n-th bit stands for the n-th processor,
54 where this data can be found. In case of multiple copies, several bits
55 are set. The total number of processors is bounded by @MAX_PROC@,
56 which should be <= the length of a word in bits. -- HWL
59 /* mattson thinks this is obsolete */
61 # if 0 && defined(GRAN)
63 typedef unsigned long TIME;
64 typedef unsigned char PROC;
65 typedef unsigned char EVTTYPE;
69 # define max(a,b) (a>b?a:b)
75 for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ;
80 /* NB: This takes a *node* rather than just a ga as input */
83 { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
86 no_of_copies(P_ node) /* DaH lo'lu'Qo'; currently unused */
89 for (i=0, n=0; i<MAX_PROC; i++)
90 if (IS_LOCAL_TO(PROCS(node),i))
96 # endif /* GRAN ; HWL */
99 %****************************************************************
101 \subsection[thread-getthread]{The Thread Scheduler}
103 %****************************************************************
105 This is the heart of the thread scheduling code.
108 # if defined(GRAN_CHECK) && defined(GRAN)
113 W_ event_trace_all = 0;
115 STGRegisterTable *CurrentRegTable = NULL;
116 P_ CurrentTSO = NULL;
118 # if defined(GRAN) /* HWL */
120 unsigned CurrentProc = 0;
121 W_ IdleProcs = ~0L, Idlers = MAX_PROC;
123 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
124 # define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
128 I_ DoFairSchedule = 0;
129 I_ DoReScheduleOnFetch = 0;
130 I_ DoStealThreadsFirst = 0;
131 I_ SimplifiedFetch = 0;
132 I_ DoAlwaysCreateThreads = 0;
133 I_ DoGUMMFetching = 0;
134 I_ DoThreadMigration = 0;
135 I_ FetchStrategy = 4;
136 I_ PreferSparksOfLocalNodes = 0;
138 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
140 I_ PrintFetchMisses = 0, fetch_misses = 0;
144 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
148 I_ no_gr_profile = 0; /* Don't create any .gr file at all? */
149 I_ do_sp_profile = 0;
150 I_ do_gr_migration = 0;
152 P_ RunnableThreadsHd[MAX_PROC];
153 P_ RunnableThreadsTl[MAX_PROC];
155 P_ WaitThreadsHd[MAX_PROC];
156 P_ WaitThreadsTl[MAX_PROC];
158 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
159 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
161 W_ CurrentTime[MAX_PROC]; /* Per PE clock */
163 # if defined(GRAN_CHECK) && defined(GRAN)
164 P_ BlockedOnFetch[MAX_PROC]; /* HWL-CHECK */
167 I_ OutstandingFetches[MAX_PROC];
169 W_ SparksAvail = 0; /* How many sparks are available */
170 W_ SurplusThreads = 0; /* How many excess threads are there */
172 StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
174 /* Communication Cost Variables -- set in main program */
176 W_ gran_latency = LATENCY, gran_additional_latency = ADDITIONAL_LATENCY,
177 gran_fetchtime = FETCHTIME,
178 gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime = GLOBALUNBLOCKTIME,
179 gran_mpacktime = MSGPACKTIME, gran_munpacktime = MSGUNPACKTIME,
182 W_ gran_threadcreatetime = THREADCREATETIME,
183 gran_threadqueuetime = THREADQUEUETIME,
184 gran_threaddescheduletime = THREADDESCHEDULETIME,
185 gran_threadscheduletime = THREADSCHEDULETIME,
186 gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
188 /* Instruction Cost Variables -- set in main program */
190 W_ gran_arith_cost = ARITH_COST, gran_branch_cost = BRANCH_COST,
191 gran_load_cost = LOAD_COST, gran_store_cost = STORE_COST,
192 gran_float_cost = FLOAT_COST, gran_heapalloc_cost = 0;
194 W_ max_proc = MAX_PROC;
196 /* Granularity event types' names for output */
198 char *event_names[] =
199 { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
200 "MOVESPARK", "MOVETHREAD", "FINDWORK",
201 "FETCHNODE", "FETCHREPLY"
205 /* Prototypes of GrAnSim debugging functions */
206 void DEBUG_PRINT_NODE PROTO((P_));
207 void DEBUG_TREE PROTO((P_));
208 void DEBUG_INFO_TABLE PROTO((P_));
209 void DEBUG_CURR_THREADQ PROTO((I_));
210 void DEBUG_THREADQ PROTO((P_, I_));
211 void DEBUG_TSO PROTO((P_, I_));
212 void DEBUG_EVENT PROTO((eventq, I_));
213 void DEBUG_SPARK PROTO((sparkq, I_));
214 void DEBUG_SPARKQ PROTO((sparkq, I_));
215 void DEBUG_CURR_SPARKQ PROTO((I_));
216 void DEBUG_PROC PROTO((I_, I_));
217 void DCT(STG_NO_ARGS);
218 void DCP(STG_NO_ARGS);
219 void DEQ(STG_NO_ARGS);
220 void DSQ(STG_NO_ARGS);
222 void HandleFetchRequest PROTO((P_, PROC, P_));
223 # endif /* GRAN ; HWL */
225 #if defined(GRAN_CHECK) && defined(GRAN)
226 static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
228 static I_ noOfEvents = 0;
229 static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
232 TIME SparkStealTime();
234 /* Fcts for manipulating event queues have been deleted -- HWL */
235 /* ---------------------------------- */
243 fprintf(stderr,"Spark: NIL\n");
245 fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
246 (W_) SPARK_NODE(spark), SPARK_NAME(spark),
247 ((SPARK_EXPORTED(spark))?"True":"False"),
248 SPARK_PREV(spark), SPARK_NEXT(spark) );
251 static print_sparkq(hd)
256 fprintf(stderr,"Spark Queue with root at %x:\n",hd);
257 for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
262 static print_event(event)
267 fprintf(stderr,"Evt: NIL\n");
269 fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
270 event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
271 EVENT_PROC(event), EVENT_CREATOR(event),
272 EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
273 EVENT_SPARK(event), EVENT_NEXT(event)*/ );
277 static print_eventq(hd)
282 fprintf(stderr,"Event Queue with root at %x:\n",hd);
283 for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
288 /* ---------------------------------- */
291 static eventq getnextevent()
293 static eventq entry = NULL;
297 fprintf(stderr,"No next event\n");
298 exit(EXIT_FAILURE); /* ToDo: abort()? EXIT? */
304 #if defined(GRAN_CHECK) && defined(GRAN)
305 if (debug & 0x20) { /* count events */
307 event_counts[EVENT_TYPE(EventHd)]++;
312 EventHd = EVENT_NEXT(EventHd);
316 /* ToDo: replace malloc/free with a free list */
318 static insert_event(newentry)
321 EVTTYPE evttype = EVENT_TYPE(newentry);
324 /* Search the queue and insert at the right point:
325 FINDWORK before everything, CONTINUETHREAD after everything.
327 This ensures that we find any available work after all threads have
328 executed the current cycle. This level of detail would normally be
329 irrelevant, but matters for ridiculously low latencies...
336 for (event = EventHd, prev=&EventHd; event != NULL;
337 prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
339 if(evttype == FINDWORK ? (EVENT_TIME(event) >= EVENT_TIME(newentry)) :
340 evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) :
341 (EVENT_TIME(event) > EVENT_TIME(newentry) ||
342 (EVENT_TIME(event) == EVENT_TIME(newentry) &&
343 EVENT_TYPE(event) != FINDWORK )))
346 EVENT_NEXT(newentry) = event;
355 static newevent(proc,creator,time,evttype,tso,node,spark)
362 eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "newevent");
364 EVENT_PROC(newentry) = proc;
365 EVENT_CREATOR(newentry) = creator;
366 EVENT_TIME(newentry) = time;
367 EVENT_TYPE(newentry) = evttype;
368 EVENT_TSO(newentry) = tso;
369 EVENT_NODE(newentry) = node;
370 EVENT_SPARK(newentry) = spark;
371 EVENT_NEXT(newentry) = NULL;
373 insert_event(newentry);
379 P_ RunnableThreadsHd = Nil_closure;
380 P_ RunnableThreadsTl = Nil_closure;
382 P_ WaitingThreadsHd = Nil_closure;
383 P_ WaitingThreadsTl = Nil_closure;
385 PP_ PendingSparksBase[SPARK_POOLS];
386 PP_ PendingSparksLim[SPARK_POOLS];
388 PP_ PendingSparksHd[SPARK_POOLS];
389 PP_ PendingSparksTl[SPARK_POOLS];
391 # endif /* GRAN ; HWL */
393 static jmp_buf scheduler_loop;
395 I_ required_thread_count = 0;
396 I_ advisory_thread_count = 0;
398 EXTFUN(resumeThread);
400 P_ NewThread PROTO((P_, W_));
402 I_ context_switch = 0;
409 I_ SparkLimit[SPARK_POOLS];
412 initThreadPools(STG_NO_ARGS)
414 I_ size = RTSflags.ConcFlags.maxLocalSparks;
416 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
418 if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
421 if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
424 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
425 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
435 ScheduleThreads(topClosure)
443 #if defined(PROFILING) || defined(PAR)
444 if (time_profiling || RTSflags.ConcFlags.ctxtSwitchTime > 0) {
445 if (initialize_virtual_timer(RTSflags.CcFlags.msecsPerTick)) {
447 if (RTSflags.ConcFlags.ctxtSwitchTime > 0) {
448 if (initialize_virtual_timer(RTSflags.ConcFlags.ctxtSwitchTime)) {
451 fprintf(stderr, "Can't initialize virtual timer.\n");
455 context_switch = 0 /* 1 HWL */;
457 #if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
458 if ( debug & 0x40 ) {
459 fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
463 #if defined(GRAN) /* KH */
464 for (i=0; i<max_proc; i++)
466 RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
467 WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
468 PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] =
469 PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] =
472 # if defined(GRAN_CHECK)
474 BlockedOnFetch[i] = 0; /*- StgFalse; -*/ /* HWL-CHECK */
476 OutstandingFetches[i] = 0;
479 CurrentProc = MainProc;
486 * We perform GC so that a signal handler can install a new
487 * TopClosure and start a new main thread.
492 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
493 /* kludge to save the top closure as a root */
494 CurrentTSO = topClosure;
495 ReallyPerformThreadGC(0, rtsTrue);
496 topClosure = CurrentTSO;
497 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
499 fprintf(stderr, "Not enough heap for main thread\n");
504 RunnableThreadsHd = RunnableThreadsTl = tso;
506 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
507 ThreadQueueHd = ThreadQueueTl = tso;
509 # if defined(GRAN_CHECK)
510 if ( debug & 0x40 ) {
511 fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
517 if (RTSflags.ParFlags.granSimStats) {
518 DumpGranEvent(GR_START, tso);
519 sameThread = rtsTrue;
524 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
527 required_thread_count = 1;
528 advisory_thread_count = 0;
530 } /*if IAmMainThread ...*/
533 /* ----------------------------------------------------------------- */
534 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
535 /* ----------------------------------------------------------------- */
537 if(setjmp(scheduler_loop) < 0)
540 #if defined(GRAN) && defined(GRAN_CHECK)
541 if ( debug & 0x80 ) {
542 fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
543 DEBUG_TSO(ThreadQueueHd,1);
544 /* if (ThreadQueueHd == MainTSO) {
545 fprintf(stderr,"D> Event Queue is now:\n");
552 if (PendingFetches != Nil_closure) {
557 if (ThreadQueueHd == Nil_closure) {
558 fprintf(stderr, "No runnable threads!\n");
561 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
562 QP_Event1("AG", ThreadQueueHd);
567 while (RunnableThreadsHd == Nil_closure) {
568 /* If we've no work */
569 if (WaitingThreadsHd == Nil_closure) {
571 fprintf(stderr, "No runnable threads!\n");
574 AwaitEvent(RTSflags.ConcFlags.ctxtSwitchTime);
577 if (RunnableThreadsHd == Nil_closure) {
578 if (advisory_thread_count < RTSflags.ConcFlags.maxThreads &&
579 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
580 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
582 * If we're here (no runnable threads) and we have pending
583 * sparks, we must have a space problem. Get enough space
584 * to turn one of those pending sparks into a
585 * thread... ReallyPerformGC doesn't return until the
586 * space is available, so it may force global GC. ToDo:
587 * Is this unnecessary here? Duplicated in ReSchedule()?
590 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
591 SAVE_Hp -= THREAD_SPACE_REQUIRED;
594 * We really have absolutely no work. Send out a fish
595 * (there may be some out there already), and wait for
596 * something to arrive. We clearly can't run any threads
597 * until a SCHEDULE or RESUME arrives, and so that's what
598 * we're hoping to see. (Of course, we still have to
599 * respond to other types of messages.)
602 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
608 } else if (PacketsWaiting()) { /* Look for incoming messages */
613 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
614 QP_Event1("AG", RunnableThreadsHd);
618 if (RTSflags.ParFlags.granSimStats && !sameThread)
619 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
622 #if !GRAN /* ROUND_ROBIN */
623 CurrentTSO = RunnableThreadsHd;
624 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
625 TSO_LINK(CurrentTSO) = Nil_closure;
627 if (RunnableThreadsHd == Nil_closure)
628 RunnableThreadsTl = Nil_closure;
631 /* This used to be Round Robin. KH.
632 I think we can ignore that, and move it down to ReSchedule instead.
634 CurrentTSO = ThreadQueueHd;
635 /* TSO_LINK(CurrentTSO) = Nil_closure; humbug */
638 /* If we're not running a timer, just leave the flag on */
639 if (RTSflags.ConcFlags.ctxtSwitchTime > 0)
642 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
643 if (CurrentTSO == Nil_closure) {
644 fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
645 CurrentProc,CurrentTime[CurrentProc]);
650 if (BlockedOnFetch[CurrentProc]) {
651 fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
652 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
657 if ( (debug & 0x10) &&
658 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
659 fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
660 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
665 miniInterpret((StgFunPtr)resumeThread);
669 % Some remarks on GrAnSim -- HWL
671 The ReSchedule fct is the heart of GrAnSim. Based on its par it issues a
672 CONTINUETRHEAD to carry on executing the current thread in due course or it
673 watches out for new work (e.g. called from EndThread).
675 Then it picks the next event (getnextevent) and handles it appropriately
676 (see switch construct). Note that a continue in the switch causes the next
677 event to be handled and a break causes a jmp to the scheduler_loop where
678 the TSO at the head of the current processor's runnable queue is executed.
680 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
681 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
687 ReSchedule(what_next)
688 int what_next; /* Run the current thread again? */
690 sparkq spark, nextspark;
695 #if defined(GRAN_CHECK) && defined(GRAN)
696 if ( debug & 0x80 ) {
697 fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
698 DEBUG_TSO(ThreadQueueHd,1);
702 #if defined(GRAN_CHECK) && defined(GRAN)
703 if ( (debug & 0x80) || (debug & 0x40 ) )
704 if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
705 fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
709 /* Run the current thread again (if there is one) */
710 if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
712 /* A bit of a hassle if the event queue is empty, but ... */
713 CurrentTSO = ThreadQueueHd;
715 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
716 CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
718 /* This code does round-Robin, if preferred. */
719 if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
721 if(RTSflags.ParFlags.granSimStats)
722 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
723 ThreadQueueHd = TSO_LINK(CurrentTSO);
724 TSO_LINK(ThreadQueueTl) = CurrentTSO;
725 ThreadQueueTl = CurrentTSO;
726 TSO_LINK(CurrentTSO) = Nil_closure;
727 if (RTSflags.ParFlags.granSimStats)
728 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
729 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
732 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
733 /* has been updated before that already. */
734 else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
736 #if defined(GRAN_CHECK) && defined(GRAN)
737 if(DoReScheduleOnFetch)
739 fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
744 if(RTSflags.ParFlags.granSimStats)
745 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
747 CurrentTSO = ThreadQueueHd;
748 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
749 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
751 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
754 /* We go in here if the current thread is blocked on fetch => don'd CONT */
755 else if(what_next==CHANGE_THREAD)
757 /* just fall into event handling loop for next event */
760 /* We go in here if we have no runnable threads or what_next==0 */
763 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
764 FINDWORK,Nil_closure,Nil_closure,NULL);
765 CurrentTSO = Nil_closure;
768 /* ----------------------------------------------------------------- */
769 /* This part is the EVENT HANDLING LOOP */
770 /* ----------------------------------------------------------------- */
773 /* Choose the processor with the next event */
774 event = getnextevent();
775 CurrentProc = EVENT_PROC(event);
776 if(EVENT_TIME(event) > CurrentTime[CurrentProc])
777 CurrentTime[CurrentProc] = EVENT_TIME(event);
779 MAKE_BUSY(CurrentProc);
781 #if defined(GRAN_CHECK) && defined(GRAN)
783 fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
786 /* Deal with the idlers */
789 #if defined(GRAN_CHECK) && defined(GRAN)
791 (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
796 switch (EVENT_TYPE(event))
798 /* Should just be continuing execution */
800 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
801 if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
802 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
803 CurrentProc,CurrentTime[CurrentProc]);
808 if(ThreadQueueHd==Nil_closure)
810 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
811 FINDWORK,Nil_closure,Nil_closure,NULL);
812 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
815 break; /* fall into scheduler loop */
818 #if defined(GRAN_CHECK) && defined(GRAN)
819 if (SimplifiedFetch) {
820 fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
825 CurrentTime[CurrentProc] += gran_munpacktime;
826 HandleFetchRequest(EVENT_NODE(event),
827 EVENT_CREATOR(event),
832 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
833 if (SimplifiedFetch) {
834 fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
839 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
840 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
842 fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
843 CurrentTSO,CurrentTime[CurrentProc]);
849 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
850 fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
851 CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
854 BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
859 /* Copy or move node to CurrentProc */
860 if (FetchNode(EVENT_NODE(event),
861 EVENT_CREATOR(event),
862 EVENT_PROC(event)) ) {
863 /* Fetch has failed i.e. node has been grabbed by another PE */
864 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
865 PROC p = where_is(node);
868 #if defined(GRAN_CHECK) && defined(GRAN)
869 if (PrintFetchMisses) {
870 fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
871 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
874 #endif /* GRAN_CHECK */
876 CurrentTime[CurrentProc] += gran_mpacktime;
878 /* Count fetch again !? */
879 ++TSO_FETCHCOUNT(tso);
880 TSO_FETCHTIME(tso) += gran_fetchtime;
882 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
885 /* Chase the grabbed node */
886 newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
888 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
890 BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
893 TSO_TYPE(tso) |= FETCH_MASK_TSO;
896 CurrentTime[CurrentProc] += gran_mtidytime;
898 continue; /* NB: no REPLy has been processed; tso still sleeping */
901 /* -- Qapla'! Fetch has been successful; node is here, now */
902 ++TSO_FETCHCOUNT(EVENT_TSO(event));
903 TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
905 if (RTSflags.ParFlags.granSimStats)
906 DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
907 EVENT_NODE(event),EVENT_CREATOR(event));
909 --OutstandingFetches[CurrentProc];
910 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
911 if (OutstandingFetches[CurrentProc] < 0) {
912 fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
917 if (!DoReScheduleOnFetch) {
918 CurrentTSO = EVENT_TSO(event); /* awaken blocked thread */
919 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
920 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
921 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
922 TSO_BLOCKEDAT(EVENT_TSO(event));
923 if(RTSflags.ParFlags.granSimStats)
924 DumpGranEvent(GR_RESUME,EVENT_TSO(event));
927 /* fall through to RESUMETHREAD */
930 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
931 /* the runnable queue ( i.e. Qu' SImqa'lu') */
932 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
933 TSO_BLOCKEDAT(EVENT_TSO(event));
934 StartThread(event,GR_RESUME);
938 StartThread(event,GR_START);
942 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
943 if (!DoThreadMigration) {
944 fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
948 CurrentTime[CurrentProc] += gran_munpacktime;
949 StartThread(event,GR_STOLEN);
950 continue; /* to the next event */
953 CurrentTime[CurrentProc] += gran_munpacktime;
954 spark = EVENT_SPARK(event);
956 ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
957 so the assignment above is needed. */
960 DumpSparkGranEvent(SP_ACQUIRED,spark);
962 ++SparksAvail; /* Probably Temporarily */
963 /* Drop into FINDWORK */
965 if (!DoReScheduleOnFetch &&
966 (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
967 continue; /* to next event (i.e. leave */
968 } /* spark in sparkq for now) */
971 if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
972 && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
975 sparkq spark_of_non_local_node = NULL;
977 /* Choose a spark from the local spark queue */
978 spark = SparkQueueHd;
980 while (spark != NULL && !found)
982 node = SPARK_NODE(spark);
983 if (!SHOULD_SPARK(node))
986 DumpSparkGranEvent(SP_PRUNED,spark);
988 ASSERT(spark != NULL);
990 SparkQueueHd = SPARK_NEXT(spark);
991 if(SparkQueueHd == NULL)
996 spark = SparkQueueHd;
998 /* -- node should eventually be sparked */
999 else if (PreferSparksOfLocalNodes &&
1000 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1002 /* We have seen this spark before => no local sparks */
1003 if (spark==spark_of_non_local_node) {
1008 /* Remember first non-local node */
1009 if (spark_of_non_local_node==NULL)
1010 spark_of_non_local_node = spark;
1012 /* Special case: 1 elem sparkq with non-local spark */
1013 if (spark==SparkQueueTl) {
1018 /* Put spark (non-local!) at the end of the sparkq */
1019 SPARK_NEXT(SparkQueueTl) = spark;
1020 SparkQueueHd = SPARK_NEXT(spark);
1021 SPARK_NEXT(spark) = NULL;
1022 SparkQueueTl = spark;
1024 spark = SparkQueueHd;
1032 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1035 CurrentTime[CurrentProc] += gran_threadcreatetime;
1037 node = SPARK_NODE(spark);
1038 if((tso = NewThread(node, T_REQUIRED))==NULL)
1040 /* Some kind of backoff needed here in case there's too little heap */
1041 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1042 FINDWORK,Nil_closure,Nil_closure,NULL);
1043 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
1045 continue; /* to the next event, eventually */
1048 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1049 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1050 TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1052 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1053 STARTTHREAD,tso,Nil_closure,NULL);
1055 ASSERT(spark != NULL);
1057 SparkQueueHd = SPARK_NEXT(spark);
1058 if(SparkQueueHd == NULL)
1059 SparkQueueTl = NULL;
1061 DisposeSpark(spark);
1064 /* Make the PE idle if nothing sparked and we have no threads. */
1066 if(ThreadQueueHd == Nil_closure)
1067 #if defined(GRAN_CHECK) && defined(GRAN)
1069 MAKE_IDLE(CurrentProc);
1070 if ( (debug & 0x40) || (debug & 0x80) ) {
1071 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1075 MAKE_IDLE(CurrentProc);
1076 #endif /* GRAN_CHECK */
1078 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1079 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1082 continue; /* to the next event */
1086 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1087 if ( (debug & 0x04) &&
1088 (!DoReScheduleOnFetch && ThreadQueueHd != Nil_closure)
1090 fprintf(stderr,"Waning in FINDWORK handling:\n");
1091 fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
1094 if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
1095 continue; /* to next event */
1097 break; /* run ThreadQueueHd */
1102 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
1105 _longjmp(scheduler_loop, 1);
1110 Here follows the non-GRAN @ReSchedule@.
1116 int again; /* Run the current thread again? */
1124 * In the parallel world, we do unfair scheduling for the moment.
1125 * Ultimately, this should all be merged with the more
1126 * sophisticated GrAnSim scheduling options. (Of course, some
1127 * provision should be made for *required* threads to make sure
1128 * that they don't starve, but for now we assume that no one is
1129 * running concurrent Haskell on a multi-processor platform.)
1135 if (RunnableThreadsHd == Nil_closure)
1136 RunnableThreadsTl = CurrentTSO;
1137 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1138 RunnableThreadsHd = CurrentTSO;
1144 * In the sequential world, we assume that the whole point of running
1145 * the threaded build is for concurrent Haskell, so we provide round-robin
1150 if(RunnableThreadsHd == Nil_closure) {
1151 RunnableThreadsHd = CurrentTSO;
1153 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1154 if (DO_QP_PROF > 1) {
1155 QP_Event1("GA", CurrentTSO);
1158 RunnableThreadsTl = CurrentTSO;
1164 * Debugging code, which is useful enough (and cheap enough) to compile
1165 * in all the time. This makes sure that we don't access saved registers,
1166 * etc. in threads which are supposed to be sleeping.
1168 CurrentTSO = Nil_closure;
1169 CurrentRegTable = NULL;
1172 /* First the required sparks */
1174 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1175 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1177 if (SHOULD_SPARK(spark)) {
1178 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1180 if (RunnableThreadsHd == Nil_closure) {
1181 RunnableThreadsHd = tso;
1183 if (RTSflags.ParFlags.granSimStats) {
1184 DumpGranEvent(GR_START, tso);
1185 sameThread = rtsTrue;
1189 TSO_LINK(RunnableThreadsTl) = tso;
1191 if (RTSflags.ParFlags.granSimStats)
1192 DumpGranEvent(GR_STARTQ, tso);
1195 RunnableThreadsTl = tso;
1198 QP_Event0(threadId++, spark);
1201 DumpSparkGranEvent(SP_PRUNED, threadId++);
1205 PendingSparksHd[REQUIRED_POOL] = sparkp;
1207 /* Now, almost the same thing for advisory sparks */
1209 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1210 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1212 if (SHOULD_SPARK(spark)) {
1215 /* In the parallel world, don't create advisory threads if we are
1216 * about to rerun the same thread, or already have runnable threads,
1217 * or the main thread has terminated */
1218 (RunnableThreadsHd != Nil_closure ||
1219 (required_thread_count == 0 && IAmMainThread)) ||
1221 advisory_thread_count == RTSflags.ConcFlags.maxThreads ||
1222 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1224 advisory_thread_count++;
1225 if (RunnableThreadsHd == Nil_closure) {
1226 RunnableThreadsHd = tso;
1228 if (RTSflags.ParFlags.granSimStats) {
1229 DumpGranEvent(GR_START, tso);
1230 sameThread = rtsTrue;
1234 TSO_LINK(RunnableThreadsTl) = tso;
1236 if (RTSflags.ParFlags.granSimStats)
1237 DumpGranEvent(GR_STARTQ, tso);
1240 RunnableThreadsTl = tso;
1243 QP_Event0(threadId++, spark);
1246 DumpSparkGranEvent(SP_PRUNED, threadId++);
1250 PendingSparksHd[ADVISORY_POOL] = sparkp;
1253 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1255 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1263 %****************************************************************************
1265 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1266 Threads (GrAnSim only)}
1268 %****************************************************************************
1270 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1276 StartThread(event,event_type)
1278 enum gran_event_types event_type;
1280 if(ThreadQueueHd==Nil_closure)
1282 CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
1283 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
1284 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1285 if(RTSflags.ParFlags.granSimStats)
1286 DumpGranEvent(event_type,EVENT_TSO(event));
1290 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1291 ThreadQueueTl = EVENT_TSO(event);
1293 if(DoThreadMigration)
1296 if(RTSflags.ParFlags.granSimStats)
1297 DumpGranEvent(event_type+1,EVENT_TSO(event));
1300 CurrentTime[CurrentProc] += gran_threadqueuetime;
1304 Export work to idle PEs.
1311 if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
1312 for(proc = 0; proc < max_proc; proc++)
1315 if(DoStealThreadsFirst &&
1316 (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1318 if (SurplusThreads > 0l) /* Steal a thread */
1325 if(SparksAvail > 0l &&
1326 (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1329 if (IS_IDLE(proc) && SurplusThreads > 0l &&
1330 (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1336 Steal a spark and schedule moving it to proc. We want to look at PEs in
1337 clock order -- most retarded first. Currently sparks are only stolen from
1338 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1339 be changed to first steal from the former then from the latter.
1346 sparkq spark, prev, next;
1348 TIME times[MAX_PROC], stealtime;
1349 unsigned ntimes=0, i, j;
1351 /* times shall contain processors from which we may steal sparks */
1352 for(p=0; p < max_proc; ++p)
1354 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1355 CurrentTime[p] <= CurrentTime[CurrentProc])
1356 times[ntimes++] = p;
1359 for(i=0; i < ntimes; ++i)
1360 for(j=i+1; j < ntimes; ++j)
1361 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1363 unsigned temp = times[i];
1364 times[i] = times[j];
1368 for(i=0; i < ntimes && !stolen; ++i)
1372 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1373 spark != NULL && !stolen;
1376 next = SPARK_NEXT(spark);
1378 if(SHOULD_SPARK(SPARK_NODE(spark)))
1380 /* Don't Steal local sparks */
1381 if(!SPARK_GLOBAL(spark))
1387 SPARK_NEXT(spark) = NULL;
1388 CurrentTime[p] += gran_mpacktime;
1390 stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1393 newevent(proc,p /* CurrentProc */,stealtime,
1394 MOVESPARK,Nil_closure,Nil_closure,spark);
1398 ++SPARK_GLOBAL(spark);
1401 DumpSparkGranEvent(SP_EXPORTED,spark);
1403 CurrentTime[p] += gran_mtidytime;
1410 DumpSparkGranEvent(SP_PRUNED,spark);
1411 DisposeSpark(spark);
1414 if(spark == PendingSparksHd[p][ADVISORY_POOL])
1415 PendingSparksHd[p][ADVISORY_POOL] = next;
1418 SPARK_NEXT(prev) = next;
1421 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1422 PendingSparksTl[p][ADVISORY_POOL] = NULL;
1427 Steal a spark and schedule moving it to proc.
1435 TIME times[MAX_PROC], stealtime;
1436 unsigned ntimes=0, i, j;
1438 /* Hunt for a thread */
1440 /* times shall contain processors from which we may steal threads */
1441 for(p=0; p < max_proc; ++p)
1442 if(proc != p && RunnableThreadsHd[p] != Nil_closure &&
1443 CurrentTime[p] <= CurrentTime[CurrentProc])
1444 times[ntimes++] = p;
1447 for(i=0; i < ntimes; ++i)
1448 for(j=i+1; j < ntimes; ++j)
1449 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1451 unsigned temp = times[i];
1452 times[i] = times[j];
1456 for(i=0; i < ntimes; ++i)
1460 /* Steal the first exportable thread in the runnable queue after the */
1463 if(RunnableThreadsHd[p] != Nil_closure)
1465 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
1466 thread != Nil_closure && TSO_LOCKED(thread);
1467 prev = thread, thread = TSO_LINK(thread))
1470 if(thread != Nil_closure) /* Take thread out of runnable queue */
1472 TSO_LINK(prev) = TSO_LINK(thread);
1474 TSO_LINK(thread) = Nil_closure;
1476 if(RunnableThreadsTl[p] == thread)
1477 RunnableThreadsTl[p] = prev;
1479 /* Turn magic constants into params !? -- HWL */
1481 CurrentTime[p] += 5l * gran_mpacktime;
1483 stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1484 + SparkStealTime() + 4l * gran_additional_latency
1485 + 5l * gran_munpacktime;
1487 /* Move the thread */
1488 SET_PROCS(thread,PE_NUMBER(proc));
1490 /* Move from one queue to another */
1491 newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
1495 if(RTSflags.ParFlags.granSimStats)
1496 DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
1498 CurrentTime[p] += 5l * gran_mtidytime;
1507 TIME SparkStealTime()
1509 double fishdelay, sparkdelay, latencydelay;
1510 fishdelay = (double)max_proc/2;
1511 sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
1512 latencydelay = sparkdelay*((double)gran_latency);
1515 fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
1516 fishdelay,sparkdelay,latencydelay,Idlers);
1518 return((TIME)latencydelay);
1520 #endif /* GRAN ; HWL */
1524 %****************************************************************************
1526 \subsection[thread-execution]{Executing Threads}
1528 %****************************************************************************
1531 EXTDATA_RO(StkO_info);
1532 EXTDATA_RO(TSO_info);
1533 EXTDATA_RO(WorldStateToken_closure);
1535 EXTFUN(EnterNodeCode);
1536 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
1540 /* Slow but relatively reliable method uses stgMallocBytes */
1541 /* Eventually change that to heap allocated sparks. */
1544 NewSpark(node,name,local)
1548 sparkq newspark = (sparkq) stgMallocBytes(sizeof(struct spark), "NewSpark");
1550 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
1551 SPARK_NODE(newspark) = node;
1552 SPARK_NAME(newspark) = name;
1553 SPARK_GLOBAL(newspark) = !local;
1566 /* Heap-allocated disposal.
1568 FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
1569 SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
1573 DisposeSparkQ(spark)
1579 DisposeSparkQ(SPARK_NEXT(spark));
1582 if (SparksAvail < 0)
1583 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
1591 /* Create a new TSO, with the specified closure to enter and thread type */
1594 NewThread(topClosure, type)
1600 if (AvailableTSO != Nil_closure) {
1603 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
1605 AvailableTSO = TSO_LINK(tso);
1606 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
1609 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
1610 BYTES_TO_STGWORDS(sizeof(StgDouble)));
1612 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
1613 SET_TSO_HDR(tso, TSO_info, CCC);
1616 TSO_LINK(tso) = Nil_closure;
1618 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
1620 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
1621 TSO_ID(tso) = threadId++;
1622 TSO_TYPE(tso) = type;
1623 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
1624 TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
1625 TSO_SWITCH(tso) = NULL;
1632 #if defined(GRAN) || defined(PAR)
1633 TSO_SPARKNAME(tso) = 0;
1635 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
1637 TSO_STARTEDAT(tso) = CURRENT_TIME;
1639 TSO_EXPORTED(tso) = 0;
1640 TSO_BASICBLOCKS(tso) = 0;
1641 TSO_ALLOCS(tso) = 0;
1642 TSO_EXECTIME(tso) = 0;
1643 TSO_FETCHTIME(tso) = 0;
1644 TSO_FETCHCOUNT(tso) = 0;
1645 TSO_BLOCKTIME(tso) = 0;
1646 TSO_BLOCKCOUNT(tso) = 0;
1647 TSO_BLOCKEDAT(tso) = 0;
1648 TSO_GLOBALSPARKS(tso) = 0;
1649 TSO_LOCALSPARKS(tso) = 0;
1652 * set pc, Node (R1), liveness
1654 CurrentRegTable = TSO_INTERNAL_PTR(tso);
1655 SAVE_Liveness = LIVENESS_R1;
1656 SAVE_R1.p = topClosure;
1659 if (type == T_MAIN) {
1663 if (AvailableStack != Nil_closure) {
1664 stko = AvailableStack;
1666 SET_PROCS(stko,ThisPE);
1668 AvailableStack = STKO_LINK(AvailableStack);
1669 } else if (SAVE_Hp + STKO_HS + RTSflags.ConcFlags.stkChunkSize > SAVE_HpLim) {
1672 ALLOC_STK(STKO_HS,RTSflags.ConcFlags.stkChunkSize,0);
1674 SAVE_Hp += STKO_HS + RTSflags.ConcFlags.stkChunkSize;
1675 SET_STKO_HDR(stko, StkO_info, CCC);
1677 STKO_SIZE(stko) = RTSflags.ConcFlags.stkChunkSize + STKO_VHS;
1678 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
1679 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
1680 STKO_LINK(stko) = Nil_closure;
1681 STKO_RETURN(stko) = NULL;
1687 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
1690 if (type == T_MAIN) {
1691 STKO_SpA(stko) -= AREL(1);
1692 *STKO_SpA(stko) = (P_) WorldStateToken_closure;
1695 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
1698 ASSERT(sanityChk_StkO(stko));
1701 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
1710 EndThread(STG_NO_ARGS)
1713 TIME now = CURRENT_TIME;
1716 if (RTSflags.TickyFlags.showTickyStats) {
1717 fprintf(RTSflags.TickyFlags.tickyFile,
1718 "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
1719 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
1720 fprintf(RTSflags.TickyFlags.tickyFile,
1721 "\tB stack max. depth: %ld words\n",
1722 TSO_BHWM(CurrentTSO));
1727 QP_Event1("G*", CurrentTSO);
1731 ASSERT(CurrentTSO == ThreadQueueHd);
1732 ThreadQueueHd = TSO_LINK(CurrentTSO);
1734 if(ThreadQueueHd == Nil_closure)
1735 ThreadQueueTl = Nil_closure;
1737 else if (DoThreadMigration)
1742 if(TSO_TYPE(CurrentTSO)==T_MAIN)
1745 for(i=0; i < max_proc; ++i) {
1746 StgBool is_first = StgTrue;
1747 while(RunnableThreadsHd[i] != Nil_closure)
1749 /* We schedule runnable threads before killing them to */
1750 /* make the job of bookkeeping the running, runnable, */
1751 /* blocked threads easier for scripts like gr2ps -- HWL */
1753 if (RTSflags.ParFlags.granSimStats && !is_first)
1754 DumpRawGranEvent(i,GR_SCHEDULE,
1755 TSO_ID(RunnableThreadsHd[i]));
1757 DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
1758 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
1759 is_first = StgFalse;
1763 ThreadQueueHd = Nil_closure;
1765 #if defined(GRAN_CHECK) && defined(GRAN)
1766 /* Print event stats */
1770 fprintf(stderr,"Statistics of events (total=%d):\n",
1772 for (i=0; i<=7; i++) {
1773 fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
1774 event_names[i],i,event_counts[i],
1775 (float)(100*event_counts[i])/(float)(noOfEvents) );
1783 DumpGranInfo(CurrentProc,CurrentTSO,
1784 TSO_TYPE(CurrentTSO) != T_ADVISORY);
1786 /* Note ThreadQueueHd is Nil when the main thread terminates */
1787 if(ThreadQueueHd != Nil_closure)
1789 if (RTSflags.ParFlags.granSimStats && !no_gr_profile)
1790 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
1791 CurrentTime[CurrentProc] += gran_threadscheduletime;
1794 else if (RTSflags.ParFlags.granSimStats_Binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
1796 grterminate(CurrentTime[CurrentProc]);
1801 if (RTSflags.ParFlags.granSimStats) {
1802 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
1803 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
1807 switch (TSO_TYPE(CurrentTSO)) {
1809 required_thread_count--;
1811 if (RTSflags.ParFlags.granSimStats_Binary)
1815 #if defined(GRAN_CHECK) && defined(GRAN)
1816 if ( (debug & 0x80) || (debug & 0x40) )
1817 fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
1819 /* I've stolen that from the end of ReSchedule (!GRAN). HWL */
1820 longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
1826 required_thread_count--;
1830 advisory_thread_count--;
1838 fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
1842 /* Reuse stack object space */
1843 ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
1844 STKO_LINK(SAVE_StkO) = AvailableStack;
1845 AvailableStack = SAVE_StkO;
1847 TSO_LINK(CurrentTSO) = AvailableTSO;
1848 AvailableTSO = CurrentTSO;
1849 CurrentTSO = Nil_closure;
1850 CurrentRegTable = NULL;
1853 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
1854 /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
1855 /* issued by ReSchedule. */
1856 ReSchedule(SAME_THREAD); /* back for more! */
1858 ReSchedule(0); /* back for more! */
1863 %****************************************************************************
1865 \subsection[thread-blocking]{Local Blocking}
1867 %****************************************************************************
1872 void CountnUPDs() { ++nUPDs; }
1873 void CountnUPDs_old() { ++nUPDs_old; }
1874 void CountnUPDs_new() { ++nUPDs_new; }
1876 void CountnPAPs() { ++nPAPs; }
1879 EXTDATA_RO(BQ_info);
1882 /* NB: non-GRAN version ToDo
1884 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
1887 P_ PendingFetches = Nil_closure;
1890 AwakenBlockingQueue(bqe)
1897 TIME now = CURRENT_TIME;
1902 while (bqe != Nil_closure) {
1904 while (IS_MUTABLE(INFO_PTR(bqe))) {
1905 switch (INFO_TYPE(INFO_PTR(bqe))) {
1909 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
1912 if (RTSflags.ParFlags.granSimStats) {
1913 DumpGranEvent(GR_RESUMEQ, bqe);
1914 switch (TSO_QUEUE(bqe)) {
1916 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1919 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1923 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
1928 if (last_tso == NULL) {
1929 if (RunnableThreadsHd == Nil_closure) {
1930 RunnableThreadsHd = bqe;
1932 TSO_LINK(RunnableThreadsTl) = bqe;
1936 bqe = TSO_LINK(bqe);
1940 next = BF_LINK(bqe);
1941 BF_LINK(bqe) = PendingFetches;
1942 PendingFetches = bqe;
1944 if (last_tso != NULL)
1945 TSO_LINK(last_tso) = next;
1948 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
1949 INFO_PTR(bqe), (W_) bqe);
1956 if (last_tso != NULL) {
1957 RunnableThreadsTl = last_tso;
1959 TSO_LINK(last_tso) = Nil_closure;
1967 /* NB: GRAN version only ToDo
1969 * AwakenBlockingQueue returns True if we are on the oldmutables list,
1970 * so that the update code knows what to do next.
1974 AwakenBlockingQueue(node)
1977 P_ tso = (P_) BQ_ENTRIES(node);
1986 if (tso != Nil_closure)
1990 while(tso != Nil_closure) {
1992 ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
1998 /* Find where the tso lives */
1999 proc = where_is(tso);
2001 if(proc == CurrentProc)
2002 notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
2005 CurrentTime[CurrentProc] += gran_mpacktime;
2006 notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
2007 CurrentTime[CurrentProc] += gran_mtidytime;
2010 /* and create a resume message */
2011 newevent(proc, CurrentProc, notifytime,
2012 RESUMETHREAD,tso,Nil_closure,NULL);
2015 tso = TSO_LINK(tso);
2016 TSO_LINK(prev) = Nil_closure;
2021 if (ThreadQueueHd == Nil_closure)
2022 ThreadQueueHd = tso;
2024 TSO_LINK(ThreadQueueTl) = tso;
2026 while(TSO_LINK(tso) != Nil_closure) {
2027 ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2029 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2031 tso = TSO_LINK(tso);
2034 ASSERT(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2036 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2039 ThreadQueueTl = tso;
2042 return MUT_LINK(node) != MUT_NOT_LINKED;
2045 #endif /* GRAN only */
2053 SAVE_Liveness = args >> 1;
2054 TSO_PC1(CurrentTSO) = Continue;
2056 QP_Event1("GR", CurrentTSO);
2059 if (RTSflags.ParFlags.granSimStats) {
2060 /* Note that CURRENT_TIME may perform an unsafe call */
2061 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
2064 ReSchedule(args & 1);
2069 %****************************************************************************
2071 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
2073 %****************************************************************************
2075 The following GrAnSim routines simulate the fetching of nodes from a remote
2076 processor. We use a 1 word bitmask to indicate on which processor a node is
2077 lying. Thus, moving or copying a node from one processor to another just
2078 requires an appropriate change in this bitmask (using @SET_GA@).
2079 Additionally, the clocks have to be updated.
2081 A special case arises when the node that is needed by processor A has been
2082 moved from a processor B to a processor C between sending out a @FETCH@
2083 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
2086 Currently, we only support GRIP-like single closure fetching. We plan to
2087 incorporate GUM-like packet fetching in the near future.
2092 /* Fetch node "node" to processor "p" */
2095 FetchNode(node,from,to)
2099 ASSERT(to==CurrentProc);
2101 if (!IS_LOCAL_TO(PROCS(node),from) &&
2102 !IS_LOCAL_TO(PROCS(node),to) )
2105 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
2106 PROCS(node) |= PE_NUMBER(to); /* Copy node */
2108 PROCS(node) = PE_NUMBER(to); /* Move node */
2110 /* Now fetch the children */
2113 fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
2119 /* --------------------------------------------------
2120 Cost of sending a packet of size n = C + P*n
2121 where C = packet construction constant,
2122 P = cost of packing one word into a packet
2123 [Should also account for multiple packets].
2124 -------------------------------------------------- */
2127 HandleFetchRequest(node,p,tso)
2131 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
2133 newevent(p,CurrentProc,
2134 CurrentTime[CurrentProc] /* +gran_latency */,
2135 FETCHREPLY,tso,node,NULL); /* node needed ? */
2136 CurrentTime[CurrentProc] += gran_mtidytime;
2138 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
2140 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
2141 /* Send a reply to the originator */
2142 CurrentTime[CurrentProc] += gran_mpacktime;
2144 newevent(p,CurrentProc,
2145 CurrentTime[CurrentProc]+gran_latency,
2146 FETCHREPLY,tso,node,NULL); /* node needed ? */
2148 CurrentTime[CurrentProc] += gran_mtidytime;
2151 { /* Qu'vatlh! node has been grabbed by another proc => forward */
2152 PROC p_new = where_is(node);
2155 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2158 max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
2159 FETCHREPLY,tso,node,NULL); /* node needed ? */
2160 CurrentTime[CurrentProc] += gran_mtidytime;
2165 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2166 if (debug & 0x2) /* 0x2 should be somehting like DBG_PRINT_FWD */
2167 fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
2168 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
2170 /* Prepare FORWARD message to proc p_new */
2171 CurrentTime[CurrentProc] += gran_mpacktime;
2173 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
2176 newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
2178 CurrentTime[CurrentProc] += gran_mtidytime;
2184 %****************************************************************************
2186 \subsection[gr-simulation]{Granularity Simulation}
2188 %****************************************************************************
2191 #if 0 /* moved to GranSim.lc */
2194 FILE *gr_file = NULL;
2195 char gr_filename[STATS_FILENAME_MAXLEN];
2197 init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
2198 char *prog_argv[], *rts_argv[];
2199 int prog_argc, rts_argc;
2205 char *extension = RTSflags.ParFlags.granSimStats_Binary? "gb": "gr";
2206 sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0],extension);
2208 if ((gr_file = fopen(gr_filename,"w")) == NULL )
2210 fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
2214 #if defined(GRAN_CHECK) && defined(GRAN)
2215 if(DoReScheduleOnFetch)
2216 setbuf(gr_file,NULL);
2219 fputs("Granularity Simulation for ",gr_file);
2220 for(i=0; i < prog_argc; ++i)
2222 fputs(prog_argv[i],gr_file);
2228 fputs("+RTS ",gr_file);
2230 for(i=0; i < rts_argc; ++i)
2232 fputs(rts_argv[i],gr_file);
2237 fputs("\n\n--------------------\n\n",gr_file);
2239 fputs("General Parameters:\n\n",gr_file);
2241 fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
2242 max_proc,DoFairSchedule?"Fair":"Unfair",
2243 DoThreadMigration?"":"Don't ",
2244 DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
2245 DoReScheduleOnFetch?"":"Don't ");
2247 fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
2248 SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
2249 DoGUMMFetching?"Many Closures":"Exactly One Closure");
2250 fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
2252 FetchStrategy==1?"only run runnable threads (don't create new ones":
2253 FetchStrategy==2?"create threads only from local sparks":
2254 FetchStrategy==3?"create threads from local or global sparks":
2255 FetchStrategy==4?"create sparks and steal threads if necessary":
2258 fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
2259 gran_threadcreatetime,gran_threadqueuetime);
2260 fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
2261 gran_threaddescheduletime,gran_threadscheduletime);
2262 fprintf(gr_file, "Thread Context-Switch Time %lu\n",
2263 gran_threadcontextswitchtime);
2264 fputs("\n\n--------------------\n\n",gr_file);
2266 fputs("Communication Metrics:\n\n",gr_file);
2268 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
2269 gran_latency, gran_additional_latency, gran_fetchtime,
2270 gran_gunblocktime, gran_lunblocktime);
2272 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
2273 gran_mpacktime, gran_mtidytime, gran_munpacktime);
2274 fputs("\n\n--------------------\n\n",gr_file);
2276 fputs("Instruction Metrics:\n\n",gr_file);
2277 fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
2278 gran_arith_cost, gran_branch_cost,
2279 gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
2280 fputs("\n\n++++++++++++++++++++\n\n",gr_file);
2283 if(RTSflags.ParFlags.granSimStats_Binary)
2284 grputw(sizeof(TIME));
2290 void end_gr_simulation() {
2293 fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
2301 %****************************************************************************
2303 \subsection[qp-profile]{Quasi-Parallel Profiling}
2305 %****************************************************************************
2313 /* *Virtual* Time in milliseconds */
2315 qp_elapsed_time(STG_NO_ARGS)
2317 extern StgDouble usertime();
2319 return ((long) (usertime() * 1e3));
2323 init_qp_profiling(STG_NO_ARGS)
2326 char qp_filename[STATS_FILENAME_MAXLEN];
2328 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
2329 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
2330 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
2334 fputs(prog_argv[0], qp_file);
2335 for(i = 1; prog_argv[i]; i++) {
2336 fputc(' ', qp_file);
2337 fputs(prog_argv[i], qp_file);
2339 fprintf(qp_file, " +RTS -C%d -t%d\n"
2340 , RTSflags.ConcFlags.ctxtSwitchTime
2341 , RTSflags.ConcFlags.maxThreads);
2343 fputs(time_str(), qp_file);
2344 fputc('\n', qp_file);
2349 QP_Event0(tid, node)
2353 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
2357 QP_Event1(event, tso)
2361 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
2362 TSO_ID(tso), TSO_NAME(tso));
2366 QP_Event2(event, tso1, tso2)
2370 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
2371 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
2377 %****************************************************************************
2379 \subsection[entry-points]{Routines directly called from Haskell world}
2381 %****************************************************************************
2383 The @GranSim...@ rotuines in here are directly called via macros from the
2386 First some auxiliary routines.
2390 /* Take the current thread off the thread queue and thereby activate the */
2391 /* next thread. It's assumed that the next ReSchedule after this uses */
2392 /* NEW_THREAD as param. */
2393 /* This fct is called from GranSimBlock and GranSimFetch */
2396 ActivateNextThread ()
2398 #if defined(GRAN_CHECK) && defined(GRAN)
2399 if(ThreadQueueHd != CurrentTSO) {
2400 fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
2405 ThreadQueueHd = TSO_LINK(ThreadQueueHd);
2406 if(ThreadQueueHd==Nil_closure) {
2407 MAKE_IDLE(CurrentProc);
2408 ThreadQueueTl = Nil_closure;
2409 } else if (RTSflags.ParFlags.granSimStats) {
2410 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
2411 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2416 Now the main stg-called routines:
2419 /* ------------------------------------------------------------------------ */
2420 /* The following GranSim... fcts are stg-called from the threaded world. */
2421 /* ------------------------------------------------------------------------ */
2423 /* Called from HEAP_CHK -- NB: node and liveness are junk here now.
2424 They are left temporarily to avoid complete recompilation.
2428 GranSimAllocate(n,node,liveness)
2433 TSO_ALLOCS(CurrentTSO) += n;
2434 ++TSO_BASICBLOCKS(CurrentTSO);
2436 TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
2437 CurrentTime[CurrentProc] += gran_heapalloc_cost;
2441 Subtract the values added above, if a heap check fails and
2442 so has to be redone.
2445 GranSimUnallocate(n,node,liveness)
2450 TSO_ALLOCS(CurrentTSO) -= n;
2451 --TSO_BASICBLOCKS(CurrentTSO);
2453 TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
2454 CurrentTime[CurrentProc] -= gran_heapalloc_cost;
2458 GranSimExec(ariths,branches,loads,stores,floats)
2459 W_ ariths,branches,loads,stores,floats;
2461 W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
2462 gran_store_cost*stores + gran_float_cost*floats;
2464 TSO_EXECTIME(CurrentTSO) += cost;
2465 CurrentTime[CurrentProc] += cost;
2470 Fetch the node if it isn't local
2471 -- result indicates whether fetch has been done.
2473 This is GRIP-style single item fetching.
2477 GranSimFetch(node /* , liveness_mask */ )
2479 /* I_ liveness_mask; */
2481 /* Note: once a node has been fetched, this test will be passed */
2482 if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
2484 /* I suppose we shouldn't do this for CAFs? -- KH */
2485 /* Should reschedule if the latency is high */
2486 /* We should add mpacktime to the remote PE for the reply,
2487 but we don't know who owns the node
2489 /* if(DYNAMIC_POINTER(node)) */ /* For 0.22; gone in 0.23 !!! */
2491 PROC p = where_is(node);
2495 if ( ( debug & 0x40 ) &&
2497 fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
2498 #endif /* GRAN_CHECK */
2500 CurrentTime[CurrentProc] += gran_mpacktime;
2502 ++TSO_FETCHCOUNT(CurrentTSO);
2503 TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
2505 if (SimplifiedFetch)
2507 FetchNode(node,CurrentProc);
2508 CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
2513 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
2516 newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
2517 ++OutstandingFetches[CurrentProc];
2519 /* About to block */
2520 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
2522 if (DoReScheduleOnFetch)
2525 /* Remove CurrentTSO from the queue
2526 -- assumes head of queue == CurrentTSO */
2529 if(RTSflags.ParFlags.granSimStats)
2530 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2532 ActivateNextThread();
2534 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2536 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2537 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2538 CurrentTSO,CurrentTime[CurrentProc]);
2541 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2547 TSO_LINK(CurrentTSO) = Nil_closure;
2548 /* CurrentTSO = Nil_closure; */
2550 /* ThreadQueueHd is now the next TSO to schedule or NULL */
2551 /* CurrentTSO is pointed to by the FETCHNODE event */
2553 else /* DoFairSchedule */
2555 /* Remove from the tail of the thread queue */
2556 fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
2560 else /* !DoReScheduleOnFetch */
2562 /* Note: CurrentProc is still busy as it's blocked on fetch */
2563 if(RTSflags.ParFlags.granSimStats)
2564 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2566 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2568 BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
2571 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2572 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2573 CurrentTSO,CurrentTime[CurrentProc]);
2576 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2579 CurrentTSO = Nil_closure;
2584 CurrentTime[CurrentProc] += gran_mtidytime;
2586 /* Rescheduling is necessary */
2587 NeedToReSchedule = StgTrue;
2596 GranSimSpark(local,node)
2602 DumpSparkGranEvent(SP_SPARK,node);
2604 /* Force the PE to take notice of the spark */
2605 if(DoAlwaysCreateThreads)
2606 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
2607 FINDWORK,Nil_closure,Nil_closure,NULL);
2610 ++TSO_LOCALSPARKS(CurrentTSO);
2612 ++TSO_GLOBALSPARKS(CurrentTSO);
2616 GranSimSparkAt(spark,where,identifier)
2618 P_ where; /* This should be a node; alternatively could be a GA */
2621 PROC p = where_is(where);
2625 DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
2627 CurrentTime[CurrentProc] += gran_mpacktime;
2629 exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]?
2630 CurrentTime[p]: CurrentTime[CurrentProc])
2633 newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
2635 CurrentTime[CurrentProc] += gran_mtidytime;
2637 ++TSO_GLOBALSPARKS(CurrentTSO);
2643 if(RTSflags.ParFlags.granSimStats)
2644 DumpGranEvent(GR_BLOCK,CurrentTSO);
2646 ++TSO_BLOCKCOUNT(CurrentTSO);
2647 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
2648 ActivateNextThread();
2655 %****************************************************************************
2657 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
2659 %****************************************************************************
2661 Garbage collection code for the event queue. We walk the event queue
2662 so that if the only reference to a TSO is in some event (e.g. RESUME),
2663 the TSO is still preserved.
2668 extern smInfo StorageMgrInfo;
2671 SaveEventRoots(num_ptr_roots)
2674 eventq event = EventHd;
2675 while(event != NULL)
2677 if(EVENT_TYPE(event) == RESUMETHREAD ||
2678 EVENT_TYPE(event) == MOVETHREAD ||
2679 EVENT_TYPE(event) == STARTTHREAD )
2680 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2682 else if(EVENT_TYPE(event) == MOVESPARK)
2683 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
2685 else if (EVENT_TYPE(event) == FETCHNODE ||
2686 EVENT_TYPE(event) == FETCHREPLY )
2688 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2689 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
2692 event = EVENT_NEXT(event);
2694 return(num_ptr_roots);
2698 SaveSparkRoots(num_ptr_roots)
2701 sparkq spark, /* prev, */ disposeQ=NULL;
2703 I_ i, sparkroots=0, prunedSparks=0;
2705 #if defined(GRAN_CHECK) && defined(GRAN)
2707 fprintf(stderr,"D> Saving spark roots for GC ...\n");
2710 for(proc = 0; proc < max_proc; ++proc) {
2711 for(i = 0; i < SPARK_POOLS; ++i) {
2712 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
2714 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
2716 if(++sparkroots <= MAX_SPARKS)
2718 #if defined(GRAN_CHECK) && defined(GRAN)
2720 fprintf(RTSflags.GcFlags.statsFile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
2721 num_ptr_roots,proc,i,SPARK_NODE(spark));
2723 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
2727 SPARK_NODE(spark) = Nil_closure;
2728 if (prunedSparks==0) {
2736 } /* forall spark ... */
2737 if (prunedSparks>0) {
2738 fprintf(RTSflags.GcFlags.statsFile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
2739 prunedSparks,MAX_SPARKS,proc);
2740 if (disposeQ == PendingSparksHd[proc][i])
2741 PendingSparksHd[proc][i] = NULL;
2743 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
2744 DisposeSparkQ(disposeQ);
2748 } /* forall i ... */
2749 } /*forall proc .. */
2751 return(num_ptr_roots);
2755 GC roots must be restored in *reverse order*.
2756 The recursion is a little ugly, but is better than
2757 in-place pointer reversal.
2761 RestoreEvtRoots(event,num_ptr_roots)
2767 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
2769 if(EVENT_TYPE(event) == RESUMETHREAD ||
2770 EVENT_TYPE(event) == MOVETHREAD ||
2771 EVENT_TYPE(event) == STARTTHREAD )
2772 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2774 else if(EVENT_TYPE(event) == MOVESPARK )
2775 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
2777 else if (EVENT_TYPE(event) == FETCHNODE ||
2778 EVENT_TYPE(event) == FETCHREPLY )
2780 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
2781 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2785 return(num_ptr_roots);
2789 RestoreEventRoots(num_ptr_roots)
2792 return(RestoreEvtRoots(EventHd,num_ptr_roots));
2796 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
2798 I_ num_ptr_roots, sparkroots;
2802 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
2803 if(sparkroots <= MAX_SPARKS)
2805 P_ n = SPARK_NODE(spark);
2806 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
2807 #if defined(GRAN_CHECK) && defined(GRAN)
2809 fprintf(RTSflags.GcFlags.statsFile,"Restoring Spark Root %d -- new: 0x%lx \n",
2810 num_ptr_roots,SPARK_NODE(spark));
2814 #if defined(GRAN_CHECK) && defined(GRAN)
2816 fprintf(RTSflags.GcFlags.statsFile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
2817 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
2822 return(num_ptr_roots);
2826 RestoreSparkRoots(num_ptr_roots)
2832 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
2833 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
2834 /* of the for loop. For i that is currently not necessary. C is really */
2835 /* impressive in datatype abstraction! -- HWL */
2837 for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
2838 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
2839 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
2842 return(num_ptr_roots);
2849 %****************************************************************************
2851 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
2853 %****************************************************************************
2855 Event dumping routines.
2860 DumpGranEvent(name,tso)
2861 enum gran_event_types name;
2864 DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
2867 DumpSparkGranEvent(name,id)
2868 enum gran_event_types name;
2871 DumpRawGranEvent(CurrentProc,name,id);
2874 DumpGranEventAndNode(name,tso,node,proc)
2875 enum gran_event_types name;
2879 PROC pe = CurrentProc;
2880 W_ id = TSO_ID(tso);
2882 if(name > GR_EVENT_MAX)
2883 name = GR_EVENT_MAX;
2885 if(RTSflags.ParFlags.granSimStats_Binary)
2889 grputw(CurrentTime[CurrentProc]);
2893 fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
2894 pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
2897 DumpRawGranEvent(pe,name,id)
2899 enum gran_event_types name;
2902 if(name > GR_EVENT_MAX)
2903 name = GR_EVENT_MAX;
2905 if(RTSflags.ParFlags.granSimStats_Binary)
2909 grputw(CurrentTime[CurrentProc]);
2913 fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
2914 pe,CurrentTime[CurrentProc],gran_event_names[name],id);
2917 DumpGranInfo(pe,tso,mandatory_thread)
2920 I_ mandatory_thread;
2922 if(RTSflags.ParFlags.granSimStats_Binary)
2926 grputw(CurrentTime[CurrentProc]);
2927 grputw(TSO_ID(tso));
2928 grputw(TSO_SPARKNAME(tso));
2929 grputw(TSO_STARTEDAT(tso));
2930 grputw(TSO_EXPORTED(tso));
2931 grputw(TSO_BASICBLOCKS(tso));
2932 grputw(TSO_ALLOCS(tso));
2933 grputw(TSO_EXECTIME(tso));
2934 grputw(TSO_BLOCKTIME(tso));
2935 grputw(TSO_BLOCKCOUNT(tso));
2936 grputw(TSO_FETCHTIME(tso));
2937 grputw(TSO_FETCHCOUNT(tso));
2938 grputw(TSO_LOCALSPARKS(tso));
2939 grputw(TSO_GLOBALSPARKS(tso));
2940 grputw(mandatory_thread);
2944 /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
2945 fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
2947 ,CurrentTime[CurrentProc]
2951 ,TSO_EXPORTED(tso)?'T':'F'
2952 ,TSO_BASICBLOCKS(tso)
2956 ,TSO_BLOCKCOUNT(tso)
2958 ,TSO_FETCHCOUNT(tso)
2959 ,TSO_LOCALSPARKS(tso)
2960 ,TSO_GLOBALSPARKS(tso)
2961 ,mandatory_thread?'T':'F'
2969 fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
2974 ,TSO_TYPE(tso)==T_MAIN?"MAIN":
2975 TSO_TYPE(tso)==T_FAIL?"FAIL":
2976 TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
2977 TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
2981 fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
2989 fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
2992 ,TSO_EXPORTED(tso)?'T':'F'
2993 ,TSO_BASICBLOCKS(tso)
2997 ,TSO_BLOCKCOUNT(tso)
2999 ,TSO_FETCHCOUNT(tso)
3000 ,TSO_LOCALSPARKS(tso)
3001 ,TSO_GLOBALSPARKS(tso)
3006 Output a terminate event and an 8-byte time.
3012 DumpGranEvent(GR_TERMINATE,0);
3023 putc(v >> 56l,gr_file);
3024 putc((v >> 48l)&0xffl,gr_file);
3025 putc((v >> 40l)&0xffl,gr_file);
3026 putc((v >> 32l)&0xffl,gr_file);
3028 putc((v >> 24l)&0xffl,gr_file);
3029 putc((v >> 16l)&0xffl,gr_file);
3030 putc((v >> 8l)&0xffl,gr_file);
3031 putc(v&0xffl,gr_file);
3035 Length-coded output: first 3 bits contain length coding
3049 fputc(v & 0x3f,gr_file);
3052 else if (v <= 0x3fffl)
3054 fputc((v >> 8l)|0x40l,gr_file);
3055 fputc(v&0xffl,gr_file);
3058 else if (v <= 0x3fffffffl)
3060 fputc((v >> 24l)|0x80l,gr_file);
3061 fputc((v >> 16l)&0xffl,gr_file);
3062 fputc((v >> 8l)&0xffl,gr_file);
3063 fputc(v&0xffl,gr_file);
3066 else if (sizeof(TIME) == 4)
3068 fputc(0x70,gr_file);
3069 fputc((v >> 24l)&0xffl,gr_file);
3070 fputc((v >> 16l)&0xffl,gr_file);
3071 fputc((v >> 8l)&0xffl,gr_file);
3072 fputc(v&0xffl,gr_file);
3077 if (v <= 0x3fffffffffffffl)
3078 putc((v >> 56l)|0x60l,gr_file);
3082 putc((v >> 56l)&0xffl,gr_file);
3085 putc((v >> 48l)&0xffl,gr_file);
3086 putc((v >> 40l)&0xffl,gr_file);
3087 putc((v >> 32l)&0xffl,gr_file);
3088 putc((v >> 24l)&0xffl,gr_file);
3089 putc((v >> 16l)&0xffl,gr_file);
3090 putc((v >> 8l)&0xffl,gr_file);
3091 putc(v&0xffl,gr_file);
3098 %****************************************************************************
3100 \subsection[GrAnSim-debug]{Debugging routines for GrAnSim}
3102 %****************************************************************************
3104 Debugging routines, mainly for GrAnSim. They should really be in a separate file.
3106 The first couple of routines are general ones (look also into
3107 c-as-asm/StgDebug.lc).
3111 #define NULL_REG_MAP /* Not threaded */
3112 #include "stgdefs.h"
3115 info_hdr_type(info_ptr)
3118 #if ! defined(PAR) && !defined(GRAN)
3119 switch (INFO_TAG(info_ptr))
3121 case INFO_OTHER_TAG:
3122 return("OTHER_TAG");
3123 /* case INFO_IND_TAG:
3129 switch(INFO_TYPE(info_ptr))
3131 case INFO_SPEC_U_TYPE:
3134 case INFO_SPEC_N_TYPE:
3137 case INFO_GEN_U_TYPE:
3140 case INFO_GEN_N_TYPE:
3147 case INFO_DYN_TYPE_N:
3150 case INFO_DYN_TYPE_U:
3154 case INFO_TUPLE_TYPE:
3157 case INFO_DATA_TYPE:
3160 case INFO_MUTUPLE_TYPE:
3163 case INFO_IMMUTUPLE_TYPE:
3164 return("IMMUTUPLE");
3166 case INFO_STATIC_TYPE:
3169 case INFO_CONST_TYPE:
3172 case INFO_CHARLIKE_TYPE:
3175 case INFO_INTLIKE_TYPE:
3187 case INFO_FETCHME_TYPE:
3194 case INFO_BQENT_TYPE:
3201 case INFO_STKO_TYPE:
3205 fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
3212 @var_hdr_size@ computes the size of the variable header for a closure.
3219 switch(INFO_TYPE(INFO_PTR(node)))
3221 case INFO_SPEC_U_TYPE: return(0); /* by decree */
3222 case INFO_SPEC_N_TYPE: return(0);
3223 case INFO_GEN_U_TYPE: return(GEN_VHS);
3224 case INFO_GEN_N_TYPE: return(GEN_VHS);
3225 case INFO_DYN_TYPE: return(DYN_VHS);
3227 case INFO_DYN_TYPE_N: return(DYN_VHS);
3228 case INFO_DYN_TYPE_U: return(DYN_VHS);
3230 case INFO_TUPLE_TYPE: return(TUPLE_VHS);
3231 case INFO_DATA_TYPE: return(DATA_VHS);
3232 case INFO_MUTUPLE_TYPE: return(MUTUPLE_VHS);
3233 case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
3234 case INFO_STATIC_TYPE: return(STATIC_VHS);
3235 case INFO_CONST_TYPE: return(0);
3236 case INFO_CHARLIKE_TYPE: return(0);
3237 case INFO_INTLIKE_TYPE: return(0);
3238 case INFO_BH_TYPE: return(0);
3239 case INFO_IND_TYPE: return(0);
3240 case INFO_CAF_TYPE: return(0);
3241 case INFO_FETCHME_TYPE: return(0);
3242 case INFO_BQ_TYPE: return(0);
3244 case INFO_BQENT_TYPE: return(0);
3246 case INFO_TSO_TYPE: return(TSO_VHS);
3247 case INFO_STKO_TYPE: return(STKO_VHS);
3249 fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
3250 INFO_TYPE(INFO_PTR(node)));
3256 /* Determine the size and number of pointers for this kind of closure */
3258 size_and_ptrs(node,size,ptrs)
3262 switch(INFO_TYPE(INFO_PTR(node)))
3264 case INFO_SPEC_U_TYPE:
3265 case INFO_SPEC_N_TYPE:
3266 *size = INFO_SIZE(INFO_PTR(node)); /* New for 0.24; check */
3267 *ptrs = INFO_NoPTRS(INFO_PTR(node)); /* that! -- HWL */
3269 *size = SPEC_CLOSURE_SIZE(node);
3270 *ptrs = SPEC_CLOSURE_NoPTRS(node);
3274 case INFO_GEN_U_TYPE:
3275 case INFO_GEN_N_TYPE:
3276 *size = GEN_CLOSURE_SIZE(node);
3277 *ptrs = GEN_CLOSURE_NoPTRS(node);
3281 case INFO_DYN_TYPE_U:
3282 case INFO_DYN_TYPE_N:
3285 *size = DYN_CLOSURE_SIZE(node);
3286 *ptrs = DYN_CLOSURE_NoPTRS(node);
3289 case INFO_TUPLE_TYPE:
3290 *size = TUPLE_CLOSURE_SIZE(node);
3291 *ptrs = TUPLE_CLOSURE_NoPTRS(node);
3294 case INFO_DATA_TYPE:
3295 *size = DATA_CLOSURE_SIZE(node);
3296 *ptrs = DATA_CLOSURE_NoPTRS(node);
3300 *size = IND_CLOSURE_SIZE(node);
3301 *ptrs = IND_CLOSURE_NoPTRS(node);
3304 /* ToDo: more (WDP) */
3306 /* Don't know about the others */
3314 DEBUG_PRINT_NODE(node)
3317 W_ info_ptr = INFO_PTR(node);
3318 I_ size = 0, ptrs = 0, i, vhs = 0;
3319 char *info_type = info_hdr_type(info_ptr);
3321 size_and_ptrs(node,&size,&ptrs);
3322 vhs = var_hdr_size(node);
3324 fprintf(stderr,"Node: 0x%lx", (W_) node);
3327 fprintf(stderr," [GA: 0x%lx]",GA(node));
3330 #if defined(PROFILING)
3331 fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
3335 fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
3338 fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
3339 info_ptr,info_type,size,ptrs);
3341 /* For now, we ignore the variable header */
3343 for(i=0; i < size; ++i)
3346 fprintf(stderr,"Data: ");
3349 fprintf(stderr,"\n ");
3352 fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
3354 fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
3356 fprintf(stderr, "\n");
3360 #define INFO_MASK 0x80000000
3366 W_ size = 0, ptrs = 0, i, vhs = 0;
3368 /* Don't print cycles */
3369 if((INFO_PTR(node) & INFO_MASK) != 0)
3372 size_and_ptrs(node,&size,&ptrs);
3373 vhs = var_hdr_size(node);
3375 DEBUG_PRINT_NODE(node);
3376 fprintf(stderr, "\n");
3378 /* Mark the node -- may be dangerous */
3379 INFO_PTR(node) |= INFO_MASK;
3381 for(i = 0; i < ptrs; ++i)
3382 DEBUG_TREE((P_)node[i+vhs+_FHS]);
3384 /* Unmark the node */
3385 INFO_PTR(node) &= ~INFO_MASK;
3390 DEBUG_INFO_TABLE(node)
3393 W_ info_ptr = INFO_PTR(node);
3394 char *ip_type = info_hdr_type(info_ptr);
3396 fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
3397 ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
3399 fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
3402 #if defined(PROFILING)
3403 fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
3406 #if defined(_INFO_COPYING)
3407 fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
3408 INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
3411 #if defined(_INFO_COMPACTING)
3412 fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
3413 (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
3414 fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
3415 (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
3416 #if 0 /* avoid INFO_TYPE */
3417 if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
3418 fprintf(stderr,"plus specialised code\n");
3420 fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
3428 The remaining debugging routines are more or less specific for GrAnSim.
3431 #if defined(GRAN) && defined(GRAN_CHECK)
3433 DEBUG_CURR_THREADQ(verbose)
3436 fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
3437 DEBUG_THREADQ(ThreadQueueHd, verbose);
3441 DEBUG_THREADQ(closure, verbose)
3447 fprintf(stderr,"Thread Queue: ");
3448 for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
3452 fprintf(stderr," 0x%x",x);
3454 if (closure==Nil_closure)
3455 fprintf(stderr,"NIL\n");
3457 fprintf(stderr,"\n");
3460 /* Check with Threads.lh */
3461 static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
3464 DEBUG_TSO(closure,verbose)
3469 if (closure==Nil_closure) {
3470 fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
3474 fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
3476 fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
3477 fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
3478 fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
3479 #if defined(GRAN_CHECK) && defined(GRAN)
3481 fprintf(stderr,"\tType: %s %s\n",
3482 type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
3483 (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
3485 fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3487 fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3489 fprintf(stderr,"> PC1: 0x%x",TSO_PC1(closure));
3490 fprintf(stderr,"\tPC2: 0x%x\n",TSO_PC2(closure));
3491 fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
3492 fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
3493 fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
3496 fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
3497 fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
3498 fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
3499 fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
3500 fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
3501 fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
3502 fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
3503 fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
3504 fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
3505 fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
3506 fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
3507 fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
3508 fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
3509 fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
3514 DEBUG_EVENT(event, verbose)
3521 fprintf(stderr," 0x%x",event);
3526 DEBUG_EVENTQ(verbose)
3531 fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
3532 for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
3533 DEBUG_EVENT(x,verbose);
3536 fprintf(stderr,"NIL\n");
3538 fprintf(stderr,"\n");
3542 DEBUG_SPARK(spark, verbose)
3549 fprintf(stderr," 0x%x",spark);
3553 DEBUG_SPARKQ(spark,verbose)
3559 fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
3560 for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
3561 DEBUG_SPARK(x,verbose);
3564 fprintf(stderr,"NIL\n");
3566 fprintf(stderr,"\n");
3570 DEBUG_CURR_SPARKQ(verbose)
3573 DEBUG_SPARKQ(SparkQueueHd,verbose);
3577 DEBUG_PROC(proc,verbose)
3581 fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
3582 proc,CurrentTime[proc],CurrentTime[proc],
3583 (CurrentProc==proc)?"ACTIVE":"INACTIVE");
3584 DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
3585 if ( (CurrentProc==proc) )
3586 DEBUG_TSO(CurrentTSO,1);
3589 fprintf(stderr,"Next event (%s) is on proc %d\n",
3590 event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
3592 if (verbose & 0x1) {
3593 fprintf(stderr,"\nREQUIRED sparks: ");
3594 DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
3595 fprintf(stderr,"\nADVISORY_sparks: ");
3596 DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
3600 /* Debug CurrentTSO */
3603 fprintf(stderr,"Current Proc: %d\n",CurrentProc);
3604 DEBUG_TSO(CurrentTSO,1);
3607 /* Debug Current Processor */
3609 DCP(){ DEBUG_PROC(CurrentProc,2); }
3611 /* Shorthand for debugging event queue */
3613 DEQ() { DEBUG_EVENTQ(1); }
3615 /* Shorthand for debugging spark queue */
3617 DSQ() { DEBUG_CURR_SPARKQ(1); }
3619 /* Shorthand for printing a node */
3621 DN(P_ node) { DEBUG_PRINT_NODE(node); }
3627 %****************************************************************************
3629 \subsection[qp-profile]{Quasi-Parallel Profiling}
3631 %****************************************************************************
3638 /* *Virtual* Time in milliseconds */
3642 return ((long) (usertime() * 1e3));
3646 init_qp_profiling(STG_NO_ARGS)
3649 char qp_filename[STATS_FILENAME_MAXLEN];
3651 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3652 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3653 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3657 fputs(prog_argv[0], qp_file);
3658 for(i = 1; prog_argv[i]; i++) {
3659 fputc(' ', qp_file);
3660 fputs(prog_argv[i], qp_file);
3662 fprintf(qp_file, "+RTS -C%ld -t%ld\n"
3663 , RTSflags.ConcFlags.ctxtSwitchTime
3664 , RTSflags.ConcFlags.maxThreads);
3666 fputs(time_str(), qp_file);
3667 fputc('\n', qp_file);
3672 QP_Event0(tid, node)
3676 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3680 QP_Event1(event, tso)
3684 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3685 TSO_ID(tso), (W_) TSO_NAME(tso));
3689 QP_Event2(event, tso1, tso2)
3693 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3694 TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
3699 #if defined(CONCURRENT) && !defined(GRAN)
3700 /* romoluSnganpu' SamuS! */
3702 unsigned CurrentProc = 0;
3703 W_ IdleProcs = ~0l, Idlers = 32;
3706 GranSimAllocate(I_ n, P_ node, W_ liveness)
3710 GranSimUnallocate(W_ n, P_ node, W_ liveness)
3714 GranSimExec(W_ ariths, W_ branches, W_ loads, W_ stores, W_ floats)
3718 GranSimFetch(P_ node /* , liveness_mask */ )
3719 /* I_ liveness_mask; */
3720 { return(9999999); }
3723 GranSimSpark(W_ local, P_ node)
3728 GranSimSparkAt(spark,where,identifier)
3730 P_ where; /* This should be a node; alternatively could be a GA */
3736 GranSimBlock(STG_NO_ARGS)