2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
4 %************************************************************************
6 \section[Threads.lc]{Thread Control Routines}
8 %************************************************************************
10 %************************************************************************
12 \subsection[thread-overview]{Overview of the Thread Management System}
14 %************************************************************************
16 %************************************************************************
18 \subsection[thread-decls]{Thread Declarations}
20 %************************************************************************
22 % I haven't checked if GRAN can work with QP profiling. But as we use our
23 % own profiling (GR profiling) that should be irrelevant. -- HWL
27 #if defined(CONCURRENT)
29 # define NON_POSIX_SOURCE /* so says Solaris */
37 static void init_qp_profiling(STG_NO_ARGS); /* forward decl */
40 @AvailableStack@ is used to determine whether an existing stack can be
41 reused without new allocation, so reducing garbage collection, and
42 stack setup time. At present, it is only used for the first stack
43 chunk of a thread, the one that's got @StkOChunkSize@ words.
46 P_ AvailableStack = Nil_closure;
47 P_ AvailableTSO = Nil_closure;
50 Macros for dealing with the new and improved GA field for simulating
51 parallel execution. Based on @CONCURRENT@ package. The GA field now
52 contains a mask, where the n-th bit stands for the n-th processor,
53 where this data can be found. In case of multiple copies, several bits
54 are set. The total number of processors is bounded by @MAX_PROC@,
55 which should be <= the length of a word in bits. -- HWL
58 /* mattson thinks this is obsolete */
60 # if 0 && defined(GRAN)
61 extern FILE *main_statsfile; /* Might be of general interest HWL */
63 typedef unsigned long TIME;
64 typedef unsigned char PROC;
65 typedef unsigned char EVTTYPE;
69 # define max(a,b) (a>b?a:b)
75 for (i=0; i<MAX_PROC && !IS_LOCAL_TO(ga,i); i++) ;
80 /* NB: This takes a *node* rather than just a ga as input */
83 { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
86 no_of_copies(P_ node) /* DaH lo'lu'Qo'; currently unused */
89 for (i=0, n=0; i<MAX_PROC; i++)
90 if (IS_LOCAL_TO(PROCS(node),i))
96 # endif /* GRAN ; HWL */
99 %****************************************************************
101 \subsection[thread-getthread]{The Thread Scheduler}
103 %****************************************************************
105 This is the heart of the thread scheduling code.
108 # if defined(GRAN_CHECK) && defined(GRAN)
113 W_ event_trace_all = 0;
115 STGRegisterTable *CurrentRegTable = NULL;
116 P_ CurrentTSO = NULL;
118 # if defined(GRAN) /* HWL */
120 unsigned CurrentProc = 0;
121 W_ IdleProcs = ~0L, Idlers = MAX_PROC;
123 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
124 # define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
128 I_ DoFairSchedule = 0;
129 I_ DoReScheduleOnFetch = 0;
130 I_ DoStealThreadsFirst = 0;
131 I_ SimplifiedFetch = 0;
132 I_ DoAlwaysCreateThreads = 0;
133 I_ DoGUMMFetching = 0;
134 I_ DoThreadMigration = 0;
135 I_ FetchStrategy = 4;
136 I_ PreferSparksOfLocalNodes = 0;
138 # if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
140 I_ PrintFetchMisses = 0, fetch_misses = 0;
144 I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
149 I_ do_gr_profile = 0; /* Full .gr profile or only END events? */
150 I_ no_gr_profile = 0; /* Don't create any .gr file at all? */
151 I_ do_sp_profile = 0;
152 I_ do_gr_migration = 0;
154 P_ RunnableThreadsHd[MAX_PROC];
155 P_ RunnableThreadsTl[MAX_PROC];
157 P_ WaitThreadsHd[MAX_PROC];
158 P_ WaitThreadsTl[MAX_PROC];
160 sparkq PendingSparksHd[MAX_PROC][SPARK_POOLS];
161 sparkq PendingSparksTl[MAX_PROC][SPARK_POOLS];
163 W_ CurrentTime[MAX_PROC]; /* Per PE clock */
165 # if defined(GRAN_CHECK) && defined(GRAN)
166 P_ BlockedOnFetch[MAX_PROC]; /* HWL-CHECK */
169 I_ OutstandingFetches[MAX_PROC];
171 W_ SparksAvail = 0; /* How many sparks are available */
172 W_ SurplusThreads = 0; /* How many excess threads are there */
174 StgBool NeedToReSchedule = StgFalse; /* Do we need to reschedule following a fetch? */
176 /* Communication Cost Variables -- set in main program */
178 W_ gran_latency = LATENCY, gran_additional_latency = ADDITIONAL_LATENCY,
179 gran_fetchtime = FETCHTIME,
180 gran_lunblocktime = LOCALUNBLOCKTIME, gran_gunblocktime = GLOBALUNBLOCKTIME,
181 gran_mpacktime = MSGPACKTIME, gran_munpacktime = MSGUNPACKTIME,
184 W_ gran_threadcreatetime = THREADCREATETIME,
185 gran_threadqueuetime = THREADQUEUETIME,
186 gran_threaddescheduletime = THREADDESCHEDULETIME,
187 gran_threadscheduletime = THREADSCHEDULETIME,
188 gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
190 /* Instruction Cost Variables -- set in main program */
192 W_ gran_arith_cost = ARITH_COST, gran_branch_cost = BRANCH_COST,
193 gran_load_cost = LOAD_COST, gran_store_cost = STORE_COST,
194 gran_float_cost = FLOAT_COST, gran_heapalloc_cost = 0;
196 W_ max_proc = MAX_PROC;
198 /* Granularity event types' names for output */
200 char *event_names[] =
201 { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
202 "MOVESPARK", "MOVETHREAD", "FINDWORK",
203 "FETCHNODE", "FETCHREPLY"
207 /* Prototypes of GrAnSim debugging functions */
208 void DEBUG_PRINT_NODE PROTO((P_));
209 void DEBUG_TREE PROTO((P_));
210 void DEBUG_INFO_TABLE PROTO((P_));
211 void DEBUG_CURR_THREADQ PROTO((I_));
212 void DEBUG_THREADQ PROTO((P_, I_));
213 void DEBUG_TSO PROTO((P_, I_));
214 void DEBUG_EVENT PROTO((eventq, I_));
215 void DEBUG_SPARK PROTO((sparkq, I_));
216 void DEBUG_SPARKQ PROTO((sparkq, I_));
217 void DEBUG_CURR_SPARKQ PROTO((I_));
218 void DEBUG_PROC PROTO((I_, I_));
219 void DCT(STG_NO_ARGS);
220 void DCP(STG_NO_ARGS);
221 void DEQ(STG_NO_ARGS);
222 void DSQ(STG_NO_ARGS);
224 void HandleFetchRequest PROTO((P_, PROC, P_));
225 # endif /* GRAN ; HWL */
227 #if defined(GRAN_CHECK) && defined(GRAN)
228 static eventq DelayedEventHd = NULL, DelayedEventTl = NULL;
230 static I_ noOfEvents = 0;
231 static I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
234 TIME SparkStealTime();
236 /* Fcts for manipulating event queues have been deleted -- HWL */
237 /* ---------------------------------- */
245 fprintf(stderr,"Spark: NIL\n");
247 fprintf(stderr,"Spark: Node 0x%lx, Name 0x%lx, Exported %s, Prev 0x%x, Next 0x%x\n",
248 (W_) SPARK_NODE(spark), SPARK_NAME(spark),
249 ((SPARK_EXPORTED(spark))?"True":"False"),
250 SPARK_PREV(spark), SPARK_NEXT(spark) );
253 static print_sparkq(hd)
258 fprintf(stderr,"Spark Queue with root at %x:\n",hd);
259 for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
264 static print_event(event)
269 fprintf(stderr,"Evt: NIL\n");
271 fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO 0x%lx, node 0x%lx\n",
272 event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
273 EVENT_PROC(event), EVENT_CREATOR(event),
274 EVENT_TIME(event), EVENT_TSO(event), EVENT_NODE(event) /*,
275 EVENT_SPARK(event), EVENT_NEXT(event)*/ );
279 static print_eventq(hd)
284 fprintf(stderr,"Event Queue with root at %x:\n",hd);
285 for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
290 /* ---------------------------------- */
293 static eventq getnextevent()
295 static eventq entry = NULL;
299 fprintf(stderr,"No next event\n");
300 exit(EXIT_FAILURE); /* ToDo: abort()? EXIT??? */
306 #if defined(GRAN_CHECK) && defined(GRAN)
307 if (debug & 0x20) { /* count events */
309 event_counts[EVENT_TYPE(EventHd)]++;
314 EventHd = EVENT_NEXT(EventHd);
318 /* ToDo: replace malloc/free with a free list */
320 static insert_event(newentry)
323 EVTTYPE evttype = EVENT_TYPE(newentry);
326 /* Search the queue and insert at the right point:
327 FINDWORK before everything, CONTINUETHREAD after everything.
329 This ensures that we find any available work after all threads have
330 executed the current cycle. This level of detail would normally be
331 irrelevant, but matters for ridiculously low latencies...
338 for (event = EventHd, prev=&EventHd; event != NULL;
339 prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event))
341 if(evttype == FINDWORK ? (EVENT_TIME(event) >= EVENT_TIME(newentry)) :
342 evttype == CONTINUETHREAD ? (EVENT_TIME(event) > EVENT_TIME(newentry)) :
343 (EVENT_TIME(event) > EVENT_TIME(newentry) ||
344 (EVENT_TIME(event) == EVENT_TIME(newentry) &&
345 EVENT_TYPE(event) != FINDWORK )))
348 EVENT_NEXT(newentry) = event;
357 static newevent(proc,creator,time,evttype,tso,node,spark)
365 eventq newentry = (eventq) xmalloc(sizeof(struct event));
367 EVENT_PROC(newentry) = proc;
368 EVENT_CREATOR(newentry) = creator;
369 EVENT_TIME(newentry) = time;
370 EVENT_TYPE(newentry) = evttype;
371 EVENT_TSO(newentry) = tso;
372 EVENT_NODE(newentry) = node;
373 EVENT_SPARK(newentry) = spark;
374 EVENT_NEXT(newentry) = NULL;
376 insert_event(newentry);
382 P_ RunnableThreadsHd = Nil_closure;
383 P_ RunnableThreadsTl = Nil_closure;
385 P_ WaitingThreadsHd = Nil_closure;
386 P_ WaitingThreadsTl = Nil_closure;
388 PP_ PendingSparksBase[SPARK_POOLS];
389 PP_ PendingSparksLim[SPARK_POOLS];
391 PP_ PendingSparksHd[SPARK_POOLS];
392 PP_ PendingSparksTl[SPARK_POOLS];
394 # endif /* GRAN ; HWL */
396 static jmp_buf scheduler_loop;
398 I_ MaxThreads = DEFAULT_MAX_THREADS;
399 I_ required_thread_count = 0;
400 I_ advisory_thread_count = 0;
402 EXTFUN(resumeThread);
404 P_ NewThread PROTO((P_, W_));
406 I_ context_switch = 0;
408 I_ contextSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
414 I_ MaxLocalSparks = DEFAULT_MAX_LOCAL_SPARKS;
415 I_ SparkLimit[SPARK_POOLS];
417 extern I_ doSanityChks;
418 extern void checkAStack(STG_NO_ARGS);
421 initThreadPools(size)
424 SparkLimit[ADVISORY_POOL] = SparkLimit[REQUIRED_POOL] = size;
425 if ((PendingSparksBase[ADVISORY_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
427 if ((PendingSparksBase[REQUIRED_POOL] = (PP_) malloc(size * sizeof(P_))) == NULL)
429 PendingSparksLim[ADVISORY_POOL] = PendingSparksBase[ADVISORY_POOL] + size;
430 PendingSparksLim[REQUIRED_POOL] = PendingSparksBase[REQUIRED_POOL] + size;
440 ScheduleThreads(topClosure)
446 #if defined(USE_COST_CENTRES) || defined(GUM)
447 if (time_profiling || contextSwitchTime > 0) {
448 if (initialize_virtual_timer(tick_millisecs)) {
450 if (contextSwitchTime > 0) {
451 if (initialize_virtual_timer(contextSwitchTime)) {
454 fprintf(stderr, "Can't initialize virtual timer.\n");
458 context_switch = 0 /* 1 HWL */;
460 #if defined(GRAN_CHECK) && defined(GRAN) /* HWL */
461 if ( debug & 0x40 ) {
462 fprintf(stderr,"D> Doing init in ScheduleThreads now ...\n");
466 #if defined(GRAN) /* KH */
467 for (i=0; i<max_proc; i++)
469 RunnableThreadsHd[i] = RunnableThreadsTl[i] = Nil_closure;
470 WaitThreadsHd[i] = WaitThreadsTl[i] = Nil_closure;
471 PendingSparksHd[i][REQUIRED_POOL] = PendingSparksHd[i][ADVISORY_POOL] =
472 PendingSparksTl[i][REQUIRED_POOL] = PendingSparksTl[i][ADVISORY_POOL] =
475 # if defined(GRAN_CHECK)
477 BlockedOnFetch[i] = 0; /*- StgFalse; -*/ /* HWL-CHECK */
479 OutstandingFetches[i] = 0;
482 CurrentProc = MainProc;
489 * We perform GC so that a signal handler can install a new TopClosure and start
495 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
496 /* kludge to save the top closure as a root */
497 CurrentTSO = topClosure;
498 ReallyPerformThreadGC(0, rtsTrue);
499 topClosure = CurrentTSO;
500 if ((tso = NewThread(topClosure, T_MAIN)) == NULL) {
502 fprintf(stderr, "Not enough heap for main thread\n");
507 RunnableThreadsHd = RunnableThreadsTl = tso;
509 /* NB: CurrentProc must have been set to MainProc before that! -- HWL */
510 ThreadQueueHd = ThreadQueueTl = tso;
512 # if defined(GRAN_CHECK)
513 if ( debug & 0x40 ) {
514 fprintf(stderr,"D> MainTSO has been initialized (0x%x)\n", tso);
521 DumpGranEvent(GR_START, tso);
522 sameThread = rtsTrue;
527 MAKE_BUSY(MainProc); /* Everything except the main PE is idle */
530 required_thread_count = 1;
531 advisory_thread_count = 0;
533 } /*if IAmMainThread ...*/
536 /* ----------------------------------------------------------------- */
537 /* This part is the MAIN SCHEDULER LOOP; jumped at from ReSchedule */
538 /* ----------------------------------------------------------------- */
540 if(setjmp(scheduler_loop) < 0)
543 #if defined(GRAN) && defined(GRAN_CHECK)
544 if ( debug & 0x80 ) {
545 fprintf(stderr,"D> MAIN Schedule Loop; ThreadQueueHd is ");
546 DEBUG_TSO(ThreadQueueHd,1);
547 /* if (ThreadQueueHd == MainTSO) {
548 fprintf(stderr,"D> Event Queue is now:\n");
555 if (PendingFetches != Nil_closure) {
560 if (ThreadQueueHd == Nil_closure) {
561 fprintf(stderr, "No runnable threads!\n");
564 if (DO_QP_PROF > 1 && CurrentTSO != ThreadQueueHd) {
565 QP_Event1("AG", ThreadQueueHd);
570 while (RunnableThreadsHd == Nil_closure) {
571 /* If we've no work */
572 if (WaitingThreadsHd == Nil_closure) {
574 fprintf(stderr, "No runnable threads!\n");
580 if (RunnableThreadsHd == Nil_closure) {
581 if (advisory_thread_count < MaxThreads &&
582 (PendingSparksHd[REQUIRED_POOL] < PendingSparksTl[REQUIRED_POOL] ||
583 PendingSparksHd[ADVISORY_POOL] < PendingSparksTl[ADVISORY_POOL])) {
585 * If we're here (no runnable threads) and we have pending sparks,
586 * we must have a space problem. Get enough space to turn one of
587 * those pending sparks into a thread...ReallyPerformGC doesn't
588 * return until the space is available, so it may force global GC.
589 * ToDo: Is this unnecessary here? Duplicated in ReSchedule()? --JSM
591 ReallyPerformThreadGC(THREAD_SPACE_REQUIRED, rtsTrue);
592 SAVE_Hp -= THREAD_SPACE_REQUIRED;
595 * We really have absolutely no work. Send out a fish (there may be
596 * some out there already), and wait for something to arrive. We
597 * clearly can't run any threads until a SCHEDULE or RESUME arrives,
598 * and so that's what we're hoping to see. (Of course, we still have
599 * to respond to other types of messages.)
602 sendFish(choosePE(), mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
607 } else if (PacketsWaiting()) { /* Look for incoming messages */
612 if (DO_QP_PROF > 1 && CurrentTSO != RunnableThreadsHd) {
613 QP_Event1("AG", RunnableThreadsHd);
617 if (do_gr_profile && !sameThread)
618 DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
621 #if !GRAN /* ROUND_ROBIN */
622 CurrentTSO = RunnableThreadsHd;
623 RunnableThreadsHd = TSO_LINK(RunnableThreadsHd);
624 TSO_LINK(CurrentTSO) = Nil_closure;
626 if (RunnableThreadsHd == Nil_closure)
627 RunnableThreadsTl = Nil_closure;
630 /* This used to be Round Robin. KH.
631 I think we can ignore that, and move it down to ReSchedule instead.
633 CurrentTSO = ThreadQueueHd;
634 /* TSO_LINK(CurrentTSO) = Nil_closure; humbug */
637 /* If we're not running a timer, just leave the flag on */
638 if (contextSwitchTime > 0)
641 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
642 if (CurrentTSO == Nil_closure) {
643 fprintf(stderr,"Error: Trying to execute Nil_closure on proc %d (@ %d)\n",
644 CurrentProc,CurrentTime[CurrentProc]);
649 if (BlockedOnFetch[CurrentProc]) {
650 fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which is blocked-on-fetch by TSO 0x%x\n",
651 CurrentTSO,CurrentProc,CurrentTime[CurrentProc],BlockedOnFetch[CurrentProc]);
656 if ( (debug & 0x10) &&
657 (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) ) {
658 fprintf(stderr,"Error: Trying to execute TSO 0x%x on proc %d (@ %d) which should be asleep!\n",
659 CurrentTSO,CurrentProc,CurrentTime[CurrentProc]);
664 # if defined(__STG_TAILJUMPS__)
665 miniInterpret((StgFunPtr)resumeThread);
668 miniInterpret_debug((StgFunPtr)resumeThread, checkAStack);
670 miniInterpret((StgFunPtr)resumeThread);
671 # endif /* __STG_TAILJUMPS__ */
675 % Some remarks on GrAnSim -- HWL
677 The ReSchedule fct is the heart of GrAnSim. Based on its par it issues a
678 CONTINUETRHEAD to carry on executing the current thread in due course or it
679 watches out for new work (e.g. called from EndThread).
681 Then it picks the next event (getnextevent) and handles it appropriately
682 (see switch construct). Note that a continue in the switch causes the next
683 event to be handled and a break causes a jmp to the scheduler_loop where
684 the TSO at the head of the current processor's runnable queue is executed.
686 ReSchedule is mostly entered from HpOverflow.lc:PerformReSchedule which is
687 itself called via the GRAN_RESCHEDULE macro in the compiler generated code.
693 ReSchedule(what_next)
694 int what_next; /* Run the current thread again? */
696 sparkq spark, nextspark;
701 #if defined(GRAN_CHECK) && defined(GRAN)
702 if ( debug & 0x80 ) {
703 fprintf(stderr,"D> Entering ReSchedule with mode %u; tso is\n",what_next);
704 DEBUG_TSO(ThreadQueueHd,1);
708 #if defined(GRAN_CHECK) && defined(GRAN)
709 if ( (debug & 0x80) || (debug & 0x40 ) )
710 if (what_next<FIND_THREAD || what_next>CHANGE_THREAD)
711 fprintf(stderr,"ReSchedule: illegal parameter %u for what_next\n",
715 /* Run the current thread again (if there is one) */
716 if(what_next==SAME_THREAD && ThreadQueueHd != Nil_closure)
718 /* A bit of a hassle if the event queue is empty, but ... */
719 CurrentTSO = ThreadQueueHd;
721 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
722 CONTINUETHREAD,CurrentTSO,Nil_closure,NULL);
724 /* This code does round-Robin, if preferred. */
725 if(DoFairSchedule && TSO_LINK(CurrentTSO) != Nil_closure)
728 DumpGranEvent(GR_DESCHEDULE,ThreadQueueHd);
729 ThreadQueueHd = TSO_LINK(CurrentTSO);
730 TSO_LINK(ThreadQueueTl) = CurrentTSO;
731 ThreadQueueTl = CurrentTSO;
732 TSO_LINK(CurrentTSO) = Nil_closure;
734 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
735 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
738 /* Schedule `next thread' which is at ThreadQueueHd now i.e. thread queue */
739 /* has been updated before that already. */
740 else if(what_next==NEW_THREAD && ThreadQueueHd != Nil_closure)
742 #if defined(GRAN_CHECK) && defined(GRAN)
743 if(DoReScheduleOnFetch)
745 fprintf(stderr,"ReSchedule(NEW_THREAD) shouldn't be used!!\n");
751 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
753 CurrentTSO = ThreadQueueHd;
754 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
755 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
757 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
760 /* We go in here if the current thread is blocked on fetch => don'd CONT */
761 else if(what_next==CHANGE_THREAD)
763 /* just fall into event handling loop for next event */
766 /* We go in here if we have no runnable threads or what_next==0 */
769 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
770 FINDWORK,Nil_closure,Nil_closure,NULL);
771 CurrentTSO = Nil_closure;
774 /* ----------------------------------------------------------------- */
775 /* This part is the EVENT HANDLING LOOP */
776 /* ----------------------------------------------------------------- */
779 /* Choose the processor with the next event */
780 event = getnextevent();
781 CurrentProc = EVENT_PROC(event);
782 if(EVENT_TIME(event) > CurrentTime[CurrentProc])
783 CurrentTime[CurrentProc] = EVENT_TIME(event);
785 MAKE_BUSY(CurrentProc);
787 #if defined(GRAN_CHECK) && defined(GRAN)
789 fprintf(stderr,"D> After getnextevent, before HandleIdlePEs\n");
792 /* Deal with the idlers */
795 #if defined(GRAN_CHECK) && defined(GRAN)
797 (event_trace_all || EVENT_TYPE(event) != CONTINUETHREAD ||
802 switch (EVENT_TYPE(event))
804 /* Should just be continuing execution */
806 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
807 if ( (debug & 0x04) && BlockedOnFetch[CurrentProc]) {
808 fprintf(stderr,"Warning: Discarding CONTINUETHREAD on blocked proc %u @ %u\n",
809 CurrentProc,CurrentTime[CurrentProc]);
814 if(ThreadQueueHd==Nil_closure)
816 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
817 FINDWORK,Nil_closure,Nil_closure,NULL);
818 continue; /* Catches superfluous CONTINUEs -- should be unnecessary */
821 break; /* fall into scheduler loop */
824 #if defined(GRAN_CHECK) && defined(GRAN)
825 if (SimplifiedFetch) {
826 fprintf(stderr,"Error: FETCHNODE events not valid with simplified fetch\n");
831 CurrentTime[CurrentProc] += gran_munpacktime;
832 HandleFetchRequest(EVENT_NODE(event),
833 EVENT_CREATOR(event),
838 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
839 if (SimplifiedFetch) {
840 fprintf(stderr,"Error: FETCHREPLY events not valid with simplified fetch\n");
845 if (TSO_TYPE(EVENT_TSO(event)) & FETCH_MASK_TSO) {
846 TSO_TYPE(EVENT_TSO(event)) &= ~FETCH_MASK_TSO;
848 fprintf(stderr,"Error: FETCHREPLY: TSO 0x%x has fetch mask not set @ %d\n",
849 CurrentTSO,CurrentTime[CurrentProc]);
855 if (BlockedOnFetch[CurrentProc]!=ThreadQueueHd) {
856 fprintf(stderr,"Error: FETCHREPLY: Proc %d (with TSO 0x%x) not blocked-on-fetch by TSO 0x%x\n",
857 CurrentProc,CurrentTSO,BlockedOnFetch[CurrentProc]);
860 BlockedOnFetch[CurrentProc] = 0; /*- StgFalse; -*/
865 /* Copy or move node to CurrentProc */
866 if (FetchNode(EVENT_NODE(event),
867 EVENT_CREATOR(event),
868 EVENT_PROC(event)) ) {
869 /* Fetch has failed i.e. node has been grabbed by another PE */
870 P_ node = EVENT_NODE(event), tso = EVENT_TSO(event);
871 PROC p = where_is(node);
874 #if defined(GRAN_CHECK) && defined(GRAN)
875 if (PrintFetchMisses) {
876 fprintf(stderr,"Fetch miss @ %lu: node 0x%x is at proc %u (rather than proc %u)\n",
877 CurrentTime[CurrentProc],node,p,EVENT_CREATOR(event));
880 #endif /* GRAN_CHECK */
882 CurrentTime[CurrentProc] += gran_mpacktime;
884 /* Count fetch again !? */
885 ++TSO_FETCHCOUNT(tso);
886 TSO_FETCHTIME(tso) += gran_fetchtime;
888 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
891 /* Chase the grabbed node */
892 newevent(p,CurrentProc,fetchtime,FETCHNODE,tso,node,NULL);
894 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
896 BlockedOnFetch[CurrentProc] = tso; /*-StgTrue;-*/
899 TSO_TYPE(tso) |= FETCH_MASK_TSO;
902 CurrentTime[CurrentProc] += gran_mtidytime;
904 continue; /* NB: no REPLy has been processed; tso still sleeping */
907 /* -- Qapla'! Fetch has been successful; node is here, now */
908 ++TSO_FETCHCOUNT(EVENT_TSO(event));
909 TSO_FETCHTIME(EVENT_TSO(event)) += gran_fetchtime;
912 DumpGranEventAndNode(GR_REPLY,EVENT_TSO(event),
913 EVENT_NODE(event),EVENT_CREATOR(event));
915 --OutstandingFetches[CurrentProc];
916 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
917 if (OutstandingFetches[CurrentProc] < 0) {
918 fprintf(stderr,"OutstandingFetches of proc %u has become negative\n",CurrentProc);
923 if (!DoReScheduleOnFetch) {
924 CurrentTSO = EVENT_TSO(event); /* awaken blocked thread */
925 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
926 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
927 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
928 TSO_BLOCKEDAT(EVENT_TSO(event));
930 DumpGranEvent(GR_RESUME,EVENT_TSO(event));
933 /* fall through to RESUMETHREAD */
936 case RESUMETHREAD: /* Move from the blocked queue to the tail of */
937 /* the runnable queue ( i.e. Qu' SImqa'lu') */
938 TSO_BLOCKTIME(EVENT_TSO(event)) += CurrentTime[CurrentProc] -
939 TSO_BLOCKEDAT(EVENT_TSO(event));
940 StartThread(event,GR_RESUME);
944 StartThread(event,GR_START);
948 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
949 if (!DoThreadMigration) {
950 fprintf(stderr,"MOVETHREAD events should never occur without -bM\n");
954 CurrentTime[CurrentProc] += gran_munpacktime;
955 StartThread(event,GR_STOLEN);
956 continue; /* to the next event */
959 CurrentTime[CurrentProc] += gran_munpacktime;
960 spark = EVENT_SPARK(event);
962 ADD_TO_SPARK_QUEUE(spark); /* NB: this macro side-effects its arg.
963 so the assignment above is needed. */
966 DumpSparkGranEvent(SP_ACQUIRED,spark);
968 ++SparksAvail; /* Probably Temporarily */
969 /* Drop into FINDWORK */
971 if (!DoReScheduleOnFetch &&
972 (ThreadQueueHd != Nil_closure) ) { /* If we block on fetch then go */
973 continue; /* to next event (i.e. leave */
974 } /* spark in sparkq for now) */
977 if((ThreadQueueHd == Nil_closure || DoAlwaysCreateThreads)
978 && (FetchStrategy >= 2 || OutstandingFetches[CurrentProc] == 0))
981 sparkq spark_of_non_local_node = NULL;
983 /* Choose a spark from the local spark queue */
984 spark = SparkQueueHd;
986 while (spark != NULL && !found)
988 node = SPARK_NODE(spark);
989 if (!SHOULD_SPARK(node))
992 DumpSparkGranEvent(SP_PRUNED,spark);
994 assert(spark != NULL);
996 SparkQueueHd = SPARK_NEXT(spark);
997 if(SparkQueueHd == NULL)
1000 DisposeSpark(spark);
1002 spark = SparkQueueHd;
1004 /* -- node should eventually be sparked */
1005 else if (PreferSparksOfLocalNodes &&
1006 !IS_LOCAL_TO(PROCS(node),CurrentProc))
1008 /* We have seen this spark before => no local sparks */
1009 if (spark==spark_of_non_local_node) {
1014 /* Remember first non-local node */
1015 if (spark_of_non_local_node==NULL)
1016 spark_of_non_local_node = spark;
1018 /* Special case: 1 elem sparkq with non-local spark */
1019 if (spark==SparkQueueTl) {
1024 /* Put spark (non-local!) at the end of the sparkq */
1025 SPARK_NEXT(SparkQueueTl) = spark;
1026 SparkQueueHd = SPARK_NEXT(spark);
1027 SPARK_NEXT(spark) = NULL;
1028 SparkQueueTl = spark;
1030 spark = SparkQueueHd;
1038 /* We've found a node; now, create thread (DaH Qu' yIchen) */
1041 CurrentTime[CurrentProc] += gran_threadcreatetime;
1043 node = SPARK_NODE(spark);
1044 if((tso = NewThread(node, T_REQUIRED))==NULL)
1046 /* Some kind of backoff needed here in case there's too little heap */
1047 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+1,
1048 FINDWORK,Nil_closure,Nil_closure,NULL);
1049 ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,StgTrue);
1051 continue; /* to the next event, eventually */
1054 TSO_EXPORTED(tso) = SPARK_EXPORTED(spark);
1055 TSO_LOCKED(tso) = !SPARK_GLOBAL(spark);
1056 TSO_SPARKNAME(tso) = SPARK_NAME(spark);
1058 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1059 STARTTHREAD,tso,Nil_closure,NULL);
1061 assert(spark != NULL);
1063 SparkQueueHd = SPARK_NEXT(spark);
1064 if(SparkQueueHd == NULL)
1065 SparkQueueTl = NULL;
1067 DisposeSpark(spark);
1070 /* Make the PE idle if nothing sparked and we have no threads. */
1072 if(ThreadQueueHd == Nil_closure)
1073 #if defined(GRAN_CHECK) && defined(GRAN)
1075 MAKE_IDLE(CurrentProc);
1076 if ( (debug & 0x40) || (debug & 0x80) ) {
1077 fprintf(stderr,"Warning in FINDWORK handling: No work found for PROC %u\n",CurrentProc);
1081 MAKE_IDLE(CurrentProc);
1082 #endif /* GRAN_CHECK */
1084 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
1085 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1088 continue; /* to the next event */
1092 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
1093 if ( (debug & 0x04) &&
1094 (!DoReScheduleOnFetch && ThreadQueueHd != Nil_closure)
1096 fprintf(stderr,"Waning in FINDWORK handling:\n");
1097 fprintf(stderr,"ThreadQueueHd!=Nil_closure should never happen with !DoReScheduleOnFetch");
1100 if (FetchStrategy < 2 && OutstandingFetches[CurrentProc] != 0)
1101 continue; /* to next event */
1103 break; /* run ThreadQueueHd */
1108 fprintf(stderr,"Illegal event type %u\n",EVENT_TYPE(event));
1111 _longjmp(scheduler_loop, 1);
1116 Here follows the non-GRAN @ReSchedule@.
1122 int again; /* Run the current thread again? */
1130 * In the parallel world, we do unfair scheduling for the moment.
1131 * Ultimately, this should all be merged with the more sophicticated
1132 * GrAnSim scheduling options. (Of course, some provision should be
1133 * made for *required* threads to make sure that they don't starve,
1134 * but for now we assume that no one is running concurrent Haskell on
1135 * a multi-processor platform.)
1141 if (RunnableThreadsHd == Nil_closure)
1142 RunnableThreadsTl = CurrentTSO;
1143 TSO_LINK(CurrentTSO) = RunnableThreadsHd;
1144 RunnableThreadsHd = CurrentTSO;
1150 * In the sequential world, we assume that the whole point of running
1151 * the threaded build is for concurrent Haskell, so we provide round-robin
1156 if(RunnableThreadsHd == Nil_closure) {
1157 RunnableThreadsHd = CurrentTSO;
1159 TSO_LINK(RunnableThreadsTl) = CurrentTSO;
1160 if (DO_QP_PROF > 1) {
1161 QP_Event1("GA", CurrentTSO);
1164 RunnableThreadsTl = CurrentTSO;
1170 * Debugging code, which is useful enough (and cheap enough) to compile
1171 * in all the time. This makes sure that we don't access saved registers,
1172 * etc. in threads which are supposed to be sleeping.
1174 CurrentTSO = Nil_closure;
1175 CurrentRegTable = NULL;
1178 /* First the required sparks */
1180 for (sparkp = PendingSparksHd[REQUIRED_POOL];
1181 sparkp < PendingSparksTl[REQUIRED_POOL]; sparkp++) {
1183 if (SHOULD_SPARK(spark)) {
1184 if ((tso = NewThread(spark, T_REQUIRED)) == NULL)
1186 if (RunnableThreadsHd == Nil_closure) {
1187 RunnableThreadsHd = tso;
1189 if (do_gr_profile) {
1190 DumpGranEvent(GR_START, tso);
1191 sameThread = rtsTrue;
1195 TSO_LINK(RunnableThreadsTl) = tso;
1198 DumpGranEvent(GR_STARTQ, tso);
1201 RunnableThreadsTl = tso;
1204 QP_Event0(threadId++, spark);
1207 DumpSparkGranEvent(SP_PRUNED, threadId++);
1211 PendingSparksHd[REQUIRED_POOL] = sparkp;
1213 /* Now, almost the same thing for advisory sparks */
1215 for (sparkp = PendingSparksHd[ADVISORY_POOL];
1216 sparkp < PendingSparksTl[ADVISORY_POOL]; sparkp++) {
1218 if (SHOULD_SPARK(spark)) {
1221 /* In the parallel world, don't create advisory threads if we are
1222 * about to rerun the same thread, or already have runnable threads,
1223 * or the main thread has terminated */
1224 (RunnableThreadsHd != Nil_closure ||
1225 (required_thread_count == 0 && IAmMainThread)) ||
1227 advisory_thread_count == MaxThreads ||
1228 (tso = NewThread(spark, T_ADVISORY)) == NULL)
1230 advisory_thread_count++;
1231 if (RunnableThreadsHd == Nil_closure) {
1232 RunnableThreadsHd = tso;
1234 if (do_gr_profile) {
1235 DumpGranEvent(GR_START, tso);
1236 sameThread = rtsTrue;
1240 TSO_LINK(RunnableThreadsTl) = tso;
1243 DumpGranEvent(GR_STARTQ, tso);
1246 RunnableThreadsTl = tso;
1249 QP_Event0(threadId++, spark);
1252 DumpSparkGranEvent(SP_PRUNED, threadId++);
1256 PendingSparksHd[ADVISORY_POOL] = sparkp;
1259 longjmp(scheduler_loop, required_thread_count == 0 ? -1 : 1);
1261 longjmp(scheduler_loop, required_thread_count == 0 && IAmMainThread ? -1 : 1);
1269 %****************************************************************************
1271 \subsection[thread-gransim-execution]{Starting, Idling and Migrating
1272 Threads (GrAnSim only)}
1274 %****************************************************************************
1276 Thread start, idle and migration code for GrAnSim (i.e. simulating multiple
1282 StartThread(event,event_type)
1284 enum gran_event_types event_type;
1286 if(ThreadQueueHd==Nil_closure)
1288 CurrentTSO = ThreadQueueHd = ThreadQueueTl = EVENT_TSO(event);
1289 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc]+gran_threadqueuetime,
1290 CONTINUETHREAD,Nil_closure,Nil_closure,NULL);
1292 DumpGranEvent(event_type,EVENT_TSO(event));
1296 TSO_LINK(ThreadQueueTl) = EVENT_TSO(event);
1297 ThreadQueueTl = EVENT_TSO(event);
1299 if(DoThreadMigration)
1303 DumpGranEvent(event_type+1,EVENT_TSO(event));
1306 CurrentTime[CurrentProc] += gran_threadqueuetime;
1310 Export work to idle PEs.
1317 if(ANY_IDLE && (SparksAvail > 0l || SurplusThreads > 0l))
1318 for(proc = 0; proc < max_proc; proc++)
1321 if(DoStealThreadsFirst &&
1322 (FetchStrategy >= 4 || OutstandingFetches[proc] == 0))
1324 if (SurplusThreads > 0l) /* Steal a thread */
1331 if(SparksAvail > 0l &&
1332 (FetchStrategy >= 3 || OutstandingFetches[proc] == 0)) /* Steal a spark */
1335 if (IS_IDLE(proc) && SurplusThreads > 0l &&
1336 (FetchStrategy >= 4 || OutstandingFetches[proc] == 0)) /* Steal a thread */
1342 Steal a spark and schedule moving it to proc. We want to look at PEs in
1343 clock order -- most retarded first. Currently sparks are only stolen from
1344 the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, this should
1345 be changed to first steal from the former then from the latter.
1352 sparkq spark, prev, next;
1354 TIME times[MAX_PROC], stealtime;
1355 unsigned ntimes=0, i, j;
1357 /* times shall contain processors from which we may steal sparks */
1358 for(p=0; p < max_proc; ++p)
1360 PendingSparksHd[p][ADVISORY_POOL] != NULL &&
1361 CurrentTime[p] <= CurrentTime[CurrentProc])
1362 times[ntimes++] = p;
1365 for(i=0; i < ntimes; ++i)
1366 for(j=i+1; j < ntimes; ++j)
1367 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1369 unsigned temp = times[i];
1370 times[i] = times[j];
1374 for(i=0; i < ntimes && !stolen; ++i)
1378 for(prev=NULL, spark = PendingSparksHd[p][ADVISORY_POOL];
1379 spark != NULL && !stolen;
1382 next = SPARK_NEXT(spark);
1384 if(SHOULD_SPARK(SPARK_NODE(spark)))
1386 /* Don't Steal local sparks */
1387 if(!SPARK_GLOBAL(spark))
1393 SPARK_NEXT(spark) = NULL;
1394 CurrentTime[p] += gran_mpacktime;
1396 stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1399 newevent(proc,p /* CurrentProc */,stealtime,
1400 MOVESPARK,Nil_closure,Nil_closure,spark);
1404 ++SPARK_GLOBAL(spark);
1407 DumpSparkGranEvent(SP_EXPORTED,spark);
1409 CurrentTime[p] += gran_mtidytime;
1416 DumpSparkGranEvent(SP_PRUNED,spark);
1417 DisposeSpark(spark);
1420 if(spark == PendingSparksHd[p][ADVISORY_POOL])
1421 PendingSparksHd[p][ADVISORY_POOL] = next;
1424 SPARK_NEXT(prev) = next;
1427 if(PendingSparksHd[p][ADVISORY_POOL] == NULL)
1428 PendingSparksTl[p][ADVISORY_POOL] = NULL;
1433 Steal a spark and schedule moving it to proc.
1441 TIME times[MAX_PROC], stealtime;
1442 unsigned ntimes=0, i, j;
1444 /* Hunt for a thread */
1446 /* times shall contain processors from which we may steal threads */
1447 for(p=0; p < max_proc; ++p)
1448 if(proc != p && RunnableThreadsHd[p] != Nil_closure &&
1449 CurrentTime[p] <= CurrentTime[CurrentProc])
1450 times[ntimes++] = p;
1453 for(i=0; i < ntimes; ++i)
1454 for(j=i+1; j < ntimes; ++j)
1455 if(CurrentTime[times[i]] > CurrentTime[times[j]])
1457 unsigned temp = times[i];
1458 times[i] = times[j];
1462 for(i=0; i < ntimes; ++i)
1466 /* Steal the first exportable thread in the runnable queue after the */
1469 if(RunnableThreadsHd[p] != Nil_closure)
1471 for(prev = RunnableThreadsHd[p], thread = TSO_LINK(RunnableThreadsHd[p]);
1472 thread != Nil_closure && TSO_LOCKED(thread);
1473 prev = thread, thread = TSO_LINK(thread))
1476 if(thread != Nil_closure) /* Take thread out of runnable queue */
1478 TSO_LINK(prev) = TSO_LINK(thread);
1480 TSO_LINK(thread) = Nil_closure;
1482 if(RunnableThreadsTl[p] == thread)
1483 RunnableThreadsTl[p] = prev;
1485 /* Turn magic constants into params !? -- HWL */
1487 CurrentTime[p] += 5l * gran_mpacktime;
1489 stealtime = (CurrentTime[p] > CurrentTime[proc]? CurrentTime[p]: CurrentTime[proc])
1490 + SparkStealTime() + 4l * gran_additional_latency
1491 + 5l * gran_munpacktime;
1493 /* Move the thread */
1494 SET_PROCS(thread,PE_NUMBER(proc));
1496 /* Move from one queue to another */
1497 newevent(proc,p,stealtime,MOVETHREAD,thread,Nil_closure,NULL);
1502 DumpRawGranEvent(p,GR_STEALING,TSO_ID(thread));
1504 CurrentTime[p] += 5l * gran_mtidytime;
1513 TIME SparkStealTime()
1515 double fishdelay, sparkdelay, latencydelay;
1516 fishdelay = (double)max_proc/2;
1517 sparkdelay = fishdelay - ((fishdelay-1)/(double)(max_proc-1))*(double)Idlers;
1518 latencydelay = sparkdelay*((double)gran_latency);
1521 fprintf(stderr,"fish delay = %g, spark delay = %g, latency delay = %g, Idlers = %u\n",
1522 fishdelay,sparkdelay,latencydelay,Idlers);
1524 return((TIME)latencydelay);
1526 #endif /* GRAN ; HWL */
1530 %****************************************************************************
1532 \subsection[thread-execution]{Executing Threads}
1534 %****************************************************************************
1537 EXTDATA_RO(StkO_info);
1538 EXTDATA_RO(TSO_info);
1539 EXTDATA_RO(WorldStateToken_closure);
1541 EXTFUN(EnterNodeCode);
1542 UNVEC(EXTFUN(stopThreadDirectReturn);,EXTDATA(vtbl_stopStgWorld);)
1546 /* Slow but relatively reliable method uses xmalloc */
1547 /* Eventually change that to heap allocated sparks. */
1550 NewSpark(node,name,local)
1554 extern P_ xmalloc();
1555 sparkq newspark = (sparkq) xmalloc(sizeof(struct spark));
1556 SPARK_PREV(newspark) = SPARK_NEXT(newspark) = NULL;
1557 SPARK_NODE(newspark) = node;
1558 SPARK_NAME(newspark) = name;
1559 SPARK_GLOBAL(newspark) = !local;
1572 /* Heap-allocated disposal.
1574 FREEZE_MUT_HDR(spark, ImMutArrayOfPtrs);
1575 SPARK_PREV(spark) = SPARK_NEXT(spark) = SPARK_NODE(spark) = Nil_closure;
1579 DisposeSparkQ(spark)
1585 DisposeSparkQ(SPARK_NEXT(spark));
1588 if (SparksAvail < 0)
1589 fprintf(stderr,"DisposeSparkQ: SparksAvail<0 after disposing sparkq @ 0x%lx\n", spark);
1597 I_ StkOChunkSize = DEFAULT_STKO_CHUNK_SIZE;
1599 /* Create a new TSO, with the specified closure to enter and thread type */
1602 NewThread(topClosure, type)
1608 if (AvailableTSO != Nil_closure) {
1611 SET_PROCS(tso,ThisPE); /* Allocate it locally! */
1613 AvailableTSO = TSO_LINK(tso);
1614 } else if (SAVE_Hp + TSO_HS + TSO_CTS_SIZE > SAVE_HpLim) {
1617 ALLOC_TSO(TSO_HS,BYTES_TO_STGWORDS(sizeof(STGRegisterTable)),
1618 BYTES_TO_STGWORDS(sizeof(StgDouble)));
1620 SAVE_Hp += TSO_HS + TSO_CTS_SIZE;
1621 SET_TSO_HDR(tso, TSO_info, CCC);
1624 TSO_LINK(tso) = Nil_closure;
1625 TSO_CCC(tso) = (CostCentre)STATIC_CC_REF(CC_MAIN);
1626 TSO_NAME(tso) = (P_) INFO_PTR(topClosure); /* A string would be nicer -- JSM */
1627 TSO_ID(tso) = threadId++;
1628 TSO_TYPE(tso) = type;
1629 TSO_PC1(tso) = TSO_PC2(tso) = EnterNodeCode;
1630 TSO_ARG1(tso) = TSO_EVENT(tso) = 0;
1631 TSO_SWITCH(tso) = NULL;
1633 #ifdef DO_REDN_COUNTING
1638 #if defined(GRAN) || defined(PAR)
1639 TSO_SPARKNAME(tso) = 0;
1641 TSO_STARTEDAT(tso) = CurrentTime[CurrentProc];
1643 TSO_STARTEDAT(tso) = CURRENT_TIME;
1645 TSO_EXPORTED(tso) = 0;
1646 TSO_BASICBLOCKS(tso) = 0;
1647 TSO_ALLOCS(tso) = 0;
1648 TSO_EXECTIME(tso) = 0;
1649 TSO_FETCHTIME(tso) = 0;
1650 TSO_FETCHCOUNT(tso) = 0;
1651 TSO_BLOCKTIME(tso) = 0;
1652 TSO_BLOCKCOUNT(tso) = 0;
1653 TSO_BLOCKEDAT(tso) = 0;
1654 TSO_GLOBALSPARKS(tso) = 0;
1655 TSO_LOCALSPARKS(tso) = 0;
1658 * set pc, Node (R1), liveness
1660 CurrentRegTable = TSO_INTERNAL_PTR(tso);
1661 SAVE_Liveness = LIVENESS_R1;
1662 SAVE_R1.p = topClosure;
1665 if (type == T_MAIN) {
1669 if (AvailableStack != Nil_closure) {
1670 stko = AvailableStack;
1672 SET_PROCS(stko,ThisPE);
1674 AvailableStack = STKO_LINK(AvailableStack);
1675 } else if (SAVE_Hp + STKO_HS + StkOChunkSize > SAVE_HpLim) {
1678 ALLOC_STK(STKO_HS,StkOChunkSize,0);
1680 SAVE_Hp += STKO_HS + StkOChunkSize;
1681 SET_STKO_HDR(stko, StkO_info, CCC);
1683 STKO_SIZE(stko) = StkOChunkSize + STKO_VHS;
1684 STKO_SpB(stko) = STKO_SuB(stko) = STKO_BSTK_BOT(stko) + BREL(1);
1685 STKO_SpA(stko) = STKO_SuA(stko) = STKO_ASTK_BOT(stko) + AREL(1);
1686 STKO_LINK(stko) = Nil_closure;
1687 STKO_RETURN(stko) = NULL;
1692 #ifdef DO_REDN_COUNTING
1693 STKO_ADEP(stko) = STKO_BDEP(stko) = 0;
1696 if (type == T_MAIN) {
1697 STKO_SpA(stko) -= AREL(1);
1698 *STKO_SpA(stko) = (P_) WorldStateToken_closure;
1701 SAVE_Ret = (StgRetAddr) UNVEC(stopThreadDirectReturn,vtbl_stopStgWorld);
1705 QP_Event1(do_qp_prof > 1 ? "*A" : "*G", tso);
1714 EndThread(STG_NO_ARGS)
1717 TIME now = CURRENT_TIME;
1719 #ifdef DO_REDN_COUNTING
1720 extern FILE *tickyfile;
1722 if (tickyfile != NULL) {
1723 fprintf(tickyfile, "Thread %d (%lx)\n\tA stack max. depth: %ld words\n",
1724 TSO_ID(CurrentTSO), TSO_NAME(CurrentTSO), TSO_AHWM(CurrentTSO));
1725 fprintf(tickyfile, "\tB stack max. depth: %ld words\n",
1726 TSO_BHWM(CurrentTSO));
1731 QP_Event1("G*", CurrentTSO);
1735 assert(CurrentTSO == ThreadQueueHd);
1736 ThreadQueueHd = TSO_LINK(CurrentTSO);
1738 if(ThreadQueueHd == Nil_closure)
1739 ThreadQueueTl = Nil_closure;
1741 else if (DoThreadMigration)
1746 if(TSO_TYPE(CurrentTSO)==T_MAIN)
1749 for(i=0; i < max_proc; ++i) {
1750 StgBool is_first = StgTrue;
1751 while(RunnableThreadsHd[i] != Nil_closure)
1753 /* We schedule runnable threads before killing them to */
1754 /* make the job of bookkeeping the running, runnable, */
1755 /* blocked threads easier for scripts like gr2ps -- HWL */
1757 if (do_gr_profile && !is_first)
1758 DumpRawGranEvent(i,GR_SCHEDULE,
1759 TSO_ID(RunnableThreadsHd[i]));
1761 DumpGranInfo(i,RunnableThreadsHd[i],StgTrue);
1762 RunnableThreadsHd[i] = TSO_LINK(RunnableThreadsHd[i]);
1763 is_first = StgFalse;
1767 ThreadQueueHd = Nil_closure;
1769 #if defined(GRAN_CHECK) && defined(GRAN)
1770 /* Print event stats */
1774 fprintf(stderr,"Statistics of events (total=%d):\n",
1776 for (i=0; i<=7; i++) {
1777 fprintf(stderr,"> %s (%d): \t%ld \t%f%%\n",
1778 event_names[i],i,event_counts[i],
1779 (float)(100*event_counts[i])/(float)(noOfEvents) );
1787 DumpGranInfo(CurrentProc,CurrentTSO,
1788 TSO_TYPE(CurrentTSO) != T_ADVISORY);
1790 /* Note ThreadQueueHd is Nil when the main thread terminates */
1791 if(ThreadQueueHd != Nil_closure)
1793 if (do_gr_profile && !no_gr_profile)
1794 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
1795 CurrentTime[CurrentProc] += gran_threadscheduletime;
1798 else if (do_gr_binary && TSO_TYPE(CurrentTSO)==T_MAIN &&
1800 grterminate(CurrentTime[CurrentProc]);
1805 if (do_gr_profile) {
1806 TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
1807 DumpGranInfo(thisPE, CurrentTSO, TSO_TYPE(CurrentTSO) != T_ADVISORY);
1811 switch (TSO_TYPE(CurrentTSO)) {
1813 required_thread_count--;
1819 #if defined(GRAN_CHECK) && defined(GRAN)
1820 if ( (debug & 0x80) || (debug & 0x40) )
1821 fprintf(stderr,"\nGRAN: I hereby terminate the main thread!\n");
1823 /* I've stolen that from the end of ReSchedule (!GRAN). HWL */
1824 longjmp(scheduler_loop, required_thread_count > 0 ? 1 : -1);
1830 required_thread_count--;
1834 advisory_thread_count--;
1842 fprintf(stderr, "EndThread: %lx unknown\n", (W_) TSO_TYPE(CurrentTSO));
1846 /* Reuse stack object space */
1847 ASSERT(STKO_LINK(SAVE_StkO) == Nil_closure);
1848 STKO_LINK(SAVE_StkO) = AvailableStack;
1849 AvailableStack = SAVE_StkO;
1851 TSO_LINK(CurrentTSO) = AvailableTSO;
1852 AvailableTSO = CurrentTSO;
1853 CurrentTSO = Nil_closure;
1854 CurrentRegTable = NULL;
1857 /* NB: Now ThreadQueueHd is either the next runnable thread on this */
1858 /* proc or it's Nil_closure. In the latter case, a FINDWORK will be */
1859 /* issued by ReSchedule. */
1860 ReSchedule(SAME_THREAD); /* back for more! */
1862 ReSchedule(0); /* back for more! */
1867 %****************************************************************************
1869 \subsection[thread-blocking]{Local Blocking}
1871 %****************************************************************************
1876 void CountnUPDs() { ++nUPDs; }
1877 void CountnUPDs_old() { ++nUPDs_old; }
1878 void CountnUPDs_new() { ++nUPDs_new; }
1880 void CountnPAPs() { ++nPAPs; }
1883 EXTDATA_RO(BQ_info);
1886 /* NB: non-GRAN version ToDo
1888 * AwakenBlockingQueue awakens a list of TSOs and FBQs.
1891 P_ PendingFetches = Nil_closure;
1894 AwakenBlockingQueue(bqe)
1901 TIME now = CURRENT_TIME;
1906 while (bqe != Nil_closure) {
1908 while (IS_MUTABLE(INFO_PTR(bqe))) {
1909 switch (INFO_TYPE(INFO_PTR(bqe))) {
1913 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", bqe, CurrentTSO);
1916 if (do_gr_profile) {
1917 DumpGranEvent(GR_RESUMEQ, bqe);
1918 switch (TSO_QUEUE(bqe)) {
1920 TSO_BLOCKTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1923 TSO_FETCHTIME(bqe) += now - TSO_BLOCKEDAT(bqe);
1927 fprintf(stderr, "ABQ: TSO_QUEUE invalid.\n");
1932 if (last_tso == NULL) {
1933 if (RunnableThreadsHd == Nil_closure) {
1934 RunnableThreadsHd = bqe;
1936 TSO_LINK(RunnableThreadsTl) = bqe;
1940 bqe = TSO_LINK(bqe);
1944 next = BF_LINK(bqe);
1945 BF_LINK(bqe) = PendingFetches;
1946 PendingFetches = bqe;
1948 if (last_tso != NULL)
1949 TSO_LINK(last_tso) = next;
1952 fprintf(stderr, "Unexpected IP (%#lx) in blocking queue at %#lx\n",
1953 INFO_PTR(bqe), (W_) bqe);
1960 if (last_tso != NULL) {
1961 RunnableThreadsTl = last_tso;
1963 TSO_LINK(last_tso) = Nil_closure;
1971 /* NB: GRAN version only ToDo
1973 * AwakenBlockingQueue returns True if we are on the oldmutables list,
1974 * so that the update code knows what to do next.
1978 AwakenBlockingQueue(node)
1981 P_ tso = (P_) BQ_ENTRIES(node);
1990 if (tso != Nil_closure)
1994 while(tso != Nil_closure) {
1996 assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2002 /* Find where the tso lives */
2003 proc = where_is(tso);
2005 if(proc == CurrentProc)
2006 notifytime = CurrentTime[CurrentProc] + gran_lunblocktime;
2009 CurrentTime[CurrentProc] += gran_mpacktime;
2010 notifytime = CurrentTime[CurrentProc] + gran_gunblocktime;
2011 CurrentTime[CurrentProc] += gran_mtidytime;
2014 /* and create a resume message */
2015 newevent(proc, CurrentProc, notifytime,
2016 RESUMETHREAD,tso,Nil_closure,NULL);
2019 tso = TSO_LINK(tso);
2020 TSO_LINK(prev) = Nil_closure;
2025 if (ThreadQueueHd == Nil_closure)
2026 ThreadQueueHd = tso;
2028 TSO_LINK(ThreadQueueTl) = tso;
2030 while(TSO_LINK(tso) != Nil_closure) {
2031 assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2033 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2035 tso = TSO_LINK(tso);
2038 assert(TSO_INTERNAL_PTR(tso)->rR[0].p == node);
2040 QP_Event2(do_qp_prof > 1 ? "RA" : "RG", tso, CurrentTSO);
2043 ThreadQueueTl = tso;
2046 return MUT_LINK(node) != MUT_NOT_LINKED;
2049 #endif /* GRAN only */
2057 SAVE_Liveness = args >> 1;
2058 TSO_PC1(CurrentTSO) = Continue;
2060 QP_Event1("GR", CurrentTSO);
2063 if (do_gr_profile) {
2064 /* Note that CURRENT_TIME may perform an unsafe call */
2065 TSO_EXECTIME(CurrentTSO) += CURRENT_TIME - TSO_BLOCKEDAT(CurrentTSO);
2068 ReSchedule(args & 1);
2073 %****************************************************************************
2075 \subsection[gr-fetch]{Fetching Nodes (GrAnSim only)}
2077 %****************************************************************************
2079 The following GrAnSim routines simulate the fetching of nodes from a remote
2080 processor. We use a 1 word bitmask to indicate on which processor a node is
2081 lying. Thus, moving or copying a node from one processor to another just
2082 requires an appropriate change in this bitmask (using @SET_GA@).
2083 Additionally, the clocks have to be updated.
2085 A special case arises when the node that is needed by processor A has been
2086 moved from a processor B to a processor C between sending out a @FETCH@
2087 (from A) and its arrival at B. In that case the @FETCH@ has to be forwarded
2090 Currently, we only support GRIP-like single closure fetching. We plan to
2091 incorporate GUM-like packet fetching in the near future.
2096 /* Fetch node "node" to processor "p" */
2099 FetchNode(node,from,to)
2103 assert(to==CurrentProc);
2104 if (!IS_LOCAL_TO(PROCS(node),from) &&
2105 !IS_LOCAL_TO(PROCS(node),to) )
2108 if(IS_NF(INFO_PTR(node))) /* Old: || IS_BQ(node) */
2109 PROCS(node) |= PE_NUMBER(to); /* Copy node */
2111 PROCS(node) = PE_NUMBER(to); /* Move node */
2113 /* Now fetch the children */
2116 fprintf(stderr,"Sorry, GUMM fetching not yet implemented.\n");
2122 /* --------------------------------------------------
2123 Cost of sending a packet of size n = C + P*n
2124 where C = packet construction constant,
2125 P = cost of packing one word into a packet
2126 [Should also account for multiple packets].
2127 -------------------------------------------------- */
2130 HandleFetchRequest(node,p,tso)
2134 if (IS_LOCAL_TO(PROCS(node),p) ) /* Somebody else moved node already => */
2136 newevent(p,CurrentProc,
2137 CurrentTime[CurrentProc] /* +gran_latency */,
2138 FETCHREPLY,tso,node,NULL); /* node needed ?? */
2139 CurrentTime[CurrentProc] += gran_mtidytime;
2141 else if (IS_LOCAL_TO(PROCS(node),CurrentProc) ) /* Is node still here? */
2143 /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
2144 /* Send a reply to the originator */
2145 CurrentTime[CurrentProc] += gran_mpacktime;
2147 newevent(p,CurrentProc,
2148 CurrentTime[CurrentProc]+gran_latency,
2149 FETCHREPLY,tso,node,NULL); /* node needed ?? */
2151 CurrentTime[CurrentProc] += gran_mtidytime;
2154 { /* Qu'vatlh! node has been grabbed by another proc => forward */
2155 PROC p_new = where_is(node);
2158 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2161 max(CurrentTime[p_new],CurrentTime[CurrentProc])+gran_latency,
2162 FETCHREPLY,tso,node,NULL); /* node needed ?? */
2163 CurrentTime[CurrentProc] += gran_mtidytime;
2168 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2169 if (debug & 0x2) /* 0x2 should be somehting like DBG_PRINT_FWD */
2170 fprintf(stderr,"Qu'vatlh! node 0x%x has been grabbed by %d (current=%d; demander=%d) @ %d\n",
2171 node,p_new,CurrentProc,p,CurrentTime[CurrentProc]);
2173 /* Prepare FORWARD message to proc p_new */
2174 CurrentTime[CurrentProc] += gran_mpacktime;
2176 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p_new]) +
2179 newevent(p_new,p,fetchtime,FETCHNODE,tso,node,NULL);
2181 CurrentTime[CurrentProc] += gran_mtidytime;
2187 %****************************************************************************
2189 \subsection[gr-simulation]{Granularity Simulation}
2191 %****************************************************************************
2194 #if 0 /* moved to GranSim.lc */
2197 FILE *gr_file = NULL;
2198 char gr_filename[STATS_FILENAME_MAXLEN];
2200 init_gr_simulation(rts_argc,rts_argv,prog_argc,prog_argv)
2201 char *prog_argv[], *rts_argv[];
2202 int prog_argc, rts_argc;
2208 char *extension = do_gr_binary? "gb": "gr";
2209 sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0],extension);
2211 if ((gr_file = fopen(gr_filename,"w")) == NULL )
2213 fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
2217 #if defined(GRAN_CHECK) && defined(GRAN)
2218 if(DoReScheduleOnFetch)
2219 setbuf(gr_file,NULL);
2222 fputs("Granularity Simulation for ",gr_file);
2223 for(i=0; i < prog_argc; ++i)
2225 fputs(prog_argv[i],gr_file);
2231 fputs("+RTS ",gr_file);
2233 for(i=0; i < rts_argc; ++i)
2235 fputs(rts_argv[i],gr_file);
2240 fputs("\n\n--------------------\n\n",gr_file);
2242 fputs("General Parameters:\n\n",gr_file);
2244 fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s\n",
2245 max_proc,DoFairSchedule?"Fair":"Unfair",
2246 DoThreadMigration?"":"Don't ",
2247 DoThreadMigration && DoStealThreadsFirst?" Before Sparks":"",
2248 DoReScheduleOnFetch?"":"Don't ");
2250 fprintf(gr_file, "%s, Fetch %s in Each Packet\n",
2251 SimplifiedFetch?"Simplified Fetch":(DoReScheduleOnFetch?"Reschedule on Fetch":"Block on Fetch"),
2252 DoGUMMFetching?"Many Closures":"Exactly One Closure");
2253 fprintf(gr_file, "Fetch Strategy(%u): If outstanding fetches %s\n",
2255 FetchStrategy==1?"only run runnable threads (don't create new ones":
2256 FetchStrategy==2?"create threads only from local sparks":
2257 FetchStrategy==3?"create threads from local or global sparks":
2258 FetchStrategy==4?"create sparks and steal threads if necessary":
2261 fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
2262 gran_threadcreatetime,gran_threadqueuetime);
2263 fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
2264 gran_threaddescheduletime,gran_threadscheduletime);
2265 fprintf(gr_file, "Thread Context-Switch Time %lu\n",
2266 gran_threadcontextswitchtime);
2267 fputs("\n\n--------------------\n\n",gr_file);
2269 fputs("Communication Metrics:\n\n",gr_file);
2271 "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
2272 gran_latency, gran_additional_latency, gran_fetchtime,
2273 gran_gunblocktime, gran_lunblocktime);
2275 "Message Creation %lu (+ %lu after send), Message Read %lu\n",
2276 gran_mpacktime, gran_mtidytime, gran_munpacktime);
2277 fputs("\n\n--------------------\n\n",gr_file);
2279 fputs("Instruction Metrics:\n\n",gr_file);
2280 fprintf(gr_file,"Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
2281 gran_arith_cost, gran_branch_cost,
2282 gran_load_cost, gran_store_cost,gran_float_cost,gran_heapalloc_cost);
2283 fputs("\n\n++++++++++++++++++++\n\n",gr_file);
2287 grputw(sizeof(TIME));
2293 void end_gr_simulation() {
2296 fprintf(stderr,"The simulation is finished. Look at %s for details.\n",
2304 %****************************************************************************
2306 \subsection[qp-profile]{Quasi-Parallel Profiling}
2308 %****************************************************************************
2316 /* *Virtual* Time in milliseconds */
2318 qp_elapsed_time(STG_NO_ARGS)
2320 extern StgDouble usertime();
2322 return ((long) (usertime() * 1e3));
2326 init_qp_profiling(STG_NO_ARGS)
2329 char qp_filename[STATS_FILENAME_MAXLEN];
2331 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
2332 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
2333 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
2337 fputs(prog_argv[0], qp_file);
2338 for(i = 1; prog_argv[i]; i++) {
2339 fputc(' ', qp_file);
2340 fputs(prog_argv[i], qp_file);
2342 fprintf(qp_file, " +RTS -C%d -t%d\n", contextSwitchTime, MaxThreads);
2343 fputs(time_str(), qp_file);
2344 fputc('\n', qp_file);
2349 QP_Event0(tid, node)
2353 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
2357 QP_Event1(event, tso)
2361 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
2362 TSO_ID(tso), TSO_NAME(tso));
2366 QP_Event2(event, tso1, tso2)
2370 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
2371 TSO_ID(tso1), TSO_NAME(tso1), TSO_ID(tso2), TSO_NAME(tso2));
2377 %****************************************************************************
2379 \subsection[entry-points]{Routines directly called from Haskell world}
2381 %****************************************************************************
2383 The @GranSim...@ rotuines in here are directly called via macros from the
2386 First some auxiliary routines.
2390 /* Take the current thread off the thread queue and thereby activate the */
2391 /* next thread. It's assumed that the next ReSchedule after this uses */
2392 /* NEW_THREAD as param. */
2393 /* This fct is called from GranSimBlock and GranSimFetch */
2396 ActivateNextThread ()
2398 #if defined(GRAN_CHECK) && defined(GRAN)
2399 if(ThreadQueueHd != CurrentTSO) {
2400 fprintf(stderr,"Error: ThreadQueueHd != CurrentTSO in ActivateNextThread\n");
2405 ThreadQueueHd = TSO_LINK(ThreadQueueHd);
2406 if(ThreadQueueHd==Nil_closure) {
2407 MAKE_IDLE(CurrentProc);
2408 ThreadQueueTl = Nil_closure;
2409 } else if (do_gr_profile) {
2410 CurrentTime[CurrentProc] += gran_threadcontextswitchtime;
2411 DumpGranEvent(GR_SCHEDULE,ThreadQueueHd);
2416 Now the main stg-called routines:
2419 /* ------------------------------------------------------------------------ */
2420 /* The following GranSim... fcts are stg-called from the threaded world. */
2421 /* ------------------------------------------------------------------------ */
2423 /* Called from HEAP_CHK -- NB: node and liveness are junk here now.
2424 They are left temporarily to avoid complete recompilation.
2428 GranSimAllocate(n,node,liveness)
2433 TSO_ALLOCS(CurrentTSO) += n;
2434 ++TSO_BASICBLOCKS(CurrentTSO);
2436 TSO_EXECTIME(CurrentTSO) += gran_heapalloc_cost;
2437 CurrentTime[CurrentProc] += gran_heapalloc_cost;
2441 Subtract the values added above, if a heap check fails and
2442 so has to be redone.
2445 GranSimUnallocate(n,node,liveness)
2450 TSO_ALLOCS(CurrentTSO) -= n;
2451 --TSO_BASICBLOCKS(CurrentTSO);
2453 TSO_EXECTIME(CurrentTSO) -= gran_heapalloc_cost;
2454 CurrentTime[CurrentProc] -= gran_heapalloc_cost;
2458 GranSimExec(ariths,branches,loads,stores,floats)
2459 W_ ariths,branches,loads,stores,floats;
2461 W_ cost = gran_arith_cost*ariths + gran_branch_cost*branches + gran_load_cost * loads +
2462 gran_store_cost*stores + gran_float_cost*floats;
2464 TSO_EXECTIME(CurrentTSO) += cost;
2465 CurrentTime[CurrentProc] += cost;
2470 Fetch the node if it isn't local
2471 -- result indicates whether fetch has been done.
2473 This is GRIP-style single item fetching.
2477 GranSimFetch(node /* , liveness_mask */ )
2479 /* I_ liveness_mask; */
2481 /* Note: once a node has been fetched, this test will be passed */
2482 if(!IS_LOCAL_TO(PROCS(node),CurrentProc) )
2484 /* I suppose we shouldn't do this for CAFs? -- KH */
2485 /* Should reschedule if the latency is high */
2486 /* We should add mpacktime to the remote PE for the reply,
2487 but we don't know who owns the node
2489 /* if(DYNAMIC_POINTER(node)) */ /* For 0.22; gone in 0.23 !!! */
2491 PROC p = where_is(node);
2495 if ( ( debug & 0x40 ) &&
2497 fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
2498 #endif /* GRAN_CHECK */
2500 CurrentTime[CurrentProc] += gran_mpacktime;
2502 ++TSO_FETCHCOUNT(CurrentTSO);
2503 TSO_FETCHTIME(CurrentTSO) += gran_fetchtime;
2505 if (SimplifiedFetch)
2507 FetchNode(node,CurrentProc);
2508 CurrentTime[CurrentProc] += gran_mtidytime+gran_fetchtime+
2513 fetchtime = max(CurrentTime[CurrentProc],CurrentTime[p]) +
2516 newevent(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
2517 ++OutstandingFetches[CurrentProc];
2519 /* About to block */
2520 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[p];
2522 if (DoReScheduleOnFetch)
2525 /* Remove CurrentTSO from the queue
2526 -- assumes head of queue == CurrentTSO */
2530 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2532 ActivateNextThread();
2534 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2536 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2537 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2538 CurrentTSO,CurrentTime[CurrentProc]);
2541 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2547 TSO_LINK(CurrentTSO) = Nil_closure;
2548 /* CurrentTSO = Nil_closure; */
2550 /* ThreadQueueHd is now the next TSO to schedule or NULL */
2551 /* CurrentTSO is pointed to by the FETCHNODE event */
2553 else /* DoFairSchedule */
2555 /* Remove from the tail of the thread queue */
2556 fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
2560 else /* !DoReScheduleOnFetch */
2562 /* Note: CurrentProc is still busy as it's blocked on fetch */
2564 DumpGranEventAndNode(GR_FETCH,CurrentTSO,node,p);
2566 #if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
2568 BlockedOnFetch[CurrentProc] = CurrentTSO; /*- StgTrue; -*/
2571 if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
2572 fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
2573 CurrentTSO,CurrentTime[CurrentProc]);
2576 TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
2579 CurrentTSO = Nil_closure;
2584 CurrentTime[CurrentProc] += gran_mtidytime;
2586 /* Rescheduling is necessary */
2587 NeedToReSchedule = StgTrue;
2596 GranSimSpark(local,node)
2602 DumpSparkGranEvent(SP_SPARK,node);
2604 /* Force the PE to take notice of the spark */
2605 if(DoAlwaysCreateThreads)
2606 newevent(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
2607 FINDWORK,Nil_closure,Nil_closure,NULL);
2610 ++TSO_LOCALSPARKS(CurrentTSO);
2612 ++TSO_GLOBALSPARKS(CurrentTSO);
2616 GranSimSparkAt(spark,where,identifier)
2618 P_ where; /* This should be a node; alternatively could be a GA */
2621 PROC p = where_is(where);
2625 DumpSparkGranEvent(SP_SPARKAT,SPARK_NODE(spark));
2627 CurrentTime[CurrentProc] += gran_mpacktime;
2629 exporttime = (CurrentTime[p] > CurrentTime[CurrentProc]?
2630 CurrentTime[p]: CurrentTime[CurrentProc])
2633 newevent(p,CurrentProc,exporttime,MOVESPARK,Nil_closure,Nil_closure,spark);
2635 CurrentTime[CurrentProc] += gran_mtidytime;
2637 ++TSO_GLOBALSPARKS(CurrentTSO);
2644 DumpGranEvent(GR_BLOCK,CurrentTSO);
2646 ++TSO_BLOCKCOUNT(CurrentTSO);
2647 TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
2648 ActivateNextThread();
2655 %****************************************************************************
2657 \subsection[gc-GrAnSim]{Garbage collection routines for GrAnSim objects}
2659 %****************************************************************************
2661 Garbage collection code for the event queue. We walk the event queue
2662 so that if the only reference to a TSO is in some event (e.g. RESUME),
2663 the TSO is still preserved.
2668 extern smInfo StorageMgrInfo;
2671 SaveEventRoots(num_ptr_roots)
2674 eventq event = EventHd;
2675 while(event != NULL)
2677 if(EVENT_TYPE(event) == RESUMETHREAD ||
2678 EVENT_TYPE(event) == MOVETHREAD ||
2679 EVENT_TYPE(event) == STARTTHREAD )
2680 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2682 else if(EVENT_TYPE(event) == MOVESPARK)
2683 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(EVENT_SPARK(event));
2685 else if (EVENT_TYPE(event) == FETCHNODE ||
2686 EVENT_TYPE(event) == FETCHREPLY )
2688 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_TSO(event);
2689 StorageMgrInfo.roots[num_ptr_roots++] = EVENT_NODE(event);
2692 event = EVENT_NEXT(event);
2694 return(num_ptr_roots);
2698 SaveSparkRoots(num_ptr_roots)
2701 sparkq spark, /* prev, */ disposeQ=NULL;
2703 I_ i, sparkroots=0, prunedSparks=0;
2705 #if defined(GRAN_CHECK) && defined(GRAN)
2707 fprintf(stderr,"D> Saving spark roots for GC ...\n");
2710 for(proc = 0; proc < max_proc; ++proc) {
2711 for(i = 0; i < SPARK_POOLS; ++i) {
2712 for(/* prev = &PendingSparksHd[proc][i],*/ spark = PendingSparksHd[proc][i];
2714 /* prev = &SPARK_NEXT(spark), */ spark = SPARK_NEXT(spark))
2716 if(++sparkroots <= MAX_SPARKS)
2718 #if defined(GRAN_CHECK) && defined(GRAN)
2720 fprintf(main_statsfile,"Saving Spark Root %d(proc: %d; pool: %d) -- 0x%lx\n",
2721 num_ptr_roots,proc,i,SPARK_NODE(spark));
2723 StorageMgrInfo.roots[num_ptr_roots++] = SPARK_NODE(spark);
2727 SPARK_NODE(spark) = Nil_closure;
2728 if (prunedSparks==0) {
2736 } /* forall spark ... */
2737 if (prunedSparks>0) {
2738 fprintf(main_statsfile,"Pruning and disposing %lu excess sparks (> %lu) on proc %d for GC purposes\n",
2739 prunedSparks,MAX_SPARKS,proc);
2740 if (disposeQ == PendingSparksHd[proc][i])
2741 PendingSparksHd[proc][i] = NULL;
2743 SPARK_NEXT(SPARK_PREV(disposeQ)) = NULL;
2744 DisposeSparkQ(disposeQ);
2748 } /* forall i ... */
2749 } /*forall proc .. */
2751 return(num_ptr_roots);
2755 GC roots must be restored in *reverse order*.
2756 The recursion is a little ugly, but is better than
2757 in-place pointer reversal.
2761 RestoreEvtRoots(event,num_ptr_roots)
2767 num_ptr_roots = RestoreEvtRoots(EVENT_NEXT(event),num_ptr_roots);
2769 if(EVENT_TYPE(event) == RESUMETHREAD ||
2770 EVENT_TYPE(event) == MOVETHREAD ||
2771 EVENT_TYPE(event) == STARTTHREAD )
2772 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2774 else if(EVENT_TYPE(event) == MOVESPARK )
2775 SPARK_NODE(EVENT_SPARK(event)) = StorageMgrInfo.roots[--num_ptr_roots];
2777 else if (EVENT_TYPE(event) == FETCHNODE ||
2778 EVENT_TYPE(event) == FETCHREPLY )
2780 EVENT_NODE(event) = StorageMgrInfo.roots[--num_ptr_roots];
2781 EVENT_TSO(event) = StorageMgrInfo.roots[--num_ptr_roots];
2785 return(num_ptr_roots);
2789 RestoreEventRoots(num_ptr_roots)
2792 return(RestoreEvtRoots(EventHd,num_ptr_roots));
2796 RestoreSpkRoots(spark,num_ptr_roots,sparkroots)
2798 I_ num_ptr_roots, sparkroots;
2802 num_ptr_roots = RestoreSpkRoots(SPARK_NEXT(spark),num_ptr_roots,++sparkroots);
2803 if(sparkroots <= MAX_SPARKS)
2805 P_ n = SPARK_NODE(spark);
2806 SPARK_NODE(spark) = StorageMgrInfo.roots[--num_ptr_roots];
2807 #if defined(GRAN_CHECK) && defined(GRAN)
2809 fprintf(main_statsfile,"Restoring Spark Root %d -- new: 0x%lx \n",
2810 num_ptr_roots,SPARK_NODE(spark));
2814 #if defined(GRAN_CHECK) && defined(GRAN)
2816 fprintf(main_statsfile,"Error in RestoreSpkRoots (%d; @ spark 0x%x): More than MAX_SPARKS (%d) sparks\n",
2817 num_ptr_roots,SPARK_NODE(spark),MAX_SPARKS);
2822 return(num_ptr_roots);
2826 RestoreSparkRoots(num_ptr_roots)
2832 /* NB: PROC is currently an unsigned datatype i.e. proc>=0 is always */
2833 /* true ((PROC)-1 == (PROC)255). So we need a second clause in the head */
2834 /* of the for loop. For i that is currently not necessary. C is really */
2835 /* impressive in datatype abstraction! -- HWL */
2837 for(proc = max_proc - 1; (proc >= 0) && (proc < max_proc); --proc) {
2838 for(i = SPARK_POOLS - 1; (i >= 0) && (i < SPARK_POOLS) ; --i) {
2839 num_ptr_roots = RestoreSpkRoots(PendingSparksHd[proc][i],num_ptr_roots,0);
2842 return(num_ptr_roots);
2849 %****************************************************************************
2851 \subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
2853 %****************************************************************************
2855 Event dumping routines.
2860 DumpGranEvent(name,tso)
2861 enum gran_event_types name;
2864 DumpRawGranEvent(CurrentProc,name,TSO_ID(tso));
2867 DumpSparkGranEvent(name,id)
2868 enum gran_event_types name;
2871 DumpRawGranEvent(CurrentProc,name,id);
2874 DumpGranEventAndNode(name,tso,node,proc)
2875 enum gran_event_types name;
2879 PROC pe = CurrentProc;
2880 W_ id = TSO_ID(tso);
2882 if(name > GR_EVENT_MAX)
2883 name = GR_EVENT_MAX;
2889 grputw(CurrentTime[CurrentProc]);
2893 fprintf(gr_file,"PE %2u [%lu]: %s %lx \t0x%lx\t(from %2u)\n",
2894 pe,CurrentTime[CurrentProc],gran_event_names[name],id,node,proc);
2897 DumpRawGranEvent(pe,name,id)
2899 enum gran_event_types name;
2902 if(name > GR_EVENT_MAX)
2903 name = GR_EVENT_MAX;
2909 grputw(CurrentTime[CurrentProc]);
2913 fprintf(gr_file,"PE %2u [%lu]: %s %lx\n",
2914 pe,CurrentTime[CurrentProc],gran_event_names[name],id);
2917 DumpGranInfo(pe,tso,mandatory_thread)
2920 I_ mandatory_thread;
2926 grputw(CurrentTime[CurrentProc]);
2927 grputw(TSO_ID(tso));
2928 grputw(TSO_SPARKNAME(tso));
2929 grputw(TSO_STARTEDAT(tso));
2930 grputw(TSO_EXPORTED(tso));
2931 grputw(TSO_BASICBLOCKS(tso));
2932 grputw(TSO_ALLOCS(tso));
2933 grputw(TSO_EXECTIME(tso));
2934 grputw(TSO_BLOCKTIME(tso));
2935 grputw(TSO_BLOCKCOUNT(tso));
2936 grputw(TSO_FETCHTIME(tso));
2937 grputw(TSO_FETCHCOUNT(tso));
2938 grputw(TSO_LOCALSPARKS(tso));
2939 grputw(TSO_GLOBALSPARKS(tso));
2940 grputw(mandatory_thread);
2944 /* NB: DumpGranEvent cannot be used because PE may be wrong (as well as the extra info) */
2945 fprintf(gr_file,"PE %2u [%lu]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
2947 ,CurrentTime[CurrentProc]
2951 ,TSO_EXPORTED(tso)?'T':'F'
2952 ,TSO_BASICBLOCKS(tso)
2956 ,TSO_BLOCKCOUNT(tso)
2958 ,TSO_FETCHCOUNT(tso)
2959 ,TSO_LOCALSPARKS(tso)
2960 ,TSO_GLOBALSPARKS(tso)
2961 ,mandatory_thread?'T':'F'
2969 fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
2974 ,TSO_TYPE(tso)==T_MAIN?"MAIN":
2975 TSO_TYPE(tso)==T_FAIL?"FAIL":
2976 TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
2977 TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
2981 fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx,0x%lx), SWITCH %lx0x\n"
2989 fprintf(gr_file,"SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
2992 ,TSO_EXPORTED(tso)?'T':'F'
2993 ,TSO_BASICBLOCKS(tso)
2997 ,TSO_BLOCKCOUNT(tso)
2999 ,TSO_FETCHCOUNT(tso)
3000 ,TSO_LOCALSPARKS(tso)
3001 ,TSO_GLOBALSPARKS(tso)
3006 Output a terminate event and an 8-byte time.
3012 DumpGranEvent(GR_TERMINATE,0);
3023 putc(v >> 56l,gr_file);
3024 putc((v >> 48l)&0xffl,gr_file);
3025 putc((v >> 40l)&0xffl,gr_file);
3026 putc((v >> 32l)&0xffl,gr_file);
3028 putc((v >> 24l)&0xffl,gr_file);
3029 putc((v >> 16l)&0xffl,gr_file);
3030 putc((v >> 8l)&0xffl,gr_file);
3031 putc(v&0xffl,gr_file);
3035 Length-coded output: first 3 bits contain length coding
3049 fputc(v & 0x3f,gr_file);
3052 else if (v <= 0x3fffl)
3054 fputc((v >> 8l)|0x40l,gr_file);
3055 fputc(v&0xffl,gr_file);
3058 else if (v <= 0x3fffffffl)
3060 fputc((v >> 24l)|0x80l,gr_file);
3061 fputc((v >> 16l)&0xffl,gr_file);
3062 fputc((v >> 8l)&0xffl,gr_file);
3063 fputc(v&0xffl,gr_file);
3066 else if (sizeof(TIME) == 4)
3068 fputc(0x70,gr_file);
3069 fputc((v >> 24l)&0xffl,gr_file);
3070 fputc((v >> 16l)&0xffl,gr_file);
3071 fputc((v >> 8l)&0xffl,gr_file);
3072 fputc(v&0xffl,gr_file);
3077 if (v <= 0x3fffffffffffffl)
3078 putc((v >> 56l)|0x60l,gr_file);
3082 putc((v >> 56l)&0xffl,gr_file);
3085 putc((v >> 48l)&0xffl,gr_file);
3086 putc((v >> 40l)&0xffl,gr_file);
3087 putc((v >> 32l)&0xffl,gr_file);
3088 putc((v >> 24l)&0xffl,gr_file);
3089 putc((v >> 16l)&0xffl,gr_file);
3090 putc((v >> 8l)&0xffl,gr_file);
3091 putc(v&0xffl,gr_file);
3098 %****************************************************************************
3100 \subsection[GrAnSim-debug]{Debugging routines for GrAnSim}
3102 %****************************************************************************
3104 Debugging routines, mainly for GrAnSim. They should really be in a separate file.
3106 The first couple of routines are general ones (look also into
3107 c-as-asm/StgDebug.lc).
3111 #define NULL_REG_MAP /* Not threaded */
3112 #include "stgdefs.h"
3115 info_hdr_type(info_ptr)
3118 #if ! defined(PAR) && !defined(GRAN)
3119 switch (INFO_TAG(info_ptr))
3121 case INFO_OTHER_TAG:
3122 return("OTHER_TAG");
3123 /* case INFO_IND_TAG:
3129 switch(INFO_TYPE(info_ptr))
3131 case INFO_SPEC_U_TYPE:
3134 case INFO_SPEC_N_TYPE:
3137 case INFO_GEN_U_TYPE:
3140 case INFO_GEN_N_TYPE:
3147 case INFO_DYN_TYPE_N:
3150 case INFO_DYN_TYPE_U:
3154 case INFO_TUPLE_TYPE:
3157 case INFO_DATA_TYPE:
3160 case INFO_MUTUPLE_TYPE:
3163 case INFO_IMMUTUPLE_TYPE:
3164 return("IMMUTUPLE");
3166 case INFO_STATIC_TYPE:
3169 case INFO_CONST_TYPE:
3172 case INFO_CHARLIKE_TYPE:
3175 case INFO_INTLIKE_TYPE:
3187 case INFO_FETCHME_TYPE:
3194 case INFO_BQENT_TYPE:
3201 case INFO_STKO_TYPE:
3205 fprintf(stderr,"Unknown header type %lu\n",INFO_TYPE(info_ptr));
3212 @var_hdr_size@ computes the size of the variable header for a closure.
3219 switch(INFO_TYPE(INFO_PTR(node)))
3221 case INFO_SPEC_U_TYPE: return(0); /* by decree */
3222 case INFO_SPEC_N_TYPE: return(0);
3223 case INFO_GEN_U_TYPE: return(GEN_VHS);
3224 case INFO_GEN_N_TYPE: return(GEN_VHS);
3225 case INFO_DYN_TYPE: return(DYN_VHS);
3227 case INFO_DYN_TYPE_N: return(DYN_VHS);
3228 case INFO_DYN_TYPE_U: return(DYN_VHS);
3230 case INFO_TUPLE_TYPE: return(TUPLE_VHS);
3231 case INFO_DATA_TYPE: return(DATA_VHS);
3232 case INFO_MUTUPLE_TYPE: return(MUTUPLE_VHS);
3233 case INFO_IMMUTUPLE_TYPE: return(MUTUPLE_VHS); /* same layout */
3234 case INFO_STATIC_TYPE: return(STATIC_VHS);
3235 case INFO_CONST_TYPE: return(0);
3236 case INFO_CHARLIKE_TYPE: return(0);
3237 case INFO_INTLIKE_TYPE: return(0);
3238 case INFO_BH_TYPE: return(0);
3239 case INFO_IND_TYPE: return(0);
3240 case INFO_CAF_TYPE: return(0);
3241 case INFO_FETCHME_TYPE: return(0);
3242 case INFO_BQ_TYPE: return(0);
3244 case INFO_BQENT_TYPE: return(0);
3246 case INFO_TSO_TYPE: return(TSO_VHS);
3247 case INFO_STKO_TYPE: return(STKO_VHS);
3249 fprintf(stderr,"Unknown info type 0x%lx (%lu)\n", INFO_PTR(node),
3250 INFO_TYPE(INFO_PTR(node)));
3256 /* Determine the size and number of pointers for this kind of closure */
3258 size_and_ptrs(node,size,ptrs)
3262 switch(INFO_TYPE(INFO_PTR(node)))
3264 case INFO_SPEC_U_TYPE:
3265 case INFO_SPEC_N_TYPE:
3266 *size = INFO_SIZE(INFO_PTR(node)); /* New for 0.24; check */
3267 *ptrs = INFO_NoPTRS(INFO_PTR(node)); /* that! -- HWL */
3269 *size = SPEC_CLOSURE_SIZE(node);
3270 *ptrs = SPEC_CLOSURE_NoPTRS(node);
3274 case INFO_GEN_U_TYPE:
3275 case INFO_GEN_N_TYPE:
3276 *size = GEN_CLOSURE_SIZE(node);
3277 *ptrs = GEN_CLOSURE_NoPTRS(node);
3281 case INFO_DYN_TYPE_U:
3282 case INFO_DYN_TYPE_N:
3285 *size = DYN_CLOSURE_SIZE(node);
3286 *ptrs = DYN_CLOSURE_NoPTRS(node);
3289 case INFO_TUPLE_TYPE:
3290 *size = TUPLE_CLOSURE_SIZE(node);
3291 *ptrs = TUPLE_CLOSURE_NoPTRS(node);
3294 case INFO_DATA_TYPE:
3295 *size = DATA_CLOSURE_SIZE(node);
3296 *ptrs = DATA_CLOSURE_NoPTRS(node);
3300 *size = IND_CLOSURE_SIZE(node);
3301 *ptrs = IND_CLOSURE_NoPTRS(node);
3304 /* ToDo: more (WDP) */
3306 /* Don't know about the others */
3314 DEBUG_PRINT_NODE(node)
3317 W_ info_ptr = INFO_PTR(node);
3318 I_ size = 0, ptrs = 0, i, vhs = 0;
3319 char *info_type = info_hdr_type(info_ptr);
3321 size_and_ptrs(node,&size,&ptrs);
3322 vhs = var_hdr_size(node);
3324 fprintf(stderr,"Node: 0x%lx", (W_) node);
3327 fprintf(stderr," [GA: 0x%lx]",GA(node));
3330 #if defined(USE_COST_CENTRES)
3331 fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
3335 fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
3338 fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
3339 info_ptr,info_type,size,ptrs);
3341 /* For now, we ignore the variable header */
3343 for(i=0; i < size; ++i)
3346 fprintf(stderr,"Data: ");
3349 fprintf(stderr,"\n ");
3352 fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
3354 fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
3356 fprintf(stderr, "\n");
3360 #define INFO_MASK 0x80000000
3366 W_ size = 0, ptrs = 0, i, vhs = 0;
3368 /* Don't print cycles */
3369 if((INFO_PTR(node) & INFO_MASK) != 0)
3372 size_and_ptrs(node,&size,&ptrs);
3373 vhs = var_hdr_size(node);
3375 DEBUG_PRINT_NODE(node);
3376 fprintf(stderr, "\n");
3378 /* Mark the node -- may be dangerous */
3379 INFO_PTR(node) |= INFO_MASK;
3381 for(i = 0; i < ptrs; ++i)
3382 DEBUG_TREE((P_)node[i+vhs+_FHS]);
3384 /* Unmark the node */
3385 INFO_PTR(node) &= ~INFO_MASK;
3390 DEBUG_INFO_TABLE(node)
3393 W_ info_ptr = INFO_PTR(node);
3394 char *ip_type = info_hdr_type(info_ptr);
3396 fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
3397 ip_type,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
3399 fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
3402 #if defined(USE_COST_CENTRES)
3403 fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
3406 #if defined(_INFO_COPYING)
3407 fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
3408 INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
3411 #if defined(_INFO_COMPACTING)
3412 fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
3413 (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
3414 fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
3415 (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
3416 #if 0 /* avoid INFO_TYPE */
3417 if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
3418 fprintf(stderr,"plus specialised code\n");
3420 fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
3428 The remaining debugging routines are more or less specific for GrAnSim.
3431 #if defined(GRAN) && defined(GRAN_CHECK)
3433 DEBUG_CURR_THREADQ(verbose)
3436 fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
3437 DEBUG_THREADQ(ThreadQueueHd, verbose);
3441 DEBUG_THREADQ(closure, verbose)
3447 fprintf(stderr,"Thread Queue: ");
3448 for (x=closure; x!=Nil_closure; x=TSO_LINK(x))
3452 fprintf(stderr," 0x%x",x);
3454 if (closure==Nil_closure)
3455 fprintf(stderr,"NIL\n");
3457 fprintf(stderr,"\n");
3460 /* Check with Threads.lh */
3461 static char *type_name[] = { "T_MAIN", "T_REQUIRED", "T_ADVISORY", "T_FAIL"};
3464 DEBUG_TSO(closure,verbose)
3469 if (closure==Nil_closure) {
3470 fprintf(stderr,"TSO at 0x%x is Nil_closure!\n");
3474 fprintf(stderr,"TSO at 0x%x has the following contents:\n",closure);
3476 fprintf(stderr,"> Name: 0x%x",TSO_NAME(closure));
3477 fprintf(stderr,"\tLink: 0x%x\n",TSO_LINK(closure));
3478 fprintf(stderr,"> Id: 0x%x",TSO_ID(closure));
3479 #if defined(GRAN_CHECK) && defined(GRAN)
3481 fprintf(stderr,"\tType: %s %s\n",
3482 type_name[TSO_TYPE(closure) & ~FETCH_MASK_TSO],
3483 (TSO_TYPE(closure) & FETCH_MASK_TSO) ? "SLEEPING" : "");
3485 fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3487 fprintf(stderr,"\tType: %s\n",type_name[TSO_TYPE(closure)]);
3489 fprintf(stderr,"> PC1: 0x%x",TSO_PC1(closure));
3490 fprintf(stderr,"\tPC2: 0x%x\n",TSO_PC2(closure));
3491 fprintf(stderr,"> ARG1: 0x%x",TSO_ARG1(closure));
3492 fprintf(stderr,"\tARG2: 0x%x\n",TSO_ARG2(closure));
3493 fprintf(stderr,"> SWITCH: 0x%x\n", TSO_SWITCH(closure));
3496 fprintf(stderr,"} LOCKED: 0x%x",TSO_LOCKED(closure));
3497 fprintf(stderr,"\tSPARKNAME: 0x%x\n", TSO_SPARKNAME(closure));
3498 fprintf(stderr,"} STARTEDAT: 0x%x", TSO_STARTEDAT(closure));
3499 fprintf(stderr,"\tEXPORTED: 0x%x\n", TSO_EXPORTED(closure));
3500 fprintf(stderr,"} BASICBLOCKS: 0x%x", TSO_BASICBLOCKS(closure));
3501 fprintf(stderr,"\tALLOCS: 0x%x\n", TSO_ALLOCS(closure));
3502 fprintf(stderr,"} EXECTIME: 0x%x", TSO_EXECTIME(closure));
3503 fprintf(stderr,"\tFETCHTIME: 0x%x\n", TSO_FETCHTIME(closure));
3504 fprintf(stderr,"} FETCHCOUNT: 0x%x", TSO_FETCHCOUNT(closure));
3505 fprintf(stderr,"\tBLOCKTIME: 0x%x\n", TSO_BLOCKTIME(closure));
3506 fprintf(stderr,"} BLOCKCOUNT: 0x%x", TSO_BLOCKCOUNT(closure));
3507 fprintf(stderr,"\tBLOCKEDAT: 0x%x\n", TSO_BLOCKEDAT(closure));
3508 fprintf(stderr,"} GLOBALSPARKS: 0x%x", TSO_GLOBALSPARKS(closure));
3509 fprintf(stderr,"\tLOCALSPARKS: 0x%x\n", TSO_LOCALSPARKS(closure));
3514 DEBUG_EVENT(event, verbose)
3521 fprintf(stderr," 0x%x",event);
3526 DEBUG_EVENTQ(verbose)
3531 fprintf(stderr,"Eventq (hd @0x%x):\n",EventHd);
3532 for (x=EventHd; x!=NULL; x=EVENT_NEXT(x)) {
3533 DEBUG_EVENT(x,verbose);
3536 fprintf(stderr,"NIL\n");
3538 fprintf(stderr,"\n");
3542 DEBUG_SPARK(spark, verbose)
3549 fprintf(stderr," 0x%x",spark);
3553 DEBUG_SPARKQ(spark,verbose)
3559 fprintf(stderr,"Sparkq (hd @0x%x):\n",spark);
3560 for (x=spark; x!=NULL; x=SPARK_NEXT(x)) {
3561 DEBUG_SPARK(x,verbose);
3564 fprintf(stderr,"NIL\n");
3566 fprintf(stderr,"\n");
3570 DEBUG_CURR_SPARKQ(verbose)
3573 DEBUG_SPARKQ(SparkQueueHd,verbose);
3577 DEBUG_PROC(proc,verbose)
3581 fprintf(stderr,"Status of proc %d at time %d (0x%x): %s\n",
3582 proc,CurrentTime[proc],CurrentTime[proc],
3583 (CurrentProc==proc)?"ACTIVE":"INACTIVE");
3584 DEBUG_THREADQ(RunnableThreadsHd[proc],verbose & 0x2);
3585 if ( (CurrentProc==proc) )
3586 DEBUG_TSO(CurrentTSO,1);
3589 fprintf(stderr,"Next event (%s) is on proc %d\n",
3590 event_names[EVENT_TYPE(EventHd)],EVENT_PROC(EventHd));
3592 if (verbose & 0x1) {
3593 fprintf(stderr,"\nREQUIRED sparks: ");
3594 DEBUG_SPARKQ(PendingSparksHd[proc][REQUIRED_POOL],1);
3595 fprintf(stderr,"\nADVISORY_sparks: ");
3596 DEBUG_SPARKQ(PendingSparksHd[proc][ADVISORY_POOL],1);
3600 /* Debug CurrentTSO */
3603 fprintf(stderr,"Current Proc: %d\n",CurrentProc);
3604 DEBUG_TSO(CurrentTSO,1);
3607 /* Debug Current Processor */
3609 DCP(){ DEBUG_PROC(CurrentProc,2); }
3611 /* Shorthand for debugging event queue */
3613 DEQ() { DEBUG_EVENTQ(1); }
3615 /* Shorthand for debugging spark queue */
3617 DSQ() { DEBUG_CURR_SPARKQ(1); }
3619 /* Shorthand for printing a node */
3621 DN(P_ node) { DEBUG_PRINT_NODE(node); }
3627 %****************************************************************************
3629 \subsection[qp-profile]{Quasi-Parallel Profiling}
3631 %****************************************************************************
3638 /* *Virtual* Time in milliseconds */
3642 return ((long) (usertime() * 1e3));
3646 init_qp_profiling(STG_NO_ARGS)
3649 char qp_filename[STATS_FILENAME_MAXLEN];
3651 sprintf(qp_filename, QP_FILENAME_FMT, prog_argv[0]);
3652 if ((qp_file = fopen(qp_filename,"w")) == NULL ) {
3653 fprintf(stderr, "Can't open quasi-parallel profile report file %s\n",
3657 fputs(prog_argv[0], qp_file);
3658 for(i = 1; prog_argv[i]; i++) {
3659 fputc(' ', qp_file);
3660 fputs(prog_argv[i], qp_file);
3662 fprintf(qp_file, "+RTS -C%ld -t%ld\n", contextSwitchTime, MaxThreads);
3663 fputs(time_str(), qp_file);
3664 fputc('\n', qp_file);
3669 QP_Event0(tid, node)
3673 fprintf(qp_file, "%lu ** %lu 0x%lx\n", qp_elapsed_time(), tid, INFO_PTR(node));
3677 QP_Event1(event, tso)
3681 fprintf(qp_file, "%lu %s %lu 0x%lx\n", qp_elapsed_time(), event,
3682 TSO_ID(tso), (W_) TSO_NAME(tso));
3686 QP_Event2(event, tso1, tso2)
3690 fprintf(qp_file, "%lu %s %lu 0x%lx %lu 0x%lx\n", qp_elapsed_time(), event,
3691 TSO_ID(tso1), (W_) TSO_NAME(tso1), TSO_ID(tso2), (W_) TSO_NAME(tso2));
3696 #if defined(CONCURRENT) && !defined(GRAN)
3697 /* romoluSnganpu' SamuS! */
3699 unsigned CurrentProc = 0;
3700 W_ IdleProcs = ~0l, Idlers = 32;
3703 GranSimAllocate(n,node,liveness)
3710 GranSimUnallocate(n,node,liveness)
3718 GranSimExec(ariths,branches,loads,stores,floats)
3719 W_ ariths,branches,loads,stores,floats;
3723 GranSimFetch(node /* , liveness_mask */ )
3725 /* I_ liveness_mask; */
3729 GranSimSpark(local,node)
3736 GranSimSparkAt(spark,where,identifier)
3738 P_ where; /* This should be a node; alternatively could be a GA */