X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FSchedule.c;h=4430f5a2aa16eca5dc321a267b7161909fea3624;hb=d1447bea8845e1c77ad8e76b16f9f5180dede4d8;hp=665e60f4d4ed1c91657c79530cc7c68817724f00;hpb=e778a17045aac8aded0e0438f5e0178643ff678d;p=ghc-hetmet.git diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c index 665e60f..4430f5a 100644 --- a/ghc/rts/Schedule.c +++ b/ghc/rts/Schedule.c @@ -1,20 +1,27 @@ /* --------------------------------------------------------------------------- - * $Id: Schedule.c,v 1.89 2001/02/09 13:09:16 simonmar Exp $ + * $Id: Schedule.c,v 1.121 2002/02/12 15:38:08 sof Exp $ * * (c) The GHC Team, 1998-2000 * * Scheduler * - * The main scheduling code in GranSim is quite different from that in std - * (concurrent) Haskell: while concurrent Haskell just iterates over the - * threads in the runnable queue, GranSim is event driven, i.e. it iterates - * over the events in the global event queue. -- HWL + * Different GHC ways use this scheduler quite differently (see comments below) + * Here is the global picture: + * + * WAY Name CPP flag What's it for + * -------------------------------------- + * mp GUM PAR Parallel execution on a distributed memory machine + * s SMP SMP Parallel execution on a shared memory machine + * mg GranSim GRAN Simulation of parallel execution + * md GUM/GdH DIST Distributed execution (based on GUM) + * * --------------------------------------------------------------------------*/ //@node Main scheduling code, , , //@section Main scheduling code -/* Version with scheduler monitor support for SMPs. +/* + * Version with scheduler monitor support for SMPs (WAY=s): This design provides a high-level API to create and schedule threads etc. as documented in the SMP design document. @@ -32,6 +39,24 @@ In a non-SMP build, there is one global capability, namely MainRegTable. SDM & KH, 10/99 + + * Version with support for distributed memory parallelism aka GUM (WAY=mp): + + The main scheduling loop in GUM iterates until a finish message is received. + In that case a global flag @receivedFinish@ is set and this instance of + the RTS shuts down. See ghc/rts/parallel/HLComms.c:processMessages() + for the handling of incoming messages, such as PP_FINISH. + Note that in the parallel case we have a system manager that coordinates + different PEs, each of which are running one instance of the RTS. + See ghc/rts/parallel/SysMan.c for the main routine of the parallel program. + From this routine processes executing ghc/rts/Main.c are spawned. -- HWL + + * Version with support for simulating parallel execution aka GranSim (WAY=mg): + + The main scheduling code in GranSim is quite different from that in std + (concurrent) Haskell: while concurrent Haskell just iterates over the + threads in the runnable queue, GranSim is event driven, i.e. it iterates + over the events in the global event queue. -- HWL */ //@menu @@ -50,6 +75,7 @@ //@node Includes, Variables and Data structures, Main scheduling code, Main scheduling code //@subsection Includes +#include "PosixSource.h" #include "Rts.h" #include "SchedAPI.h" #include "RtsUtils.h" @@ -57,7 +83,6 @@ #include "Storage.h" #include "StgRun.h" #include "StgStartup.h" -#include "GC.h" #include "Hooks.h" #include "Schedule.h" #include "StgMiscClosures.h" @@ -71,6 +96,10 @@ #include "Stats.h" #include "Itimer.h" #include "Prelude.h" +#ifdef PROFILING +#include "Proftimer.h" +#include "ProfHeap.h" +#endif #if defined(GRAN) || defined(PAR) # include "GranSimRts.h" # include "GranSim.h" @@ -81,6 +110,9 @@ # include "HLC.h" #endif #include "Sparks.h" +#include "Capability.h" +#include "OSThreads.h" +#include "Task.h" #include @@ -91,7 +123,7 @@ * * These are the threads which clients have requested that we run. * - * In an SMP build, we might have several concurrent clients all + * In a 'threaded' build, we might have several concurrent clients all * waiting for results, and each one will wait on a condition variable * until the result is available. * @@ -106,8 +138,8 @@ typedef struct StgMainThread_ { StgTSO * tso; SchedulerStatus stat; StgClosure ** ret; -#ifdef SMP - pthread_cond_t wakeup; +#if defined(RTS_SUPPORTS_THREADS) + Condition wakeup; #endif struct StgMainThread_ *link; } StgMainThread; @@ -153,11 +185,12 @@ StgTSO *sleeping_queue; /* perhaps replace with a hash table? */ */ StgTSO *all_threads; -/* Threads suspended in _ccall_GC. +/* When a thread performs a safe C call (_ccall_GC, using old + * terminology), it gets put on the suspended_ccalling_threads + * list. Used by the garbage collector. */ static StgTSO *suspended_ccalling_threads; -static void GetRoots(void); static StgTSO *threadStackOverflow(StgTSO *tso); /* KH: The following two flags are shared memory locations. There is no need @@ -197,18 +230,6 @@ StgThreadID next_thread_id = 1; #define MIN_STACK_WORDS (RESERVED_STACK_WORDS + sizeofW(StgStopFrame) + 2) -/* Free capability list. - * Locks required: sched_mutex. - */ -#ifdef SMP -//@cindex free_capabilities -//@cindex n_free_capabilities -Capability *free_capabilities; /* Available capabilities for running threads */ -nat n_free_capabilities; /* total number of available capabilities */ -#else -//@cindex MainRegTable -Capability MainRegTable; /* for non-SMP, we have one global capability */ -#endif #if defined(GRAN) StgTSO *CurrentTSO; @@ -222,13 +243,6 @@ StgTSO dummy_tso; rtsBool ready_to_gc; -/* All our current task ids, saved in case we need to kill them later. - */ -#ifdef SMP -//@cindex task_ids -task_info *task_ids; -#endif - void addToBlockedQueue ( StgTSO *tso ); static void schedule ( void ); @@ -245,22 +259,71 @@ static void detectBlackHoles ( void ); static void sched_belch(char *s, ...); #endif -#ifdef SMP -//@cindex sched_mutex -//@cindex term_mutex -//@cindex thread_ready_cond -//@cindex gc_pending_cond -pthread_mutex_t sched_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t term_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t thread_ready_cond = PTHREAD_COND_INITIALIZER; -pthread_cond_t gc_pending_cond = PTHREAD_COND_INITIALIZER; +#if defined(RTS_SUPPORTS_THREADS) +/* ToDo: carefully document the invariants that go together + * with these synchronisation objects. + */ +Mutex sched_mutex = INIT_MUTEX_VAR; +Mutex term_mutex = INIT_MUTEX_VAR; +#if defined(THREADED_RTS) +/* + * The rts_mutex is the 'big lock' that the active native + * thread within the RTS holds while executing code. + * It is given up when the thread makes a transition out of + * the RTS (e.g., to perform an external C call), hopefully + * for another thread to take over its chores and enter + * the RTS. + * + */ +Mutex rts_mutex = INIT_MUTEX_VAR; +/* + * When a native thread has completed executing an external + * call, it needs to communicate the result back to the + * (Haskell) thread that made the call. Do this as follows: + * + * - in resumeThread(), the thread increments the counter + * threads_waiting, and then blocks on the 'big' RTS lock. + * - upon entry to the scheduler, the thread that's currently + * holding the RTS lock checks threads_waiting. If there + * are native threads waiting, it gives up its RTS lock + * and tries to re-grab the RTS lock [perhaps after having + * waited for a bit..?] + * - care must be taken to deal with the case where more than + * one external thread are waiting on the lock. [ToDo: more] + * + */ +static nat threads_waiting = 0; +#endif + + +/* thread_ready_cond: when signalled, a thread has become runnable for a + * task to execute. + * + * In the non-SMP case, it also implies that the thread that is woken up has + * exclusive access to the RTS and all its DS (that are not under sched_mutex's + * control). + * + * thread_ready_cond is signalled whenever COND_NO_THREADS_READY doesn't hold. + * + */ +Condition thread_ready_cond = INIT_COND_VAR; +#if 0 +/* For documentation purposes only */ +#define COND_NO_THREADS_READY() (noCapabilities() || EMPTY_RUN_QUEUE()) +#endif + +#if defined(SMP) +Condition gc_pending_cond = INIT_COND_VAR; nat await_death; #endif +#endif + #if defined(PAR) StgTSO *LastTSO; rtsTime TimeOfLastYield; +rtsBool emitSchedule = rtsTrue; #endif #if DEBUG @@ -281,12 +344,35 @@ char *threadReturnCode_strs[] = { }; #endif +#if defined(PAR) +StgTSO * createSparkThread(rtsSpark spark); +StgTSO * activateSpark (rtsSpark spark); +#endif + /* * The thread state for the main thread. // ToDo: check whether not needed any more StgTSO *MainTSO; */ +#if defined(PAR) || defined(RTS_SUPPORTS_THREADS) +static void taskStart(void); +static void +taskStart(void) +{ + /* threads start up using 'taskStart', so make them + them grab the RTS lock. */ +#if defined(THREADED_RTS) + ACQUIRE_LOCK(&rts_mutex); + taskNotAvailable(); +#endif + schedule(); +} +#endif + + + + //@node Main scheduling loop, Suspend and Resume, Prototypes, Main scheduling code //@subsection Main scheduling loop @@ -339,10 +425,36 @@ schedule( void ) rtsSpark spark; StgTSO *tso; GlobalTaskId pe; + rtsBool receivedFinish = rtsFalse; +# if defined(DEBUG) + nat tp_size, sp_size; // stats only +# endif #endif rtsBool was_interrupted = rtsFalse; ACQUIRE_LOCK(&sched_mutex); + +#if defined(THREADED_RTS) + /* ToDo: consider SMP support */ + if (threads_waiting > 0) { + /* (At least) one native thread is waiting to + * deposit the result of an external call. So, + * give up our RTS executing privileges and let + * one of them continue. + * + */ + taskAvailable(); + RELEASE_LOCK(&sched_mutex); + IF_DEBUG(scheduler, sched_belch("worker thread (%d): giving up RTS token (threads_waiting=%d)\n", osThreadId(), threads_waiting)); + RELEASE_LOCK(&rts_mutex); + /* ToDo: come up with mechanism that guarantees that + * the main thread doesn't loop here. + */ + yieldThread(); + /* ToDo: longjmp() */ + taskStart(); + } +#endif #if defined(GRAN) @@ -370,8 +482,8 @@ schedule( void ) #elif defined(PAR) - while (!GlobalStopPending) { /* GlobalStopPending set in par_exit */ - + while (!receivedFinish) { /* set by processMessages */ + /* when receiving PP_FINISH message */ #else while (1) { @@ -396,7 +508,7 @@ schedule( void ) * should be done more efficiently without a linear scan * of the main threads list, somehow... */ -#ifdef SMP +#if defined(RTS_SUPPORTS_THREADS) { StgMainThread *m, **prev; prev = &main_threads; @@ -408,16 +520,17 @@ schedule( void ) } *prev = m->link; m->stat = Success; - pthread_cond_broadcast(&m->wakeup); + broadcastCondition(&m->wakeup); break; case ThreadKilled: + if (m->ret) *(m->ret) = NULL; *prev = m->link; if (was_interrupted) { m->stat = Interrupted; } else { m->stat = Killed; } - pthread_cond_broadcast(&m->wakeup); + broadcastCondition(&m->wakeup); break; default: break; @@ -425,7 +538,8 @@ schedule( void ) } } -#else +#else /* not threaded */ + # if defined(PAR) /* in GUM do this only on the Main PE */ if (IAmMainThread) @@ -443,6 +557,7 @@ schedule( void ) m->stat = Success; return; } else { + if (m->ret) { *(m->ret) = NULL; }; if (was_interrupted) { m->stat = Interrupted; } else { @@ -457,10 +572,13 @@ schedule( void ) /* Top up the run queue from our spark pool. We try to make the * number of threads in the run queue equal to the number of * free capabilities. + * + * Disable spark support in SMP for now, non-essential & requires + * a little bit of work to make it compile cleanly. -- sof 1/02. */ -#if defined(SMP) +#if 0 /* defined(SMP) */ { - nat n = n_free_capabilities; + nat n = getFreeCapabilities(); StgTSO *tso = run_queue_hd; /* Count the run queue */ @@ -471,34 +589,34 @@ schedule( void ) for (; n > 0; n--) { StgClosure *spark; - spark = findSpark(); + spark = findSpark(rtsFalse); if (spark == NULL) { break; /* no more sparks in the pool */ } else { /* I'd prefer this to be done in activateSpark -- HWL */ /* tricky - it needs to hold the scheduler lock and * not try to re-acquire it -- SDM */ - StgTSO *tso; - tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue); - pushClosure(tso,spark); - PUSH_ON_RUN_QUEUE(tso); -#ifdef PAR - advisory_thread_count++; -#endif - + createSparkThread(spark); IF_DEBUG(scheduler, - sched_belch("turning spark of closure %p into a thread", + sched_belch("==^^ turning spark of closure %p into a thread", (StgClosure *)spark)); } } /* We need to wake up the other tasks if we just created some * work for them. */ - if (n_free_capabilities - n > 1) { - pthread_cond_signal(&thread_ready_cond); + if (getFreeCapabilities() - n > 1) { + signalCondition( &thread_ready_cond ); } } -#endif /* SMP */ +#endif // SMP + + /* check for signals each time around the scheduler */ +#ifndef mingw32_TARGET_OS + if (signals_pending()) { + startSignalHandlers(); + } +#endif /* Check whether any waiting threads need to be woken up. If the * run queue is empty, and there are no other tasks running, we @@ -506,24 +624,16 @@ schedule( void ) * ToDo: what if another client comes along & requests another * main thread? */ - if (blocked_queue_hd != END_TSO_QUEUE || sleeping_queue != END_TSO_QUEUE) { - awaitEvent( - (run_queue_hd == END_TSO_QUEUE) -#ifdef SMP - && (n_free_capabilities == RtsFlags.ParFlags.nNodes) + if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue) ) { + awaitEvent( EMPTY_RUN_QUEUE() +#if defined(SMP) + && allFreeCapabilities() #endif ); } /* we can be interrupted while waiting for I/O... */ if (interrupted) continue; - /* check for signals each time around the scheduler */ -#ifndef mingw32_TARGET_OS - if (signals_pending()) { - start_signal_handlers(); - } -#endif - /* * Detect deadlock: when we have no threads to run, there are no * threads waiting on I/O or sleeping, and all the other tasks are @@ -535,58 +645,114 @@ schedule( void ) * If no threads are black holed, we have a deadlock situation, so * inform all the main threads. */ -#ifdef SMP - if (blocked_queue_hd == END_TSO_QUEUE - && run_queue_hd == END_TSO_QUEUE - && sleeping_queue == END_TSO_QUEUE - && (n_free_capabilities == RtsFlags.ParFlags.nNodes)) +#ifndef PAR + if ( EMPTY_QUEUE(blocked_queue_hd) + && EMPTY_RUN_QUEUE() + && EMPTY_QUEUE(sleeping_queue) +#if defined(SMP) + && allFreeCapabilities() +#elif defined(THREADED_RTS) + && EMPTY_QUEUE(suspended_ccalling_threads) +#endif + ) { - IF_DEBUG(scheduler, sched_belch("deadlocked, checking for black holes...")); - detectBlackHoles(); - if (run_queue_hd == END_TSO_QUEUE) { - StgMainThread *m; - for (m = main_threads; m != NULL; m = m->link) { - m->ret = NULL; - m->stat = Deadlock; - pthread_cond_broadcast(&m->wakeup); + IF_DEBUG(scheduler, sched_belch("deadlocked, forcing major GC...")); + RELEASE_LOCK(&sched_mutex); + GarbageCollect(GetRoots,rtsTrue); + ACQUIRE_LOCK(&sched_mutex); + IF_DEBUG(scheduler, sched_belch("GC done.")); + if ( EMPTY_QUEUE(blocked_queue_hd) + && EMPTY_RUN_QUEUE() + && EMPTY_QUEUE(sleeping_queue) ) { + + IF_DEBUG(scheduler, sched_belch("still deadlocked, checking for black holes...")); + detectBlackHoles(); + + /* No black holes, so probably a real deadlock. Send the + * current main thread the Deadlock exception (or in the SMP + * build, send *all* main threads the deadlock exception, + * since none of them can make progress). + */ + if ( EMPTY_RUN_QUEUE() ) { + StgMainThread *m; +#if defined(RTS_SUPPORTS_THREADS) + for (m = main_threads; m != NULL; m = m->link) { + switch (m->tso->why_blocked) { + case BlockedOnBlackHole: + raiseAsync(m->tso, (StgClosure *)NonTermination_closure); + break; + case BlockedOnException: + case BlockedOnMVar: + raiseAsync(m->tso, (StgClosure *)Deadlock_closure); + break; + default: + barf("deadlock: main thread blocked in a strange way"); + } + } +#else + m = main_threads; + switch (m->tso->why_blocked) { + case BlockedOnBlackHole: + raiseAsync(m->tso, (StgClosure *)NonTermination_closure); + break; + case BlockedOnException: + case BlockedOnMVar: + raiseAsync(m->tso, (StgClosure *)Deadlock_closure); + break; + default: + barf("deadlock: main thread blocked in a strange way"); + } +#endif } - main_threads = NULL; - } - } -#else /* ! SMP */ - if (blocked_queue_hd == END_TSO_QUEUE - && run_queue_hd == END_TSO_QUEUE - && sleeping_queue == END_TSO_QUEUE) - { - IF_DEBUG(scheduler, sched_belch("deadlocked, checking for black holes...")); - detectBlackHoles(); - if (run_queue_hd == END_TSO_QUEUE) { - StgMainThread *m = main_threads; - m->ret = NULL; - m->stat = Deadlock; - main_threads = m->link; - return; +#if defined(RTS_SUPPORTS_THREADS) + if ( EMPTY_RUN_QUEUE() ) { + IF_DEBUG(scheduler, sched_belch("all done, it seems...shut down.")); + shutdownHaskellAndExit(0); + + } +#endif + ASSERT( !EMPTY_RUN_QUEUE() ); } } +#elif defined(PAR) + /* ToDo: add deadlock detection in GUM (similar to SMP) -- HWL */ #endif -#ifdef SMP +#if defined(SMP) /* If there's a GC pending, don't do anything until it has * completed. */ if (ready_to_gc) { IF_DEBUG(scheduler,sched_belch("waiting for GC")); - pthread_cond_wait(&gc_pending_cond, &sched_mutex); + waitCondition( &gc_pending_cond, &sched_mutex ); } - +#endif + +#if defined(SMP) /* block until we've got a thread on the run queue and a free * capability. */ - while (run_queue_hd == END_TSO_QUEUE || free_capabilities == NULL) { + while ( noCapabilities() || EMPTY_RUN_QUEUE() ) { IF_DEBUG(scheduler, sched_belch("waiting for work")); - pthread_cond_wait(&thread_ready_cond, &sched_mutex); + waitCondition( &thread_ready_cond, &sched_mutex ); IF_DEBUG(scheduler, sched_belch("work now available")); } +#elif defined(THREADED_RTS) + if ( EMPTY_RUN_QUEUErun_queue_hd == END_TSO_QUEUE ) { + /* no work available, wait for external calls to complete. */ + IF_DEBUG(scheduler, sched_belch("worker thread (%d): waiting for external thread to complete..", osThreadId())); + taskAvailable(); + RELEASE_LOCK(&rts_mutex); + + while ( EMPTY_RUN_QUEUE() ) { + waitCondition(&thread_ready_cond, &sched_mutex); + }; + RELEASE_LOCK(&sched_mutex); + + IF_DEBUG(scheduler, sched_belch("worker thread (%d): re-awakened from no-work slumber..\n", osThreadId())); + /* ToDo: longjmp() */ + taskStart(); + } #endif #if defined(GRAN) @@ -603,7 +769,7 @@ schedule( void ) if (!RtsFlags.GranFlags.Light) handleIdlePEs(); - IF_DEBUG(gran, fprintf(stderr, "GRAN: switch by event-type\n")) + IF_DEBUG(gran, fprintf(stderr, "GRAN: switch by event-type\n")); /* main event dispatcher in GranSim */ switch (event->evttype) { @@ -717,7 +883,7 @@ schedule( void ) IF_DEBUG(gran, fprintf(stderr, "GRAN: About to run current thread, which is\n"); - G_TSO(t,5)) + G_TSO(t,5)); context_switch = 0; // turned on via GranYield, checking events and time slice @@ -727,14 +893,13 @@ schedule( void ) procStatus[CurrentProc] = Busy; #elif defined(PAR) - if (PendingFetches != END_BF_QUEUE) { processFetches(); } /* ToDo: phps merge with spark activation above */ /* check whether we have local work and send requests if we have none */ - if (run_queue_hd == END_TSO_QUEUE) { /* no runnable threads */ + if (EMPTY_RUN_QUEUE()) { /* no runnable threads */ /* :-[ no local threads => look out for local sparks */ /* the spark pool for the current PE */ pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable @@ -748,8 +913,8 @@ schedule( void ) * to turn one of those pending sparks into a * thread... */ - - spark = findSpark(); /* get a spark */ + + spark = findSpark(rtsFalse); /* get a spark */ if (spark != (rtsSpark) NULL) { tso = activateSpark(spark); /* turn the spark into a thread */ IF_PAR_DEBUG(schedule, @@ -766,9 +931,13 @@ schedule( void ) spark_queue_len(pool))); goto next_thread; } - } else + } + + /* If we still have no work we need to send a FISH to get a spark + from another PE + */ + if (EMPTY_RUN_QUEUE()) { /* =8-[ no local sparks => look for work on other PEs */ - { /* * We really have absolutely no work. Send out a fish * (there may be some out there already), and wait for @@ -777,28 +946,48 @@ schedule( void ) * we're hoping to see. (Of course, we still have to * respond to other types of messages.) */ - if (//!fishing && - outstandingFishes < RtsFlags.ParFlags.maxFishes ) { // && - // (last_fish_arrived_at+FISH_DELAY < CURRENT_TIME)) { - /* fishing set in sendFish, processFish; + TIME now = msTime() /*CURRENT_TIME*/; + IF_PAR_DEBUG(verbose, + belch("-- now=%ld", now)); + IF_PAR_DEBUG(verbose, + if (outstandingFishes < RtsFlags.ParFlags.maxFishes && + (last_fish_arrived_at!=0 && + last_fish_arrived_at+RtsFlags.ParFlags.fishDelay > now)) { + belch("--$$ delaying FISH until %ld (last fish %ld, delay %ld, now %ld)", + last_fish_arrived_at+RtsFlags.ParFlags.fishDelay, + last_fish_arrived_at, + RtsFlags.ParFlags.fishDelay, now); + }); + + if (outstandingFishes < RtsFlags.ParFlags.maxFishes && + (last_fish_arrived_at==0 || + (last_fish_arrived_at+RtsFlags.ParFlags.fishDelay <= now))) { + /* outstandingFishes is set in sendFish, processFish; avoid flooding system with fishes via delay */ pe = choosePE(); sendFish(pe, mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, NEW_FISH_HUNGER); + + // Global statistics: count no. of fishes + if (RtsFlags.ParFlags.ParStats.Global && + RtsFlags.GcFlags.giveStats > NO_GC_STATS) { + globalParStats.tot_fish_mess++; + } } - - processMessages(); + + receivedFinish = processMessages(); goto next_thread; - // ReSchedule(0); } } else if (PacketsWaiting()) { /* Look for incoming messages */ - processMessages(); + receivedFinish = processMessages(); } /* Now we are sure that we have some work available */ ASSERT(run_queue_hd != END_TSO_QUEUE); + /* Take a thread from the run queue, if we have work */ t = POP_RUN_QUEUE(); // take_off_run_queue(END_TSO_QUEUE); + IF_DEBUG(sanity,checkTSO(t)); /* ToDo: write something to the log-file if (RTSflags.ParFlags.granSimStats && !sameThread) @@ -809,17 +998,23 @@ schedule( void ) /* the spark pool for the current PE */ pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable - IF_DEBUG(scheduler, belch("--^^ %d sparks on [%#x] (hd=%x; tl=%x; base=%x, lim=%x)", - spark_queue_len(pool), - CURRENT_PROC, - pool->hd, pool->tl, pool->base, pool->lim)); - - IF_DEBUG(scheduler, belch("--== %d threads on [%#x] (hd=%x; tl=%x)", - run_queue_len(), CURRENT_PROC, - run_queue_hd, run_queue_tl)); + IF_DEBUG(scheduler, + belch("--=^ %d threads, %d sparks on [%#x]", + run_queue_len(), spark_queue_len(pool), CURRENT_PROC)); + +#if 1 + if (0 && RtsFlags.ParFlags.ParStats.Full && + t && LastTSO && t->id != LastTSO->id && + LastTSO->why_blocked == NotBlocked && + LastTSO->what_next != ThreadComplete) { + // if previously scheduled TSO not blocked we have to record the context switch + DumpVeryRawGranEvent(TimeOfLastYield, CURRENT_PROC, CURRENT_PROC, + GR_DESCHEDULE, LastTSO, (StgClosure *)NULL, 0, 0); + } -#if 0 - if (t != LastTSO) { + if (RtsFlags.ParFlags.ParStats.Full && + (emitSchedule /* forced emit */ || + (t && LastTSO && t->id != LastTSO->id))) { /* we are running a different TSO, so write a schedule event to log file NB: If we use fair scheduling we also have to write a deschedule @@ -829,8 +1024,9 @@ schedule( void ) */ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0); - + emitSchedule = rtsFalse; } + #endif #else /* !GRAN && !PAR */ @@ -838,30 +1034,26 @@ schedule( void ) */ ASSERT(run_queue_hd != END_TSO_QUEUE); t = POP_RUN_QUEUE(); + // Sanity check the thread we're about to run. This can be + // expensive if there is lots of thread switching going on... IF_DEBUG(sanity,checkTSO(t)); - -#endif - - /* grab a capability - */ -#ifdef SMP - cap = free_capabilities; - free_capabilities = cap->link; - n_free_capabilities--; -#else - cap = &MainRegTable; #endif - cap->rCurrentTSO = t; + grabCapability(&cap); + cap->r.rCurrentTSO = t; /* context switches are now initiated by the timer signal, unless * the user specified "context switch as often as possible", with * +RTS -C0 */ - if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0 - && (run_queue_hd != END_TSO_QUEUE - || blocked_queue_hd != END_TSO_QUEUE - || sleeping_queue != END_TSO_QUEUE)) + if ( +#ifdef PROFILING + RtsFlags.ProfFlags.profileInterval == 0 || +#endif + (RtsFlags.ConcFlags.ctxtSwitchTicks == 0 + && (run_queue_hd != END_TSO_QUEUE + || blocked_queue_hd != END_TSO_QUEUE + || sleeping_queue != END_TSO_QUEUE))) context_switch = 1; else context_switch = 0; @@ -871,31 +1063,28 @@ schedule( void ) IF_DEBUG(scheduler, sched_belch("-->> Running TSO %ld (%p) %s ...", t->id, t, whatNext_strs[t->what_next])); +#ifdef PROFILING + startHeapProfTimer(); +#endif + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* Run the current thread */ - switch (cap->rCurrentTSO->what_next) { + switch (cap->r.rCurrentTSO->what_next) { case ThreadKilled: case ThreadComplete: - /* Thread already finished, return to scheduler. */ - ret = ThreadFinished; - break; + /* Thread already finished, return to scheduler. */ + ret = ThreadFinished; + break; case ThreadEnterGHC: - ret = StgRun((StgFunPtr) stg_enterStackTop, cap); - break; + ret = StgRun((StgFunPtr) stg_enterStackTop, &cap->r); + break; case ThreadRunGHC: - ret = StgRun((StgFunPtr) stg_returnToStackTop, cap); - break; + ret = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r); + break; case ThreadEnterInterp: -#ifdef GHCI - { - IF_DEBUG(scheduler,sched_belch("entering interpreter")); - ret = interpretBCO(cap); - break; - } -#else - barf("Panic: entered a BCO but no bytecode interpreter in this build"); -#endif + ret = interpretBCO(cap); + break; default: barf("schedule: invalid what_next field"); } @@ -903,28 +1092,89 @@ schedule( void ) /* Costs for the scheduler are assigned to CCS_SYSTEM */ #ifdef PROFILING + stopHeapProfTimer(); CCCS = CCS_SYSTEM; #endif ACQUIRE_LOCK(&sched_mutex); #ifdef SMP - IF_DEBUG(scheduler,fprintf(stderr,"scheduler (task %ld): ", pthread_self());); + IF_DEBUG(scheduler,fprintf(stderr,"scheduler (task %ld): ", osThreadId());); #elif !defined(GRAN) && !defined(PAR) IF_DEBUG(scheduler,fprintf(stderr,"scheduler: ");); #endif - t = cap->rCurrentTSO; + t = cap->r.rCurrentTSO; #if defined(PAR) /* HACK 675: if the last thread didn't yield, make sure to print a SCHEDULE event to the log file when StgRunning the next thread, even if it is the same one as before */ - LastTSO = t; //(ret == ThreadBlocked) ? END_TSO_QUEUE : t; + LastTSO = t; TimeOfLastYield = CURRENT_TIME; #endif switch (ret) { case HeapOverflow: +#if defined(GRAN) + IF_DEBUG(gran, DumpGranEvent(GR_DESCHEDULE, t)); + globalGranStats.tot_heapover++; +#elif defined(PAR) + globalParStats.tot_heapover++; +#endif + + // did the task ask for a large block? + if (cap->r.rHpAlloc > BLOCK_SIZE_W) { + // if so, get one and push it on the front of the nursery. + bdescr *bd; + nat blocks; + + blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc * sizeof(W_)) / BLOCK_SIZE; + + IF_DEBUG(scheduler,belch("--<< thread %ld (%p; %s) stopped: requesting a large block (size %d)", + t->id, t, + whatNext_strs[t->what_next], blocks)); + + // don't do this if it would push us over the + // alloc_blocks_lim limit; we'll GC first. + if (alloc_blocks + blocks < alloc_blocks_lim) { + + alloc_blocks += blocks; + bd = allocGroup( blocks ); + + // link the new group into the list + bd->link = cap->r.rCurrentNursery; + bd->u.back = cap->r.rCurrentNursery->u.back; + if (cap->r.rCurrentNursery->u.back != NULL) { + cap->r.rCurrentNursery->u.back->link = bd; + } else { + ASSERT(g0s0->blocks == cap->r.rCurrentNursery && + g0s0->blocks == cap->r.rNursery); + cap->r.rNursery = g0s0->blocks = bd; + } + cap->r.rCurrentNursery->u.back = bd; + + // initialise it as a nursery block + bd->step = g0s0; + bd->gen_no = 0; + bd->flags = 0; + bd->free = bd->start; + + // don't forget to update the block count in g0s0. + g0s0->n_blocks += blocks; + ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks); + + // now update the nursery to point to the new block + cap->r.rCurrentNursery = bd; + + // we might be unlucky and have another thread get on the + // run queue before us and steal the large block, but in that + // case the thread will just end up requesting another large + // block. + PUSH_ON_RUN_QUEUE(t); + break; + } + } + /* make all the running tasks block on a condition variable, * maybe set context_switch and wait till they all pile in, * then have them wait on a GC condition variable. @@ -934,6 +1184,15 @@ schedule( void ) threadPaused(t); #if defined(GRAN) ASSERT(!is_on_queue(t,CurrentProc)); +#elif defined(PAR) + /* Currently we emit a DESCHEDULE event before GC in GUM. + ToDo: either add separate event to distinguish SYSTEM time from rest + or just nuke this DESCHEDULE (and the following SCHEDULE) */ + if (0 && RtsFlags.ParFlags.ParStats.Full) { + DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, + GR_DESCHEDULE, t, (StgClosure *)NULL, 0, 0); + emitSchedule = rtsTrue; + } #endif ready_to_gc = rtsTrue; @@ -943,6 +1202,15 @@ schedule( void ) break; case StackOverflow: +#if defined(GRAN) + IF_DEBUG(gran, + DumpGranEvent(GR_DESCHEDULE, t)); + globalGranStats.tot_stackover++; +#elif defined(PAR) + // IF_DEBUG(par, + // DumpGranEvent(GR_DESCHEDULE, t); + globalParStats.tot_stackover++; +#endif IF_DEBUG(scheduler,belch("--<< thread %ld (%p; %s) stopped, StackOverflow", t->id, t, whatNext_strs[t->what_next])); /* just adjust the stack for this thread, then pop it back @@ -974,8 +1242,9 @@ schedule( void ) DumpGranEvent(GR_DESCHEDULE, t)); globalGranStats.tot_yields++; #elif defined(PAR) - IF_DEBUG(par, - DumpGranEvent(GR_DESCHEDULE, t)); + // IF_DEBUG(par, + // DumpGranEvent(GR_DESCHEDULE, t); + globalParStats.tot_yields++; #endif /* put the thread back on the run queue. Then, if we're ready to * GC, check whether this is the last task to stop. If so, wake @@ -1008,7 +1277,18 @@ schedule( void ) //belch("&& Doing sanity check on all ThreadQueues (and their TSOs)."); checkThreadQsSanity(rtsTrue)); #endif +#if defined(PAR) + if (RtsFlags.ParFlags.doFairScheduling) { + /* this does round-robin scheduling; good for concurrency */ + APPEND_TO_RUN_QUEUE(t); + } else { + /* this does unfair scheduling; good for parallelism */ + PUSH_ON_RUN_QUEUE(t); + } +#else + /* this does round-robin scheduling; good for concurrency */ APPEND_TO_RUN_QUEUE(t); +#endif #if defined(GRAN) /* add a ContinueThread event to actually process the thread */ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc], @@ -1017,7 +1297,7 @@ schedule( void ) IF_GRAN_DEBUG(bq, belch("GRAN: eventq and runnableq after adding yielded thread to queue again:"); G_EVENTQ(0); - G_CURR_THREADQ(0)) + G_CURR_THREADQ(0)); #endif /* GRAN */ break; @@ -1043,16 +1323,19 @@ schedule( void ) procStatus[CurrentProc] = Idle; */ #elif defined(PAR) - IF_DEBUG(par, - DumpGranEvent(GR_DESCHEDULE, t)); + IF_DEBUG(scheduler, + belch("--<< thread %ld (%p; %s) stopped, blocking on node %p with BQ: ", + t->id, t, whatNext_strs[t->what_next], t->block_info.closure)); + IF_PAR_DEBUG(bq, + + if (t->block_info.closure!=(StgClosure*)NULL) + print_bq(t->block_info.closure)); /* Send a fetch (if BlockedOnGA) and dump event to log file */ blockThread(t); - IF_DEBUG(scheduler, - belch("--<< thread %ld (%p; %s) stopped, blocking on node %p with BQ: ", - t->id, t, whatNext_strs[t->what_next], t->block_info.closure); - if (t->block_info.closure!=(StgClosure*)NULL) print_bq(t->block_info.closure)); + /* whatever we schedule next, we must log that schedule */ + emitSchedule = rtsTrue; #else /* !GRAN */ /* don't need to do anything. Either the thread is blocked on @@ -1086,8 +1369,17 @@ schedule( void ) #if defined(GRAN) endThread(t, CurrentProc); // clean-up the thread #elif defined(PAR) + /* For now all are advisory -- HWL */ + //if(t->priority==AdvisoryPriority) ?? advisory_thread_count--; - if (RtsFlags.ParFlags.ParStats.Full) + +# ifdef DIST + if(t->dist.priority==RevalPriority) + FinishReval(t); +# endif + + if (RtsFlags.ParFlags.ParStats.Full && + !RtsFlags.ParFlags.ParStats.Suppressed) DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */); #endif break; @@ -1097,13 +1389,20 @@ schedule( void ) } #ifdef SMP - cap->link = free_capabilities; - free_capabilities = cap; - n_free_capabilities++; + grabCapability(&cap); +#endif + +#ifdef PROFILING + if (RtsFlags.ProfFlags.profileInterval==0 || performHeapProfile) { + GarbageCollect(GetRoots, rtsTrue); + heapCensus(); + performHeapProfile = rtsFalse; + ready_to_gc = rtsFalse; // we already GC'd + } #endif #ifdef SMP - if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) + if (ready_to_gc && allFreeCapabilities() ) #else if (ready_to_gc) #endif @@ -1113,13 +1412,13 @@ schedule( void ) * to do it in another thread. Either way, we need to * broadcast on gc_pending_cond afterward. */ -#ifdef SMP +#if defined(RTS_SUPPORTS_THREADS) IF_DEBUG(scheduler,sched_belch("doing GC")); #endif GarbageCollect(GetRoots,rtsFalse); ready_to_gc = rtsFalse; #ifdef SMP - pthread_cond_broadcast(&gc_pending_cond); + broadcastCondition(&gc_pending_cond); #endif #if defined(GRAN) /* add a ContinueThread event to continue execution of current thread */ @@ -1129,27 +1428,25 @@ schedule( void ) IF_GRAN_DEBUG(bq, fprintf(stderr, "GRAN: eventq and runnableq after Garbage collection:\n"); G_EVENTQ(0); - G_CURR_THREADQ(0)) + G_CURR_THREADQ(0)); #endif /* GRAN */ } + #if defined(GRAN) next_thread: IF_GRAN_DEBUG(unused, print_eventq(EventHd)); event = get_next_event(); - #elif defined(PAR) next_thread: /* ToDo: wait for next message to arrive rather than busy wait */ - -#else /* GRAN */ - /* not any more - next_thread: - t = take_off_run_queue(END_TSO_QUEUE); - */ #endif /* GRAN */ + } /* end of while(1) */ + + IF_PAR_DEBUG(verbose, + belch("== Leaving schedule() after having received Finish")); } /* --------------------------------------------------------------------------- @@ -1161,15 +1458,18 @@ schedule( void ) void deleteAllThreads ( void ) { - StgTSO* t; + StgTSO* t, *next; IF_DEBUG(scheduler,sched_belch("deleting all threads")); - for (t = run_queue_hd; t != END_TSO_QUEUE; t = t->link) { + for (t = run_queue_hd; t != END_TSO_QUEUE; t = next) { + next = t->link; deleteThread(t); } - for (t = blocked_queue_hd; t != END_TSO_QUEUE; t = t->link) { + for (t = blocked_queue_hd; t != END_TSO_QUEUE; t = next) { + next = t->link; deleteThread(t); } - for (t = sleeping_queue; t != END_TSO_QUEUE; t = t->link) { + for (t = sleeping_queue; t != END_TSO_QUEUE; t = next) { + next = t->link; deleteThread(t); } run_queue_hd = run_queue_tl = END_TSO_QUEUE; @@ -1179,6 +1479,7 @@ void deleteAllThreads ( void ) /* startThread and insertThread are now in GranSim.c -- HWL */ + //@node Suspend and Resume, Run queue code, Main scheduling loop, Main scheduling code //@subsection Suspend and Resume @@ -1198,40 +1499,76 @@ void deleteAllThreads ( void ) * ------------------------------------------------------------------------- */ StgInt -suspendThread( Capability *cap ) +suspendThread( StgRegTable *reg ) { nat tok; + Capability *cap; + + /* assume that *reg is a pointer to the StgRegTable part + * of a Capability. + */ + cap = (Capability *)((void *)reg - sizeof(StgFunTable)); ACQUIRE_LOCK(&sched_mutex); IF_DEBUG(scheduler, - sched_belch("thread %d did a _ccall_gc", cap->rCurrentTSO->id)); + sched_belch("thread %d did a _ccall_gc", cap->r.rCurrentTSO->id)); - threadPaused(cap->rCurrentTSO); - cap->rCurrentTSO->link = suspended_ccalling_threads; - suspended_ccalling_threads = cap->rCurrentTSO; + threadPaused(cap->r.rCurrentTSO); + cap->r.rCurrentTSO->link = suspended_ccalling_threads; + suspended_ccalling_threads = cap->r.rCurrentTSO; /* Use the thread ID as the token; it should be unique */ - tok = cap->rCurrentTSO->id; + tok = cap->r.rCurrentTSO->id; + + /* Hand back capability */ + releaseCapability(cap); + +#if defined(RTS_SUPPORTS_THREADS) && !defined(SMP) + /* Preparing to leave the RTS, so ensure there's a native thread/task + waiting to take over. + + ToDo: optimise this and only create a new task if there's a need + for one (i.e., if there's only one Concurrent Haskell thread alive, + there's no need to create a new task). + */ + IF_DEBUG(scheduler, sched_belch("worker thread (%d): leaving RTS\n", tok)); + startTask(taskStart); -#ifdef SMP - cap->link = free_capabilities; - free_capabilities = cap; - n_free_capabilities++; #endif + THREAD_RUNNABLE(); RELEASE_LOCK(&sched_mutex); + // RELEASE_LOCK(&rts_mutex); return tok; } -Capability * +StgRegTable * resumeThread( StgInt tok ) { StgTSO *tso, **prev; Capability *cap; +#if defined(THREADED_RTS) + IF_DEBUG(scheduler, sched_belch("thread %d returning, waiting for sched. lock.\n", tok)); ACQUIRE_LOCK(&sched_mutex); + threads_waiting++; + IF_DEBUG(scheduler, sched_belch("thread %d returning, threads waiting: %d.\n", tok, threads_waiting)); + RELEASE_LOCK(&sched_mutex); + + IF_DEBUG(scheduler, sched_belch("thread %d waiting for RTS lock...\n", tok)); + ACQUIRE_LOCK(&rts_mutex); + threads_waiting--; + taskNotAvailable(); + IF_DEBUG(scheduler, sched_belch("thread %d acquired RTS lock...\n", tok)); +#endif +#if defined(THREADED_RTS) + /* Free up any RTS-blocked threads. */ + broadcastCondition(&thread_ready_cond); +#endif + + /* Remove the thread off of the suspended list */ prev = &suspended_ccalling_threads; for (tso = suspended_ccalling_threads; tso != END_TSO_QUEUE; @@ -1246,23 +1583,19 @@ resumeThread( StgInt tok ) } tso->link = END_TSO_QUEUE; -#ifdef SMP - while (free_capabilities == NULL) { +#if defined(RTS_SUPPORTS_THREADS) + while ( noCapabilities() ) { IF_DEBUG(scheduler, sched_belch("waiting to resume")); - pthread_cond_wait(&thread_ready_cond, &sched_mutex); + waitCondition(&thread_ready_cond, &sched_mutex); IF_DEBUG(scheduler, sched_belch("resuming thread %d", tso->id)); } - cap = free_capabilities; - free_capabilities = cap->link; - n_free_capabilities--; -#else - cap = &MainRegTable; #endif - cap->rCurrentTSO = tso; + grabCapability(&cap); - RELEASE_LOCK(&sched_mutex); - return cap; + cap->r.rCurrentTSO = tso; + + return &cap->r; } @@ -1289,6 +1622,16 @@ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2) } /* --------------------------------------------------------------------------- + * Fetching the ThreadID from an StgTSO. + * + * This is used in the implementation of Show for ThreadIds. + * ------------------------------------------------------------------------ */ +int rts_getThreadId(const StgTSO *tso) +{ + return tso->id; +} + +/* --------------------------------------------------------------------------- Create a new thread. The new thread starts with the given stack size. Before the @@ -1354,7 +1697,7 @@ createThread_(nat size, rtsBool have_lock) stack_size = size - TSO_STRUCT_SIZEW; tso = (StgTSO *)allocate(size); - TICK_ALLOC_TSO(size-TSO_STRUCT_SIZEW, 0); + TICK_ALLOC_TSO(stack_size, 0); SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM); #if defined(GRAN) @@ -1398,8 +1741,14 @@ createThread_(nat size, rtsBool have_lock) */ #endif -#if defined(GRAN) || defined(PAR) - DumpGranEvent(GR_START,tso); +#if defined(GRAN) + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpGranEvent(GR_START,tso); +#elif defined(PAR) + if (RtsFlags.ParFlags.ParStats.Full) + DumpGranEvent(GR_STARTQ,tso); + /* HACk to avoid SCHEDULE + LastTSO = tso; */ #endif /* Link the new thread on the global thread list. @@ -1407,6 +1756,10 @@ createThread_(nat size, rtsBool have_lock) tso->global_link = all_threads; all_threads = tso; +#if defined(DIST) + tso->dist.priority = MandatoryPriority; //by default that is... +#endif + #if defined(GRAN) tso->gran.pri = pri; # if defined(DEBUG) @@ -1455,6 +1808,13 @@ createThread_(nat size, rtsBool have_lock) globalGranStats.threads_created_on_PE[CurrentProc]++; globalGranStats.tot_sq_len += spark_queue_len(CurrentProc); globalGranStats.tot_sq_probes++; +#elif defined(PAR) + // collect parallel global statistics (currently done together with GC stats) + if (RtsFlags.ParFlags.ParStats.Global && + RtsFlags.GcFlags.giveStats > NO_GC_STATS) { + //fprintf(stderr, "Creating thread %d @ %11.2f\n", tso->id, usertime()); + globalParStats.tot_threads_created++; + } #endif #if defined(GRAN) @@ -1472,6 +1832,36 @@ createThread_(nat size, rtsBool have_lock) return tso; } +#if defined(PAR) +/* RFP: + all parallel thread creation calls should fall through the following routine. +*/ +StgTSO * +createSparkThread(rtsSpark spark) +{ StgTSO *tso; + ASSERT(spark != (rtsSpark)NULL); + if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) + { threadsIgnored++; + barf("{createSparkThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)", + RtsFlags.ParFlags.maxThreads, advisory_thread_count); + return END_TSO_QUEUE; + } + else + { threadsCreated++; + tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue); + if (tso==END_TSO_QUEUE) + barf("createSparkThread: Cannot create TSO"); +#if defined(DIST) + tso->priority = AdvisoryPriority; +#endif + pushClosure(tso,spark); + PUSH_ON_RUN_QUEUE(tso); + advisory_thread_count++; + } + return tso; +} +#endif + /* Turn a spark into a thread. ToDo: fix for SMP (needs to acquire SCHED_MUTEX!) @@ -1482,22 +1872,13 @@ StgTSO * activateSpark (rtsSpark spark) { StgTSO *tso; - - ASSERT(spark != (rtsSpark)NULL); - tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue); - if (tso!=END_TSO_QUEUE) { - pushClosure(tso,spark); - PUSH_ON_RUN_QUEUE(tso); - advisory_thread_count++; - if (RtsFlags.ParFlags.ParStats.Full) { - //ASSERT(run_queue_hd == END_TSO_QUEUE); // I think ... - IF_PAR_DEBUG(verbose, - belch("==^^ activateSpark: turning spark of closure %p (%s) into a thread", - (StgClosure *)spark, info_type((StgClosure *)spark))); - } - } else { - barf("activateSpark: Cannot create TSO"); + tso = createSparkThread(spark); + if (RtsFlags.ParFlags.ParStats.Full) { + //ASSERT(run_queue_hd == END_TSO_QUEUE); // I think ... + IF_PAR_DEBUG(verbose, + belch("==^^ activateSpark: turning spark of closure %p (%s) into a thread", + (StgClosure *)spark, info_type((StgClosure *)spark))); } // ToDo: fwd info on local/global spark to thread -- HWL // tso->gran.exported = spark->exported; @@ -1521,11 +1902,6 @@ activateSpark (rtsSpark spark) void scheduleThread(StgTSO *tso) { - if (tso==END_TSO_QUEUE){ - schedule(); - return; - } - ACQUIRE_LOCK(&sched_mutex); /* Put the new thread on the head of the runnable queue. The caller @@ -1543,29 +1919,12 @@ scheduleThread(StgTSO *tso) } /* --------------------------------------------------------------------------- - * startTasks() - * - * Start up Posix threads to run each of the scheduler tasks. - * I believe the task ids are not needed in the system as defined. - * KH @ 25/10/99 - * ------------------------------------------------------------------------ */ - -#if defined(PAR) || defined(SMP) -void * -taskStart( void *arg STG_UNUSED ) -{ - rts_evalNothing(NULL); -} -#endif - -/* --------------------------------------------------------------------------- * initScheduler() * * Initialise the scheduler. This resets all the queues - if the * queues contained any threads, they'll be garbage collected at the * next pass. * - * This now calls startTasks(), so should only be called once! KH @ 25/10/99 * ------------------------------------------------------------------------ */ #ifdef SMP @@ -1576,11 +1935,10 @@ term_handler(int sig STG_UNUSED) ACQUIRE_LOCK(&term_mutex); await_death--; RELEASE_LOCK(&term_mutex); - pthread_exit(NULL); + shutdownThread(); } #endif -//@cindex initScheduler void initScheduler(void) { @@ -1613,6 +1971,27 @@ initScheduler(void) RtsFlags.ConcFlags.ctxtSwitchTicks = RtsFlags.ConcFlags.ctxtSwitchTime / TICK_MILLISECS; + +#if defined(RTS_SUPPORTS_THREADS) + /* Initialise the mutex and condition variables used by + * the scheduler. */ + initMutex(&sched_mutex); + initMutex(&term_mutex); + + initCondition(&thread_ready_cond); +#if defined(THREADED_RTS) + initMutex(&rts_mutex); +#endif + + initCondition(&gc_pending_cond); +#endif + +#if defined(THREADED_RTS) + /* Grab big lock */ + ACQUIRE_LOCK(&rts_mutex); + IF_DEBUG(scheduler, + sched_belch("worker thread (%d): acquired RTS lock\n", osThreadId())); +#endif /* Install the SIGHUP handler */ #ifdef SMP @@ -1628,92 +2007,31 @@ initScheduler(void) } #endif -#ifdef SMP - /* Allocate N Capabilities */ - { - nat i; - Capability *cap, *prev; - cap = NULL; - prev = NULL; - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - cap = stgMallocBytes(sizeof(Capability), "initScheduler:capabilities"); - cap->link = prev; - prev = cap; - } - free_capabilities = cap; - n_free_capabilities = RtsFlags.ParFlags.nNodes; - } - IF_DEBUG(scheduler,fprintf(stderr,"scheduler: Allocated %d capabilities\n", - n_free_capabilities);); + /* A capability holds the state a native thread needs in + * order to execute STG code. At least one capability is + * floating around (only SMP builds have more than one). + */ + initCapabilities(); + +#if defined(RTS_SUPPORTS_THREADS) + /* start our haskell execution tasks */ +# if defined(SMP) + startTaskManager(RtsFlags.ParFlags.nNodes, taskStart); +# else + startTaskManager(0,taskStart); +# endif #endif -#if defined(SMP) || defined(PAR) +#if /* defined(SMP) ||*/ defined(PAR) initSparkPools(); #endif } -#ifdef SMP -void -startTasks( void ) -{ - nat i; - int r; - pthread_t tid; - - /* make some space for saving all the thread ids */ - task_ids = stgMallocBytes(RtsFlags.ParFlags.nNodes * sizeof(task_info), - "initScheduler:task_ids"); - - /* and create all the threads */ - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - r = pthread_create(&tid,NULL,taskStart,NULL); - if (r != 0) { - barf("startTasks: Can't create new Posix thread"); - } - task_ids[i].id = tid; - task_ids[i].mut_time = 0.0; - task_ids[i].mut_etime = 0.0; - task_ids[i].gc_time = 0.0; - task_ids[i].gc_etime = 0.0; - task_ids[i].elapsedtimestart = elapsedtime(); - IF_DEBUG(scheduler,fprintf(stderr,"scheduler: Started task: %ld\n",tid);); - } -} -#endif - void exitScheduler( void ) { -#ifdef SMP - nat i; - - /* Don't want to use pthread_cancel, since we'd have to install - * these silly exception handlers (pthread_cleanup_{push,pop}) around - * all our locks. - */ -#if 0 - /* Cancel all our tasks */ - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - pthread_cancel(task_ids[i].id); - } - - /* Wait for all the tasks to terminate */ - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - IF_DEBUG(scheduler,fprintf(stderr,"scheduler: waiting for task %ld\n", - task_ids[i].id)); - pthread_join(task_ids[i].id, NULL); - } -#endif - - /* Send 'em all a SIGHUP. That should shut 'em up. - */ - await_death = RtsFlags.ParFlags.nNodes; - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - pthread_kill(task_ids[i].id,SIGTERM); - } - while (await_death > 0) { - sched_yield(); - } +#if defined(RTS_SUPPORTS_THREADS) + stopTaskManager(); #endif } @@ -1788,19 +2106,19 @@ waitThread(StgTSO *tso, /*out*/StgClosure **ret) m->tso = tso; m->ret = ret; m->stat = NoStatus; -#ifdef SMP - pthread_cond_init(&m->wakeup, NULL); +#if defined(RTS_SUPPORTS_THREADS) + initCondition(&m->wakeup); #endif m->link = main_threads; main_threads = m; - IF_DEBUG(scheduler, fprintf(stderr, "scheduler: new main thread (%d)\n", + IF_DEBUG(scheduler, fprintf(stderr, "== scheduler: new main thread (%d)\n", m->tso->id)); #ifdef SMP do { - pthread_cond_wait(&m->wakeup, &sched_mutex); + waitCondition(&m->wakeup, &sched_mutex); } while (m->stat == NoStatus); #elif defined(GRAN) /* GranSim specific init */ @@ -1810,17 +2128,18 @@ waitThread(StgTSO *tso, /*out*/StgClosure **ret) schedule(); #else + RELEASE_LOCK(&sched_mutex); schedule(); ASSERT(m->stat != NoStatus); #endif stat = m->stat; -#ifdef SMP - pthread_cond_destroy(&m->wakeup); +#if defined(RTS_SUPPORTS_THREADS) + closeCondition(&m->wakeup); #endif - IF_DEBUG(scheduler, fprintf(stderr, "scheduler: main thread (%d) finished\n", + IF_DEBUG(scheduler, fprintf(stderr, "== scheduler: main thread (%d) finished\n", m->tso->id)); free(m); @@ -1949,7 +2268,8 @@ take_off_run_queue(StgTSO *tso) { KH @ 25/10/99 */ -static void GetRoots(void) +void +GetRoots(evac_fn evac) { StgMainThread *m; @@ -1958,16 +2278,16 @@ static void GetRoots(void) nat i; for (i=0; i<=RtsFlags.GranFlags.proc; i++) { if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) - run_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_hds[i]); + evac((StgClosure **)&run_queue_hds[i]); if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) - run_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_tls[i]); + evac((StgClosure **)&run_queue_tls[i]); if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) - blocked_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hds[i]); + evac((StgClosure **)&blocked_queue_hds[i]); if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) - blocked_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tls[i]); + evac((StgClosure **)&blocked_queue_tls[i]); if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) - ccalling_threadss[i] = (StgTSO *)MarkRoot((StgClosure *)ccalling_threadss[i]); + evac((StgClosure **)&ccalling_threads[i]); } } @@ -1975,31 +2295,31 @@ static void GetRoots(void) #else /* !GRAN */ if (run_queue_hd != END_TSO_QUEUE) { - ASSERT(run_queue_tl != END_TSO_QUEUE); - run_queue_hd = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd); - run_queue_tl = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl); + ASSERT(run_queue_tl != END_TSO_QUEUE); + evac((StgClosure **)&run_queue_hd); + evac((StgClosure **)&run_queue_tl); } - + if (blocked_queue_hd != END_TSO_QUEUE) { - ASSERT(blocked_queue_tl != END_TSO_QUEUE); - blocked_queue_hd = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd); - blocked_queue_tl = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl); + ASSERT(blocked_queue_tl != END_TSO_QUEUE); + evac((StgClosure **)&blocked_queue_hd); + evac((StgClosure **)&blocked_queue_tl); } - + if (sleeping_queue != END_TSO_QUEUE) { - sleeping_queue = (StgTSO *)MarkRoot((StgClosure *)sleeping_queue); + evac((StgClosure **)&sleeping_queue); } #endif for (m = main_threads; m != NULL; m = m->link) { - m->tso = (StgTSO *)MarkRoot((StgClosure *)m->tso); + evac((StgClosure **)&m->tso); + } + if (suspended_ccalling_threads != END_TSO_QUEUE) { + evac((StgClosure **)&suspended_ccalling_threads); } - if (suspended_ccalling_threads != END_TSO_QUEUE) - suspended_ccalling_threads = - (StgTSO *)MarkRoot((StgClosure *)suspended_ccalling_threads); -#if defined(SMP) || defined(PAR) || defined(GRAN) - markSparkQueue(); +#if defined(PAR) || defined(GRAN) + markSparkQueue(evac); #endif } @@ -2016,7 +2336,7 @@ static void GetRoots(void) This needs to be protected by the GC condition variable above. KH. -------------------------------------------------------------------------- */ -void (*extra_roots)(void); +void (*extra_roots)(evac_fn); void performGC(void) @@ -2031,17 +2351,16 @@ performMajorGC(void) } static void -AllRoots(void) +AllRoots(evac_fn evac) { - GetRoots(); /* the scheduler's roots */ - extra_roots(); /* the user's roots */ + GetRoots(evac); // the scheduler's roots + extra_roots(evac); // the user's roots } void -performGCWithRoots(void (*get_roots)(void)) +performGCWithRoots(void (*get_roots)(evac_fn)) { extra_roots = get_roots; - GarbageCollect(AllRoots,rtsFalse); } @@ -2086,10 +2405,10 @@ threadStackOverflow(StgTSO *tso) new_tso_size = round_to_mblocks(new_tso_size); /* Be MBLOCK-friendly */ new_stack_size = new_tso_size - TSO_STRUCT_SIZEW; - IF_DEBUG(scheduler, fprintf(stderr,"scheduler: increasing stack size from %d words to %d.\n", tso->stack_size, new_stack_size)); + IF_DEBUG(scheduler, fprintf(stderr,"== scheduler: increasing stack size from %d words to %d.\n", tso->stack_size, new_stack_size)); dest = (StgTSO *)allocate(new_tso_size); - TICK_ALLOC_TSO(new_tso_size-sizeofW(StgTSO),0); + TICK_ALLOC_TSO(new_stack_size,0); /* copy the TSO block and the old stack into the new area */ memcpy(dest,tso,TSO_STRUCT_SIZE); @@ -2104,7 +2423,7 @@ threadStackOverflow(StgTSO *tso) dest->stack_size = new_stack_size; /* and relocate the update frame list */ - relocate_TSO(tso, dest); + relocate_stack(dest, diff); /* Mark the old TSO as relocated. We have to check for relocated * TSOs in the garbage collector and any primops that deal with TSOs. @@ -2155,8 +2474,10 @@ unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node ) update blocked and fetch time (depending on type of the orig closure) */ if (RtsFlags.ParFlags.ParStats.Full) { DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, - GR_RESUME, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, + GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, 0, 0 /* spark_queue_len(ADVISORY_POOL) */); + if (EMPTY_RUN_QUEUE()) + emitSchedule = rtsTrue; switch (get_itbl(node)->type) { case FETCH_ME_BQ: @@ -2167,6 +2488,10 @@ unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node ) case BLACKHOLE_BQ: ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; break; +#ifdef DIST + case MVAR: + break; +#endif default: barf("{unblockOneLocked}Daq Qagh: unexpected closure in blocking queue"); } @@ -2236,8 +2561,8 @@ unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node) case BLOCKED_FETCH: /* if it's a BLOCKED_FETCH put it on the PendingFetches list */ next = bqe->link; - bqe->link = PendingFetches; - PendingFetches = bqe; + bqe->link = (StgBlockingQueueElement *)PendingFetches; + PendingFetches = (StgBlockedFetch *)bqe; break; # if defined(DEBUG) @@ -2256,7 +2581,7 @@ unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node) (StgClosure *)bqe); # endif } - // IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id)); + IF_PAR_DEBUG(bq, fprintf(stderr, ", %p (%s)", bqe, info_type((StgClosure*)bqe))); return next; } @@ -2306,13 +2631,14 @@ awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node) nat len = 0; IF_GRAN_DEBUG(bq, - belch("## AwBQ for node %p on PE %d @ %ld by TSO %d (%p): ", \ + belch("##-_ AwBQ for node %p on PE %d @ %ld by TSO %d (%p): ", \ node, CurrentProc, CurrentTime[CurrentProc], CurrentTSO->id, CurrentTSO)); node_loc = where_is(node); - ASSERT(get_itbl(q)->type == TSO || // q is either a TSO or an RBHSave + ASSERT(q == END_BQ_QUEUE || + get_itbl(q)->type == TSO || // q is either a TSO or an RBHSave get_itbl(q)->type == CONSTR); // closure (type constructor) ASSERT(is_unique(node)); @@ -2382,15 +2708,23 @@ awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node) void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node) { - StgBlockingQueueElement *bqe, *next; + StgBlockingQueueElement *bqe; ACQUIRE_LOCK(&sched_mutex); IF_PAR_DEBUG(verbose, - belch("## AwBQ for node %p on [%x]: ", + belch("##-_ AwBQ for node %p on [%x]: ", node, mytid)); - - ASSERT(get_itbl(q)->type == TSO || +#ifdef DIST + //RFP + if(get_itbl(q)->type == CONSTR || q==END_BQ_QUEUE) { + IF_PAR_DEBUG(verbose, belch("## ... nothing to unblock so lets just return. RFP (BUG?)")); + return; + } +#endif + + ASSERT(q == END_BQ_QUEUE || + get_itbl(q)->type == TSO || get_itbl(q)->type == BLOCKED_FETCH || get_itbl(q)->type == CONSTR); @@ -2497,6 +2831,12 @@ unblockThread(StgTSO *tso) StgTSO *target = tso->block_info.tso; ASSERT(get_itbl(target)->type == TSO); + + if (target->what_next == ThreadRelocated) { + target = target->link; + ASSERT(get_itbl(target)->type == TSO); + } + ASSERT(target->blocked_exceptions != NULL); last = (StgBlockingQueueElement **)&target->blocked_exceptions; @@ -2515,6 +2855,7 @@ unblockThread(StgTSO *tso) case BlockedOnRead: case BlockedOnWrite: { + /* take TSO off blocked_queue */ StgBlockingQueueElement *prev = NULL; for (t = (StgBlockingQueueElement *)blocked_queue_hd; t != END_BQ_QUEUE; prev = t, t = t->link) { @@ -2538,6 +2879,7 @@ unblockThread(StgTSO *tso) case BlockedOnDelay: { + /* take TSO off sleeping_queue */ StgBlockingQueueElement *prev = NULL; for (t = (StgBlockingQueueElement *)sleeping_queue; t != END_BQ_QUEUE; prev = t, t = t->link) { @@ -2617,6 +2959,12 @@ unblockThread(StgTSO *tso) StgTSO *target = tso->block_info.tso; ASSERT(get_itbl(target)->type == TSO); + + while (target->what_next == ThreadRelocated) { + target = target->link; + ASSERT(get_itbl(target)->type == TSO); + } + ASSERT(target->blocked_exceptions != NULL); last = &target->blocked_exceptions; @@ -2749,7 +3097,7 @@ raiseAsync(StgTSO *tso, StgClosure *exception) } while (1) { - int words = ((P_)su - (P_)sp) - 1; + nat words = ((P_)su - (P_)sp) - 1; nat i; StgAP_UPD * ap; @@ -2836,14 +3184,24 @@ raiseAsync(StgTSO *tso, StgClosure *exception) /* Replace the updatee with an indirection - happily * this will also wake up any threads currently * waiting on the result. + * + * Warning: if we're in a loop, more than one update frame on + * the stack may point to the same object. Be careful not to + * overwrite an IND_OLDGEN in this case, because we'll screw + * up the mutable lists. To be on the safe side, don't + * overwrite any kind of indirection at all. See also + * threadSqueezeStack in GC.c, where we have to make a similar + * check. */ - UPD_IND_NOLOCK(su->updatee,ap); /* revert the black hole */ + if (!closure_IND(su->updatee)) { + UPD_IND_NOLOCK(su->updatee,ap); /* revert the black hole */ + } su = su->link; sp += sizeofW(StgUpdateFrame) -1; sp[0] = (W_)ap; /* push onto stack */ break; } - + case CATCH_FRAME: { StgCatchFrame *cf = (StgCatchFrame *)su; @@ -2973,6 +3331,11 @@ detectBlackHoles( void ) for (t = all_threads; t != END_TSO_QUEUE; t = t->global_link) { + while (t->what_next == ThreadRelocated) { + t = t->link; + ASSERT(get_itbl(t)->type == TSO); + } + if (t->why_blocked != BlockedOnBlackHole) { continue; } @@ -3081,7 +3444,22 @@ printAllThreads(void) { StgTSO *t; +# if defined(GRAN) + char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; + ullong_format_string(TIME_ON_PROC(CurrentProc), + time_string, rtsFalse/*no commas!*/); + + sched_belch("all threads at [%s]:", time_string); +# elif defined(PAR) + char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; + ullong_format_string(CURRENT_TIME, + time_string, rtsFalse/*no commas!*/); + + sched_belch("all threads at [%s]:", time_string); +# else sched_belch("all threads:"); +# endif + for (t = all_threads; t != END_TSO_QUEUE; t = t->global_link) { fprintf(stderr, "\tthread %d ", t->id); printThreadStatus(t); @@ -3107,27 +3485,41 @@ print_bq (StgClosure *node) /* should cover all closures that may have a blocking queue */ ASSERT(get_itbl(node)->type == BLACKHOLE_BQ || get_itbl(node)->type == FETCH_ME_BQ || - get_itbl(node)->type == RBH); + get_itbl(node)->type == RBH || + get_itbl(node)->type == MVAR); ASSERT(node!=(StgClosure*)NULL); // sanity check + + print_bqe(((StgBlockingQueue*)node)->blocking_queue); +} + +/* + Print a whole blocking queue starting with the element bqe. +*/ +void +print_bqe (StgBlockingQueueElement *bqe) +{ + rtsBool end; + /* NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure; */ - for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE); + for (end = (bqe==END_BQ_QUEUE); !end; // iterate until bqe points to a CONSTR - end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) { - ASSERT(bqe != END_BQ_QUEUE); // sanity check - ASSERT(bqe != (StgTSO*)NULL); // sanity check + end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), + bqe = end ? END_BQ_QUEUE : bqe->link) { + ASSERT(bqe != END_BQ_QUEUE); // sanity check + ASSERT(bqe != (StgBlockingQueueElement *)NULL); // sanity check /* types of closures that may appear in a blocking queue */ ASSERT(get_itbl(bqe)->type == TSO || get_itbl(bqe)->type == BLOCKED_FETCH || get_itbl(bqe)->type == CONSTR); /* only BQs of an RBH end with an RBH_Save closure */ - ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); + //ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); switch (get_itbl(bqe)->type) { case TSO: - fprintf(stderr," TSO %d (%x),", + fprintf(stderr," TSO %u (%x),", ((StgTSO *)bqe)->id, ((StgTSO *)bqe)); break; case BLOCKED_FETCH: @@ -3145,8 +3537,8 @@ print_bq (StgClosure *node) "RBH_Save_?"), get_itbl(bqe)); break; default: - barf("Unexpected closure type %s in blocking queue of %p (%s)", - info_type(bqe), node, info_type(node)); + barf("Unexpected closure type %s in blocking queue", // of %p (%s)", + info_type((StgClosure *)bqe)); // , node, info_type(node)); break; } } /* for */ @@ -3249,7 +3641,9 @@ sched_belch(char *s, ...) va_list ap; va_start(ap,s); #ifdef SMP - fprintf(stderr, "scheduler (task %ld): ", pthread_self()); + fprintf(stderr, "scheduler (task %ld): ", osThreadId()); +#elif defined(PAR) + fprintf(stderr, "== "); #else fprintf(stderr, "scheduler: "); #endif @@ -3271,11 +3665,9 @@ sched_belch(char *s, ...) //* blocked_queue_tl:: @cindex\s-+blocked_queue_tl //* context_switch:: @cindex\s-+context_switch //* createThread:: @cindex\s-+createThread -//* free_capabilities:: @cindex\s-+free_capabilities //* gc_pending_cond:: @cindex\s-+gc_pending_cond //* initScheduler:: @cindex\s-+initScheduler //* interrupted:: @cindex\s-+interrupted -//* n_free_capabilities:: @cindex\s-+n_free_capabilities //* next_thread_id:: @cindex\s-+next_thread_id //* print_bq:: @cindex\s-+print_bq //* run_queue_hd:: @cindex\s-+run_queue_hd @@ -3283,7 +3675,6 @@ sched_belch(char *s, ...) //* sched_mutex:: @cindex\s-+sched_mutex //* schedule:: @cindex\s-+schedule //* take_off_run_queue:: @cindex\s-+take_off_run_queue -//* task_ids:: @cindex\s-+task_ids //* term_mutex:: @cindex\s-+term_mutex //* thread_ready_cond:: @cindex\s-+thread_ready_cond //@end index