X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FSchedule.c;h=7dd063423fe3e123770291254b2efba3ed874b55;hb=117dc73b8a8f6a4184a825a96c372480377f4680;hp=2e9d0dda773ec1c3f432815c047f9ae16d7e8497;hpb=a4326c0e87499dd633c55625f325244b1caf337a;p=ghc-hetmet.git diff --git a/rts/Schedule.c b/rts/Schedule.c index 2e9d0dd..7dd0634 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -92,13 +92,15 @@ rtsBool blackholes_need_checking = rtsFalse; /* flag that tracks whether we have done any execution in this time slice. * LOCK: currently none, perhaps we should lock (but needs to be * updated in the fast path of the scheduler). + * + * NB. must be StgWord, we do xchg() on it. */ -nat recent_activity = ACTIVITY_YES; +volatile StgWord recent_activity = ACTIVITY_YES; /* if this flag is set as well, give up execution - * LOCK: none (changes once, from false->true) + * LOCK: none (changes monotonically) */ -rtsBool sched_state = SCHED_RUNNING; +volatile StgWord sched_state = SCHED_RUNNING; /* This is used in `TSO.h' and gcc 2.96 insists that this variable actually * exists - earlier gccs apparently didn't. @@ -338,7 +340,14 @@ schedule (Capability *initialCapability, Task *task) #endif /* scheduleDoGC() deletes all the threads */ cap = scheduleDoGC(cap,task,rtsFalse); - break; + + // after scheduleDoGC(), we must be shutting down. Either some + // other Capability did the final GC, or we did it above, + // either way we can fall through to the SCHED_SHUTTING_DOWN + // case now. + ASSERT(sched_state == SCHED_SHUTTING_DOWN); + // fall through + case SCHED_SHUTTING_DOWN: debugTrace(DEBUG_sched, "SCHED_SHUTTING_DOWN"); // If we are a worker, just exit. If we're a bound thread @@ -399,6 +408,7 @@ schedule (Capability *initialCapability, Task *task) // ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task); } + yield: scheduleYield(&cap,task); if (emptyRunQueue(cap)) continue; // look for work again #endif @@ -450,6 +460,15 @@ schedule (Capability *initialCapability, Task *task) } #endif + // If we're shutting down, and this thread has not yet been + // killed, kill it now. This sometimes happens when a finalizer + // thread is created by the final GC, or a thread previously + // in a foreign call returns. + if (sched_state >= SCHED_INTERRUPTING && + !(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) { + deleteThread(cap,t); + } + /* context switches are initiated by the timer signal, unless * the user specified "context switch as often as possible", with * +RTS -C0 @@ -565,7 +584,7 @@ run_thread: debugTrace(DEBUG_sched, "--<< thread %lu (%s) stopped: blocked", (unsigned long)t->id, whatNext_strs[t->what_next]); - continue; + goto yield; } #endif @@ -653,15 +672,9 @@ scheduleFindWork (Capability *cap) scheduleCheckBlockedThreads(cap); #if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) - // Try to activate one of our own sparks if (emptyRunQueue(cap)) { scheduleActivateSpark(cap); } #endif -#if defined(THREADED_RTS) - // Try to steak work if we don't have any - if (emptyRunQueue(cap)) { stealWork(cap); } -#endif - #if defined(PARALLEL_HASKELL) // if messages have been buffered... scheduleSendPendingMessages(); @@ -705,7 +718,9 @@ shouldYieldCapability (Capability *cap, Task *task) // - we need to yield this Capability to someone else // (see shouldYieldCapability()) // -// The return value indicates whether +// Careful: the scheduler loop is quite delicate. Make sure you run +// the tests in testsuite/concurrent (all ways) after modifying this, +// and also check the benchmarks in nofib/parallel for regressions. static void scheduleYield (Capability **pcap, Task *task) @@ -713,7 +728,10 @@ scheduleYield (Capability **pcap, Task *task) Capability *cap = *pcap; // if we have work, and we don't need to give up the Capability, continue. - if (!emptyRunQueue(cap) && !shouldYieldCapability(cap,task)) + if (!shouldYieldCapability(cap,task) && + (!emptyRunQueue(cap) || + blackholes_need_checking || + sched_state >= SCHED_INTERRUPTING)) return; // otherwise yield (sleep), and keep yielding if necessary. @@ -933,8 +951,13 @@ scheduleCheckBlackHoles (Capability *cap) { ACQUIRE_LOCK(&sched_mutex); if ( blackholes_need_checking ) { - checkBlackHoles(cap); blackholes_need_checking = rtsFalse; + // important that we reset the flag *before* checking the + // blackhole queue, otherwise we could get deadlock. This + // happens as follows: we wake up a thread that + // immediately runs on another Capability, blocks on a + // blackhole, and then we reset the blackholes_need_checking flag. + checkBlackHoles(cap); } RELEASE_LOCK(&sched_mutex); } @@ -978,12 +1001,11 @@ scheduleDetectDeadlock (Capability *cap, Task *task) // they are unreachable and will therefore be sent an // exception. Any threads thus released will be immediately // runnable. - cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/); + cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/); + // when force_major == rtsTrue. scheduleDoGC sets + // recent_activity to ACTIVITY_DONE_GC and turns off the timer + // signal. - recent_activity = ACTIVITY_DONE_GC; - // disable timer signals (see #1623) - stopTimer(); - if ( !emptyRunQueue(cap) ) return; #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS) @@ -1062,30 +1084,10 @@ scheduleSendPendingMessages(void) static void scheduleActivateSpark(Capability *cap) { - StgClosure *spark; - -/* We only want to stay here if the run queue is empty and we want some - work. We try to turn a spark into a thread, and add it to the run - queue, from where it will be picked up in the next iteration of the - scheduler loop. -*/ - if (!emptyRunQueue(cap)) - /* In the threaded RTS, another task might have pushed a thread - on our run queue in the meantime ? But would need a lock.. */ - return; - - - // Really we should be using reclaimSpark() here, but - // experimentally it doesn't seem to perform as well as just - // stealing from our own spark pool: - // spark = reclaimSpark(cap->sparks); - spark = tryStealSpark(cap->sparks); // defined in Sparks.c - - if (spark != NULL) { - debugTrace(DEBUG_sched, - "turning spark of closure %p into a thread", - (StgClosure *)spark); - createSparkThread(cap,spark); // defined in Sparks.c + if (anySparks()) + { + createSparkThread(cap); + debugTrace(DEBUG_sched, "creating a spark thread"); } } #endif // PARALLEL_HASKELL || THREADED_RTS @@ -1161,7 +1163,7 @@ schedulePostRunThread (Capability *cap, StgTSO *t) // ATOMICALLY_FRAME, aborting the (nested) // transaction, and saving the stack of any // partially-evaluated thunks on the heap. - throwToSingleThreaded_(cap, t, NULL, rtsTrue, NULL); + throwToSingleThreaded_(cap, t, NULL, rtsTrue); ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME); } @@ -1480,6 +1482,13 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major) nat i; #endif + if (sched_state == SCHED_SHUTTING_DOWN) { + // The final GC has already been done, and the system is + // shutting down. We'll probably deadlock if we try to GC + // now. + return cap; + } + #ifdef THREADED_RTS // In order to GC, there must be no threads running Haskell code. // Therefore, the GC thread needs to hold *all* the capabilities, @@ -1567,6 +1576,16 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major) balanceSparkPoolsCaps(n_capabilities, capabilities); #endif + if (force_major) + { + // We've just done a major GC and we don't need the timer + // signal turned on any more (#1623). + // NB. do this *before* releasing the Capabilities, to avoid + // deadlocks! + recent_activity = ACTIVITY_DONE_GC; + stopTimer(); + } + #if defined(THREADED_RTS) // release our stash of capabilities. for (i = 0; i < n_capabilities; i++) { @@ -2006,9 +2025,22 @@ workerStart(Task *task) // schedule() runs without a lock. cap = schedule(cap,task); - // On exit from schedule(), we have a Capability. - releaseCapability(cap); + // On exit from schedule(), we have a Capability, but possibly not + // the same one we started with. + + // During shutdown, the requirement is that after all the + // Capabilities are shut down, all workers that are shutting down + // have finished workerTaskStop(). This is why we hold on to + // cap->lock until we've finished workerTaskStop() below. + // + // There may be workers still involved in foreign calls; those + // will just block in waitForReturnCapability() because the + // Capability has been shut down. + // + ACQUIRE_LOCK(&cap->lock); + releaseCapability_(cap,rtsFalse); workerTaskStop(task); + RELEASE_LOCK(&cap->lock); } #endif @@ -2111,20 +2143,30 @@ exitScheduler( shutdownCapability(&capabilities[i], task, wait_foreign); } boundTaskExiting(task); - stopTaskManager(); } -#else - freeCapability(&MainCapability); #endif } void freeScheduler( void ) { - freeTaskManager(); - if (n_capabilities != 1) { - stgFree(capabilities); + nat still_running; + + ACQUIRE_LOCK(&sched_mutex); + still_running = freeTaskManager(); + // We can only free the Capabilities if there are no Tasks still + // running. We might have a Task about to return from a foreign + // call into waitForReturnCapability(), for example (actually, + // this should be the *only* thing that a still-running Task can + // do at this point, and it will block waiting for the + // Capability). + if (still_running == 0) { + freeCapabilities(); + if (n_capabilities != 1) { + stgFree(capabilities); + } } + RELEASE_LOCK(&sched_mutex); #if defined(THREADED_RTS) closeMutex(&sched_mutex); #endif