Fix race condition in wakeupThreadOnCapability() (#2574)

[ghc-hetmet.git] / rts / Capability.c
diff --git a/rts/Capability.c b/rts/Capability.c

index fa7f630..1f1a1ae 100644 (file)
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -40,6 +40,9 @@ Capability *capabilities = NULL;
  // locking, so we don't do that.
  Capability *last_free_capability;
  
+/* GC indicator, in scope for the scheduler, init'ed to false */
+volatile StgWord waiting_for_gc = 0;
+
  #if defined(THREADED_RTS)
  STATIC_INLINE rtsBool
  globalWorkToDo (void)
@@ -276,6 +279,21 @@ releaseCapability_ (Capability* cap)
         return;
      }
  
+    /* if waiting_for_gc was the reason to release the cap: thread
+       comes from yieldCap->releaseAndQueueWorker. Unconditionally set
+       cap. free and return (see default after the if-protected other
+       special cases). Thread will wait on cond.var and re-acquire the
+       same cap after GC (GC-triggering cap. calls releaseCap and
+       enters the spare_workers case)
+    */
+    if (waiting_for_gc) {
+      last_free_capability = cap; // needed?
+      trace(TRACE_sched | DEBUG_sched, 
+           "GC pending, set capability %d free", cap->no);
+      return;
+    } 
+
+
      // If the next thread on the run queue is a bound thread,
      // give this Capability to the appropriate Task.
      if (!emptyRunQueue(cap) && cap->run_queue_hd->bound) {
@@ -453,7 +471,14 @@ yieldCapability (Capability** pCap, Task *task)
  
      // The fast path has no locking, if we don't enter this while loop
  
-    while ( cap->returning_tasks_hd != NULL || !anyWorkForMe(cap,task) ) {
+    while ( waiting_for_gc
+           /* i.e. another capability triggered HeapOverflow, is busy
+              getting capabilities (stopping their owning tasks) */
+           || cap->returning_tasks_hd != NULL 
+               /* cap reserved for another task */
+           || !anyWorkForMe(cap,task) 
+               /* cap/task have no work */
+           ) {
         debugTrace(DEBUG_sched, "giving up capability %d", cap->no);
  
         // We must now release the capability and wait to be woken up
@@ -515,57 +540,40 @@ yieldCapability (Capability** pCap, Task *task)
   * ------------------------------------------------------------------------- */
  
  void
-wakeupThreadOnCapability (Capability *cap, StgTSO *tso)
+wakeupThreadOnCapability (Capability *my_cap, 
+                          Capability *other_cap, 
+                          StgTSO *tso)
  {
-    ASSERT(tso->cap == cap);
-    ASSERT(tso->bound ? tso->bound->cap == cap : 1);
-    ASSERT_LOCK_HELD(&cap->lock);
+    ACQUIRE_LOCK(&other_cap->lock);
  
-    tso->cap = cap;
+    // ASSUMES: cap->lock is held (asserted in wakeupThreadOnCapability)
+    if (tso->bound) {
+       ASSERT(tso->bound->cap == tso->cap);
+       tso->bound->cap = other_cap;
+    }
+    tso->cap = other_cap;
+
+    ASSERT(tso->bound ? tso->bound->cap == other_cap : 1);
  
-    if (cap->running_task == NULL) {
+    if (other_cap->running_task == NULL) {
         // nobody is running this Capability, we can add our thread
         // directly onto the run queue and start up a Task to run it.
-       appendToRunQueue(cap,tso);
  
-       // start it up
-       cap->running_task = myTask(); // precond for releaseCapability_()
-       trace(TRACE_sched, "resuming capability %d", cap->no);
-       releaseCapability_(cap);
+       other_cap->running_task = myTask(); 
+            // precond for releaseCapability_() and appendToRunQueue()
+
+       appendToRunQueue(other_cap,tso);
+
+       trace(TRACE_sched, "resuming capability %d", other_cap->no);
+       releaseCapability_(other_cap);
      } else {
-       appendToWakeupQueue(cap,tso);
+       appendToWakeupQueue(my_cap,other_cap,tso);
         // someone is running on this Capability, so it cannot be
         // freed without first checking the wakeup queue (see
         // releaseCapability_).
      }
-}
  
-void
-wakeupThreadOnCapability_lock (Capability *cap, StgTSO *tso)
-{
-    ACQUIRE_LOCK(&cap->lock);
-    migrateThreadToCapability (cap, tso);
-    RELEASE_LOCK(&cap->lock);
-}
-
-void
-migrateThreadToCapability (Capability *cap, StgTSO *tso)
-{
-    // ASSUMES: cap->lock is held (asserted in wakeupThreadOnCapability)
-    if (tso->bound) {
-       ASSERT(tso->bound->cap == tso->cap);
-       tso->bound->cap = cap;
-    }
-    tso->cap = cap;
-    wakeupThreadOnCapability(cap,tso);
-}
-
-void
-migrateThreadToCapability_lock (Capability *cap, StgTSO *tso)
-{
-    ACQUIRE_LOCK(&cap->lock);
-    migrateThreadToCapability (cap, tso);
-    RELEASE_LOCK(&cap->lock);
+    RELEASE_LOCK(&other_cap->lock);
  }
  
  /* ----------------------------------------------------------------------------
@@ -791,8 +799,12 @@ markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta)
                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
             evac(user, (StgClosure **)(void *)&task->suspended_tso);
         }
+
+#if defined(THREADED_RTS)
+        traverseSparkQueue (evac, user, cap);
+#endif
      }
-    
+
  #if !defined(THREADED_RTS)
      evac(user, (StgClosure **)(void *)&blocked_queue_hd);
      evac(user, (StgClosure **)(void *)&blocked_queue_tl);
@@ -800,20 +812,6 @@ markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta)
  #endif 
  }
  
-// Sparks are not roots for GC, so we don't mark them in
-// markSomeCapabilities().  Instead, we traverse the spark queues
-// after GC and throw away any that are unreachable.
-void
-updateCapabilitiesPostGC (void)
-{
-#if defined(THREADED_RTS)
-    nat i;
-    for (i = 0; i < n_capabilities; i++) {
-        updateSparkQueue (&capabilities[i]);
-    }
-#endif // THREADED_RTS
-}
-
  // This function is used by the compacting GC to thread all the
  // pointers from spark queues.
  void