do flattening before simpleOptPgm so evaluation "order" is not lost

[ghc-hetmet.git] / rts / Capability.c
diff --git a/rts/Capability.c b/rts/Capability.c

index 27a2d51..9091fdd 100644 (file)
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -18,38 +18,52 @@
  
  #include "PosixSource.h"
  #include "Rts.h"
-#include "RtsUtils.h"
-#include "RtsFlags.h"
-#include "STM.h"
-#include "OSThreads.h"
+
  #include "Capability.h"
  #include "Schedule.h"
  #include "Sparks.h"
  #include "Trace.h"
+#include "sm/GC.h" // for gcWorkerThread()
+#include "STM.h"
+#include "RtsUtils.h"
  
  // one global capability, this is the Capability for non-threaded
  // builds, and for +RTS -N1
  Capability MainCapability;
  
-nat n_capabilities;
+nat n_capabilities = 0;
  Capability *capabilities = NULL;
  
  // Holds the Capability which last became free.  This is used so that
  // an in-call has a chance of quickly finding a free Capability.
  // Maintaining a global free list of Capabilities would require global
  // locking, so we don't do that.
-Capability *last_free_capability;
+Capability *last_free_capability = NULL;
  
  /* GC indicator, in scope for the scheduler, init'ed to false */
  volatile StgWord waiting_for_gc = 0;
  
+/* Let foreign code get the current Capability -- assuming there is one!
+ * This is useful for unsafe foreign calls because they are called with
+ * the current Capability held, but they are not passed it. For example,
+ * see see the integer-gmp package which calls allocateLocal() in its
+ * stgAllocForGMP() function (which gets called by gmp functions).
+ * */
+Capability * rts_unsafeGetMyCapability (void)
+{
+#if defined(THREADED_RTS)
+  return myTask()->cap;
+#else
+  return &MainCapability;
+#endif
+}
+
  #if defined(THREADED_RTS)
  STATIC_INLINE rtsBool
  globalWorkToDo (void)
  {
-    return blackholes_need_checking
-       || sched_state >= SCHED_INTERRUPTING
-       ;
+    return sched_state >= SCHED_INTERRUPTING
+        || recent_activity == ACTIVITY_INACTIVE; // need to check for deadlock
  }
  #endif
  
@@ -57,36 +71,46 @@ globalWorkToDo (void)
  StgClosure *
  findSpark (Capability *cap)
  {
-  /* use the normal Sparks.h interface (internally modified to enable
-     concurrent stealing) 
-     and immediately turn the spark into a thread when successful
-  */
    Capability *robbed;
    StgClosurePtr spark;
    rtsBool retry;
    nat i = 0;
  
-  // first try to get a spark from our own pool.
-  // We should be using reclaimSpark(), because it works without
-  // needing any atomic instructions:
-  //   spark = reclaimSpark(cap->sparks);
-  // However, measurements show that this makes at least one benchmark
-  // slower (prsa) and doesn't affect the others.
-  spark = tryStealSpark(cap);
-  if (spark != NULL) {
-      cap->sparks_converted++;
-      return spark;
+  if (!emptyRunQueue(cap) || cap->returning_tasks_hd != NULL) {
+      // If there are other threads, don't try to run any new
+      // sparks: sparks might be speculative, we don't want to take
+      // resources away from the main computation.
+      return 0;
    }
  
-  if (n_capabilities == 1) { return NULL; } // makes no sense...
-
-  debugTrace(DEBUG_sched,
-            "cap %d: Trying to steal work from other capabilities", 
-            cap->no);
-
    do {
        retry = rtsFalse;
  
+      // first try to get a spark from our own pool.
+      // We should be using reclaimSpark(), because it works without
+      // needing any atomic instructions:
+      //   spark = reclaimSpark(cap->sparks);
+      // However, measurements show that this makes at least one benchmark
+      // slower (prsa) and doesn't affect the others.
+      spark = tryStealSpark(cap);
+      if (spark != NULL) {
+          cap->sparks_converted++;
+
+          // Post event for running a spark from capability's own pool.
+          traceEventRunSpark(cap, cap->r.rCurrentTSO);
+
+          return spark;
+      }
+      if (!emptySparkPoolCap(cap)) {
+          retry = rtsTrue;
+      }
+
+      if (n_capabilities == 1) { return NULL; } // makes no sense...
+
+      debugTrace(DEBUG_sched,
+                 "cap %d: Trying to steal work from other capabilities", 
+                 cap->no);
+
        /* visit cap.s 0..n-1 in sequence until a theft succeeds. We could
        start at a random place instead of 0 as well.  */
        for ( i=0 ; i < n_capabilities ; i++ ) {
@@ -105,10 +129,10 @@ findSpark (Capability *cap)
            }
  
            if (spark != NULL) {
-              debugTrace(DEBUG_sched,
-                "cap %d: Stole a spark from capability %d",
-                         cap->no, robbed->no);
                cap->sparks_converted++;
+
+              traceEventStealSpark(cap, cap->r.rCurrentTSO, robbed->no);
+              
                return spark;
            }
            // otherwise: no success, try next one
@@ -148,10 +172,10 @@ STATIC_INLINE void
  newReturningTask (Capability *cap, Task *task)
  {
      ASSERT_LOCK_HELD(&cap->lock);
-    ASSERT(task->return_link == NULL);
+    ASSERT(task->next == NULL);
      if (cap->returning_tasks_hd) {
-       ASSERT(cap->returning_tasks_tl->return_link == NULL);
-       cap->returning_tasks_tl->return_link = task;
+       ASSERT(cap->returning_tasks_tl->next == NULL);
+       cap->returning_tasks_tl->next = task;
      } else {
         cap->returning_tasks_hd = task;
      }
@@ -165,11 +189,11 @@ popReturningTask (Capability *cap)
      Task *task;
      task = cap->returning_tasks_hd;
      ASSERT(task);
-    cap->returning_tasks_hd = task->return_link;
+    cap->returning_tasks_hd = task->next;
      if (!cap->returning_tasks_hd) {
         cap->returning_tasks_tl = NULL;
      }
-    task->return_link = NULL;
+    task->next = NULL;
      return task;
  }
  #endif
@@ -195,23 +219,28 @@ initCapability( Capability *cap, nat i )
      initMutex(&cap->lock);
      cap->running_task      = NULL; // indicates cap is free
      cap->spare_workers     = NULL;
-    cap->suspended_ccalling_tasks = NULL;
+    cap->n_spare_workers   = 0;
+    cap->suspended_ccalls  = NULL;
      cap->returning_tasks_hd = NULL;
      cap->returning_tasks_tl = NULL;
-    cap->wakeup_queue_hd    = END_TSO_QUEUE;
-    cap->wakeup_queue_tl    = END_TSO_QUEUE;
+    cap->inbox              = (Message*)END_TSO_QUEUE;
      cap->sparks_created     = 0;
+    cap->sparks_dud         = 0;
      cap->sparks_converted   = 0;
-    cap->sparks_pruned      = 0;
+    cap->sparks_gcd         = 0;
+    cap->sparks_fizzled     = 0;
  #endif
  
      cap->f.stgEagerBlackholeInfo = (W_)&__stg_EAGER_BLACKHOLE_info;
-    cap->f.stgGCEnter1     = (F_)__stg_gc_enter_1;
-    cap->f.stgGCFun        = (F_)__stg_gc_fun;
+    cap->f.stgGCEnter1     = (StgFunPtr)__stg_gc_enter_1;
+    cap->f.stgGCFun        = (StgFunPtr)__stg_gc_fun;
  
      cap->mut_lists  = stgMallocBytes(sizeof(bdescr *) *
                                      RtsFlags.GcFlags.generations,
                                      "initCapability");
+    cap->saved_mut_lists = stgMallocBytes(sizeof(bdescr *) *
+                                          RtsFlags.GcFlags.generations,
+                                          "initCapability");
  
      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
         cap->mut_lists[g] = NULL;
@@ -223,6 +252,7 @@ initCapability( Capability *cap, nat i )
      cap->free_trec_headers = NO_TREC;
      cap->transaction_tokens = 0;
      cap->context_switch = 0;
+    cap->pinned_object_block = NULL;
  }
  
  /* ---------------------------------------------------------------------------
@@ -286,10 +316,10 @@ initCapabilities( void )
  
  void setContextSwitches(void)
  {
-  nat i;
-  for (i=0; i < n_capabilities; i++) {
-    capabilities[i].context_switch = 1;
-  }
+    nat i;
+    for (i=0; i < n_capabilities; i++) {
+        contextSwitchCapability(&capabilities[i]);
+    }
  }
  
  /* ----------------------------------------------------------------------------
@@ -312,10 +342,9 @@ giveCapabilityToTask (Capability *cap USED_IF_DEBUG, Task *task)
  {
      ASSERT_LOCK_HELD(&cap->lock);
      ASSERT(task->cap == cap);
-    trace(TRACE_sched | DEBUG_sched,
-         "passing capability %d to %s %p",
-         cap->no, task->tso ? "bound task" : "worker",
-         (void *)task->id);
+    debugTrace(DEBUG_sched, "passing capability %d to %s %p",
+               cap->no, task->incall->tso ? "bound task" : "worker",
+               (void *)task->id);
      ACQUIRE_LOCK(&task->lock);
      task->wakeup = rtsTrue;
      // the wakeup flag is needed because signalCondition() doesn't
@@ -355,17 +384,9 @@ releaseCapability_ (Capability* cap,
         return;
      }
  
-    /* if waiting_for_gc was the reason to release the cap: thread
-       comes from yieldCap->releaseAndQueueWorker. Unconditionally set
-       cap. free and return (see default after the if-protected other
-       special cases). Thread will wait on cond.var and re-acquire the
-       same cap after GC (GC-triggering cap. calls releaseCap and
-       enters the spare_workers case)
-    */
-    if (waiting_for_gc) {
+    if (waiting_for_gc == PENDING_GC_SEQ) {
        last_free_capability = cap; // needed?
-      trace(TRACE_sched | DEBUG_sched, 
-           "GC pending, set capability %d free", cap->no);
+      debugTrace(DEBUG_sched, "GC pending, set capability %d free", cap->no);
        return;
      } 
  
@@ -374,8 +395,11 @@ releaseCapability_ (Capability* cap,
      // give this Capability to the appropriate Task.
      if (!emptyRunQueue(cap) && cap->run_queue_hd->bound) {
         // Make sure we're not about to try to wake ourselves up
-       ASSERT(task != cap->run_queue_hd->bound);
-       task = cap->run_queue_hd->bound;
+       // ASSERT(task != cap->run_queue_hd->bound);
+        // assertion is false: in schedule() we force a yield after
+       // ThreadBlocked, but the thread may be back on the run queue
+       // by now.
+       task = cap->run_queue_hd->bound->task;
         giveCapabilityToTask(cap,task);
         return;
      }
@@ -388,7 +412,7 @@ releaseCapability_ (Capability* cap,
         if (sched_state < SCHED_SHUTTING_DOWN || !emptyRunQueue(cap)) {
             debugTrace(DEBUG_sched,
                        "starting new worker on capability %d", cap->no);
-           startWorkerTask(cap, workerStart);
+           startWorkerTask(cap);
             return;
         }
      }
@@ -396,7 +420,7 @@ releaseCapability_ (Capability* cap,
      // If we have an unbound thread on the run queue, or if there's
      // anything else to do, give the Capability to a worker thread.
      if (always_wakeup || 
-        !emptyRunQueue(cap) || !emptyWakeupQueue(cap) ||
+        !emptyRunQueue(cap) || !emptyInbox(cap) ||
          !emptySparkPoolCap(cap) || globalWorkToDo()) {
         if (cap->spare_workers) {
             giveCapabilityToTask(cap,cap->spare_workers);
@@ -406,7 +430,7 @@ releaseCapability_ (Capability* cap,
      }
  
      last_free_capability = cap;
-    trace(TRACE_sched | DEBUG_sched, "freeing capability %d", cap->no);
+    debugTrace(DEBUG_sched, "freeing capability %d", cap->no);
  }
  
  void
@@ -434,16 +458,33 @@ releaseCapabilityAndQueueWorker (Capability* cap USED_IF_THREADS)
  
      task = cap->running_task;
  
+    // If the Task is stopped, we shouldn't be yielding, we should
+    // be just exiting.
+    ASSERT(!task->stopped);
+
      // If the current task is a worker, save it on the spare_workers
      // list of this Capability.  A worker can mark itself as stopped,
      // in which case it is not replaced on the spare_worker queue.
      // This happens when the system is shutting down (see
      // Schedule.c:workerStart()).
-    // Also, be careful to check that this task hasn't just exited
-    // Haskell to do a foreign call (task->suspended_tso).
-    if (!isBoundTask(task) && !task->stopped && !task->suspended_tso) {
-       task->next = cap->spare_workers;
-       cap->spare_workers = task;
+    if (!isBoundTask(task))
+    {
+        if (cap->n_spare_workers < MAX_SPARE_WORKERS)
+        {
+            task->next = cap->spare_workers;
+            cap->spare_workers = task;
+            cap->n_spare_workers++;
+        }
+        else
+        {
+            debugTrace(DEBUG_sched, "%d spare workers already, exiting",
+                       cap->n_spare_workers);
+            releaseCapability_(cap,rtsFalse);
+            // hold the lock until after workerTaskStop; c.f. scheduleWorker()
+            workerTaskStop(task);
+            RELEASE_LOCK(&cap->lock);
+            shutdownThread();
+        }
      }
      // Bound tasks just float around attached to their TSOs.
  
@@ -478,17 +519,20 @@ waitForReturnCapability (Capability **pCap, Task *task)
      if (cap == NULL) {
         // Try last_free_capability first
         cap = last_free_capability;
-       if (!cap->running_task) {
+       if (cap->running_task) {
             nat i;
             // otherwise, search for a free capability
+            cap = NULL;
             for (i = 0; i < n_capabilities; i++) {
-               cap = &capabilities[i];
-               if (!cap->running_task) {
+               if (!capabilities[i].running_task) {
+                    cap = &capabilities[i];
                     break;
                 }
             }
-           // Can't find a free one, use last_free_capability.
-           cap = last_free_capability;
+            if (cap == NULL) {
+                // Can't find a free one, use last_free_capability.
+                cap = last_free_capability;
+            }
         }
  
         // record the Capability as the one this Task is now assocated with.
@@ -538,7 +582,7 @@ waitForReturnCapability (Capability **pCap, Task *task)
  
      ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
  
-    trace(TRACE_sched | DEBUG_sched, "resuming capability %d", cap->no);
+    debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
  
      *pCap = cap;
  #endif
@@ -554,6 +598,13 @@ yieldCapability (Capability** pCap, Task *task)
  {
      Capability *cap = *pCap;
  
+    if (waiting_for_gc == PENDING_GC_PAR) {
+        traceEventGcStart(cap);
+        gcWorkerThread(cap);
+        traceEventGcEnd(cap);
+        return;
+    }
+
         debugTrace(DEBUG_sched, "giving up capability %d", cap->no);
  
         // We must now release the capability and wait to be woken up
@@ -579,7 +630,7 @@ yieldCapability (Capability** pCap, Task *task)
                 continue;
             }
  
-           if (task->tso == NULL) {
+           if (task->incall->tso == NULL) {
                 ASSERT(cap->spare_workers != NULL);
                 // if we're not at the front of the queue, release it
                 // again.  This is unlikely to happen.
@@ -590,13 +641,14 @@ yieldCapability (Capability** pCap, Task *task)
                 }
                 cap->spare_workers = task->next;
                 task->next = NULL;
-           }
+                cap->n_spare_workers--;
+            }
             cap->running_task = task;
             RELEASE_LOCK(&cap->lock);
             break;
         }
  
-       trace(TRACE_sched | DEBUG_sched, "resuming capability %d", cap->no);
+       debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
         ASSERT(cap->running_task == task);
  
      *pCap = cap;
@@ -607,103 +659,21 @@ yieldCapability (Capability** pCap, Task *task)
  }
  
  /* ----------------------------------------------------------------------------
- * Wake up a thread on a Capability.
+ * prodCapability
   *
- * This is used when the current Task is running on a Capability and
- * wishes to wake up a thread on a different Capability.
+ * If a Capability is currently idle, wake up a Task on it.  Used to 
+ * get every Capability into the GC.
   * ------------------------------------------------------------------------- */
  
  void
-wakeupThreadOnCapability (Capability *my_cap, 
-                          Capability *other_cap, 
-                          StgTSO *tso)
-{
-    ACQUIRE_LOCK(&other_cap->lock);
-
-    // ASSUMES: cap->lock is held (asserted in wakeupThreadOnCapability)
-    if (tso->bound) {
-       ASSERT(tso->bound->cap == tso->cap);
-       tso->bound->cap = other_cap;
-    }
-    tso->cap = other_cap;
-
-    ASSERT(tso->bound ? tso->bound->cap == other_cap : 1);
-
-    if (other_cap->running_task == NULL) {
-       // nobody is running this Capability, we can add our thread
-       // directly onto the run queue and start up a Task to run it.
-
-       other_cap->running_task = myTask(); 
-            // precond for releaseCapability_() and appendToRunQueue()
-
-       appendToRunQueue(other_cap,tso);
-
-       trace(TRACE_sched, "resuming capability %d", other_cap->no);
-       releaseCapability_(other_cap,rtsFalse);
-    } else {
-       appendToWakeupQueue(my_cap,other_cap,tso);
-        other_cap->context_switch = 1;
-       // someone is running on this Capability, so it cannot be
-       // freed without first checking the wakeup queue (see
-       // releaseCapability_).
-    }
-
-    RELEASE_LOCK(&other_cap->lock);
-}
-
-/* ----------------------------------------------------------------------------
- * prodCapabilities
- *
- * Used to indicate that the interrupted flag is now set, or some
- * other global condition that might require waking up a Task on each
- * Capability.
- * ------------------------------------------------------------------------- */
-
-static void
-prodCapabilities(rtsBool all)
+prodCapability (Capability *cap, Task *task)
  {
-    nat i;
-    Capability *cap;
-    Task *task;
-
-    for (i=0; i < n_capabilities; i++) {
-       cap = &capabilities[i];
-       ACQUIRE_LOCK(&cap->lock);
-       if (!cap->running_task) {
-           if (cap->spare_workers) {
-               trace(TRACE_sched, "resuming capability %d", cap->no);
-               task = cap->spare_workers;
-               ASSERT(!task->stopped);
-               giveCapabilityToTask(cap,task);
-               if (!all) {
-                   RELEASE_LOCK(&cap->lock);
-                   return;
-               }
-           }
-       }
-       RELEASE_LOCK(&cap->lock);
+    ACQUIRE_LOCK(&cap->lock);
+    if (!cap->running_task) {
+        cap->running_task = task;
+        releaseCapability_(cap,rtsTrue);
      }
-    return;
-}
-
-void
-prodAllCapabilities (void)
-{
-    prodCapabilities(rtsTrue);
-}
-
-/* ----------------------------------------------------------------------------
- * prodOneCapability
- *
- * Like prodAllCapabilities, but we only require a single Task to wake
- * up in order to service some global event, such as checking for
- * deadlock after some idle time has passed.
- * ------------------------------------------------------------------------- */
-
-void
-prodOneCapability (void)
-{
-    prodCapabilities(rtsFalse);
+    RELEASE_LOCK(&cap->lock);
  }
  
  /* ----------------------------------------------------------------------------
@@ -761,12 +731,13 @@ shutdownCapability (Capability *cap, Task *task, rtsBool safe)
                  if (!osThreadIsAlive(t->id)) {
                      debugTrace(DEBUG_sched, 
                                 "worker thread %p has died unexpectedly", (void *)t->id);
-                        if (!prev) {
-                            cap->spare_workers = t->next;
-                        } else {
-                            prev->next = t->next;
-                        }
-                        prev = t;
+                    cap->n_spare_workers--;
+                    if (!prev) {
+                        cap->spare_workers = t->next;
+                    } else {
+                        prev->next = t->next;
+                    }
+                    prev = t;
                  }
              }
          }
@@ -786,16 +757,24 @@ shutdownCapability (Capability *cap, Task *task, rtsBool safe)
          // that will try to return to code that has been unloaded.
          // We can be a bit more relaxed when this is a standalone
          // program that is about to terminate, and let safe=false.
-        if (cap->suspended_ccalling_tasks && safe) {
+        if (cap->suspended_ccalls && safe) {
             debugTrace(DEBUG_sched, 
                        "thread(s) are involved in foreign calls, yielding");
              cap->running_task = NULL;
             RELEASE_LOCK(&cap->lock);
+            // The IO manager thread might have been slow to start up,
+            // so the first attempt to kill it might not have
+            // succeeded.  Just in case, try again - the kill message
+            // will only be sent once.
+            //
+            // To reproduce this deadlock: run ffi002(threaded1)
+            // repeatedly on a loaded machine.
+            ioManagerDie();
              yieldThread();
              continue;
          }
-            
-       debugTrace(DEBUG_sched, "capability %d is stopped.", cap->no);
+
+        traceEventShutdown(cap);
         RELEASE_LOCK(&cap->lock);
         break;
      }
@@ -837,7 +816,8 @@ static void
  freeCapability (Capability *cap)
  {
      stgFree(cap->mut_lists);
-#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL)
+    stgFree(cap->saved_mut_lists);
+#if defined(THREADED_RTS)
      freeSparkPool(cap->sparks);
  #endif
  }
@@ -862,51 +842,41 @@ freeCapabilities (void)
     ------------------------------------------------------------------------ */
  
  void
-markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta, 
-                      rtsBool prune_sparks USED_IF_THREADS)
+markCapability (evac_fn evac, void *user, Capability *cap,
+                rtsBool no_mark_sparks USED_IF_THREADS)
  {
-    nat i;
-    Capability *cap;
-    Task *task;
+    InCall *incall;
  
      // Each GC thread is responsible for following roots from the
      // Capability of the same number.  There will usually be the same
      // or fewer Capabilities as GC threads, but just in case there
      // are more, we mark every Capability whose number is the GC
      // thread's index plus a multiple of the number of GC threads.
-    for (i = i0; i < n_capabilities; i += delta) {
-       cap = &capabilities[i];
-       evac(user, (StgClosure **)(void *)&cap->run_queue_hd);
-       evac(user, (StgClosure **)(void *)&cap->run_queue_tl);
+    evac(user, (StgClosure **)(void *)&cap->run_queue_hd);
+    evac(user, (StgClosure **)(void *)&cap->run_queue_tl);
  #if defined(THREADED_RTS)
-       evac(user, (StgClosure **)(void *)&cap->wakeup_queue_hd);
-       evac(user, (StgClosure **)(void *)&cap->wakeup_queue_tl);
+    evac(user, (StgClosure **)(void *)&cap->inbox);
  #endif
-       for (task = cap->suspended_ccalling_tasks; task != NULL; 
-            task=task->next) {
-           debugTrace(DEBUG_sched,
-                      "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
-           evac(user, (StgClosure **)(void *)&task->suspended_tso);
-       }
+    for (incall = cap->suspended_ccalls; incall != NULL;
+         incall=incall->next) {
+        evac(user, (StgClosure **)(void *)&incall->suspended_tso);
+    }
  
  #if defined(THREADED_RTS)
-        if (prune_sparks) {
-            pruneSparkQueue (evac, user, cap);
-        } else {
-            traverseSparkQueue (evac, user, cap);
-        }
-#endif
+    if (!no_mark_sparks) {
+        traverseSparkQueue (evac, user, cap);
      }
+#endif
  
-#if !defined(THREADED_RTS)
-    evac(user, (StgClosure **)(void *)&blocked_queue_hd);
-    evac(user, (StgClosure **)(void *)&blocked_queue_tl);
-    evac(user, (StgClosure **)(void *)&sleeping_queue);
-#endif 
+    // Free STM structures for this Capability
+    stmPreGCHook(cap);
  }
  
  void
  markCapabilities (evac_fn evac, void *user)
  {
-    markSomeCapabilities(evac, user, 0, 1, rtsFalse);
+    nat n;
+    for (n = 0; n < n_capabilities; n++) {
+        markCapability(evac, user, &capabilities[n], rtsFalse);
+    }
  }