Capability stopping when waiting for GC
[ghc-hetmet.git] / rts / Schedule.c
index a7a0f33..1b5afef 100644 (file)
@@ -7,6 +7,7 @@
  * --------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
  * --------------------------------------------------------------------------*/
 
 #include "PosixSource.h"
+#define KEEP_LOCKCLOSURE
 #include "Rts.h"
 #include "SchedAPI.h"
 #include "RtsUtils.h"
 #include "Rts.h"
 #include "SchedAPI.h"
 #include "RtsUtils.h"
 #include "ThreadLabels.h"
 #include "LdvProfile.h"
 #include "Updates.h"
 #include "ThreadLabels.h"
 #include "LdvProfile.h"
 #include "Updates.h"
-#ifdef PROFILING
 #include "Proftimer.h"
 #include "ProfHeap.h"
 #include "Proftimer.h"
 #include "ProfHeap.h"
-#endif
-#if defined(GRAN) || defined(PARALLEL_HASKELL)
-# include "GranSimRts.h"
-# include "GranSim.h"
-# include "ParallelRts.h"
-# include "Parallel.h"
-# include "ParallelDebug.h"
-# include "FetchMe.h"
-# include "HLC.h"
-#endif
+
+/* PARALLEL_HASKELL includes go here */
+
 #include "Sparks.h"
 #include "Capability.h"
 #include "Task.h"
 #include "Sparks.h"
 #include "Capability.h"
 #include "Task.h"
  * Global variables
  * -------------------------------------------------------------------------- */
 
  * Global variables
  * -------------------------------------------------------------------------- */
 
-#if defined(GRAN)
-
-StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
-/* rtsTime TimeOfNextEvent, EndOfTimeSlice;            now in GranSim.c */
-
-/* 
-   In GranSim we have a runnable and a blocked queue for each processor.
-   In order to minimise code changes new arrays run_queue_hds/tls
-   are created. run_queue_hd is then a short cut (macro) for
-   run_queue_hds[CurrentProc] (see GranSim.h).
-   -- HWL
-*/
-StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
-StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
-StgTSO *ccalling_threadss[MAX_PROC];
-/* We use the same global list of threads (all_threads) in GranSim as in
-   the std RTS (i.e. we are cheating). However, we don't use this list in
-   the GranSim specific code at the moment (so we are only potentially
-   cheating).  */
-
-#else /* !GRAN */
-
 #if !defined(THREADED_RTS)
 // Blocked/sleeping thrads
 StgTSO *blocked_queue_hd = NULL;
 #if !defined(THREADED_RTS)
 // Blocked/sleeping thrads
 StgTSO *blocked_queue_hd = NULL;
@@ -111,7 +82,6 @@ StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
  * LOCK: sched_mutex+capability, or all capabilities
  */
 StgTSO *blackhole_queue = NULL;
  * LOCK: sched_mutex+capability, or all capabilities
  */
 StgTSO *blackhole_queue = NULL;
-#endif
 
 /* The blackhole_queue should be checked for threads to wake up.  See
  * Schedule.h for more thorough comment.
 
 /* The blackhole_queue should be checked for threads to wake up.  See
  * Schedule.h for more thorough comment.
@@ -119,12 +89,6 @@ StgTSO *blackhole_queue = NULL;
  */
 rtsBool blackholes_need_checking = rtsFalse;
 
  */
 rtsBool blackholes_need_checking = rtsFalse;
 
-/* Linked list of all threads.
- * Used for detecting garbage collected threads.
- * LOCK: sched_mutex+capability, or all capabilities
- */
-StgTSO *all_threads = NULL;
-
 /* flag set by signal handler to precipitate a context switch
  * LOCK: none (just an advisory flag)
  */
 /* flag set by signal handler to precipitate a context switch
  * LOCK: none (just an advisory flag)
  */
@@ -141,10 +105,6 @@ nat recent_activity = ACTIVITY_YES;
  */
 rtsBool sched_state = SCHED_RUNNING;
 
  */
 rtsBool sched_state = SCHED_RUNNING;
 
-#if defined(GRAN)
-StgTSO *CurrentTSO;
-#endif
-
 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually 
  *  exists - earlier gccs apparently didn't.
  *  -= chak
 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually 
  *  exists - earlier gccs apparently didn't.
  *  -= chak
@@ -166,12 +126,6 @@ rtsBool shutting_down_scheduler = rtsFalse;
 Mutex sched_mutex;
 #endif
 
 Mutex sched_mutex;
 #endif
 
-#if defined(PARALLEL_HASKELL)
-StgTSO *LastTSO;
-rtsTime TimeOfLastYield;
-rtsBool emitSchedule = rtsTrue;
-#endif
-
 #if !defined(mingw32_HOST_OS)
 #define FORKPROCESS_PRIMOP_SUPPORTED
 #endif
 #if !defined(mingw32_HOST_OS)
 #define FORKPROCESS_PRIMOP_SUPPORTED
 #endif
@@ -196,18 +150,12 @@ static void scheduleCheckBlockedThreads (Capability *cap);
 static void scheduleCheckWakeupThreads(Capability *cap USED_IF_NOT_THREADS);
 static void scheduleCheckBlackHoles (Capability *cap);
 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 static void scheduleCheckWakeupThreads(Capability *cap USED_IF_NOT_THREADS);
 static void scheduleCheckBlackHoles (Capability *cap);
 static void scheduleDetectDeadlock (Capability *cap, Task *task);
-#if defined(GRAN)
-static StgTSO *scheduleProcessEvent(rtsEvent *event);
-#endif
 #if defined(PARALLEL_HASKELL)
 #if defined(PARALLEL_HASKELL)
-static StgTSO *scheduleSendPendingMessages(void);
-static void scheduleActivateSpark(void);
-static rtsBool scheduleGetRemoteWork(rtsBool *receivedFinish);
+static rtsBool scheduleGetRemoteWork(Capability *cap);
+static void scheduleSendPendingMessages(void);
+static void scheduleActivateSpark(Capability *cap);
 #endif
 #endif
-#if defined(PAR) || defined(GRAN)
-static void scheduleGranParReport(void);
-#endif
-static void schedulePostRunThread(void);
+static void schedulePostRunThread(StgTSO *t);
 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 static void scheduleHandleStackOverflow( Capability *cap, Task *task, 
                                         StgTSO *t);
 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 static void scheduleHandleStackOverflow( Capability *cap, Task *task, 
                                         StgTSO *t);
@@ -216,13 +164,14 @@ static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 static void scheduleHandleThreadBlocked( StgTSO *t );
 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
                                             StgTSO *t );
 static void scheduleHandleThreadBlocked( StgTSO *t );
 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
                                             StgTSO *t );
-static rtsBool scheduleDoHeapProfile(rtsBool ready_to_gc);
+static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
 static Capability *scheduleDoGC(Capability *cap, Task *task,
                                rtsBool force_major);
 
 static rtsBool checkBlackHoles(Capability *cap);
 
 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
 static Capability *scheduleDoGC(Capability *cap, Task *task,
                                rtsBool force_major);
 
 static rtsBool checkBlackHoles(Capability *cap);
 
 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
+static StgTSO *threadStackUnderflow(Task *task, StgTSO *tso);
 
 static void deleteThread (Capability *cap, StgTSO *tso);
 static void deleteAllThreads (Capability *cap);
 
 static void deleteThread (Capability *cap, StgTSO *tso);
 static void deleteAllThreads (Capability *cap);
@@ -231,11 +180,6 @@ static void deleteAllThreads (Capability *cap);
 static void deleteThread_(Capability *cap, StgTSO *tso);
 #endif
 
 static void deleteThread_(Capability *cap, StgTSO *tso);
 #endif
 
-#if defined(PARALLEL_HASKELL)
-StgTSO * createSparkThread(rtsSpark spark);
-StgTSO * activateSpark (rtsSpark spark);  
-#endif
-
 #ifdef DEBUG
 static char *whatNext_strs[] = {
   "(unknown)",
 #ifdef DEBUG
 static char *whatNext_strs[] = {
   "(unknown)",
@@ -285,6 +229,7 @@ addToRunQueue( Capability *cap, StgTSO *t )
      This revolves around the global event queue, which determines what 
      to do next. Therefore, it's more complicated than either the 
      concurrent or the parallel (GUM) setup.
      This revolves around the global event queue, which determines what 
      to do next. Therefore, it's more complicated than either the 
      concurrent or the parallel (GUM) setup.
+  This version has been entirely removed (JB 2008/08).
 
    GUM version:
      GUM iterates over incoming messages.
 
    GUM version:
      GUM iterates over incoming messages.
@@ -295,6 +240,12 @@ addToRunQueue( Capability *cap, StgTSO *t )
      (see PendingFetches).
      This is not the ugliest code you could imagine, but it's bloody close.
 
      (see PendingFetches).
      This is not the ugliest code you could imagine, but it's bloody close.
 
+  (JB 2008/08) This version was formerly indicated by a PP-Flag PAR,
+  now by PP-flag PARALLEL_HASKELL. The Eden RTS (in GHC-6.x) uses it,
+  as well as future GUM versions. This file has been refurbished to
+  only contain valid code, which is however incomplete, refers to
+  invalid includes etc.
+
    ------------------------------------------------------------------------ */
 
 static Capability *
    ------------------------------------------------------------------------ */
 
 static Capability *
@@ -303,15 +254,8 @@ schedule (Capability *initialCapability, Task *task)
   StgTSO *t;
   Capability *cap;
   StgThreadReturnCode ret;
   StgTSO *t;
   Capability *cap;
   StgThreadReturnCode ret;
-#if defined(GRAN)
-  rtsEvent *event;
-#elif defined(PARALLEL_HASKELL)
-  StgTSO *tso;
-  GlobalTaskId pe;
+#if defined(PARALLEL_HASKELL)
   rtsBool receivedFinish = rtsFalse;
   rtsBool receivedFinish = rtsFalse;
-# if defined(DEBUG)
-  nat tp_size, sp_size; // stats only
-# endif
 #endif
   nat prev_what_next;
   rtsBool ready_to_gc;
 #endif
   nat prev_what_next;
   rtsBool ready_to_gc;
@@ -336,20 +280,12 @@ schedule (Capability *initialCapability, Task *task)
 
 #if defined(PARALLEL_HASKELL)
 #define TERMINATION_CONDITION        (!receivedFinish)
 
 #if defined(PARALLEL_HASKELL)
 #define TERMINATION_CONDITION        (!receivedFinish)
-#elif defined(GRAN)
-#define TERMINATION_CONDITION        ((event = get_next_event()) != (rtsEvent*)NULL) 
 #else
 #define TERMINATION_CONDITION        rtsTrue
 #endif
 
   while (TERMINATION_CONDITION) {
 
 #else
 #define TERMINATION_CONDITION        rtsTrue
 #endif
 
   while (TERMINATION_CONDITION) {
 
-#if defined(GRAN)
-      /* Choose the processor with the next event */
-      CurrentProc = event->proc;
-      CurrentTSO = event->tso;
-#endif
-
 #if defined(THREADED_RTS)
       if (first) {
          // don't yield the first time, we want a chance to run this
 #if defined(THREADED_RTS)
       if (first) {
          // don't yield the first time, we want a chance to run this
@@ -462,6 +398,45 @@ schedule (Capability *initialCapability, Task *task)
 
     scheduleCheckBlockedThreads(cap);
 
 
     scheduleCheckBlockedThreads(cap);
 
+#if defined(PARALLEL_HASKELL)
+    /* message processing and work distribution goes here */ 
+
+    /* if messages have been buffered... a NOOP in THREADED_RTS */
+    scheduleSendPendingMessages();
+
+    /* If the run queue is empty,...*/
+    if (emptyRunQueue(cap)) {
+      /* ...take one of our own sparks and turn it into a thread */
+      scheduleActivateSpark(cap);
+
+       /* if this did not work, try to steal a spark from someone else */
+      if (emptyRunQueue(cap)) {
+       receivedFinish = scheduleGetRemoteWork(cap);
+       continue; //  a new round, (hopefully) with new work
+       /* 
+          in GUM, this a) sends out a FISH and returns IF no fish is
+                          out already
+                       b) (blocking) awaits and receives messages
+          
+          in Eden, this is only the blocking receive, as b) in GUM.
+       */
+      }
+    } 
+
+    /* since we perform a blocking receive and continue otherwise,
+       either we never reach here or we definitely have work! */
+    // from here: non-empty run queue
+    ASSERT(!emptyRunQueue(cap));
+
+    if (PacketsWaiting()) {  /* now process incoming messages, if any
+                               pending...  
+
+                               CAUTION: scheduleGetRemoteWork called
+                               above, waits for messages as well! */
+      processMessages(cap, &receivedFinish);
+    }
+#endif // PARALLEL_HASKELL
+
     scheduleDetectDeadlock(cap,task);
 #if defined(THREADED_RTS)
     cap = task->cap;    // reload cap, it might have changed
     scheduleDetectDeadlock(cap,task);
 #if defined(THREADED_RTS)
     cap = task->cap;    // reload cap, it might have changed
@@ -482,47 +457,14 @@ schedule (Capability *initialCapability, Task *task)
        continue; // nothing to do
     }
 
        continue; // nothing to do
     }
 
-#if defined(PARALLEL_HASKELL)
-    scheduleSendPendingMessages();
-    if (emptyRunQueue(cap) && scheduleActivateSpark()) 
-       continue;
-
-#if defined(SPARKS)
-    ASSERT(next_fish_to_send_at==0);  // i.e. no delayed fishes left!
-#endif
-
-    /* If we still have no work we need to send a FISH to get a spark
-       from another PE */
-    if (emptyRunQueue(cap)) {
-       if (!scheduleGetRemoteWork(&receivedFinish)) continue;
-       ASSERT(rtsFalse); // should not happen at the moment
-    }
-    // from here: non-empty run queue.
-    //  TODO: merge above case with this, only one call processMessages() !
-    if (PacketsWaiting()) {  /* process incoming messages, if
-                               any pending...  only in else
-                               because getRemoteWork waits for
-                               messages as well */
-       receivedFinish = processMessages();
-    }
-#endif
-
-#if defined(GRAN)
-    scheduleProcessEvent(event);
-#endif
-
     // 
     // Get a thread to run
     //
     t = popRunQueue(cap);
 
     // 
     // Get a thread to run
     //
     t = popRunQueue(cap);
 
-#if defined(GRAN) || defined(PAR)
-    scheduleGranParReport(); // some kind of debuging output
-#else
     // Sanity check the thread we're about to run.  This can be
     // expensive if there is lots of thread switching going on...
     IF_DEBUG(sanity,checkTSO(t));
     // Sanity check the thread we're about to run.  This can be
     // expensive if there is lots of thread switching going on...
     IF_DEBUG(sanity,checkTSO(t));
-#endif
 
 #if defined(THREADED_RTS)
     // Check whether we can run this thread in the current task.
 
 #if defined(THREADED_RTS)
     // Check whether we can run this thread in the current task.
@@ -556,8 +498,6 @@ schedule (Capability *initialCapability, Task *task)
     }
 #endif
 
     }
 #endif
 
-    cap->r.rCurrentTSO = t;
-    
     /* context switches are initiated by the timer signal, unless
      * the user specified "context switch as often as possible", with
      * +RTS -C0
     /* context switches are initiated by the timer signal, unless
      * the user specified "context switch as often as possible", with
      * +RTS -C0
@@ -569,12 +509,15 @@ schedule (Capability *initialCapability, Task *task)
         
 run_thread:
 
         
 run_thread:
 
+    // CurrentTSO is the thread to run.  t might be different if we
+    // loop back to run_thread, so make sure to set CurrentTSO after
+    // that.
+    cap->r.rCurrentTSO = t;
+
     debugTrace(DEBUG_sched, "-->> running thread %ld %s ...", 
                              (long)t->id, whatNext_strs[t->what_next]);
 
     debugTrace(DEBUG_sched, "-->> running thread %ld %s ...", 
                              (long)t->id, whatNext_strs[t->what_next]);
 
-#if defined(PROFILING)
     startHeapProfTimer();
     startHeapProfTimer();
-#endif
 
     // Check for exceptions blocked on this thread
     maybePerformBlockedException (cap, t);
 
     // Check for exceptions blocked on this thread
     maybePerformBlockedException (cap, t);
@@ -594,9 +537,21 @@ run_thread:
 
     cap->in_haskell = rtsTrue;
 
 
     cap->in_haskell = rtsTrue;
 
-    dirtyTSO(t);
+    dirty_TSO(cap,t);
 
 
-    recent_activity = ACTIVITY_YES;
+#if defined(THREADED_RTS)
+    if (recent_activity == ACTIVITY_DONE_GC) {
+        // ACTIVITY_DONE_GC means we turned off the timer signal to
+        // conserve power (see #1623).  Re-enable it here.
+        nat prev;
+        prev = xchg((P_)&recent_activity, ACTIVITY_YES);
+        if (prev == ACTIVITY_DONE_GC) {
+            startTimer();
+        }
+    } else {
+        recent_activity = ACTIVITY_YES;
+    }
+#endif
 
     switch (prev_what_next) {
        
 
     switch (prev_what_next) {
        
@@ -643,7 +598,7 @@ run_thread:
     t->saved_errno = errno;
 #if mingw32_HOST_OS
     // Similarly for Windows error code
     t->saved_errno = errno;
 #if mingw32_HOST_OS
     // Similarly for Windows error code
-    SetLastError(t->saved_winerror);
+    t->saved_winerror = GetLastError();
 #endif
 
 #if defined(THREADED_RTS)
 #endif
 
 #if defined(THREADED_RTS)
@@ -667,12 +622,14 @@ run_thread:
     // ----------------------------------------------------------------------
     
     // Costs for the scheduler are assigned to CCS_SYSTEM
     // ----------------------------------------------------------------------
     
     // Costs for the scheduler are assigned to CCS_SYSTEM
-#if defined(PROFILING)
     stopHeapProfTimer();
     stopHeapProfTimer();
+#if defined(PROFILING)
     CCCS = CCS_SYSTEM;
 #endif
     
     CCCS = CCS_SYSTEM;
 #endif
     
-    schedulePostRunThread();
+    schedulePostRunThread(t);
+
+    t = threadStackUnderflow(task,t);
 
     ready_to_gc = rtsFalse;
 
 
     ready_to_gc = rtsFalse;
 
@@ -705,14 +662,10 @@ run_thread:
       barf("schedule: invalid thread return code %d", (int)ret);
     }
 
       barf("schedule: invalid thread return code %d", (int)ret);
     }
 
-    if (scheduleDoHeapProfile(ready_to_gc)) { ready_to_gc = rtsFalse; }
-    if (ready_to_gc) {
+    if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
       cap = scheduleDoGC(cap,task,rtsFalse);
     }
   } /* end of while() */
       cap = scheduleDoGC(cap,task,rtsFalse);
     }
   } /* end of while() */
-
-  debugTrace(PAR_DEBUG_verbose,
-            "== Leaving schedule() after having received Finish");
 }
 
 /* ----------------------------------------------------------------------------
 }
 
 /* ----------------------------------------------------------------------------
@@ -722,23 +675,7 @@ run_thread:
 static void
 schedulePreLoop(void)
 {
 static void
 schedulePreLoop(void)
 {
-#if defined(GRAN) 
-    /* set up first event to get things going */
-    /* ToDo: assign costs for system setup and init MainTSO ! */
-    new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
-             ContinueThread, 
-             CurrentTSO, (StgClosure*)NULL, (rtsSpark*)NULL);
-    
-    debugTrace (DEBUG_gran,
-               "GRAN: Init CurrentTSO (in schedule) = %p", 
-               CurrentTSO);
-    IF_DEBUG(gran, G_TSO(CurrentTSO, 5));
-    
-    if (RtsFlags.GranFlags.Light) {
-       /* Save current time; GranSim Light only */
-       CurrentTSO->gran.clock = CurrentTime[CurrentProc];
-    }      
-#endif
+  // initialisation for scheduler - what cannot go into initScheduler()  
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
@@ -760,7 +697,7 @@ schedulePushWork(Capability *cap USED_IF_THREADS,
 
     // Check whether we have more threads on our run queue, or sparks
     // in our pool, that we could hand to another Capability.
 
     // Check whether we have more threads on our run queue, or sparks
     // in our pool, that we could hand to another Capability.
-    if ((emptyRunQueue(cap) || cap->run_queue_hd->link == END_TSO_QUEUE)
+    if ((emptyRunQueue(cap) || cap->run_queue_hd->_link == END_TSO_QUEUE)
        && sparkPoolSizeCap(cap) < 2) {
        return;
     }
        && sparkPoolSizeCap(cap) < 2) {
        return;
     }
@@ -801,21 +738,21 @@ schedulePushWork(Capability *cap USED_IF_THREADS,
 
        if (cap->run_queue_hd != END_TSO_QUEUE) {
            prev = cap->run_queue_hd;
 
        if (cap->run_queue_hd != END_TSO_QUEUE) {
            prev = cap->run_queue_hd;
-           t = prev->link;
-           prev->link = END_TSO_QUEUE;
+           t = prev->_link;
+           prev->_link = END_TSO_QUEUE;
            for (; t != END_TSO_QUEUE; t = next) {
            for (; t != END_TSO_QUEUE; t = next) {
-               next = t->link;
-               t->link = END_TSO_QUEUE;
+               next = t->_link;
+               t->_link = END_TSO_QUEUE;
                if (t->what_next == ThreadRelocated
                    || t->bound == task // don't move my bound thread
                    || tsoLocked(t)) {  // don't move a locked thread
                if (t->what_next == ThreadRelocated
                    || t->bound == task // don't move my bound thread
                    || tsoLocked(t)) {  // don't move a locked thread
-                   prev->link = t;
+                   setTSOLink(cap, prev, t);
                    prev = t;
                } else if (i == n_free_caps) {
                    pushed_to_all = rtsTrue;
                    i = 0;
                    // keep one for us
                    prev = t;
                } else if (i == n_free_caps) {
                    pushed_to_all = rtsTrue;
                    i = 0;
                    // keep one for us
-                   prev->link = t;
+                   setTSOLink(cap, prev, t);
                    prev = t;
                } else {
                    debugTrace(DEBUG_sched, "pushing thread %lu to capability %d", (unsigned long)t->id, free_caps[i]->no);
                    prev = t;
                } else {
                    debugTrace(DEBUG_sched, "pushing thread %lu to capability %d", (unsigned long)t->id, free_caps[i]->no);
@@ -862,7 +799,8 @@ schedulePushWork(Capability *cap USED_IF_THREADS,
 static void
 scheduleStartSignalHandlers(Capability *cap)
 {
 static void
 scheduleStartSignalHandlers(Capability *cap)
 {
-    if (signals_pending()) { // safe outside the lock
+    if (RtsFlags.MiscFlags.install_signal_handlers && signals_pending()) {
+        // safe outside the lock
        startSignalHandlers(cap);
     }
 }
        startSignalHandlers(cap);
     }
 }
@@ -910,7 +848,7 @@ scheduleCheckWakeupThreads(Capability *cap USED_IF_THREADS)
            cap->run_queue_hd = cap->wakeup_queue_hd;
            cap->run_queue_tl = cap->wakeup_queue_tl;
        } else {
            cap->run_queue_hd = cap->wakeup_queue_hd;
            cap->run_queue_tl = cap->wakeup_queue_tl;
        } else {
-           cap->run_queue_tl->link = cap->wakeup_queue_hd;
+           setTSOLink(cap, cap->run_queue_tl, cap->wakeup_queue_hd);
            cap->run_queue_tl = cap->wakeup_queue_tl;
        }
        cap->wakeup_queue_hd = cap->wakeup_queue_tl = END_TSO_QUEUE;
            cap->run_queue_tl = cap->wakeup_queue_tl;
        }
        cap->wakeup_queue_hd = cap->wakeup_queue_tl = END_TSO_QUEUE;
@@ -977,6 +915,8 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
        cap = scheduleDoGC (cap, task, rtsTrue/*force  major GC*/);
 
        recent_activity = ACTIVITY_DONE_GC;
        cap = scheduleDoGC (cap, task, rtsTrue/*force  major GC*/);
 
        recent_activity = ACTIVITY_DONE_GC;
+        // disable timer signals (see #1623)
+        stopTimer();
        
        if ( !emptyRunQueue(cap) ) return;
 
        
        if ( !emptyRunQueue(cap) ) return;
 
@@ -985,7 +925,7 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
         * for signals to arrive rather then bombing out with a
         * deadlock.
         */
         * for signals to arrive rather then bombing out with a
         * deadlock.
         */
-       if ( anyUserHandlers() ) {
+       if ( RtsFlags.MiscFlags.install_signal_handlers && anyUserHandlers() ) {
            debugTrace(DEBUG_sched,
                       "still deadlocked, waiting for signals...");
 
            debugTrace(DEBUG_sched,
                       "still deadlocked, waiting for signals...");
 
@@ -997,6 +937,8 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
 
            // either we have threads to run, or we were interrupted:
            ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 
            // either we have threads to run, or we were interrupted:
            ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
+
+            return;
        }
 #endif
 
        }
 #endif
 
@@ -1011,7 +953,7 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
            case BlockedOnException:
            case BlockedOnMVar:
                throwToSingleThreaded(cap, task->tso, 
            case BlockedOnException:
            case BlockedOnMVar:
                throwToSingleThreaded(cap, task->tso, 
-                                     (StgClosure *)NonTermination_closure);
+                                     (StgClosure *)nonTermination_closure);
                return;
            default:
                barf("deadlock: main thread blocked in a strange way");
                return;
            default:
                barf("deadlock: main thread blocked in a strange way");
@@ -1022,164 +964,15 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
     }
 }
 
     }
 }
 
-/* ----------------------------------------------------------------------------
- * Process an event (GRAN only)
- * ------------------------------------------------------------------------- */
-
-#if defined(GRAN)
-static StgTSO *
-scheduleProcessEvent(rtsEvent *event)
-{
-    StgTSO *t;
-
-    if (RtsFlags.GranFlags.Light)
-      GranSimLight_enter_system(event, &ActiveTSO); // adjust ActiveTSO etc
-
-    /* adjust time based on time-stamp */
-    if (event->time > CurrentTime[CurrentProc] &&
-        event->evttype != ContinueThread)
-      CurrentTime[CurrentProc] = event->time;
-    
-    /* Deal with the idle PEs (may issue FindWork or MoveSpark events) */
-    if (!RtsFlags.GranFlags.Light)
-      handleIdlePEs();
-
-    IF_DEBUG(gran, debugBelch("GRAN: switch by event-type\n"));
-
-    /* main event dispatcher in GranSim */
-    switch (event->evttype) {
-      /* Should just be continuing execution */
-    case ContinueThread:
-      IF_DEBUG(gran, debugBelch("GRAN: doing ContinueThread\n"));
-      /* ToDo: check assertion
-      ASSERT(run_queue_hd != (StgTSO*)NULL &&
-            run_queue_hd != END_TSO_QUEUE);
-      */
-      /* Ignore ContinueThreads for fetching threads (if synchr comm) */
-      if (!RtsFlags.GranFlags.DoAsyncFetch &&
-         procStatus[CurrentProc]==Fetching) {
-       debugBelch("ghuH: Spurious ContinueThread while Fetching ignored; TSO %d (%p) [PE %d]\n",
-             CurrentTSO->id, CurrentTSO, CurrentProc);
-       goto next_thread;
-      }        
-      /* Ignore ContinueThreads for completed threads */
-      if (CurrentTSO->what_next == ThreadComplete) {
-       debugBelch("ghuH: found a ContinueThread event for completed thread %d (%p) [PE %d] (ignoring ContinueThread)\n", 
-             CurrentTSO->id, CurrentTSO, CurrentProc);
-       goto next_thread;
-      }        
-      /* Ignore ContinueThreads for threads that are being migrated */
-      if (PROCS(CurrentTSO)==Nowhere) { 
-       debugBelch("ghuH: trying to run the migrating TSO %d (%p) [PE %d] (ignoring ContinueThread)\n",
-             CurrentTSO->id, CurrentTSO, CurrentProc);
-       goto next_thread;
-      }
-      /* The thread should be at the beginning of the run queue */
-      if (CurrentTSO!=run_queue_hds[CurrentProc]) { 
-       debugBelch("ghuH: TSO %d (%p) [PE %d] is not at the start of the run_queue when doing a ContinueThread\n",
-             CurrentTSO->id, CurrentTSO, CurrentProc);
-       break; // run the thread anyway
-      }
-      /*
-      new_event(proc, proc, CurrentTime[proc],
-               FindWork,
-               (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
-      goto next_thread; 
-      */ /* Catches superfluous CONTINUEs -- should be unnecessary */
-      break; // now actually run the thread; DaH Qu'vam yImuHbej 
-
-    case FetchNode:
-      do_the_fetchnode(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case GlobalBlock:
-      do_the_globalblock(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case FetchReply:
-      do_the_fetchreply(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case UnblockThread:   /* Move from the blocked queue to the tail of */
-      do_the_unblock(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case ResumeThread:  /* Move from the blocked queue to the tail of */
-      /* the runnable queue ( i.e. Qu' SImqa'lu') */ 
-      event->tso->gran.blocktime += 
-       CurrentTime[CurrentProc] - event->tso->gran.blockedat;
-      do_the_startthread(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case StartThread:
-      do_the_startthread(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case MoveThread:
-      do_the_movethread(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case MoveSpark:
-      do_the_movespark(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    case FindWork:
-      do_the_findwork(event);
-      goto next_thread;             /* handle next event in event queue  */
-      
-    default:
-      barf("Illegal event type %u\n", event->evttype);
-    }  /* switch */
-    
-    /* This point was scheduler_loop in the old RTS */
-
-    IF_DEBUG(gran, debugBelch("GRAN: after main switch\n"));
-
-    TimeOfLastEvent = CurrentTime[CurrentProc];
-    TimeOfNextEvent = get_time_of_next_event();
-    IgnoreEvents=(TimeOfNextEvent==0); // HWL HACK
-    // CurrentTSO = ThreadQueueHd;
-
-    IF_DEBUG(gran, debugBelch("GRAN: time of next event is: %ld\n", 
-                        TimeOfNextEvent));
-
-    if (RtsFlags.GranFlags.Light) 
-      GranSimLight_leave_system(event, &ActiveTSO); 
-
-    EndOfTimeSlice = CurrentTime[CurrentProc]+RtsFlags.GranFlags.time_slice;
-
-    IF_DEBUG(gran, 
-            debugBelch("GRAN: end of time-slice is %#lx\n", EndOfTimeSlice));
-
-    /* in a GranSim setup the TSO stays on the run queue */
-    t = CurrentTSO;
-    /* Take a thread from the run queue. */
-    POP_RUN_QUEUE(t); // take_off_run_queue(t);
-
-    IF_DEBUG(gran, 
-            debugBelch("GRAN: About to run current thread, which is\n");
-            G_TSO(t,5));
-
-    context_switch = 0; // turned on via GranYield, checking events and time slice
-
-    IF_DEBUG(gran, 
-            DumpGranEvent(GR_SCHEDULE, t));
-
-    procStatus[CurrentProc] = Busy;
-}
-#endif // GRAN
 
 /* ----------------------------------------------------------------------------
  * Send pending messages (PARALLEL_HASKELL only)
  * ------------------------------------------------------------------------- */
 
 
 /* ----------------------------------------------------------------------------
  * Send pending messages (PARALLEL_HASKELL only)
  * ------------------------------------------------------------------------- */
 
-#if defined(PARALLEL_HASKELL)
 static StgTSO *
 scheduleSendPendingMessages(void)
 {
 static StgTSO *
 scheduleSendPendingMessages(void)
 {
-    StgSparkPool *pool;
-    rtsSpark spark;
-    StgTSO *t;
+#if defined(PARALLEL_HASKELL)
 
 # if defined(PAR) // global Mem.Mgmt., omit for now
     if (PendingFetches != END_BF_QUEUE) {
 
 # if defined(PAR) // global Mem.Mgmt., omit for now
     if (PendingFetches != END_BF_QUEUE) {
@@ -1192,8 +985,8 @@ scheduleSendPendingMessages(void)
        // packets which have become too old...
        sendOldBuffers(); 
     }
        // packets which have become too old...
        sendOldBuffers(); 
     }
-}
 #endif
 #endif
+}
 
 /* ----------------------------------------------------------------------------
  * Activate spark threads (PARALLEL_HASKELL only)
 
 /* ----------------------------------------------------------------------------
  * Activate spark threads (PARALLEL_HASKELL only)
@@ -1201,57 +994,28 @@ scheduleSendPendingMessages(void)
 
 #if defined(PARALLEL_HASKELL)
 static void
 
 #if defined(PARALLEL_HASKELL)
 static void
-scheduleActivateSpark(void)
+scheduleActivateSpark(Capability *cap)
 {
 {
-#if defined(SPARKS)
-  ASSERT(emptyRunQueue());
-/* We get here if the run queue is empty and want some work.
-   We try to turn a spark into a thread, and add it to the run queue,
-   from where it will be picked up in the next iteration of the scheduler
-   loop.
-*/
-
-      /* :-[  no local threads => look out for local sparks */
-      /* the spark pool for the current PE */
-      pool = &(cap.r.rSparks); // JB: cap = (old) MainCap
-      if (advisory_thread_count < RtsFlags.ParFlags.maxThreads &&
-         pool->hd < pool->tl) {
-       /* 
-        * ToDo: add GC code check that we really have enough heap afterwards!!
-        * Old comment:
-        * If we're here (no runnable threads) and we have pending
-        * sparks, we must have a space problem.  Get enough space
-        * to turn one of those pending sparks into a
-        * thread... 
-        */
+    StgClosure *spark;
 
 
-       spark = findSpark(rtsFalse);            /* get a spark */
-       if (spark != (rtsSpark) NULL) {
-         tso = createThreadFromSpark(spark);       /* turn the spark into a thread */
-         IF_PAR_DEBUG(fish, // schedule,
-                      debugBelch("==== schedule: Created TSO %d (%p); %d threads active\n",
-                            tso->id, tso, advisory_thread_count));
-
-         if (tso==END_TSO_QUEUE) { /* failed to activate spark->back to loop */
-           IF_PAR_DEBUG(fish, // schedule,
-                        debugBelch("==^^ failed to create thread from spark @ %lx\n",
-                            spark));
-           return rtsFalse; /* failed to generate a thread */
-         }                  /* otherwise fall through & pick-up new tso */
-       } else {
-         IF_PAR_DEBUG(fish, // schedule,
-                      debugBelch("==^^ no local sparks (spark pool contains only NFs: %d)\n", 
-                            spark_queue_len(pool)));
-         return rtsFalse;  /* failed to generate a thread */
-       }
-       return rtsTrue;  /* success in generating a thread */
-  } else { /* no more threads permitted or pool empty */
-    return rtsFalse;  /* failed to generateThread */
-  }
-#else
-  tso = NULL; // avoid compiler warning only
-  return rtsFalse;  /* dummy in non-PAR setup */
-#endif // SPARKS
+/* We only want to stay here if the run queue is empty and we want some
+   work. We try to turn a spark into a thread, and add it to the run
+   queue, from where it will be picked up in the next iteration of the
+   scheduler loop.  
+*/
+    if (!emptyRunQueue(cap)) 
+      /* In the threaded RTS, another task might have pushed a thread
+        on our run queue in the meantime ? But would need a lock.. */
+      return;
+
+    spark = findSpark(cap); // defined in Sparks.c
+
+    if (spark != NULL) {
+      debugTrace(DEBUG_sched,
+                "turning spark of closure %p into a thread",
+                (StgClosure *)spark);
+      createSparkThread(cap,spark); // defined in Sparks.c
+    }
 }
 #endif // PARALLEL_HASKELL
 
 }
 #endif // PARALLEL_HASKELL
 
@@ -1261,10 +1025,12 @@ scheduleActivateSpark(void)
     
 #if defined(PARALLEL_HASKELL)
 static rtsBool
     
 #if defined(PARALLEL_HASKELL)
 static rtsBool
-scheduleGetRemoteWork(rtsBool *receivedFinish)
+scheduleGetRemoteWork(Capability *cap)
 {
 {
-  ASSERT(emptyRunQueue());
+#if defined(PARALLEL_HASKELL)
+  rtsBool receivedFinish = rtsFalse;
 
 
+  // idle() , i.e. send all buffers, wait for work
   if (RtsFlags.ParFlags.BufferTime) {
        IF_PAR_DEBUG(verbose, 
                debugBelch("...send all pending data,"));
   if (RtsFlags.ParFlags.BufferTime) {
        IF_PAR_DEBUG(verbose, 
                debugBelch("...send all pending data,"));
@@ -1274,261 +1040,64 @@ scheduleGetRemoteWork(rtsBool *receivedFinish)
            sendImmediately(i); // send all messages away immediately
        }
   }
            sendImmediately(i); // send all messages away immediately
        }
   }
-# ifndef SPARKS
-       //++EDEN++ idle() , i.e. send all buffers, wait for work
-       // suppress fishing in EDEN... just look for incoming messages
-       // (blocking receive)
-  IF_PAR_DEBUG(verbose, 
-              debugBelch("...wait for incoming messages...\n"));
-  *receivedFinish = processMessages(); // blocking receive...
-
-       // and reenter scheduling loop after having received something
-       // (return rtsFalse below)
-
-# else /* activate SPARKS machinery */
-/* We get here, if we have no work, tried to activate a local spark, but still
-   have no work. We try to get a remote spark, by sending a FISH message.
-   Thread migration should be added here, and triggered when a sequence of 
-   fishes returns without work. */
-       delay = (RtsFlags.ParFlags.fishDelay!=0ll ? RtsFlags.ParFlags.fishDelay : 0ll);
-
-      /* =8-[  no local sparks => look for work on other PEs */
-       /*
-        * We really have absolutely no work.  Send out a fish
-        * (there may be some out there already), and wait for
-        * something to arrive.  We clearly can't run any threads
-        * until a SCHEDULE or RESUME arrives, and so that's what
-        * we're hoping to see.  (Of course, we still have to
-        * respond to other types of messages.)
-        */
-       rtsTime now = msTime() /*CURRENT_TIME*/;
-       IF_PAR_DEBUG(verbose, 
-                    debugBelch("--  now=%ld\n", now));
-       IF_PAR_DEBUG(fish, // verbose,
-            if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
-                (last_fish_arrived_at!=0 &&
-                 last_fish_arrived_at+delay > now)) {
-              debugBelch("--$$ <%llu> delaying FISH until %llu (last fish %llu, delay %llu)\n",
-                    now, last_fish_arrived_at+delay, 
-                    last_fish_arrived_at,
-                    delay);
-            });
-  
-       if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
-           advisory_thread_count < RtsFlags.ParFlags.maxThreads) { // send a FISH, but when?
-         if (last_fish_arrived_at==0 ||
-             (last_fish_arrived_at+delay <= now)) {           // send FISH now!
-           /* outstandingFishes is set in sendFish, processFish;
-              avoid flooding system with fishes via delay */
-    next_fish_to_send_at = 0;  
-  } else {
-    /* ToDo: this should be done in the main scheduling loop to avoid the
-             busy wait here; not so bad if fish delay is very small  */
-    int iq = 0; // DEBUGGING -- HWL
-    next_fish_to_send_at = last_fish_arrived_at+delay; // remember when to send  
-    /* send a fish when ready, but process messages that arrive in the meantime */
-    do {
-      if (PacketsWaiting()) {
-        iq++; // DEBUGGING
-        *receivedFinish = processMessages();
-      }
-      now = msTime();
-    } while (!*receivedFinish || now<next_fish_to_send_at);
-    // JB: This means the fish could become obsolete, if we receive
-    // work. Better check for work again? 
-    // last line: while (!receivedFinish || !haveWork || now<...)
-    // next line: if (receivedFinish || haveWork )
-
-    if (*receivedFinish) // no need to send a FISH if we are finishing anyway
-      return rtsFalse;  // NB: this will leave scheduler loop
-                       // immediately after return!
-                         
-    IF_PAR_DEBUG(fish, // verbose,
-              debugBelch("--$$ <%llu> sent delayed fish (%d processMessages); active/total threads=%d/%d\n",now,iq,run_queue_len(),advisory_thread_count));
-
-  }
 
 
-    // JB: IMHO, this should all be hidden inside sendFish(...)
-    /* pe = choosePE(); 
-       sendFish(pe, thisPE, NEW_FISH_AGE, NEW_FISH_HISTORY, 
-                NEW_FISH_HUNGER);
+  /* this would be the place for fishing in GUM... 
 
 
-    // Global statistics: count no. of fishes
-    if (RtsFlags.ParFlags.ParStats.Global &&
-         RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
-          globalParStats.tot_fish_mess++;
-          }
-    */ 
-
-  /* delayed fishes must have been sent by now! */
-  next_fish_to_send_at = 0;  
-  }
-      
-  *receivedFinish = processMessages();
-# endif /* SPARKS */
-
- return rtsFalse;
- /* NB: this function always returns rtsFalse, meaning the scheduler
-    loop continues with the next iteration; 
-    rationale: 
-      return code means success in finding work; we enter this function
-      if there is no local work, thus have to send a fish which takes
-      time until it arrives with work; in the meantime we should process
-      messages in the main loop;
- */
-}
-#endif // PARALLEL_HASKELL
+     if (no-earlier-fish-around) 
+          sendFish(choosePe());
+   */
 
 
-/* ----------------------------------------------------------------------------
- * PAR/GRAN: Report stats & debugging info(?)
- * ------------------------------------------------------------------------- */
+  // Eden:just look for incoming messages (blocking receive)
+  IF_PAR_DEBUG(verbose, 
+              debugBelch("...wait for incoming messages...\n"));
+  processMessages(cap, &receivedFinish); // blocking receive...
 
 
-#if defined(PAR) || defined(GRAN)
-static void
-scheduleGranParReport(void)
-{
-  ASSERT(run_queue_hd != END_TSO_QUEUE);
 
 
-  /* Take a thread from the run queue, if we have work */
-  POP_RUN_QUEUE(t);  // take_off_run_queue(END_TSO_QUEUE);
+  return receivedFinish;
+  // reenter scheduling look after having received something
 
 
-    /* If this TSO has got its outport closed in the meantime, 
-     *   it mustn't be run. Instead, we have to clean it up as if it was finished.
-     * It has to be marked as TH_DEAD for this purpose.
-     * If it is TH_TERM instead, it is supposed to have finished in the normal way.
+#else /* !PARALLEL_HASKELL, i.e. THREADED_RTS */
 
 
-JB: TODO: investigate wether state change field could be nuked
-     entirely and replaced by the normal tso state (whatnext
-     field). All we want to do is to kill tsos from outside.
-     */
+  return rtsFalse; /* return value unused in THREADED_RTS */
 
 
-    /* ToDo: write something to the log-file
-    if (RTSflags.ParFlags.granSimStats && !sameThread)
-        DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
-
-    CurrentTSO = t;
-    */
-    /* the spark pool for the current PE */
-    pool = &(cap.r.rSparks); //  cap = (old) MainCap
-
-    IF_DEBUG(scheduler, 
-            debugBelch("--=^ %d threads, %d sparks on [%#x]\n", 
-                  run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
-
-    IF_PAR_DEBUG(fish,
-            debugBelch("--=^ %d threads, %d sparks on [%#x]\n", 
-                  run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
-
-    if (RtsFlags.ParFlags.ParStats.Full && 
-       (t->par.sparkname != (StgInt)0) && // only log spark generated threads
-       (emitSchedule || // forced emit
-         (t && LastTSO && t->id != LastTSO->id))) {
-      /* 
-        we are running a different TSO, so write a schedule event to log file
-        NB: If we use fair scheduling we also have to write  a deschedule 
-            event for LastTSO; with unfair scheduling we know that the
-            previous tso has blocked whenever we switch to another tso, so
-            we don't need it in GUM for now
-      */
-      IF_PAR_DEBUG(fish, // schedule,
-                  debugBelch("____ scheduling spark generated thread %d (%lx) (%lx) via a forced emit\n",t->id,t,t->par.sparkname));
-
-      DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
-                      GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0);
-      emitSchedule = rtsFalse;
-    }
-}     
-#endif
+#endif /* PARALLEL_HASKELL */
+}
+#endif // PARALLEL_HASKELL
 
 /* ----------------------------------------------------------------------------
  * After running a thread...
  * ------------------------------------------------------------------------- */
 
 static void
 
 /* ----------------------------------------------------------------------------
  * After running a thread...
  * ------------------------------------------------------------------------- */
 
 static void
-schedulePostRunThread(void)
+schedulePostRunThread (StgTSO *t)
 {
 {
-#if defined(PAR)
-    /* HACK 675: if the last thread didn't yield, make sure to print a 
-       SCHEDULE event to the log file when StgRunning the next thread, even
-       if it is the same one as before */
-    LastTSO = t; 
-    TimeOfLastYield = CURRENT_TIME;
-#endif
+    // We have to be able to catch transactions that are in an
+    // infinite loop as a result of seeing an inconsistent view of
+    // memory, e.g. 
+    //
+    //   atomically $ do
+    //       [a,b] <- mapM readTVar [ta,tb]
+    //       when (a == b) loop
+    //
+    // and a is never equal to b given a consistent view of memory.
+    //
+    if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
+        if (!stmValidateNestOfTransactions (t -> trec)) {
+            debugTrace(DEBUG_sched | DEBUG_stm,
+                       "trec %p found wasting its time", t);
+            
+            // strip the stack back to the
+            // ATOMICALLY_FRAME, aborting the (nested)
+            // transaction, and saving the stack of any
+            // partially-evaluated thunks on the heap.
+            throwToSingleThreaded_(&capabilities[0], t, 
+                                   NULL, rtsTrue, NULL);
+            
+            ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
+        }
+    }
 
   /* some statistics gathering in the parallel case */
 
   /* some statistics gathering in the parallel case */
-
-#if defined(GRAN) || defined(PAR) || defined(EDEN)
-  switch (ret) {
-    case HeapOverflow:
-# if defined(GRAN)
-      IF_DEBUG(gran, DumpGranEvent(GR_DESCHEDULE, t));
-      globalGranStats.tot_heapover++;
-# elif defined(PAR)
-      globalParStats.tot_heapover++;
-# endif
-      break;
-
-     case StackOverflow:
-# if defined(GRAN)
-      IF_DEBUG(gran, 
-              DumpGranEvent(GR_DESCHEDULE, t));
-      globalGranStats.tot_stackover++;
-# elif defined(PAR)
-      // IF_DEBUG(par, 
-      // DumpGranEvent(GR_DESCHEDULE, t);
-      globalParStats.tot_stackover++;
-# endif
-      break;
-
-    case ThreadYielding:
-# if defined(GRAN)
-      IF_DEBUG(gran, 
-              DumpGranEvent(GR_DESCHEDULE, t));
-      globalGranStats.tot_yields++;
-# elif defined(PAR)
-      // IF_DEBUG(par, 
-      // DumpGranEvent(GR_DESCHEDULE, t);
-      globalParStats.tot_yields++;
-# endif
-      break; 
-
-    case ThreadBlocked:
-# if defined(GRAN)
-       debugTrace(DEBUG_sched, 
-                  "--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: ", 
-                  t->id, t, whatNext_strs[t->what_next], t->block_info.closure, 
-                  (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
-              if (t->block_info.closure!=(StgClosure*)NULL)
-                print_bq(t->block_info.closure);
-              debugBelch("\n"));
-
-      // ??? needed; should emit block before
-      IF_DEBUG(gran, 
-              DumpGranEvent(GR_DESCHEDULE, t)); 
-      prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
-      /*
-       ngoq Dogh!
-      ASSERT(procStatus[CurrentProc]==Busy || 
-             ((procStatus[CurrentProc]==Fetching) && 
-             (t->block_info.closure!=(StgClosure*)NULL)));
-      if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
-         !(!RtsFlags.GranFlags.DoAsyncFetch &&
-           procStatus[CurrentProc]==Fetching)) 
-       procStatus[CurrentProc] = Idle;
-      */
-# elif defined(PAR)
-//++PAR++  blockThread() writes the event (change?)
-# endif
-    break;
-
-  case ThreadFinished:
-    break;
-
-  default:
-    barf("parGlobalStats: unknown return code");
-    break;
-    }
-#endif
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
@@ -1607,23 +1176,19 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
     }
     
     debugTrace(DEBUG_sched,
     }
     
     debugTrace(DEBUG_sched,
-              "--<< thread %ld (%s) stopped: HeapOverflow\n", 
+              "--<< thread %ld (%s) stopped: HeapOverflow",
               (long)t->id, whatNext_strs[t->what_next]);
 
               (long)t->id, whatNext_strs[t->what_next]);
 
-#if defined(GRAN)
-    ASSERT(!is_on_queue(t,CurrentProc));
-#elif defined(PARALLEL_HASKELL)
-    /* Currently we emit a DESCHEDULE event before GC in GUM.
-       ToDo: either add separate event to distinguish SYSTEM time from rest
-       or just nuke this DESCHEDULE (and the following SCHEDULE) */
-    if (0 && RtsFlags.ParFlags.ParStats.Full) {
-       DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
-                        GR_DESCHEDULE, t, (StgClosure *)NULL, 0, 0);
-       emitSchedule = rtsTrue;
+    if (context_switch) {
+        // Sometimes we miss a context switch, e.g. when calling
+        // primitives in a tight loop, MAYBE_GC() doesn't check the
+        // context switch flag, and we end up waiting for a GC.
+        // See #1984, and concurrent/should_run/1984
+        context_switch = 0;
+        addToRunQueue(cap,t);
+    } else {
+        pushOnRunQueue(cap,t);
     }
     }
-#endif
-      
-    pushOnRunQueue(cap,t);
     return rtsTrue;
     /* actual GC is done at the end of the while loop in schedule() */
 }
     return rtsTrue;
     /* actual GC is done at the end of the while loop in schedule() */
 }
@@ -1691,7 +1256,7 @@ scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
     IF_DEBUG(sanity,
             //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
             checkTSO(t));
     IF_DEBUG(sanity,
             //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
             checkTSO(t));
-    ASSERT(t->link == END_TSO_QUEUE);
+    ASSERT(t->_link == END_TSO_QUEUE);
     
     // Shortcut if we're just switching evaluators: don't bother
     // doing stack squeezing (which can be expensive), just run the
     
     // Shortcut if we're just switching evaluators: don't bother
     // doing stack squeezing (which can be expensive), just run the
@@ -1699,28 +1264,9 @@ scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
     if (t->what_next != prev_what_next) {
        return rtsTrue;
     }
     if (t->what_next != prev_what_next) {
        return rtsTrue;
     }
-    
-#if defined(GRAN)
-    ASSERT(!is_on_queue(t,CurrentProc));
-      
-    IF_DEBUG(sanity,
-            //debugBelch("&& Doing sanity check on all ThreadQueues (and their TSOs).");
-            checkThreadQsSanity(rtsTrue));
-
-#endif
 
     addToRunQueue(cap,t);
 
 
     addToRunQueue(cap,t);
 
-#if defined(GRAN)
-    /* add a ContinueThread event to actually process the thread */
-    new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
-             ContinueThread,
-             t, (StgClosure*)NULL, (rtsSpark*)NULL);
-    IF_GRAN_DEBUG(bq, 
-                 debugBelch("GRAN: eventq and runnableq after adding yielded thread to queue again:\n");
-                 G_EVENTQ(0);
-                 G_CURR_THREADQ(0));
-#endif
     return rtsFalse;
 }
 
     return rtsFalse;
 }
 
@@ -1735,42 +1281,6 @@ scheduleHandleThreadBlocked( StgTSO *t
 #endif
     )
 {
 #endif
     )
 {
-#if defined(GRAN)
-    IF_DEBUG(scheduler,
-            debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: \n", 
-                       t->id, t, whatNext_strs[t->what_next], t->block_info.closure, (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
-            if (t->block_info.closure!=(StgClosure*)NULL) print_bq(t->block_info.closure));
-    
-    // ??? needed; should emit block before
-    IF_DEBUG(gran, 
-            DumpGranEvent(GR_DESCHEDULE, t)); 
-    prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
-    /*
-      ngoq Dogh!
-      ASSERT(procStatus[CurrentProc]==Busy || 
-      ((procStatus[CurrentProc]==Fetching) && 
-      (t->block_info.closure!=(StgClosure*)NULL)));
-      if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
-      !(!RtsFlags.GranFlags.DoAsyncFetch &&
-      procStatus[CurrentProc]==Fetching)) 
-      procStatus[CurrentProc] = Idle;
-    */
-#elif defined(PAR)
-    IF_DEBUG(scheduler,
-            debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p with BQ: \n", 
-                       t->id, t, whatNext_strs[t->what_next], t->block_info.closure));
-    IF_PAR_DEBUG(bq,
-                
-                if (t->block_info.closure!=(StgClosure*)NULL) 
-                print_bq(t->block_info.closure));
-    
-    /* Send a fetch (if BlockedOnGA) and dump event to log file */
-    blockThread(t);
-    
-    /* whatever we schedule next, we must log that schedule */
-    emitSchedule = rtsTrue;
-    
-#else /* !GRAN */
 
       // We don't need to do anything.  The thread is blocked, and it
       // has tidied up its stack and placed itself on whatever queue
 
       // We don't need to do anything.  The thread is blocked, and it
       // has tidied up its stack and placed itself on whatever queue
@@ -1793,12 +1303,6 @@ scheduleHandleThreadBlocked( StgTSO *t
        debugTraceEnd();
     }
 #endif
        debugTraceEnd();
     }
 #endif
-    
-    /* Only for dumping event to log file 
-       ToDo: do I need this in GranSim, too?
-       blockThread(t);
-    */
-#endif
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
@@ -1817,51 +1321,6 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
     debugTrace(DEBUG_sched, "--++ thread %lu (%s) finished", 
               (unsigned long)t->id, whatNext_strs[t->what_next]);
 
     debugTrace(DEBUG_sched, "--++ thread %lu (%s) finished", 
               (unsigned long)t->id, whatNext_strs[t->what_next]);
 
-    /* Inform the Hpc that a thread has finished */
-    hs_hpc_thread_finished_event(t);
-
-#if defined(GRAN)
-      endThread(t, CurrentProc); // clean-up the thread
-#elif defined(PARALLEL_HASKELL)
-      /* For now all are advisory -- HWL */
-      //if(t->priority==AdvisoryPriority) ??
-      advisory_thread_count--; // JB: Caution with this counter, buggy!
-      
-# if defined(DIST)
-      if(t->dist.priority==RevalPriority)
-       FinishReval(t);
-# endif
-    
-# if defined(EDENOLD)
-      // the thread could still have an outport... (BUG)
-      if (t->eden.outport != -1) {
-      // delete the outport for the tso which has finished...
-       IF_PAR_DEBUG(eden_ports,
-                  debugBelch("WARNING: Scheduler removes outport %d for TSO %d.\n",
-                             t->eden.outport, t->id));
-       deleteOPT(t);
-      }
-      // thread still in the process (HEAVY BUG! since outport has just been closed...)
-      if (t->eden.epid != -1) {
-       IF_PAR_DEBUG(eden_ports,
-                  debugBelch("WARNING: Scheduler removes TSO %d from process %d .\n",
-                          t->id, t->eden.epid));
-       removeTSOfromProcess(t);
-      }
-# endif 
-
-# if defined(PAR)
-      if (RtsFlags.ParFlags.ParStats.Full &&
-         !RtsFlags.ParFlags.ParStats.Suppressed) 
-       DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
-
-      //  t->par only contains statistics: left out for now...
-      IF_PAR_DEBUG(fish,
-                  debugBelch("**** end thread: ended sparked thread %d (%lx); sparkname: %lx\n",
-                             t->id,t,t->par.sparkname));
-# endif
-#endif // PARALLEL_HASKELL
-
       //
       // Check whether the thread that just completed was a bound
       // thread, and if so return with the result.  
       //
       // Check whether the thread that just completed was a bound
       // thread, and if so return with the result.  
@@ -1881,7 +1340,7 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
              // point where we can deal with this.  Leaving it on the run
              // queue also ensures that the garbage collector knows about
              // this thread and its return value (it gets dropped from the
              // point where we can deal with this.  Leaving it on the run
              // queue also ensures that the garbage collector knows about
              // this thread and its return value (it gets dropped from the
-             // all_threads list so there's no other way to find it).
+             // step->threads list so there's no other way to find it).
              appendToRunQueue(cap,t);
              return rtsFalse;
 #else
              appendToRunQueue(cap,t);
              return rtsFalse;
 #else
@@ -1919,36 +1378,21 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
- * Perform a heap census, if PROFILING
+ * Perform a heap census
  * -------------------------------------------------------------------------- */
 
 static rtsBool
  * -------------------------------------------------------------------------- */
 
 static rtsBool
-scheduleDoHeapProfile( rtsBool ready_to_gc STG_UNUSED )
+scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
 {
 {
-#if defined(PROFILING)
     // When we have +RTS -i0 and we're heap profiling, do a census at
     // every GC.  This lets us get repeatable runs for debugging.
     if (performHeapProfile ||
        (RtsFlags.ProfFlags.profileInterval==0 &&
         RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
     // When we have +RTS -i0 and we're heap profiling, do a census at
     // every GC.  This lets us get repeatable runs for debugging.
     if (performHeapProfile ||
        (RtsFlags.ProfFlags.profileInterval==0 &&
         RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
-
-       // checking black holes is necessary before GC, otherwise
-       // there may be threads that are unreachable except by the
-       // blackhole queue, which the GC will consider to be
-       // deadlocked.
-       scheduleCheckBlackHoles(&MainCapability);
-
-       debugTrace(DEBUG_sched, "garbage collecting before heap census");
-       GarbageCollect(rtsTrue);
-
-       debugTrace(DEBUG_sched, "performing heap census");
-       heapCensus();
-
-       performHeapProfile = rtsFalse;
-       return rtsTrue;  // true <=> we already GC'd
+        return rtsTrue;
+    } else {
+        return rtsFalse;
     }
     }
-#endif
-    return rtsFalse;
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
@@ -1959,8 +1403,10 @@ static Capability *
 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
 {
     StgTSO *t;
 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
 {
     StgTSO *t;
+    rtsBool heap_census;
 #ifdef THREADED_RTS
 #ifdef THREADED_RTS
-    static volatile StgWord waiting_for_gc;
+    /* extern static volatile StgWord waiting_for_gc; 
+       lives inside capability.c */
     rtsBool was_waiting;
     nat i;
 #endif
     rtsBool was_waiting;
     nat i;
 #endif
@@ -1977,6 +1423,10 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
     // the other tasks to sleep and stay asleep.
     //
        
     // the other tasks to sleep and stay asleep.
     //
        
+    /*  Other capabilities are prevented from running yet more Haskell
+       threads if waiting_for_gc is set. Tested inside
+       yieldCapability() and releaseCapability() in Capability.c */
+
     was_waiting = cas(&waiting_for_gc, 0, 1);
     if (was_waiting) {
        do {
     was_waiting = cas(&waiting_for_gc, 0, 1);
     if (was_waiting) {
        do {
@@ -2007,51 +1457,6 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
     waiting_for_gc = rtsFalse;
 #endif
 
     waiting_for_gc = rtsFalse;
 #endif
 
-    /* Kick any transactions which are invalid back to their
-     * atomically frames.  When next scheduled they will try to
-     * commit, this commit will fail and they will retry.
-     */
-    { 
-       StgTSO *next;
-
-       for (t = all_threads; t != END_TSO_QUEUE; t = next) {
-           if (t->what_next == ThreadRelocated) {
-               next = t->link;
-           } else {
-               next = t->global_link;
-               
-               // This is a good place to check for blocked
-               // exceptions.  It might be the case that a thread is
-               // blocked on delivering an exception to a thread that
-               // is also blocked - we try to ensure that this
-               // doesn't happen in throwTo(), but it's too hard (or
-               // impossible) to close all the race holes, so we
-               // accept that some might get through and deal with
-               // them here.  A GC will always happen at some point,
-               // even if the system is otherwise deadlocked.
-               maybePerformBlockedException (&capabilities[0], t);
-
-               if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
-                   if (!stmValidateNestOfTransactions (t -> trec)) {
-                       debugTrace(DEBUG_sched | DEBUG_stm,
-                                  "trec %p found wasting its time", t);
-                       
-                       // strip the stack back to the
-                       // ATOMICALLY_FRAME, aborting the (nested)
-                       // transaction, and saving the stack of any
-                       // partially-evaluated thunks on the heap.
-                       throwToSingleThreaded_(&capabilities[0], t, 
-                                              NULL, rtsTrue, NULL);
-                       
-#ifdef REG_R1
-                       ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
-#endif
-                   }
-               }
-           }
-       }
-    }
-    
     // so this happens periodically:
     if (cap) scheduleCheckBlackHoles(cap);
     
     // so this happens periodically:
     if (cap) scheduleCheckBlackHoles(cap);
     
@@ -2066,6 +1471,8 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
        deleteAllThreads(&capabilities[0]);
        sched_state = SCHED_SHUTTING_DOWN;
     }
        deleteAllThreads(&capabilities[0]);
        sched_state = SCHED_SHUTTING_DOWN;
     }
+    
+    heap_census = scheduleNeedHeapProfile(rtsTrue);
 
     /* everybody back, start the GC.
      * Could do it in this thread, or signal a condition var
 
     /* everybody back, start the GC.
      * Could do it in this thread, or signal a condition var
@@ -2075,8 +1482,14 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
 #if defined(THREADED_RTS)
     debugTrace(DEBUG_sched, "doing GC");
 #endif
 #if defined(THREADED_RTS)
     debugTrace(DEBUG_sched, "doing GC");
 #endif
-    GarbageCollect(force_major);
+    GarbageCollect(force_major || heap_census);
     
     
+    if (heap_census) {
+        debugTrace(DEBUG_sched, "performing heap census");
+        heapCensus();
+       performHeapProfile = rtsFalse;
+    }
+
 #if defined(THREADED_RTS)
     // release our stash of capabilities.
     for (i = 0; i < n_capabilities; i++) {
 #if defined(THREADED_RTS)
     // release our stash of capabilities.
     for (i = 0; i < n_capabilities; i++) {
@@ -2092,17 +1505,6 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
     }
 #endif
 
     }
 #endif
 
-#if defined(GRAN)
-    /* add a ContinueThread event to continue execution of current thread */
-    new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
-             ContinueThread,
-             t, (StgClosure*)NULL, (rtsSpark*)NULL);
-    IF_GRAN_DEBUG(bq, 
-                 debugBelch("GRAN: eventq and runnableq after Garbage collection:\n\n");
-                 G_EVENTQ(0);
-                 G_CURR_THREADQ(0));
-#endif /* GRAN */
-
     return cap;
 }
 
     return cap;
 }
 
@@ -2110,7 +1512,7 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
  * Singleton fork(). Do not copy any running threads.
  * ------------------------------------------------------------------------- */
 
  * Singleton fork(). Do not copy any running threads.
  * ------------------------------------------------------------------------- */
 
-StgInt
+pid_t
 forkProcess(HsStablePtr *entry
 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
            STG_UNUSED
 forkProcess(HsStablePtr *entry
 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
            STG_UNUSED
@@ -2122,6 +1524,7 @@ forkProcess(HsStablePtr *entry
     pid_t pid;
     StgTSO* t,*next;
     Capability *cap;
     pid_t pid;
     StgTSO* t,*next;
     Capability *cap;
+    nat s;
     
 #if defined(THREADED_RTS)
     if (RtsFlags.ParFlags.nNodes > 1) {
     
 #if defined(THREADED_RTS)
     if (RtsFlags.ParFlags.nNodes > 1) {
@@ -2135,25 +1538,44 @@ forkProcess(HsStablePtr *entry
     // ToDo: for SMP, we should probably acquire *all* the capabilities
     cap = rts_lock();
     
     // ToDo: for SMP, we should probably acquire *all* the capabilities
     cap = rts_lock();
     
+    // no funny business: hold locks while we fork, otherwise if some
+    // other thread is holding a lock when the fork happens, the data
+    // structure protected by the lock will forever be in an
+    // inconsistent state in the child.  See also #1391.
+    ACQUIRE_LOCK(&sched_mutex);
+    ACQUIRE_LOCK(&cap->lock);
+    ACQUIRE_LOCK(&cap->running_task->lock);
+
     pid = fork();
     
     if (pid) { // parent
        
     pid = fork();
     
     if (pid) { // parent
        
+        RELEASE_LOCK(&sched_mutex);
+        RELEASE_LOCK(&cap->lock);
+        RELEASE_LOCK(&cap->running_task->lock);
+
        // just return the pid
        rts_unlock(cap);
        return pid;
        
     } else { // child
        
        // just return the pid
        rts_unlock(cap);
        return pid;
        
     } else { // child
        
+#if defined(THREADED_RTS)
+        initMutex(&sched_mutex);
+        initMutex(&cap->lock);
+        initMutex(&cap->running_task->lock);
+#endif
+
        // Now, all OS threads except the thread that forked are
        // stopped.  We need to stop all Haskell threads, including
        // those involved in foreign calls.  Also we need to delete
        // all Tasks, because they correspond to OS threads that are
        // now gone.
 
        // Now, all OS threads except the thread that forked are
        // stopped.  We need to stop all Haskell threads, including
        // those involved in foreign calls.  Also we need to delete
        // all Tasks, because they correspond to OS threads that are
        // now gone.
 
-       for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+        for (s = 0; s < total_steps; s++) {
+          for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
            if (t->what_next == ThreadRelocated) {
            if (t->what_next == ThreadRelocated) {
-               next = t->link;
+               next = t->_link;
            } else {
                next = t->global_link;
                // don't allow threads to catch the ThreadKilled
            } else {
                next = t->global_link;
                // don't allow threads to catch the ThreadKilled
@@ -2161,6 +1583,7 @@ forkProcess(HsStablePtr *entry
                // threads may be evaluating thunks that we need later.
                deleteThread_(cap,t);
            }
                // threads may be evaluating thunks that we need later.
                deleteThread_(cap,t);
            }
+          }
        }
        
        // Empty the run queue.  It seems tempting to let all the
        }
        
        // Empty the run queue.  It seems tempting to let all the
@@ -2174,14 +1597,19 @@ forkProcess(HsStablePtr *entry
        // don't exist now:
        cap->suspended_ccalling_tasks = NULL;
 
        // don't exist now:
        cap->suspended_ccalling_tasks = NULL;
 
-       // Empty the all_threads list.  Otherwise, the garbage
+       // Empty the threads lists.  Otherwise, the garbage
        // collector may attempt to resurrect some of these threads.
        // collector may attempt to resurrect some of these threads.
-       all_threads = END_TSO_QUEUE;
+        for (s = 0; s < total_steps; s++) {
+            all_steps[s].threads = END_TSO_QUEUE;
+        }
 
        // Wipe the task list, except the current Task.
        ACQUIRE_LOCK(&sched_mutex);
        for (task = all_tasks; task != NULL; task=task->all_link) {
            if (task != cap->running_task) {
 
        // Wipe the task list, except the current Task.
        ACQUIRE_LOCK(&sched_mutex);
        for (task = all_tasks; task != NULL; task=task->all_link) {
            if (task != cap->running_task) {
+#if defined(THREADED_RTS)
+                initMutex(&task->lock); // see #1391
+#endif
                discardTask(task);
            }
        }
                discardTask(task);
            }
        }
@@ -2195,6 +1623,11 @@ forkProcess(HsStablePtr *entry
        cap->returning_tasks_tl = NULL;
 #endif
 
        cap->returning_tasks_tl = NULL;
 #endif
 
+        // On Unix, all timers are reset in the child, so we need to start
+        // the timer again.
+        initTimer();
+        startTimer();
+
        cap = rts_evalStableIO(cap, entry, NULL);  // run the action
        rts_checkSchedStatus("forkProcess",cap);
        
        cap = rts_evalStableIO(cap, entry, NULL);  // run the action
        rts_checkSchedStatus("forkProcess",cap);
        
@@ -2218,14 +1651,18 @@ deleteAllThreads ( Capability *cap )
     // NOTE: only safe to call if we own all capabilities.
 
     StgTSO* t, *next;
     // NOTE: only safe to call if we own all capabilities.
 
     StgTSO* t, *next;
+    nat s;
+
     debugTrace(DEBUG_sched,"deleting all threads");
     debugTrace(DEBUG_sched,"deleting all threads");
-    for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+    for (s = 0; s < total_steps; s++) {
+      for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
        if (t->what_next == ThreadRelocated) {
        if (t->what_next == ThreadRelocated) {
-           next = t->link;
+           next = t->_link;
        } else {
            next = t->global_link;
            deleteThread(cap,t);
        }
        } else {
            next = t->global_link;
            deleteThread(cap,t);
        }
+      }
     }      
 
     // The run queue now contains a bunch of ThreadKilled threads.  We
     }      
 
     // The run queue now contains a bunch of ThreadKilled threads.  We
@@ -2380,7 +1817,7 @@ resumeThread (void *task_)
 
     tso = task->suspended_tso;
     task->suspended_tso = NULL;
 
     tso = task->suspended_tso;
     task->suspended_tso = NULL;
-    tso->link = END_TSO_QUEUE;
+    tso->_link = END_TSO_QUEUE; // no write barrier reqd
     debugTrace(DEBUG_sched, "thread %lu: re-entering RTS", (unsigned long)tso->id);
     
     if (tso->why_blocked == BlockedOnCCall) {
     debugTrace(DEBUG_sched, "thread %lu: re-entering RTS", (unsigned long)tso->id);
     
     if (tso->why_blocked == BlockedOnCCall) {
@@ -2399,7 +1836,7 @@ resumeThread (void *task_)
 #endif
 
     /* We might have GC'd, mark the TSO dirty again */
 #endif
 
     /* We might have GC'd, mark the TSO dirty again */
-    dirtyTSO(tso);
+    dirty_TSO(cap,tso);
 
     IF_DEBUG(sanity, checkTSO(tso));
 
 
     IF_DEBUG(sanity, checkTSO(tso));
 
@@ -2462,13 +1899,6 @@ scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
 
     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)tso->id);
 
 
     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)tso->id);
 
-#if defined(GRAN)
-    /* GranSim specific init */
-    CurrentTSO = m->tso;                // the TSO to run
-    procStatus[MainProc] = Busy;        // status of main PE
-    CurrentProc = MainProc;             // PE to run it on
-#endif
-
     cap = schedule(cap,task);
 
     ASSERT(task->stat != NoStatus);
     cap = schedule(cap,task);
 
     ASSERT(task->stat != NoStatus);
@@ -2517,28 +1947,17 @@ workerStart(Task *task)
 void 
 initScheduler(void)
 {
 void 
 initScheduler(void)
 {
-#if defined(GRAN)
-  nat i;
-  for (i=0; i<=MAX_PROC; i++) {
-    run_queue_hds[i]      = END_TSO_QUEUE;
-    run_queue_tls[i]      = END_TSO_QUEUE;
-    blocked_queue_hds[i]  = END_TSO_QUEUE;
-    blocked_queue_tls[i]  = END_TSO_QUEUE;
-    ccalling_threadss[i]  = END_TSO_QUEUE;
-    blackhole_queue[i]    = END_TSO_QUEUE;
-    sleeping_queue        = END_TSO_QUEUE;
-  }
-#elif !defined(THREADED_RTS)
+#if !defined(THREADED_RTS)
   blocked_queue_hd  = END_TSO_QUEUE;
   blocked_queue_tl  = END_TSO_QUEUE;
   sleeping_queue    = END_TSO_QUEUE;
 #endif
 
   blackhole_queue   = END_TSO_QUEUE;
   blocked_queue_hd  = END_TSO_QUEUE;
   blocked_queue_tl  = END_TSO_QUEUE;
   sleeping_queue    = END_TSO_QUEUE;
 #endif
 
   blackhole_queue   = END_TSO_QUEUE;
-  all_threads       = END_TSO_QUEUE;
 
   context_switch = 0;
   sched_state    = SCHED_RUNNING;
 
   context_switch = 0;
   sched_state    = SCHED_RUNNING;
+  recent_activity = ACTIVITY_YES;
 
 #if defined(THREADED_RTS)
   /* Initialise the mutex and condition variables used by
 
 #if defined(THREADED_RTS)
   /* Initialise the mutex and condition variables used by
@@ -2585,7 +2004,13 @@ initScheduler(void)
 }
 
 void
 }
 
 void
-exitScheduler( void )
+exitScheduler(
+    rtsBool wait_foreign
+#if !defined(THREADED_RTS)
+                         __attribute__((unused))
+#endif
+)
+               /* see Capability.c, shutdownCapability() */
 {
     Task *task = NULL;
 
 {
     Task *task = NULL;
 
@@ -2607,7 +2032,7 @@ exitScheduler( void )
        nat i;
        
        for (i = 0; i < n_capabilities; i++) {
        nat i;
        
        for (i = 0; i < n_capabilities; i++) {
-           shutdownCapability(&capabilities[i], task);
+           shutdownCapability(&capabilities[i], task, wait_foreign);
        }
        boundTaskExiting(task);
        stopTaskManager();
        }
        boundTaskExiting(task);
        stopTaskManager();
@@ -2629,85 +2054,6 @@ freeScheduler( void )
 #endif
 }
 
 #endif
 }
 
-/* ---------------------------------------------------------------------------
-   Where are the roots that we know about?
-
-        - all the threads on the runnable queue
-        - all the threads on the blocked queue
-        - all the threads on the sleeping queue
-       - all the thread currently executing a _ccall_GC
-        - all the "main threads"
-     
-   ------------------------------------------------------------------------ */
-
-/* This has to be protected either by the scheduler monitor, or by the
-       garbage collection monitor (probably the latter).
-       KH @ 25/10/99
-*/
-
-void
-GetRoots( evac_fn evac )
-{
-    nat i;
-    Capability *cap;
-    Task *task;
-
-#if defined(GRAN)
-    for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
-       if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
-           evac((StgClosure **)&run_queue_hds[i]);
-       if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
-           evac((StgClosure **)&run_queue_tls[i]);
-       
-       if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
-           evac((StgClosure **)&blocked_queue_hds[i]);
-       if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
-           evac((StgClosure **)&blocked_queue_tls[i]);
-       if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
-           evac((StgClosure **)&ccalling_threads[i]);
-    }
-
-    markEventQueue();
-
-#else /* !GRAN */
-
-    for (i = 0; i < n_capabilities; i++) {
-       cap = &capabilities[i];
-       evac((StgClosure **)(void *)&cap->run_queue_hd);
-       evac((StgClosure **)(void *)&cap->run_queue_tl);
-#if defined(THREADED_RTS)
-       evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
-       evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
-#endif
-       for (task = cap->suspended_ccalling_tasks; task != NULL; 
-            task=task->next) {
-           debugTrace(DEBUG_sched,
-                      "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
-           evac((StgClosure **)(void *)&task->suspended_tso);
-       }
-
-    }
-    
-
-#if !defined(THREADED_RTS)
-    evac((StgClosure **)(void *)&blocked_queue_hd);
-    evac((StgClosure **)(void *)&blocked_queue_tl);
-    evac((StgClosure **)(void *)&sleeping_queue);
-#endif 
-#endif
-
-    // evac((StgClosure **)&blackhole_queue);
-
-#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
-    markSparkQueue(evac);
-#endif
-    
-#if defined(RTS_USER_SIGNALS)
-    // mark the signal handlers (signals should be already blocked)
-    markSignalHandlers(evac);
-#endif
-}
-
 /* -----------------------------------------------------------------------------
    performGC
 
 /* -----------------------------------------------------------------------------
    performGC
 
@@ -2765,7 +2111,12 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
   // while we are moving the TSO:
   lockClosure((StgClosure *)tso);
 
   // while we are moving the TSO:
   lockClosure((StgClosure *)tso);
 
-  if (tso->stack_size >= tso->max_stack_size) {
+  if (tso->stack_size >= tso->max_stack_size && !(tso->flags & TSO_BLOCKEX)) {
+      // NB. never raise a StackOverflow exception if the thread is
+      // inside Control.Exceptino.block.  It is impractical to protect
+      // against stack overflow exceptions, since virtually anything
+      // can raise one (even 'catch'), so this is the only sensible
+      // thing to do here.  See bug #767.
 
       debugTrace(DEBUG_gc,
                 "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
 
       debugTrace(DEBUG_gc,
                 "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
@@ -2795,7 +2146,7 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
             "increasing stack size from %ld words to %d.",
             (long)tso->stack_size, new_stack_size);
 
             "increasing stack size from %ld words to %d.",
             (long)tso->stack_size, new_stack_size);
 
-  dest = (StgTSO *)allocate(new_tso_size);
+  dest = (StgTSO *)allocateLocal(cap,new_tso_size);
   TICK_ALLOC_TSO(new_stack_size,0);
 
   /* copy the TSO block and the old stack into the new area */
   TICK_ALLOC_TSO(new_stack_size,0);
 
   /* copy the TSO block and the old stack into the new area */
@@ -2816,7 +2167,7 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
    * dead TSO's stack.
    */
   tso->what_next = ThreadRelocated;
    * dead TSO's stack.
    */
   tso->what_next = ThreadRelocated;
-  tso->link = dest;
+  setTSOLink(cap,tso,dest);
   tso->sp = (P_)&(tso->stack[tso->stack_size]);
   tso->why_blocked = NotBlocked;
 
   tso->sp = (P_)&(tso->stack[tso->stack_size]);
   tso->why_blocked = NotBlocked;
 
@@ -2838,6 +2189,56 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
   return dest;
 }
 
   return dest;
 }
 
+static StgTSO *
+threadStackUnderflow (Task *task STG_UNUSED, StgTSO *tso)
+{
+    bdescr *bd, *new_bd;
+    lnat new_tso_size_w, tso_size_w;
+    StgTSO *new_tso;
+
+    tso_size_w = tso_sizeW(tso);
+
+    if (tso_size_w < MBLOCK_SIZE_W || 
+        (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4) 
+    {
+        return tso;
+    }
+
+    // don't allow throwTo() to modify the blocked_exceptions queue
+    // while we are moving the TSO:
+    lockClosure((StgClosure *)tso);
+
+    new_tso_size_w = round_to_mblocks(tso_size_w/2);
+
+    debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
+               tso->id, tso_size_w, new_tso_size_w);
+
+    bd = Bdescr((StgPtr)tso);
+    new_bd = splitLargeBlock(bd, new_tso_size_w / BLOCK_SIZE_W);
+    new_bd->free = bd->free;
+    bd->free = bd->start + TSO_STRUCT_SIZEW;
+
+    new_tso = (StgTSO *)new_bd->start;
+    memcpy(new_tso,tso,TSO_STRUCT_SIZE);
+    new_tso->stack_size = new_tso_size_w - TSO_STRUCT_SIZEW;
+
+    tso->what_next = ThreadRelocated;
+    tso->_link = new_tso; // no write barrier reqd: same generation
+
+    // The TSO attached to this Task may have moved, so update the
+    // pointer to it.
+    if (task->tso == tso) {
+        task->tso = new_tso;
+    }
+
+    unlockTSO(new_tso);
+    unlockTSO(tso);
+
+    IF_DEBUG(sanity,checkTSO(new_tso));
+
+    return new_tso;
+}
+
 /* ---------------------------------------------------------------------------
    Interrupt execution
    - usually called inside a signal handler so it mustn't do anything fancy.   
 /* ---------------------------------------------------------------------------
    Interrupt execution
    - usually called inside a signal handler so it mustn't do anything fancy.   
@@ -2907,7 +2308,7 @@ checkBlackHoles (Capability *cap)
     t = blackhole_queue;
     while (t != END_TSO_QUEUE) {
        ASSERT(t->why_blocked == BlockedOnBlackHole);
     t = blackhole_queue;
     while (t != END_TSO_QUEUE) {
        ASSERT(t->why_blocked == BlockedOnBlackHole);
-       type = get_itbl(t->block_info.closure)->type;
+       type = get_itbl(UNTAG_CLOSURE(t->block_info.closure))->type;
        if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
            IF_DEBUG(sanity,checkTSO(t));
            t = unblockOne(cap, t);
        if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
            IF_DEBUG(sanity,checkTSO(t));
            t = unblockOne(cap, t);
@@ -2916,8 +2317,8 @@ checkBlackHoles (Capability *cap)
            *prev = t;
            any_woke_up = rtsTrue;
        } else {
            *prev = t;
            any_woke_up = rtsTrue;
        } else {
-           prev = &t->link;
-           t = t->link;
+           prev = &t->_link;
+           t = t->_link;
        }
     }
 
        }
     }
 
@@ -3088,10 +2489,10 @@ findRetryFrameHelper (StgTSO *tso)
        return CATCH_RETRY_FRAME;
       
     case CATCH_STM_FRAME: {
        return CATCH_RETRY_FRAME;
       
     case CATCH_STM_FRAME: {
-        debugTrace(DEBUG_stm,
-                  "found CATCH_STM_FRAME at %p during retry", p);
         StgTRecHeader *trec = tso -> trec;
        StgTRecHeader *outer = stmGetEnclosingTRec(trec);
         StgTRecHeader *trec = tso -> trec;
        StgTRecHeader *outer = stmGetEnclosingTRec(trec);
+        debugTrace(DEBUG_stm,
+                  "found CATCH_STM_FRAME at %p during retry", p);
         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
        stmAbortTransaction(tso -> cap, trec);
        stmFreeAbortedTRec(tso -> cap, trec);
         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
        stmAbortTransaction(tso -> cap, trec);
        stmFreeAbortedTRec(tso -> cap, trec);
@@ -3125,11 +2526,15 @@ resurrectThreads (StgTSO *threads)
 {
     StgTSO *tso, *next;
     Capability *cap;
 {
     StgTSO *tso, *next;
     Capability *cap;
+    step *step;
 
     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
        next = tso->global_link;
 
     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
        next = tso->global_link;
-       tso->global_link = all_threads;
-       all_threads = tso;
+
+        step = Bdescr((P_)tso)->step;
+       tso->global_link = step->threads;
+       step->threads = tso;
+
        debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
        
        // Wake up the thread on the Capability it was last on
        debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
        
        // Wake up the thread on the Capability it was last on
@@ -3140,15 +2545,15 @@ resurrectThreads (StgTSO *threads)
        case BlockedOnException:
            /* Called by GC - sched_mutex lock is currently held. */
            throwToSingleThreaded(cap, tso,
        case BlockedOnException:
            /* Called by GC - sched_mutex lock is currently held. */
            throwToSingleThreaded(cap, tso,
-                                 (StgClosure *)BlockedOnDeadMVar_closure);
+                                 (StgClosure *)blockedOnDeadMVar_closure);
            break;
        case BlockedOnBlackHole:
            throwToSingleThreaded(cap, tso,
            break;
        case BlockedOnBlackHole:
            throwToSingleThreaded(cap, tso,
-                                 (StgClosure *)NonTermination_closure);
+                                 (StgClosure *)nonTermination_closure);
            break;
        case BlockedOnSTM:
            throwToSingleThreaded(cap, tso,
            break;
        case BlockedOnSTM:
            throwToSingleThreaded(cap, tso,
-                                 (StgClosure *)BlockedIndefinitely_closure);
+                                 (StgClosure *)blockedIndefinitely_closure);
            break;
        case NotBlocked:
            /* This might happen if the thread was blocked on a black hole
            break;
        case NotBlocked:
            /* This might happen if the thread was blocked on a black hole
@@ -3161,3 +2566,37 @@ resurrectThreads (StgTSO *threads)
        }
     }
 }
        }
     }
 }
+
+/* -----------------------------------------------------------------------------
+   performPendingThrowTos is called after garbage collection, and
+   passed a list of threads that were found to have pending throwTos
+   (tso->blocked_exceptions was not empty), and were blocked.
+   Normally this doesn't happen, because we would deliver the
+   exception directly if the target thread is blocked, but there are
+   small windows where it might occur on a multiprocessor (see
+   throwTo()).
+
+   NB. we must be holding all the capabilities at this point, just
+   like resurrectThreads().
+   -------------------------------------------------------------------------- */
+
+void
+performPendingThrowTos (StgTSO *threads)
+{
+    StgTSO *tso, *next;
+    Capability *cap;
+    step *step;
+
+    for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
+       next = tso->global_link;
+
+        step = Bdescr((P_)tso)->step;
+       tso->global_link = step->threads;
+       step->threads = tso;
+
+       debugTrace(DEBUG_sched, "performing blocked throwTo to thread %lu", (unsigned long)tso->id);
+       
+       cap = tso->cap;
+        maybePerformBlockedException(cap, tso);
+    }
+}