[project @ 2004-08-19 11:27:45 by simonmar]
[ghc-hetmet.git] / ghc / rts / Schedule.c
index 4017c3b..71c3ec9 100644 (file)
@@ -1,7 +1,6 @@
 /* ---------------------------------------------------------------------------
- * $Id: Schedule.c,v 1.189 2004/02/27 12:39:16 simonmar Exp $
  *
- * (c) The GHC Team, 1998-2003
+ * (c) The GHC Team, 1998-2004
  *
  * Scheduler
  *
@@ -42,9 +41,9 @@
 #include "SchedAPI.h"
 #include "RtsUtils.h"
 #include "RtsFlags.h"
+#include "BlockAlloc.h"
 #include "Storage.h"
 #include "StgRun.h"
-#include "StgStartup.h"
 #include "Hooks.h"
 #define COMPILING_SCHEDULER
 #include "Schedule.h"
@@ -59,6 +58,8 @@
 #include "Timer.h"
 #include "Prelude.h"
 #include "ThreadLabels.h"
+#include "LdvProfile.h"
+#include "Updates.h"
 #ifdef PROFILING
 #include "Proftimer.h"
 #include "ProfHeap.h"
@@ -234,6 +235,7 @@ rtsBool emitSchedule = rtsTrue;
 
 #if DEBUG
 static char *whatNext_strs[] = {
+  "(unknown)",
   "ThreadRunGHC",
   "ThreadInterpret",
   "ThreadKilled",
@@ -259,6 +261,7 @@ static void
 taskStart(void)
 {
   ACQUIRE_LOCK(&sched_mutex);
+  startingWorkerThread = rtsFalse;
   schedule(NULL,NULL);
   RELEASE_LOCK(&sched_mutex);
 }
@@ -275,7 +278,10 @@ startSchedulerTaskIfNecessary(void)
       // just because the last one hasn't yet reached the
       // "waiting for capability" state
       startingWorkerThread = rtsTrue;
-      startTask(taskStart);
+      if(!startTask(taskStart))
+      {
+        startingWorkerThread = rtsFalse;
+      }
     }
   }
 }
@@ -321,7 +327,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
           Capability *initialCapability )
 {
   StgTSO *t;
-  Capability *cap = initialCapability;
+  Capability *cap;
   StgThreadReturnCode ret;
 #if defined(GRAN)
   rtsEvent *event;
@@ -336,10 +342,12 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 # endif
 #endif
   rtsBool was_interrupted = rtsFalse;
-  StgTSOWhatNext prev_what_next;
+  nat prev_what_next;
   
   // Pre-condition: sched_mutex is held.
+  // We might have a capability, passed in as initialCapability.
+  cap = initialCapability;
+
 #if defined(RTS_SUPPORTS_THREADS)
   //
   // in the threaded case, the capability is either passed in via the
@@ -429,104 +437,6 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 #endif
     }
 
-    //
-    // Go through the list of main threads and wake up any
-    // clients whose computations have finished.  ToDo: this
-    // should be done more efficiently without a linear scan
-    // of the main threads list, somehow...
-    //
-#if defined(RTS_SUPPORTS_THREADS)
-    { 
-       StgMainThread *m, **prev;
-       prev = &main_threads;
-       for (m = main_threads; m != NULL; prev = &m->link, m = m->link) {
-         if (m->tso->what_next == ThreadComplete
-             || m->tso->what_next == ThreadKilled)
-         {
-           if (m == mainThread)
-           {
-              if (m->tso->what_next == ThreadComplete)
-              {
-                if (m->ret)
-                {
-                  // NOTE: return val is tso->sp[1] (see StgStartup.hc)
-                  *(m->ret) = (StgClosure *)m->tso->sp[1]; 
-                }
-                m->stat = Success;
-              }
-              else
-              {
-                if (m->ret)
-                {
-                  *(m->ret) = NULL;
-                }
-                if (was_interrupted)
-                {
-                  m->stat = Interrupted;
-                }
-                else
-                {
-                  m->stat = Killed;
-                }
-              }
-              *prev = m->link;
-           
-#ifdef DEBUG
-             removeThreadLabel((StgWord)m->tso->id);
-#endif
-              releaseCapability(cap);
-              return;
-            }
-            else
-            {
-                // The current OS thread can not handle the fact that
-                // the Haskell thread "m" has ended.  "m" is bound;
-                // the scheduler loop in it's bound OS thread has to
-                // return, so let's pass our capability directly to
-                // that thread.
-               passCapability(&m->bound_thread_cond);
-               continue;
-            }
-          }
-       }
-    }
-    
-#else /* not threaded */
-
-# if defined(PAR)
-    /* in GUM do this only on the Main PE */
-    if (IAmMainThread)
-# endif
-    /* If our main thread has finished or been killed, return.
-     */
-    {
-      StgMainThread *m = main_threads;
-      if (m->tso->what_next == ThreadComplete
-         || m->tso->what_next == ThreadKilled) {
-#ifdef DEBUG
-       removeThreadLabel((StgWord)m->tso->id);
-#endif
-       main_threads = main_threads->link;
-       if (m->tso->what_next == ThreadComplete) {
-           // We finished successfully, fill in the return value
-           // NOTE: return val is tso->sp[1] (see StgStartup.hc)
-           if (m->ret) { *(m->ret) = (StgClosure *)m->tso->sp[1]; };
-           m->stat = Success;
-           return;
-       } else {
-         if (m->ret) { *(m->ret) = NULL; };
-         if (was_interrupted) {
-           m->stat = Interrupted;
-         } else {
-           m->stat = Killed;
-         }
-         return;
-       }
-      }
-    }
-#endif
-
-
 #if defined(RTS_USER_SIGNALS)
     // check for signals each time around the scheduler
     if (signals_pending()) {
@@ -536,19 +446,20 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
     }
 #endif
 
-    /* Check whether any waiting threads need to be woken up.  If the
-     * run queue is empty, and there are no other tasks running, we
-     * can wait indefinitely for something to happen.
-     */
-    if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue) 
+    //
+    // Check whether any waiting threads need to be woken up.  If the
+    // run queue is empty, and there are no other tasks running, we
+    // can wait indefinitely for something to happen.
+    //
+    if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue)
 #if defined(RTS_SUPPORTS_THREADS)
                || EMPTY_RUN_QUEUE()
 #endif
-        )
+       )
     {
       awaitEvent( EMPTY_RUN_QUEUE() );
     }
-    /* we can be interrupted while waiting for I/O... */
+    // we can be interrupted while waiting for I/O...
     if (interrupted) continue;
 
     /* 
@@ -614,11 +525,9 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
            m = main_threads;
            switch (m->tso->why_blocked) {
            case BlockedOnBlackHole:
-               raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
-               break;
            case BlockedOnException:
            case BlockedOnMVar:
-               raiseAsync(m->tso, (StgClosure *)Deadlock_closure);
+               raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
                break;
            default:
                barf("deadlock: main thread blocked in a strange way");
@@ -635,7 +544,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 
 #if defined(RTS_SUPPORTS_THREADS)
     if ( EMPTY_RUN_QUEUE() ) {
-      continue; // nothing to do
+       continue; // nothing to do
     }
 #endif
 
@@ -924,12 +833,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 
 #ifdef THREADED_RTS
     {
-      StgMainThread *m;
-      for(m = main_threads; m; m = m->link)
-      {
-       if(m->tso == t)
-         break;
-      }
+      StgMainThread *m = t->main;
       
       if(m)
       {
@@ -977,8 +881,6 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
             || blocked_queue_hd != END_TSO_QUEUE
             || sleeping_queue != END_TSO_QUEUE)))
        context_switch = 1;
-    else
-       context_switch = 0;
 
 run_thread:
 
@@ -995,23 +897,35 @@ run_thread:
     /* Run the current thread 
      */
     prev_what_next = t->what_next;
+
+    errno = t->saved_errno;
+
     switch (prev_what_next) {
+
     case ThreadKilled:
     case ThreadComplete:
        /* Thread already finished, return to scheduler. */
        ret = ThreadFinished;
        break;
+
     case ThreadRunGHC:
-       errno = t->saved_errno;
        ret = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
-       t->saved_errno = errno;
        break;
+
     case ThreadInterpret:
        ret = interpretBCO(cap);
        break;
+
     default:
       barf("schedule: invalid what_next field");
     }
+
+    // The TSO might have moved, so find the new location:
+    t = cap->r.rCurrentTSO;
+
+    // And save the current errno in this thread.
+    t->saved_errno = errno;
+
     /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
     
     /* Costs for the scheduler are assigned to CCS_SYSTEM */
@@ -1027,7 +941,6 @@ run_thread:
 #elif !defined(GRAN) && !defined(PAR)
     IF_DEBUG(scheduler,fprintf(stderr,"sched: "););
 #endif
-    t = cap->r.rCurrentTSO;
     
 #if defined(PAR)
     /* HACK 675: if the last thread didn't yield, make sure to print a 
@@ -1047,12 +960,12 @@ run_thread:
 #endif
 
       // did the task ask for a large block?
-      if (cap->r.rHpAlloc > BLOCK_SIZE_W) {
+      if (cap->r.rHpAlloc > BLOCK_SIZE) {
          // if so, get one and push it on the front of the nursery.
          bdescr *bd;
          nat blocks;
          
-         blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc * sizeof(W_)) / BLOCK_SIZE;
+         blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
 
          IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped: requesting a large block (size %d)", 
                                   t->id, whatNext_strs[t->what_next], blocks));
@@ -1153,7 +1066,6 @@ run_thread:
        */
       threadPaused(t);
       { 
-       StgMainThread *m;
        /* enlarge the stack */
        StgTSO *new_t = threadStackOverflow(t);
        
@@ -1161,17 +1073,22 @@ run_thread:
         * main thread stack.  It better not be on any other queues...
         * (it shouldn't be).
         */
-       for (m = main_threads; m != NULL; m = m->link) {
-         if (m->tso == t) {
-           m->tso = new_t;
-         }
+       if (t->main != NULL) {
+           t->main->tso = new_t;
        }
-       threadPaused(new_t);
        PUSH_ON_RUN_QUEUE(new_t);
       }
       break;
 
     case ThreadYielding:
+      // Reset the context switch flag.  We don't do this just before
+      // running the thread, because that would mean we would lose ticks
+      // during GC, which can lead to unfair scheduling (a thread hogs
+      // the CPU because the tick always arrives during GC).  This way
+      // penalises threads that do a lot of allocation, but that seems
+      // better than the alternative.
+      context_switch = 0;
+
 #if defined(GRAN)
       IF_DEBUG(gran, 
               DumpGranEvent(GR_DESCHEDULE, t));
@@ -1327,8 +1244,74 @@ run_thread:
          !RtsFlags.ParFlags.ParStats.Suppressed) 
        DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
 #endif
+
+      //
+      // Check whether the thread that just completed was a main
+      // thread, and if so return with the result.  
+      //
+      // There is an assumption here that all thread completion goes
+      // through this point; we need to make sure that if a thread
+      // ends up in the ThreadKilled state, that it stays on the run
+      // queue so it can be dealt with here.
+      //
+      if (
+#if defined(RTS_SUPPORTS_THREADS)
+         mainThread != NULL
+#else
+         mainThread->tso == t
+#endif
+         )
+      {
+         // We are a bound thread: this must be our thread that just
+         // completed.
+         ASSERT(mainThread->tso == t);
+
+         if (t->what_next == ThreadComplete) {
+             if (mainThread->ret) {
+                 // NOTE: return val is tso->sp[1] (see StgStartup.hc)
+                 *(mainThread->ret) = (StgClosure *)mainThread->tso->sp[1]; 
+             }
+             mainThread->stat = Success;
+         } else {
+             if (mainThread->ret) {
+                 *(mainThread->ret) = NULL;
+             }
+             if (was_interrupted) {
+                 mainThread->stat = Interrupted;
+             } else {
+                 mainThread->stat = Killed;
+             }
+         }
+#ifdef DEBUG
+         removeThreadLabel((StgWord)mainThread->tso->id);
+#endif
+         if (mainThread->prev == NULL) {
+             main_threads = mainThread->link;
+         } else {
+             mainThread->prev->link = mainThread->link;
+         }
+         if (mainThread->link != NULL) {
+             mainThread->link->prev = NULL;
+         }
+         releaseCapability(cap);
+         return;
+      }
+
+#ifdef RTS_SUPPORTS_THREADS
+      ASSERT(t->main == NULL);
+#else
+      if (t->main != NULL) {
+         // Must be a main thread that is not the topmost one.  Leave
+         // it on the run queue until the stack has unwound to the
+         // point where we can deal with this.  Leaving it on the run
+         // queue also ensures that the garbage collector knows about
+         // this thread and its return value (it gets dropped from the
+         // all_threads list so there's no other way to find it).
+         APPEND_TO_RUN_QUEUE(t);
+      }
+#endif
       break;
-      
+
     default:
       barf("schedule: invalid thread return code %d", (int)ret);
     }
@@ -1409,12 +1392,7 @@ StgBool
 isThreadBound(StgTSO* tso USED_IN_THREADED_RTS)
 {
 #ifdef THREADED_RTS
-  StgMainThread *m;
-  for(m = main_threads; m; m = m->link)
-  {
-    if(m->tso == tso)
-      return rtsTrue;
-  }
+  return (tso->main != NULL);
 #endif
   return rtsFalse;
 }
@@ -1423,13 +1401,22 @@ isThreadBound(StgTSO* tso USED_IN_THREADED_RTS)
  * Singleton fork(). Do not copy any running threads.
  * ------------------------------------------------------------------------- */
 
+#ifndef mingw32_TARGET_OS
+#define FORKPROCESS_PRIMOP_SUPPORTED
+#endif
+
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
 static void 
 deleteThreadImmediately(StgTSO *tso);
-
+#endif
 StgInt
-forkProcess(HsStablePtr *entry)
+forkProcess(HsStablePtr *entry
+#ifndef FORKPROCESS_PRIMOP_SUPPORTED
+           STG_UNUSED
+#endif
+           )
 {
-#ifndef mingw32_TARGET_OS
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
   pid_t pid;
   StgTSO* t,*next;
   StgMainThread *m;
@@ -1463,17 +1450,17 @@ forkProcess(HsStablePtr *entry)
       // wipe the main thread list
     while((m = main_threads) != NULL) {
       main_threads = m->link;
-#ifdef THREADED_RTS
+# ifdef THREADED_RTS
       closeCondition(&m->bound_thread_cond);
-#endif
+# endif
       stgFree(m);
     }
     
-#ifdef RTS_SUPPORTS_THREADS
+# ifdef RTS_SUPPORTS_THREADS
     resetTaskManagerAfterFork();      // tell startTask() and friends that
     startingWorkerThread = rtsFalse;  // we have no worker threads any more
     resetWorkerWakeupPipeAfterFork();
-#endif
+# endif
     
     rc = rts_evalStableIO(entry, NULL);  // run the action
     rts_checkSchedStatus("forkProcess",rc);
@@ -1483,10 +1470,10 @@ forkProcess(HsStablePtr *entry)
     hs_exit();                      // clean up and exit
     stg_exit(0);
   }
-#else /* mingw32 */
-  barf("forkProcess#: primop not implemented for mingw32, sorry!\n");
+#else /* !FORKPROCESS_PRIMOP_SUPPORTED */
+  barf("forkProcess#: primop not supported, sorry!\n");
   return -1;
-#endif /* mingw32 */
+#endif
 }
 
 /* ---------------------------------------------------------------------------
@@ -1507,9 +1494,15 @@ deleteAllThreads ( void )
       next = t->global_link;
       deleteThread(t);
   }      
-  run_queue_hd = run_queue_tl = END_TSO_QUEUE;
-  blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
-  sleeping_queue = END_TSO_QUEUE;
+
+  // The run queue now contains a bunch of ThreadKilled threads.  We
+  // must not throw these away: the main thread(s) will be in there
+  // somewhere, and the main scheduler loop has to deal with it.
+  // Also, the run queue is the only thing keeping these threads from
+  // being GC'd, and we don't want the "main thread has been GC'd" panic.
+
+  ASSERT(blocked_queue_hd == END_TSO_QUEUE);
+  ASSERT(sleeping_queue == END_TSO_QUEUE);
 }
 
 /* startThread and  insertThread are now in GranSim.c -- HWL */
@@ -1531,12 +1524,7 @@ deleteAllThreads ( void )
  * ------------------------------------------------------------------------- */
    
 StgInt
-suspendThread( StgRegTable *reg, 
-              rtsBool concCall
-#if !defined(DEBUG)
-              STG_UNUSED
-#endif
-              )
+suspendThread( StgRegTable *reg )
 {
   nat tok;
   Capability *cap;
@@ -1550,7 +1538,7 @@ suspendThread( StgRegTable *reg,
   ACQUIRE_LOCK(&sched_mutex);
 
   IF_DEBUG(scheduler,
-          sched_belch("thread %d did a _ccall_gc (is_concurrent: %d)", cap->r.rCurrentTSO->id,concCall));
+          sched_belch("thread %d did a _ccall_gc", cap->r.rCurrentTSO->id));
 
   // XXX this might not be necessary --SDM
   cap->r.rCurrentTSO->what_next = ThreadRunGHC;
@@ -1559,17 +1547,12 @@ suspendThread( StgRegTable *reg,
   cap->r.rCurrentTSO->link = suspended_ccalling_threads;
   suspended_ccalling_threads = cap->r.rCurrentTSO;
 
-#if defined(RTS_SUPPORTS_THREADS)
-  if(cap->r.rCurrentTSO->blocked_exceptions == NULL)
-  {
+  if(cap->r.rCurrentTSO->blocked_exceptions == NULL)  {
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall;
       cap->r.rCurrentTSO->blocked_exceptions = END_TSO_QUEUE;
-  }
-  else
-  {
+  } else {
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall_NoUnblockExc;
   }
-#endif
 
   /* Use the thread ID as the token; it should be unique */
   tok = cap->r.rCurrentTSO->id;
@@ -1593,8 +1576,7 @@ suspendThread( StgRegTable *reg,
 }
 
 StgRegTable *
-resumeThread( StgInt tok,
-             rtsBool concCall STG_UNUSED )
+resumeThread( StgInt tok )
 {
   StgTSO *tso, **prev;
   Capability *cap;
@@ -1625,13 +1607,10 @@ resumeThread( StgInt tok,
   }
   tso->link = END_TSO_QUEUE;
   
-#if defined(RTS_SUPPORTS_THREADS)
-  if(tso->why_blocked == BlockedOnCCall)
-  {
+  if(tso->why_blocked == BlockedOnCCall) {
       awakenBlockedQueueNoLock(tso->blocked_exceptions);
       tso->blocked_exceptions = NULL;
   }
-#endif
   
   /* Reset blocking status */
   tso->why_blocked  = NotBlocked;
@@ -1760,6 +1739,7 @@ createThread(nat size)
   tso->blocked_exceptions = NULL;
 
   tso->saved_errno = 0;
+  tso->main = NULL;
   
   tso->stack_size   = stack_size;
   tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize) 
@@ -1773,9 +1753,10 @@ createThread(nat size)
   /* put a stop frame on the stack */
   tso->sp -= sizeofW(StgStopFrame);
   SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
+  tso->link = END_TSO_QUEUE;
+
   // ToDo: check this
 #if defined(GRAN)
-  tso->link = END_TSO_QUEUE;
   /* uses more flexible routine in GranSim */
   insertThread(tso, CurrentProc);
 #else
@@ -1952,7 +1933,9 @@ void
 scheduleThread_(StgTSO *tso)
 {
   // Precondition: sched_mutex must be held.
-  PUSH_ON_RUN_QUEUE(tso);
+  // The thread goes at the *end* of the run-queue, to avoid possible
+  // starvation of any threads already on the queue.
+  APPEND_TO_RUN_QUEUE(tso);
   THREAD_RUNNABLE();
 }
 
@@ -1965,9 +1948,11 @@ scheduleThread(StgTSO* tso)
 }
 
 #if defined(RTS_SUPPORTS_THREADS)
-static Condition *bound_cond_cache = NULL;
+static Condition bound_cond_cache;
+static int bound_cond_cache_full = 0;
 #endif
 
+
 SchedulerStatus
 scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
                   Capability *initialCapability)
@@ -1977,15 +1962,23 @@ scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
 
     m = stgMallocBytes(sizeof(StgMainThread), "waitThread");
     m->tso = tso;
+    tso->main = m;
     m->ret = ret;
     m->stat = NoStatus;
+    m->link = main_threads;
+    m->prev = NULL;
+    if (main_threads != NULL) {
+       main_threads->prev = m;
+    }
+    main_threads = m;
+
 #if defined(RTS_SUPPORTS_THREADS)
     // Allocating a new condition for each thread is expensive, so we
     // cache one.  This is a pretty feeble hack, but it helps speed up
     // consecutive call-ins quite a bit.
-    if (bound_cond_cache != NULL) {
-       m->bound_thread_cond = *bound_cond_cache;
-       bound_cond_cache = NULL;
+    if (bound_cond_cache_full) {
+       m->bound_thread_cond = bound_cond_cache;
+       bound_cond_cache_full = 0;
     } else {
        initCondition(&m->bound_thread_cond);
     }
@@ -2002,10 +1995,7 @@ scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
     */
     IF_DEBUG(scheduler, sched_belch("waiting for thread (%d)", tso->id));
     
-    m->link = main_threads;
-    main_threads = m;
-    
-    PUSH_ON_RUN_QUEUE(tso);
+    APPEND_TO_RUN_QUEUE(tso);
     // NB. Don't call THREAD_RUNNABLE() here, because the thread is
     // bound and only runnable by *this* OS thread, so waking up other
     // workers will just slow things down.
@@ -2062,9 +2052,7 @@ initScheduler(void)
   initMutex(&term_mutex);
 #endif
   
-#if defined(RTS_SUPPORTS_THREADS)
   ACQUIRE_LOCK(&sched_mutex);
-#endif
 
   /* A capability holds the state a native thread needs in
    * order to execute STG code. At least one capability is
@@ -2082,7 +2070,6 @@ initScheduler(void)
 #endif
 
   RELEASE_LOCK(&sched_mutex);
-
 }
 
 void
@@ -2127,8 +2114,9 @@ waitThread_(StgMainThread* m, Capability *initialCapability)
 
 #if defined(RTS_SUPPORTS_THREADS)
   // Free the condition variable, returning it to the cache if possible.
-  if (bound_cond_cache == NULL) {
-      *bound_cond_cache = m->bound_thread_cond;
+  if (!bound_cond_cache_full) {
+      bound_cond_cache = m->bound_thread_cond;
+      bound_cond_cache_full = 1;
   } else {
       closeCondition(&m->bound_thread_cond);
   }
@@ -2210,25 +2198,6 @@ GetRoots( evac_fn evac )
   // mark the signal handlers (signals should be already blocked)
   markSignalHandlers(evac);
 #endif
-
-  // main threads which have completed need to be retained until they
-  // are dealt with in the main scheduler loop.  They won't be
-  // retained any other way: the GC will drop them from the
-  // all_threads list, so we have to be careful to treat them as roots
-  // here.
-  { 
-      StgMainThread *m;
-      for (m = main_threads; m != NULL; m = m->link) {
-         switch (m->tso->what_next) {
-         case ThreadComplete:
-         case ThreadKilled:
-             evac((StgClosure **)&m->tso);
-             break;
-         default:
-             break;
-         }
-      }
-  }
 }
 
 /* -----------------------------------------------------------------------------
@@ -2456,8 +2425,8 @@ unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node)
       ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
       /* if it's a TSO just push it onto the run_queue */
       next = bqe->link;
-      // ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
-      PUSH_ON_RUN_QUEUE((StgTSO *)bqe); 
+      ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
+      APPEND_TO_RUN_QUEUE((StgTSO *)bqe); 
       THREAD_RUNNABLE();
       unblockCount(bqe, node);
       /* reset blocking status after dumping event */
@@ -2501,7 +2470,8 @@ unblockOneLocked(StgTSO *tso)
   ASSERT(tso->why_blocked != NotBlocked);
   tso->why_blocked = NotBlocked;
   next = tso->link;
-  PUSH_ON_RUN_QUEUE(tso);
+  tso->link = END_TSO_QUEUE;
+  APPEND_TO_RUN_QUEUE(tso);
   THREAD_RUNNABLE();
   IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));
   return next;
@@ -2644,7 +2614,6 @@ awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
 
 #else   /* !GRAN && !PAR */
 
-#ifdef RTS_SUPPORTS_THREADS
 void
 awakenBlockedQueueNoLock(StgTSO *tso)
 {
@@ -2652,7 +2621,6 @@ awakenBlockedQueueNoLock(StgTSO *tso)
     tso = unblockOneLocked(tso);
   }
 }
-#endif
 
 void
 awakenBlockedQueue(StgTSO *tso)
@@ -2953,7 +2921,7 @@ unblockThread(StgTSO *tso)
   tso->link = END_TSO_QUEUE;
   tso->why_blocked = NotBlocked;
   tso->block_info.closure = NULL;
-  PUSH_ON_RUN_QUEUE(tso);
+  APPEND_TO_RUN_QUEUE(tso);
 }
 #endif
 
@@ -2997,6 +2965,7 @@ deleteThread(StgTSO *tso)
   raiseAsync(tso,NULL);
 }
 
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
 static void 
 deleteThreadImmediately(StgTSO *tso)
 { // for forkProcess only:
@@ -3005,13 +2974,15 @@ deleteThreadImmediately(StgTSO *tso)
   if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
       return;
   }
-#if defined(RTS_SUPPORTS_THREADS)
-  if (tso->why_blocked != BlockedOnCCall
-      && tso->why_blocked != BlockedOnCCall_NoUnblockExc)
-#endif
+
+  if (tso->why_blocked != BlockedOnCCall &&
+      tso->why_blocked != BlockedOnCCall_NoUnblockExc) {
     unblockThread(tso);
+  }
+
   tso->what_next = ThreadKilled;
 }
+#endif
 
 void
 raiseAsyncWithLock(StgTSO *tso, StgClosure *exception)
@@ -3170,7 +3141,8 @@ raiseAsync(StgTSO *tso, StgClosure *exception)
            //
            if (!closure_IND(((StgUpdateFrame *)frame)->updatee)) {
                // revert the black hole
-               UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,ap);
+               UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,
+                              (StgClosure *)ap);
            }
            sp += sizeofW(StgUpdateFrame) - 1;
            sp[0] = (W_)ap; // push onto stack
@@ -3192,6 +3164,77 @@ raiseAsync(StgTSO *tso, StgClosure *exception)
 }
 
 /* -----------------------------------------------------------------------------
+   raiseExceptionHelper
+   
+   This function is called by the raise# primitve, just so that we can
+   move some of the tricky bits of raising an exception from C-- into
+   C.  Who knows, it might be a useful re-useable thing here too.
+   -------------------------------------------------------------------------- */
+
+StgWord
+raiseExceptionHelper (StgTSO *tso, StgClosure *exception)
+{
+    StgClosure *raise_closure = NULL;
+    StgPtr p, next;
+    StgRetInfoTable *info;
+    //
+    // This closure represents the expression 'raise# E' where E
+    // is the exception raise.  It is used to overwrite all the
+    // thunks which are currently under evaluataion.
+    //
+
+    //    
+    // LDV profiling: stg_raise_info has THUNK as its closure
+    // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
+    // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
+    // 1 does not cause any problem unless profiling is performed.
+    // However, when LDV profiling goes on, we need to linearly scan
+    // small object pool, where raise_closure is stored, so we should
+    // use MIN_UPD_SIZE.
+    //
+    // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+    //                                        sizeofW(StgClosure)+1);
+    //
+
+    //
+    // Walk up the stack, looking for the catch frame.  On the way,
+    // we update any closures pointed to from update frames with the
+    // raise closure that we just built.
+    //
+    p = tso->sp;
+    while(1) {
+       info = get_ret_itbl((StgClosure *)p);
+       next = p + stack_frame_sizeW((StgClosure *)p);
+       switch (info->i.type) {
+           
+       case UPDATE_FRAME:
+           // Only create raise_closure if we need to.
+           if (raise_closure == NULL) {
+               raise_closure = 
+                   (StgClosure *)allocate(sizeofW(StgClosure)+MIN_UPD_SIZE);
+               SET_HDR(raise_closure, &stg_raise_info, CCCS);
+               raise_closure->payload[0] = exception;
+           }
+           UPD_IND(((StgUpdateFrame *)p)->updatee,raise_closure);
+           p = next;
+           continue;
+           
+       case CATCH_FRAME:
+           tso->sp = p;
+           return CATCH_FRAME;
+           
+       case STOP_FRAME:
+           tso->sp = p;
+           return STOP_FRAME;
+
+       default:
+           p = next; 
+           continue;
+       }
+    }
+}
+
+/* -----------------------------------------------------------------------------
    resurrectThreads is called after garbage collection on the list of
    threads found to be garbage.  Each of these threads will be woken
    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
@@ -3343,14 +3386,12 @@ printThreadBlockage(StgTSO *tso)
            tso->block_info.closure, info_type(tso->block_info.closure));
     break;
 #endif
-#if defined(RTS_SUPPORTS_THREADS)
   case BlockedOnCCall:
     fprintf(stderr,"is blocked on an external call");
     break;
   case BlockedOnCCall_NoUnblockExc:
     fprintf(stderr,"is blocked on an external call (exceptions were already blocked)");
     break;
-#endif
   default:
     barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
         tso->why_blocked, tso->id, tso);