[project @ 2004-08-19 11:27:45 by simonmar]
[ghc-hetmet.git] / ghc / rts / Schedule.c
index 4017c3b..71c3ec9 100644 (file)
@@ -1,7 +1,6 @@
 /* ---------------------------------------------------------------------------
 /* ---------------------------------------------------------------------------
- * $Id: Schedule.c,v 1.189 2004/02/27 12:39:16 simonmar Exp $
  *
  *
- * (c) The GHC Team, 1998-2003
+ * (c) The GHC Team, 1998-2004
  *
  * Scheduler
  *
  *
  * Scheduler
  *
@@ -42,9 +41,9 @@
 #include "SchedAPI.h"
 #include "RtsUtils.h"
 #include "RtsFlags.h"
 #include "SchedAPI.h"
 #include "RtsUtils.h"
 #include "RtsFlags.h"
+#include "BlockAlloc.h"
 #include "Storage.h"
 #include "StgRun.h"
 #include "Storage.h"
 #include "StgRun.h"
-#include "StgStartup.h"
 #include "Hooks.h"
 #define COMPILING_SCHEDULER
 #include "Schedule.h"
 #include "Hooks.h"
 #define COMPILING_SCHEDULER
 #include "Schedule.h"
@@ -59,6 +58,8 @@
 #include "Timer.h"
 #include "Prelude.h"
 #include "ThreadLabels.h"
 #include "Timer.h"
 #include "Prelude.h"
 #include "ThreadLabels.h"
+#include "LdvProfile.h"
+#include "Updates.h"
 #ifdef PROFILING
 #include "Proftimer.h"
 #include "ProfHeap.h"
 #ifdef PROFILING
 #include "Proftimer.h"
 #include "ProfHeap.h"
@@ -234,6 +235,7 @@ rtsBool emitSchedule = rtsTrue;
 
 #if DEBUG
 static char *whatNext_strs[] = {
 
 #if DEBUG
 static char *whatNext_strs[] = {
+  "(unknown)",
   "ThreadRunGHC",
   "ThreadInterpret",
   "ThreadKilled",
   "ThreadRunGHC",
   "ThreadInterpret",
   "ThreadKilled",
@@ -259,6 +261,7 @@ static void
 taskStart(void)
 {
   ACQUIRE_LOCK(&sched_mutex);
 taskStart(void)
 {
   ACQUIRE_LOCK(&sched_mutex);
+  startingWorkerThread = rtsFalse;
   schedule(NULL,NULL);
   RELEASE_LOCK(&sched_mutex);
 }
   schedule(NULL,NULL);
   RELEASE_LOCK(&sched_mutex);
 }
@@ -275,7 +278,10 @@ startSchedulerTaskIfNecessary(void)
       // just because the last one hasn't yet reached the
       // "waiting for capability" state
       startingWorkerThread = rtsTrue;
       // just because the last one hasn't yet reached the
       // "waiting for capability" state
       startingWorkerThread = rtsTrue;
-      startTask(taskStart);
+      if(!startTask(taskStart))
+      {
+        startingWorkerThread = rtsFalse;
+      }
     }
   }
 }
     }
   }
 }
@@ -321,7 +327,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
           Capability *initialCapability )
 {
   StgTSO *t;
           Capability *initialCapability )
 {
   StgTSO *t;
-  Capability *cap = initialCapability;
+  Capability *cap;
   StgThreadReturnCode ret;
 #if defined(GRAN)
   rtsEvent *event;
   StgThreadReturnCode ret;
 #if defined(GRAN)
   rtsEvent *event;
@@ -336,10 +342,12 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 # endif
 #endif
   rtsBool was_interrupted = rtsFalse;
 # endif
 #endif
   rtsBool was_interrupted = rtsFalse;
-  StgTSOWhatNext prev_what_next;
+  nat prev_what_next;
   
   // Pre-condition: sched_mutex is held.
   
   // Pre-condition: sched_mutex is held.
+  // We might have a capability, passed in as initialCapability.
+  cap = initialCapability;
+
 #if defined(RTS_SUPPORTS_THREADS)
   //
   // in the threaded case, the capability is either passed in via the
 #if defined(RTS_SUPPORTS_THREADS)
   //
   // in the threaded case, the capability is either passed in via the
@@ -429,104 +437,6 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 #endif
     }
 
 #endif
     }
 
-    //
-    // Go through the list of main threads and wake up any
-    // clients whose computations have finished.  ToDo: this
-    // should be done more efficiently without a linear scan
-    // of the main threads list, somehow...
-    //
-#if defined(RTS_SUPPORTS_THREADS)
-    { 
-       StgMainThread *m, **prev;
-       prev = &main_threads;
-       for (m = main_threads; m != NULL; prev = &m->link, m = m->link) {
-         if (m->tso->what_next == ThreadComplete
-             || m->tso->what_next == ThreadKilled)
-         {
-           if (m == mainThread)
-           {
-              if (m->tso->what_next == ThreadComplete)
-              {
-                if (m->ret)
-                {
-                  // NOTE: return val is tso->sp[1] (see StgStartup.hc)
-                  *(m->ret) = (StgClosure *)m->tso->sp[1]; 
-                }
-                m->stat = Success;
-              }
-              else
-              {
-                if (m->ret)
-                {
-                  *(m->ret) = NULL;
-                }
-                if (was_interrupted)
-                {
-                  m->stat = Interrupted;
-                }
-                else
-                {
-                  m->stat = Killed;
-                }
-              }
-              *prev = m->link;
-           
-#ifdef DEBUG
-             removeThreadLabel((StgWord)m->tso->id);
-#endif
-              releaseCapability(cap);
-              return;
-            }
-            else
-            {
-                // The current OS thread can not handle the fact that
-                // the Haskell thread "m" has ended.  "m" is bound;
-                // the scheduler loop in it's bound OS thread has to
-                // return, so let's pass our capability directly to
-                // that thread.
-               passCapability(&m->bound_thread_cond);
-               continue;
-            }
-          }
-       }
-    }
-    
-#else /* not threaded */
-
-# if defined(PAR)
-    /* in GUM do this only on the Main PE */
-    if (IAmMainThread)
-# endif
-    /* If our main thread has finished or been killed, return.
-     */
-    {
-      StgMainThread *m = main_threads;
-      if (m->tso->what_next == ThreadComplete
-         || m->tso->what_next == ThreadKilled) {
-#ifdef DEBUG
-       removeThreadLabel((StgWord)m->tso->id);
-#endif
-       main_threads = main_threads->link;
-       if (m->tso->what_next == ThreadComplete) {
-           // We finished successfully, fill in the return value
-           // NOTE: return val is tso->sp[1] (see StgStartup.hc)
-           if (m->ret) { *(m->ret) = (StgClosure *)m->tso->sp[1]; };
-           m->stat = Success;
-           return;
-       } else {
-         if (m->ret) { *(m->ret) = NULL; };
-         if (was_interrupted) {
-           m->stat = Interrupted;
-         } else {
-           m->stat = Killed;
-         }
-         return;
-       }
-      }
-    }
-#endif
-
-
 #if defined(RTS_USER_SIGNALS)
     // check for signals each time around the scheduler
     if (signals_pending()) {
 #if defined(RTS_USER_SIGNALS)
     // check for signals each time around the scheduler
     if (signals_pending()) {
@@ -536,19 +446,20 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
     }
 #endif
 
     }
 #endif
 
-    /* Check whether any waiting threads need to be woken up.  If the
-     * run queue is empty, and there are no other tasks running, we
-     * can wait indefinitely for something to happen.
-     */
-    if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue) 
+    //
+    // Check whether any waiting threads need to be woken up.  If the
+    // run queue is empty, and there are no other tasks running, we
+    // can wait indefinitely for something to happen.
+    //
+    if ( !EMPTY_QUEUE(blocked_queue_hd) || !EMPTY_QUEUE(sleeping_queue)
 #if defined(RTS_SUPPORTS_THREADS)
                || EMPTY_RUN_QUEUE()
 #endif
 #if defined(RTS_SUPPORTS_THREADS)
                || EMPTY_RUN_QUEUE()
 #endif
-        )
+       )
     {
       awaitEvent( EMPTY_RUN_QUEUE() );
     }
     {
       awaitEvent( EMPTY_RUN_QUEUE() );
     }
-    /* we can be interrupted while waiting for I/O... */
+    // we can be interrupted while waiting for I/O...
     if (interrupted) continue;
 
     /* 
     if (interrupted) continue;
 
     /* 
@@ -614,11 +525,9 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
            m = main_threads;
            switch (m->tso->why_blocked) {
            case BlockedOnBlackHole:
            m = main_threads;
            switch (m->tso->why_blocked) {
            case BlockedOnBlackHole:
-               raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
-               break;
            case BlockedOnException:
            case BlockedOnMVar:
            case BlockedOnException:
            case BlockedOnMVar:
-               raiseAsync(m->tso, (StgClosure *)Deadlock_closure);
+               raiseAsync(m->tso, (StgClosure *)NonTermination_closure);
                break;
            default:
                barf("deadlock: main thread blocked in a strange way");
                break;
            default:
                barf("deadlock: main thread blocked in a strange way");
@@ -635,7 +544,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 
 #if defined(RTS_SUPPORTS_THREADS)
     if ( EMPTY_RUN_QUEUE() ) {
 
 #if defined(RTS_SUPPORTS_THREADS)
     if ( EMPTY_RUN_QUEUE() ) {
-      continue; // nothing to do
+       continue; // nothing to do
     }
 #endif
 
     }
 #endif
 
@@ -924,12 +833,7 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
 
 #ifdef THREADED_RTS
     {
 
 #ifdef THREADED_RTS
     {
-      StgMainThread *m;
-      for(m = main_threads; m; m = m->link)
-      {
-       if(m->tso == t)
-         break;
-      }
+      StgMainThread *m = t->main;
       
       if(m)
       {
       
       if(m)
       {
@@ -977,8 +881,6 @@ schedule( StgMainThread *mainThread USED_WHEN_RTS_SUPPORTS_THREADS,
             || blocked_queue_hd != END_TSO_QUEUE
             || sleeping_queue != END_TSO_QUEUE)))
        context_switch = 1;
             || blocked_queue_hd != END_TSO_QUEUE
             || sleeping_queue != END_TSO_QUEUE)))
        context_switch = 1;
-    else
-       context_switch = 0;
 
 run_thread:
 
 
 run_thread:
 
@@ -995,23 +897,35 @@ run_thread:
     /* Run the current thread 
      */
     prev_what_next = t->what_next;
     /* Run the current thread 
      */
     prev_what_next = t->what_next;
+
+    errno = t->saved_errno;
+
     switch (prev_what_next) {
     switch (prev_what_next) {
+
     case ThreadKilled:
     case ThreadComplete:
        /* Thread already finished, return to scheduler. */
        ret = ThreadFinished;
        break;
     case ThreadKilled:
     case ThreadComplete:
        /* Thread already finished, return to scheduler. */
        ret = ThreadFinished;
        break;
+
     case ThreadRunGHC:
     case ThreadRunGHC:
-       errno = t->saved_errno;
        ret = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
        ret = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
-       t->saved_errno = errno;
        break;
        break;
+
     case ThreadInterpret:
        ret = interpretBCO(cap);
        break;
     case ThreadInterpret:
        ret = interpretBCO(cap);
        break;
+
     default:
       barf("schedule: invalid what_next field");
     }
     default:
       barf("schedule: invalid what_next field");
     }
+
+    // The TSO might have moved, so find the new location:
+    t = cap->r.rCurrentTSO;
+
+    // And save the current errno in this thread.
+    t->saved_errno = errno;
+
     /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
     
     /* Costs for the scheduler are assigned to CCS_SYSTEM */
     /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
     
     /* Costs for the scheduler are assigned to CCS_SYSTEM */
@@ -1027,7 +941,6 @@ run_thread:
 #elif !defined(GRAN) && !defined(PAR)
     IF_DEBUG(scheduler,fprintf(stderr,"sched: "););
 #endif
 #elif !defined(GRAN) && !defined(PAR)
     IF_DEBUG(scheduler,fprintf(stderr,"sched: "););
 #endif
-    t = cap->r.rCurrentTSO;
     
 #if defined(PAR)
     /* HACK 675: if the last thread didn't yield, make sure to print a 
     
 #if defined(PAR)
     /* HACK 675: if the last thread didn't yield, make sure to print a 
@@ -1047,12 +960,12 @@ run_thread:
 #endif
 
       // did the task ask for a large block?
 #endif
 
       // did the task ask for a large block?
-      if (cap->r.rHpAlloc > BLOCK_SIZE_W) {
+      if (cap->r.rHpAlloc > BLOCK_SIZE) {
          // if so, get one and push it on the front of the nursery.
          bdescr *bd;
          nat blocks;
          
          // if so, get one and push it on the front of the nursery.
          bdescr *bd;
          nat blocks;
          
-         blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc * sizeof(W_)) / BLOCK_SIZE;
+         blocks = (nat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
 
          IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped: requesting a large block (size %d)", 
                                   t->id, whatNext_strs[t->what_next], blocks));
 
          IF_DEBUG(scheduler,belch("--<< thread %ld (%s) stopped: requesting a large block (size %d)", 
                                   t->id, whatNext_strs[t->what_next], blocks));
@@ -1153,7 +1066,6 @@ run_thread:
        */
       threadPaused(t);
       { 
        */
       threadPaused(t);
       { 
-       StgMainThread *m;
        /* enlarge the stack */
        StgTSO *new_t = threadStackOverflow(t);
        
        /* enlarge the stack */
        StgTSO *new_t = threadStackOverflow(t);
        
@@ -1161,17 +1073,22 @@ run_thread:
         * main thread stack.  It better not be on any other queues...
         * (it shouldn't be).
         */
         * main thread stack.  It better not be on any other queues...
         * (it shouldn't be).
         */
-       for (m = main_threads; m != NULL; m = m->link) {
-         if (m->tso == t) {
-           m->tso = new_t;
-         }
+       if (t->main != NULL) {
+           t->main->tso = new_t;
        }
        }
-       threadPaused(new_t);
        PUSH_ON_RUN_QUEUE(new_t);
       }
       break;
 
     case ThreadYielding:
        PUSH_ON_RUN_QUEUE(new_t);
       }
       break;
 
     case ThreadYielding:
+      // Reset the context switch flag.  We don't do this just before
+      // running the thread, because that would mean we would lose ticks
+      // during GC, which can lead to unfair scheduling (a thread hogs
+      // the CPU because the tick always arrives during GC).  This way
+      // penalises threads that do a lot of allocation, but that seems
+      // better than the alternative.
+      context_switch = 0;
+
 #if defined(GRAN)
       IF_DEBUG(gran, 
               DumpGranEvent(GR_DESCHEDULE, t));
 #if defined(GRAN)
       IF_DEBUG(gran, 
               DumpGranEvent(GR_DESCHEDULE, t));
@@ -1327,8 +1244,74 @@ run_thread:
          !RtsFlags.ParFlags.ParStats.Suppressed) 
        DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
 #endif
          !RtsFlags.ParFlags.ParStats.Suppressed) 
        DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
 #endif
+
+      //
+      // Check whether the thread that just completed was a main
+      // thread, and if so return with the result.  
+      //
+      // There is an assumption here that all thread completion goes
+      // through this point; we need to make sure that if a thread
+      // ends up in the ThreadKilled state, that it stays on the run
+      // queue so it can be dealt with here.
+      //
+      if (
+#if defined(RTS_SUPPORTS_THREADS)
+         mainThread != NULL
+#else
+         mainThread->tso == t
+#endif
+         )
+      {
+         // We are a bound thread: this must be our thread that just
+         // completed.
+         ASSERT(mainThread->tso == t);
+
+         if (t->what_next == ThreadComplete) {
+             if (mainThread->ret) {
+                 // NOTE: return val is tso->sp[1] (see StgStartup.hc)
+                 *(mainThread->ret) = (StgClosure *)mainThread->tso->sp[1]; 
+             }
+             mainThread->stat = Success;
+         } else {
+             if (mainThread->ret) {
+                 *(mainThread->ret) = NULL;
+             }
+             if (was_interrupted) {
+                 mainThread->stat = Interrupted;
+             } else {
+                 mainThread->stat = Killed;
+             }
+         }
+#ifdef DEBUG
+         removeThreadLabel((StgWord)mainThread->tso->id);
+#endif
+         if (mainThread->prev == NULL) {
+             main_threads = mainThread->link;
+         } else {
+             mainThread->prev->link = mainThread->link;
+         }
+         if (mainThread->link != NULL) {
+             mainThread->link->prev = NULL;
+         }
+         releaseCapability(cap);
+         return;
+      }
+
+#ifdef RTS_SUPPORTS_THREADS
+      ASSERT(t->main == NULL);
+#else
+      if (t->main != NULL) {
+         // Must be a main thread that is not the topmost one.  Leave
+         // it on the run queue until the stack has unwound to the
+         // point where we can deal with this.  Leaving it on the run
+         // queue also ensures that the garbage collector knows about
+         // this thread and its return value (it gets dropped from the
+         // all_threads list so there's no other way to find it).
+         APPEND_TO_RUN_QUEUE(t);
+      }
+#endif
       break;
       break;
-      
+
     default:
       barf("schedule: invalid thread return code %d", (int)ret);
     }
     default:
       barf("schedule: invalid thread return code %d", (int)ret);
     }
@@ -1409,12 +1392,7 @@ StgBool
 isThreadBound(StgTSO* tso USED_IN_THREADED_RTS)
 {
 #ifdef THREADED_RTS
 isThreadBound(StgTSO* tso USED_IN_THREADED_RTS)
 {
 #ifdef THREADED_RTS
-  StgMainThread *m;
-  for(m = main_threads; m; m = m->link)
-  {
-    if(m->tso == tso)
-      return rtsTrue;
-  }
+  return (tso->main != NULL);
 #endif
   return rtsFalse;
 }
 #endif
   return rtsFalse;
 }
@@ -1423,13 +1401,22 @@ isThreadBound(StgTSO* tso USED_IN_THREADED_RTS)
  * Singleton fork(). Do not copy any running threads.
  * ------------------------------------------------------------------------- */
 
  * Singleton fork(). Do not copy any running threads.
  * ------------------------------------------------------------------------- */
 
+#ifndef mingw32_TARGET_OS
+#define FORKPROCESS_PRIMOP_SUPPORTED
+#endif
+
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
 static void 
 deleteThreadImmediately(StgTSO *tso);
 static void 
 deleteThreadImmediately(StgTSO *tso);
-
+#endif
 StgInt
 StgInt
-forkProcess(HsStablePtr *entry)
+forkProcess(HsStablePtr *entry
+#ifndef FORKPROCESS_PRIMOP_SUPPORTED
+           STG_UNUSED
+#endif
+           )
 {
 {
-#ifndef mingw32_TARGET_OS
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
   pid_t pid;
   StgTSO* t,*next;
   StgMainThread *m;
   pid_t pid;
   StgTSO* t,*next;
   StgMainThread *m;
@@ -1463,17 +1450,17 @@ forkProcess(HsStablePtr *entry)
       // wipe the main thread list
     while((m = main_threads) != NULL) {
       main_threads = m->link;
       // wipe the main thread list
     while((m = main_threads) != NULL) {
       main_threads = m->link;
-#ifdef THREADED_RTS
+# ifdef THREADED_RTS
       closeCondition(&m->bound_thread_cond);
       closeCondition(&m->bound_thread_cond);
-#endif
+# endif
       stgFree(m);
     }
     
       stgFree(m);
     }
     
-#ifdef RTS_SUPPORTS_THREADS
+# ifdef RTS_SUPPORTS_THREADS
     resetTaskManagerAfterFork();      // tell startTask() and friends that
     startingWorkerThread = rtsFalse;  // we have no worker threads any more
     resetWorkerWakeupPipeAfterFork();
     resetTaskManagerAfterFork();      // tell startTask() and friends that
     startingWorkerThread = rtsFalse;  // we have no worker threads any more
     resetWorkerWakeupPipeAfterFork();
-#endif
+# endif
     
     rc = rts_evalStableIO(entry, NULL);  // run the action
     rts_checkSchedStatus("forkProcess",rc);
     
     rc = rts_evalStableIO(entry, NULL);  // run the action
     rts_checkSchedStatus("forkProcess",rc);
@@ -1483,10 +1470,10 @@ forkProcess(HsStablePtr *entry)
     hs_exit();                      // clean up and exit
     stg_exit(0);
   }
     hs_exit();                      // clean up and exit
     stg_exit(0);
   }
-#else /* mingw32 */
-  barf("forkProcess#: primop not implemented for mingw32, sorry!\n");
+#else /* !FORKPROCESS_PRIMOP_SUPPORTED */
+  barf("forkProcess#: primop not supported, sorry!\n");
   return -1;
   return -1;
-#endif /* mingw32 */
+#endif
 }
 
 /* ---------------------------------------------------------------------------
 }
 
 /* ---------------------------------------------------------------------------
@@ -1507,9 +1494,15 @@ deleteAllThreads ( void )
       next = t->global_link;
       deleteThread(t);
   }      
       next = t->global_link;
       deleteThread(t);
   }      
-  run_queue_hd = run_queue_tl = END_TSO_QUEUE;
-  blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
-  sleeping_queue = END_TSO_QUEUE;
+
+  // The run queue now contains a bunch of ThreadKilled threads.  We
+  // must not throw these away: the main thread(s) will be in there
+  // somewhere, and the main scheduler loop has to deal with it.
+  // Also, the run queue is the only thing keeping these threads from
+  // being GC'd, and we don't want the "main thread has been GC'd" panic.
+
+  ASSERT(blocked_queue_hd == END_TSO_QUEUE);
+  ASSERT(sleeping_queue == END_TSO_QUEUE);
 }
 
 /* startThread and  insertThread are now in GranSim.c -- HWL */
 }
 
 /* startThread and  insertThread are now in GranSim.c -- HWL */
@@ -1531,12 +1524,7 @@ deleteAllThreads ( void )
  * ------------------------------------------------------------------------- */
    
 StgInt
  * ------------------------------------------------------------------------- */
    
 StgInt
-suspendThread( StgRegTable *reg, 
-              rtsBool concCall
-#if !defined(DEBUG)
-              STG_UNUSED
-#endif
-              )
+suspendThread( StgRegTable *reg )
 {
   nat tok;
   Capability *cap;
 {
   nat tok;
   Capability *cap;
@@ -1550,7 +1538,7 @@ suspendThread( StgRegTable *reg,
   ACQUIRE_LOCK(&sched_mutex);
 
   IF_DEBUG(scheduler,
   ACQUIRE_LOCK(&sched_mutex);
 
   IF_DEBUG(scheduler,
-          sched_belch("thread %d did a _ccall_gc (is_concurrent: %d)", cap->r.rCurrentTSO->id,concCall));
+          sched_belch("thread %d did a _ccall_gc", cap->r.rCurrentTSO->id));
 
   // XXX this might not be necessary --SDM
   cap->r.rCurrentTSO->what_next = ThreadRunGHC;
 
   // XXX this might not be necessary --SDM
   cap->r.rCurrentTSO->what_next = ThreadRunGHC;
@@ -1559,17 +1547,12 @@ suspendThread( StgRegTable *reg,
   cap->r.rCurrentTSO->link = suspended_ccalling_threads;
   suspended_ccalling_threads = cap->r.rCurrentTSO;
 
   cap->r.rCurrentTSO->link = suspended_ccalling_threads;
   suspended_ccalling_threads = cap->r.rCurrentTSO;
 
-#if defined(RTS_SUPPORTS_THREADS)
-  if(cap->r.rCurrentTSO->blocked_exceptions == NULL)
-  {
+  if(cap->r.rCurrentTSO->blocked_exceptions == NULL)  {
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall;
       cap->r.rCurrentTSO->blocked_exceptions = END_TSO_QUEUE;
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall;
       cap->r.rCurrentTSO->blocked_exceptions = END_TSO_QUEUE;
-  }
-  else
-  {
+  } else {
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall_NoUnblockExc;
   }
       cap->r.rCurrentTSO->why_blocked = BlockedOnCCall_NoUnblockExc;
   }
-#endif
 
   /* Use the thread ID as the token; it should be unique */
   tok = cap->r.rCurrentTSO->id;
 
   /* Use the thread ID as the token; it should be unique */
   tok = cap->r.rCurrentTSO->id;
@@ -1593,8 +1576,7 @@ suspendThread( StgRegTable *reg,
 }
 
 StgRegTable *
 }
 
 StgRegTable *
-resumeThread( StgInt tok,
-             rtsBool concCall STG_UNUSED )
+resumeThread( StgInt tok )
 {
   StgTSO *tso, **prev;
   Capability *cap;
 {
   StgTSO *tso, **prev;
   Capability *cap;
@@ -1625,13 +1607,10 @@ resumeThread( StgInt tok,
   }
   tso->link = END_TSO_QUEUE;
   
   }
   tso->link = END_TSO_QUEUE;
   
-#if defined(RTS_SUPPORTS_THREADS)
-  if(tso->why_blocked == BlockedOnCCall)
-  {
+  if(tso->why_blocked == BlockedOnCCall) {
       awakenBlockedQueueNoLock(tso->blocked_exceptions);
       tso->blocked_exceptions = NULL;
   }
       awakenBlockedQueueNoLock(tso->blocked_exceptions);
       tso->blocked_exceptions = NULL;
   }
-#endif
   
   /* Reset blocking status */
   tso->why_blocked  = NotBlocked;
   
   /* Reset blocking status */
   tso->why_blocked  = NotBlocked;
@@ -1760,6 +1739,7 @@ createThread(nat size)
   tso->blocked_exceptions = NULL;
 
   tso->saved_errno = 0;
   tso->blocked_exceptions = NULL;
 
   tso->saved_errno = 0;
+  tso->main = NULL;
   
   tso->stack_size   = stack_size;
   tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize) 
   
   tso->stack_size   = stack_size;
   tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize) 
@@ -1773,9 +1753,10 @@ createThread(nat size)
   /* put a stop frame on the stack */
   tso->sp -= sizeofW(StgStopFrame);
   SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
   /* put a stop frame on the stack */
   tso->sp -= sizeofW(StgStopFrame);
   SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
+  tso->link = END_TSO_QUEUE;
+
   // ToDo: check this
 #if defined(GRAN)
   // ToDo: check this
 #if defined(GRAN)
-  tso->link = END_TSO_QUEUE;
   /* uses more flexible routine in GranSim */
   insertThread(tso, CurrentProc);
 #else
   /* uses more flexible routine in GranSim */
   insertThread(tso, CurrentProc);
 #else
@@ -1952,7 +1933,9 @@ void
 scheduleThread_(StgTSO *tso)
 {
   // Precondition: sched_mutex must be held.
 scheduleThread_(StgTSO *tso)
 {
   // Precondition: sched_mutex must be held.
-  PUSH_ON_RUN_QUEUE(tso);
+  // The thread goes at the *end* of the run-queue, to avoid possible
+  // starvation of any threads already on the queue.
+  APPEND_TO_RUN_QUEUE(tso);
   THREAD_RUNNABLE();
 }
 
   THREAD_RUNNABLE();
 }
 
@@ -1965,9 +1948,11 @@ scheduleThread(StgTSO* tso)
 }
 
 #if defined(RTS_SUPPORTS_THREADS)
 }
 
 #if defined(RTS_SUPPORTS_THREADS)
-static Condition *bound_cond_cache = NULL;
+static Condition bound_cond_cache;
+static int bound_cond_cache_full = 0;
 #endif
 
 #endif
 
+
 SchedulerStatus
 scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
                   Capability *initialCapability)
 SchedulerStatus
 scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
                   Capability *initialCapability)
@@ -1977,15 +1962,23 @@ scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
 
     m = stgMallocBytes(sizeof(StgMainThread), "waitThread");
     m->tso = tso;
 
     m = stgMallocBytes(sizeof(StgMainThread), "waitThread");
     m->tso = tso;
+    tso->main = m;
     m->ret = ret;
     m->stat = NoStatus;
     m->ret = ret;
     m->stat = NoStatus;
+    m->link = main_threads;
+    m->prev = NULL;
+    if (main_threads != NULL) {
+       main_threads->prev = m;
+    }
+    main_threads = m;
+
 #if defined(RTS_SUPPORTS_THREADS)
     // Allocating a new condition for each thread is expensive, so we
     // cache one.  This is a pretty feeble hack, but it helps speed up
     // consecutive call-ins quite a bit.
 #if defined(RTS_SUPPORTS_THREADS)
     // Allocating a new condition for each thread is expensive, so we
     // cache one.  This is a pretty feeble hack, but it helps speed up
     // consecutive call-ins quite a bit.
-    if (bound_cond_cache != NULL) {
-       m->bound_thread_cond = *bound_cond_cache;
-       bound_cond_cache = NULL;
+    if (bound_cond_cache_full) {
+       m->bound_thread_cond = bound_cond_cache;
+       bound_cond_cache_full = 0;
     } else {
        initCondition(&m->bound_thread_cond);
     }
     } else {
        initCondition(&m->bound_thread_cond);
     }
@@ -2002,10 +1995,7 @@ scheduleWaitThread(StgTSO* tso, /*[out]*/HaskellObj* ret,
     */
     IF_DEBUG(scheduler, sched_belch("waiting for thread (%d)", tso->id));
     
     */
     IF_DEBUG(scheduler, sched_belch("waiting for thread (%d)", tso->id));
     
-    m->link = main_threads;
-    main_threads = m;
-    
-    PUSH_ON_RUN_QUEUE(tso);
+    APPEND_TO_RUN_QUEUE(tso);
     // NB. Don't call THREAD_RUNNABLE() here, because the thread is
     // bound and only runnable by *this* OS thread, so waking up other
     // workers will just slow things down.
     // NB. Don't call THREAD_RUNNABLE() here, because the thread is
     // bound and only runnable by *this* OS thread, so waking up other
     // workers will just slow things down.
@@ -2062,9 +2052,7 @@ initScheduler(void)
   initMutex(&term_mutex);
 #endif
   
   initMutex(&term_mutex);
 #endif
   
-#if defined(RTS_SUPPORTS_THREADS)
   ACQUIRE_LOCK(&sched_mutex);
   ACQUIRE_LOCK(&sched_mutex);
-#endif
 
   /* A capability holds the state a native thread needs in
    * order to execute STG code. At least one capability is
 
   /* A capability holds the state a native thread needs in
    * order to execute STG code. At least one capability is
@@ -2082,7 +2070,6 @@ initScheduler(void)
 #endif
 
   RELEASE_LOCK(&sched_mutex);
 #endif
 
   RELEASE_LOCK(&sched_mutex);
-
 }
 
 void
 }
 
 void
@@ -2127,8 +2114,9 @@ waitThread_(StgMainThread* m, Capability *initialCapability)
 
 #if defined(RTS_SUPPORTS_THREADS)
   // Free the condition variable, returning it to the cache if possible.
 
 #if defined(RTS_SUPPORTS_THREADS)
   // Free the condition variable, returning it to the cache if possible.
-  if (bound_cond_cache == NULL) {
-      *bound_cond_cache = m->bound_thread_cond;
+  if (!bound_cond_cache_full) {
+      bound_cond_cache = m->bound_thread_cond;
+      bound_cond_cache_full = 1;
   } else {
       closeCondition(&m->bound_thread_cond);
   }
   } else {
       closeCondition(&m->bound_thread_cond);
   }
@@ -2210,25 +2198,6 @@ GetRoots( evac_fn evac )
   // mark the signal handlers (signals should be already blocked)
   markSignalHandlers(evac);
 #endif
   // mark the signal handlers (signals should be already blocked)
   markSignalHandlers(evac);
 #endif
-
-  // main threads which have completed need to be retained until they
-  // are dealt with in the main scheduler loop.  They won't be
-  // retained any other way: the GC will drop them from the
-  // all_threads list, so we have to be careful to treat them as roots
-  // here.
-  { 
-      StgMainThread *m;
-      for (m = main_threads; m != NULL; m = m->link) {
-         switch (m->tso->what_next) {
-         case ThreadComplete:
-         case ThreadKilled:
-             evac((StgClosure **)&m->tso);
-             break;
-         default:
-             break;
-         }
-      }
-  }
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
@@ -2456,8 +2425,8 @@ unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node)
       ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
       /* if it's a TSO just push it onto the run_queue */
       next = bqe->link;
       ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
       /* if it's a TSO just push it onto the run_queue */
       next = bqe->link;
-      // ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
-      PUSH_ON_RUN_QUEUE((StgTSO *)bqe); 
+      ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
+      APPEND_TO_RUN_QUEUE((StgTSO *)bqe); 
       THREAD_RUNNABLE();
       unblockCount(bqe, node);
       /* reset blocking status after dumping event */
       THREAD_RUNNABLE();
       unblockCount(bqe, node);
       /* reset blocking status after dumping event */
@@ -2501,7 +2470,8 @@ unblockOneLocked(StgTSO *tso)
   ASSERT(tso->why_blocked != NotBlocked);
   tso->why_blocked = NotBlocked;
   next = tso->link;
   ASSERT(tso->why_blocked != NotBlocked);
   tso->why_blocked = NotBlocked;
   next = tso->link;
-  PUSH_ON_RUN_QUEUE(tso);
+  tso->link = END_TSO_QUEUE;
+  APPEND_TO_RUN_QUEUE(tso);
   THREAD_RUNNABLE();
   IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));
   return next;
   THREAD_RUNNABLE();
   IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));
   return next;
@@ -2644,7 +2614,6 @@ awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
 
 #else   /* !GRAN && !PAR */
 
 
 #else   /* !GRAN && !PAR */
 
-#ifdef RTS_SUPPORTS_THREADS
 void
 awakenBlockedQueueNoLock(StgTSO *tso)
 {
 void
 awakenBlockedQueueNoLock(StgTSO *tso)
 {
@@ -2652,7 +2621,6 @@ awakenBlockedQueueNoLock(StgTSO *tso)
     tso = unblockOneLocked(tso);
   }
 }
     tso = unblockOneLocked(tso);
   }
 }
-#endif
 
 void
 awakenBlockedQueue(StgTSO *tso)
 
 void
 awakenBlockedQueue(StgTSO *tso)
@@ -2953,7 +2921,7 @@ unblockThread(StgTSO *tso)
   tso->link = END_TSO_QUEUE;
   tso->why_blocked = NotBlocked;
   tso->block_info.closure = NULL;
   tso->link = END_TSO_QUEUE;
   tso->why_blocked = NotBlocked;
   tso->block_info.closure = NULL;
-  PUSH_ON_RUN_QUEUE(tso);
+  APPEND_TO_RUN_QUEUE(tso);
 }
 #endif
 
 }
 #endif
 
@@ -2997,6 +2965,7 @@ deleteThread(StgTSO *tso)
   raiseAsync(tso,NULL);
 }
 
   raiseAsync(tso,NULL);
 }
 
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
 static void 
 deleteThreadImmediately(StgTSO *tso)
 { // for forkProcess only:
 static void 
 deleteThreadImmediately(StgTSO *tso)
 { // for forkProcess only:
@@ -3005,13 +2974,15 @@ deleteThreadImmediately(StgTSO *tso)
   if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
       return;
   }
   if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
       return;
   }
-#if defined(RTS_SUPPORTS_THREADS)
-  if (tso->why_blocked != BlockedOnCCall
-      && tso->why_blocked != BlockedOnCCall_NoUnblockExc)
-#endif
+
+  if (tso->why_blocked != BlockedOnCCall &&
+      tso->why_blocked != BlockedOnCCall_NoUnblockExc) {
     unblockThread(tso);
     unblockThread(tso);
+  }
+
   tso->what_next = ThreadKilled;
 }
   tso->what_next = ThreadKilled;
 }
+#endif
 
 void
 raiseAsyncWithLock(StgTSO *tso, StgClosure *exception)
 
 void
 raiseAsyncWithLock(StgTSO *tso, StgClosure *exception)
@@ -3170,7 +3141,8 @@ raiseAsync(StgTSO *tso, StgClosure *exception)
            //
            if (!closure_IND(((StgUpdateFrame *)frame)->updatee)) {
                // revert the black hole
            //
            if (!closure_IND(((StgUpdateFrame *)frame)->updatee)) {
                // revert the black hole
-               UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,ap);
+               UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,
+                              (StgClosure *)ap);
            }
            sp += sizeofW(StgUpdateFrame) - 1;
            sp[0] = (W_)ap; // push onto stack
            }
            sp += sizeofW(StgUpdateFrame) - 1;
            sp[0] = (W_)ap; // push onto stack
@@ -3192,6 +3164,77 @@ raiseAsync(StgTSO *tso, StgClosure *exception)
 }
 
 /* -----------------------------------------------------------------------------
 }
 
 /* -----------------------------------------------------------------------------
+   raiseExceptionHelper
+   
+   This function is called by the raise# primitve, just so that we can
+   move some of the tricky bits of raising an exception from C-- into
+   C.  Who knows, it might be a useful re-useable thing here too.
+   -------------------------------------------------------------------------- */
+
+StgWord
+raiseExceptionHelper (StgTSO *tso, StgClosure *exception)
+{
+    StgClosure *raise_closure = NULL;
+    StgPtr p, next;
+    StgRetInfoTable *info;
+    //
+    // This closure represents the expression 'raise# E' where E
+    // is the exception raise.  It is used to overwrite all the
+    // thunks which are currently under evaluataion.
+    //
+
+    //    
+    // LDV profiling: stg_raise_info has THUNK as its closure
+    // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
+    // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
+    // 1 does not cause any problem unless profiling is performed.
+    // However, when LDV profiling goes on, we need to linearly scan
+    // small object pool, where raise_closure is stored, so we should
+    // use MIN_UPD_SIZE.
+    //
+    // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+    //                                        sizeofW(StgClosure)+1);
+    //
+
+    //
+    // Walk up the stack, looking for the catch frame.  On the way,
+    // we update any closures pointed to from update frames with the
+    // raise closure that we just built.
+    //
+    p = tso->sp;
+    while(1) {
+       info = get_ret_itbl((StgClosure *)p);
+       next = p + stack_frame_sizeW((StgClosure *)p);
+       switch (info->i.type) {
+           
+       case UPDATE_FRAME:
+           // Only create raise_closure if we need to.
+           if (raise_closure == NULL) {
+               raise_closure = 
+                   (StgClosure *)allocate(sizeofW(StgClosure)+MIN_UPD_SIZE);
+               SET_HDR(raise_closure, &stg_raise_info, CCCS);
+               raise_closure->payload[0] = exception;
+           }
+           UPD_IND(((StgUpdateFrame *)p)->updatee,raise_closure);
+           p = next;
+           continue;
+           
+       case CATCH_FRAME:
+           tso->sp = p;
+           return CATCH_FRAME;
+           
+       case STOP_FRAME:
+           tso->sp = p;
+           return STOP_FRAME;
+
+       default:
+           p = next; 
+           continue;
+       }
+    }
+}
+
+/* -----------------------------------------------------------------------------
    resurrectThreads is called after garbage collection on the list of
    threads found to be garbage.  Each of these threads will be woken
    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
    resurrectThreads is called after garbage collection on the list of
    threads found to be garbage.  Each of these threads will be woken
    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
@@ -3343,14 +3386,12 @@ printThreadBlockage(StgTSO *tso)
            tso->block_info.closure, info_type(tso->block_info.closure));
     break;
 #endif
            tso->block_info.closure, info_type(tso->block_info.closure));
     break;
 #endif
-#if defined(RTS_SUPPORTS_THREADS)
   case BlockedOnCCall:
     fprintf(stderr,"is blocked on an external call");
     break;
   case BlockedOnCCall_NoUnblockExc:
     fprintf(stderr,"is blocked on an external call (exceptions were already blocked)");
     break;
   case BlockedOnCCall:
     fprintf(stderr,"is blocked on an external call");
     break;
   case BlockedOnCCall_NoUnblockExc:
     fprintf(stderr,"is blocked on an external call (exceptions were already blocked)");
     break;
-#endif
   default:
     barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
         tso->why_blocked, tso->id, tso);
   default:
     barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
         tso->why_blocked, tso->id, tso);