FIX (partially) #2703: bug in stack overflow handling when inside block

[ghc-hetmet.git] / rts / Schedule.c
diff --git a/rts/Schedule.c b/rts/Schedule.c

index 94aac6c..626c097 100644 (file)
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -89,11 +89,6 @@ StgTSO *blackhole_queue = NULL;
   */
  rtsBool blackholes_need_checking = rtsFalse;
  
-/* flag set by signal handler to precipitate a context switch
- * LOCK: none (just an advisory flag)
- */
-int context_switch = 0;
-
  /* flag that tracks whether we have done any execution in this time slice.
   * LOCK: currently none, perhaps we should lock (but needs to be
   * updated in the fast path of the scheduler).
@@ -155,7 +150,7 @@ static rtsBool scheduleGetRemoteWork(Capability *cap);
  static void scheduleSendPendingMessages(void);
  static void scheduleActivateSpark(Capability *cap);
  #endif
-static void schedulePostRunThread(StgTSO *t);
+static void schedulePostRunThread(Capability *cap, StgTSO *t);
  static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
  static void scheduleHandleStackOverflow( Capability *cap, Task *task, 
                                          StgTSO *t);
@@ -504,7 +499,7 @@ schedule (Capability *initialCapability, Task *task)
       */
      if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
         && !emptyThreadQueues(cap)) {
-       context_switch = 1;
+       cap->context_switch = 1;
      }
          
  run_thread:
@@ -627,7 +622,7 @@ run_thread:
      CCCS = CCS_SYSTEM;
  #endif
      
-    schedulePostRunThread(t);
+    schedulePostRunThread(cap,t);
  
      t = threadStackUnderflow(task,t);
  
@@ -969,10 +964,10 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
   * Send pending messages (PARALLEL_HASKELL only)
   * ------------------------------------------------------------------------- */
  
+#if defined(PARALLEL_HASKELL)
  static StgTSO *
  scheduleSendPendingMessages(void)
  {
-#if defined(PARALLEL_HASKELL)
  
  # if defined(PAR) // global Mem.Mgmt., omit for now
      if (PendingFetches != END_BF_QUEUE) {
@@ -985,8 +980,8 @@ scheduleSendPendingMessages(void)
         // packets which have become too old...
         sendOldBuffers(); 
      }
-#endif
  }
+#endif
  
  /* ----------------------------------------------------------------------------
   * Activate spark threads (PARALLEL_HASKELL only)
@@ -1069,7 +1064,7 @@ scheduleGetRemoteWork(Capability *cap)
   * ------------------------------------------------------------------------- */
  
  static void
-schedulePostRunThread (StgTSO *t)
+schedulePostRunThread (Capability *cap, StgTSO *t)
  {
      // We have to be able to catch transactions that are in an
      // infinite loop as a result of seeing an inconsistent view of
@@ -1090,8 +1085,7 @@ schedulePostRunThread (StgTSO *t)
              // ATOMICALLY_FRAME, aborting the (nested)
              // transaction, and saving the stack of any
              // partially-evaluated thunks on the heap.
-            throwToSingleThreaded_(&capabilities[0], t, 
-                                   NULL, rtsTrue, NULL);
+            throwToSingleThreaded_(cap, t, NULL, rtsTrue, NULL);
              
              ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
          }
@@ -1179,12 +1173,12 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
                "--<< thread %ld (%s) stopped: HeapOverflow",
                (long)t->id, whatNext_strs[t->what_next]);
  
-    if (context_switch) {
+    if (cap->context_switch) {
          // Sometimes we miss a context switch, e.g. when calling
          // primitives in a tight loop, MAYBE_GC() doesn't check the
          // context switch flag, and we end up waiting for a GC.
          // See #1984, and concurrent/should_run/1984
-        context_switch = 0;
+        cap->context_switch = 0;
          addToRunQueue(cap,t);
      } else {
          pushOnRunQueue(cap,t);
@@ -1234,7 +1228,7 @@ scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
      // the CPU because the tick always arrives during GC).  This way
      // penalises threads that do a lot of allocation, but that seems
      // better than the alternative.
-    context_switch = 0;
+    cap->context_switch = 0;
      
      /* put the thread back on the run queue.  Then, if we're ready to
       * GC, check whether this is the last task to stop.  If so, wake
@@ -1402,10 +1396,10 @@ scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
  static Capability *
  scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
  {
-    StgTSO *t;
      rtsBool heap_census;
  #ifdef THREADED_RTS
-    static volatile StgWord waiting_for_gc;
+    /* extern static volatile StgWord waiting_for_gc; 
+       lives inside capability.c */
      rtsBool was_waiting;
      nat i;
  #endif
@@ -1422,6 +1416,10 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
      // the other tasks to sleep and stay asleep.
      //
         
+    /*  Other capabilities are prevented from running yet more Haskell
+       threads if waiting_for_gc is set. Tested inside
+       yieldCapability() and releaseCapability() in Capability.c */
+
      was_waiting = cas(&waiting_for_gc, 0, 1);
      if (was_waiting) {
         do {
@@ -1431,6 +1429,7 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
         return cap;  // NOTE: task->cap might have changed here
      }
  
+    setContextSwitches();
      for (i=0; i < n_capabilities; i++) {
         debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities);
         if (cap != &capabilities[i]) {
@@ -1441,7 +1440,6 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
             // all the Capabilities, but even so it's a slightly
             // unsavoury invariant.
             task->cap = pcap;
-           context_switch = 1;
             waitForReturnCapability(&pcap, task);
             if (pcap != &capabilities[i]) {
                 barf("scheduleDoGC: got the wrong capability");
@@ -1866,7 +1864,7 @@ scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
      if (cpu == cap->no) {
         appendToRunQueue(cap,tso);
      } else {
-       migrateThreadToCapability_lock(&capabilities[cpu],tso);
+       wakeupThreadOnCapability(cap, &capabilities[cpu], tso);
      }
  #else
      appendToRunQueue(cap,tso);
@@ -1908,7 +1906,7 @@ scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
   * ------------------------------------------------------------------------- */
  
  #if defined(THREADED_RTS)
-void
+void OSThreadProcAttr
  workerStart(Task *task)
  {
      Capability *cap;
@@ -1950,7 +1948,6 @@ initScheduler(void)
  
    blackhole_queue   = END_TSO_QUEUE;
  
-  context_switch = 0;
    sched_state    = SCHED_RUNNING;
    recent_activity = ACTIVITY_YES;
  
@@ -2128,10 +2125,17 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
    }
  
    /* Try to double the current stack size.  If that takes us over the
-   * maximum stack size for this thread, then use the maximum instead.
-   * Finally round up so the TSO ends up as a whole number of blocks.
+   * maximum stack size for this thread, then use the maximum instead
+   * (that is, unless we're already at or over the max size and we
+   * can't raise the StackOverflow exception (see above), in which
+   * case just double the size). Finally round up so the TSO ends up as
+   * a whole number of blocks.
     */
-  new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
+  if (tso->stack_size >= tso->max_stack_size) {
+      new_stack_size = tso->stack_size * 2;
+  } else { 
+      new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
+  }
    new_tso_size   = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) + 
                                        TSO_STRUCT_SIZE)/sizeof(W_);
    new_tso_size = round_to_mblocks(new_tso_size);  /* Be MBLOCK-friendly */
@@ -2188,7 +2192,7 @@ static StgTSO *
  threadStackUnderflow (Task *task STG_UNUSED, StgTSO *tso)
  {
      bdescr *bd, *new_bd;
-    lnat new_tso_size_w, tso_size_w;
+    lnat free_w, tso_size_w;
      StgTSO *new_tso;
  
      tso_size_w = tso_sizeW(tso);
@@ -2203,19 +2207,19 @@ threadStackUnderflow (Task *task STG_UNUSED, StgTSO *tso)
      // while we are moving the TSO:
      lockClosure((StgClosure *)tso);
  
-    new_tso_size_w = round_to_mblocks(tso_size_w/2);
-
-    debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
-               tso->id, tso_size_w, new_tso_size_w);
+    // this is the number of words we'll free
+    free_w = round_to_mblocks(tso_size_w/2);
  
      bd = Bdescr((StgPtr)tso);
-    new_bd = splitLargeBlock(bd, new_tso_size_w / BLOCK_SIZE_W);
-    new_bd->free = bd->free;
+    new_bd = splitLargeBlock(bd, free_w / BLOCK_SIZE_W);
      bd->free = bd->start + TSO_STRUCT_SIZEW;
  
      new_tso = (StgTSO *)new_bd->start;
      memcpy(new_tso,tso,TSO_STRUCT_SIZE);
-    new_tso->stack_size = new_tso_size_w - TSO_STRUCT_SIZEW;
+    new_tso->stack_size = new_bd->free - new_tso->stack;
+
+    debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
+               (long)tso->id, tso_size_w, tso_sizeW(new_tso));
  
      tso->what_next = ThreadRelocated;
      tso->_link = new_tso; // no write barrier reqd: same generation
@@ -2243,7 +2247,7 @@ void
  interruptStgRts(void)
  {
      sched_state = SCHED_INTERRUPTING;
-    context_switch = 1;
+    setContextSwitches();
      wakeUpRts();
  }
  
@@ -2307,8 +2311,6 @@ checkBlackHoles (Capability *cap)
         if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
             IF_DEBUG(sanity,checkTSO(t));
             t = unblockOne(cap, t);
-           // urk, the threads migrate to the current capability
-           // here, but we'd like to keep them on the original one.
             *prev = t;
             any_woke_up = rtsTrue;
         } else {