pad step_workspace to 64 bytes, to speed up access to gct->steps[]
[ghc-hetmet.git] / rts / Schedule.c
index 375520d..5fa949c 100644 (file)
@@ -222,6 +222,7 @@ static Capability *scheduleDoGC(Capability *cap, Task *task,
 static rtsBool checkBlackHoles(Capability *cap);
 
 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
+static StgTSO *threadStackUnderflow(Task *task, StgTSO *tso);
 
 static void deleteThread (Capability *cap, StgTSO *tso);
 static void deleteAllThreads (Capability *cap);
@@ -598,7 +599,7 @@ run_thread:
         // ACTIVITY_DONE_GC means we turned off the timer signal to
         // conserve power (see #1623).  Re-enable it here.
         nat prev;
-        prev = xchg(&recent_activity, ACTIVITY_YES);
+        prev = xchg((P_)&recent_activity, ACTIVITY_YES);
         if (prev == ACTIVITY_DONE_GC) {
             startTimer();
         }
@@ -683,6 +684,8 @@ run_thread:
     
     schedulePostRunThread();
 
+    t = threadStackUnderflow(task,t);
+
     ready_to_gc = rtsFalse;
 
     switch (ret) {
@@ -718,9 +721,6 @@ run_thread:
       cap = scheduleDoGC(cap,task,rtsFalse);
     }
   } /* end of while() */
-
-  debugTrace(PAR_DEBUG_verbose,
-            "== Leaving schedule() after having received Finish");
 }
 
 /* ----------------------------------------------------------------------------
@@ -1634,7 +1634,16 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
     }
 #endif
       
-    pushOnRunQueue(cap,t);
+    if (context_switch) {
+        // Sometimes we miss a context switch, e.g. when calling
+        // primitives in a tight loop, MAYBE_GC() doesn't check the
+        // context switch flag, and we end up waiting for a GC.
+        // See #1984, and concurrent/should_run/1984
+        context_switch = 0;
+        addToRunQueue(cap,t);
+    } else {
+        pushOnRunQueue(cap,t);
+    }
     return rtsTrue;
     /* actual GC is done at the end of the while loop in schedule() */
 }
@@ -2664,87 +2673,6 @@ freeScheduler( void )
 #endif
 }
 
-/* ---------------------------------------------------------------------------
-   Where are the roots that we know about?
-
-        - all the threads on the runnable queue
-        - all the threads on the blocked queue
-        - all the threads on the sleeping queue
-       - all the thread currently executing a _ccall_GC
-        - all the "main threads"
-     
-   ------------------------------------------------------------------------ */
-
-/* This has to be protected either by the scheduler monitor, or by the
-       garbage collection monitor (probably the latter).
-       KH @ 25/10/99
-*/
-
-void
-GetRoots( evac_fn evac )
-{
-    nat i;
-    Capability *cap;
-    Task *task;
-
-#if defined(GRAN)
-    for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
-       if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
-           evac((StgClosure **)&run_queue_hds[i]);
-       if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
-           evac((StgClosure **)&run_queue_tls[i]);
-       
-       if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
-           evac((StgClosure **)&blocked_queue_hds[i]);
-       if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
-           evac((StgClosure **)&blocked_queue_tls[i]);
-       if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
-           evac((StgClosure **)&ccalling_threads[i]);
-    }
-
-    markEventQueue();
-
-#else /* !GRAN */
-
-    for (i = 0; i < n_capabilities; i++) {
-       cap = &capabilities[i];
-       evac((StgClosure **)(void *)&cap->run_queue_hd);
-       evac((StgClosure **)(void *)&cap->run_queue_tl);
-#if defined(THREADED_RTS)
-       evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
-       evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
-#endif
-       for (task = cap->suspended_ccalling_tasks; task != NULL; 
-            task=task->next) {
-           debugTrace(DEBUG_sched,
-                      "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
-           evac((StgClosure **)(void *)&task->suspended_tso);
-       }
-
-    }
-    
-
-#if !defined(THREADED_RTS)
-    evac((StgClosure **)(void *)&blocked_queue_hd);
-    evac((StgClosure **)(void *)&blocked_queue_tl);
-    evac((StgClosure **)(void *)&sleeping_queue);
-#endif 
-#endif
-
-    // evac((StgClosure **)&blackhole_queue);
-
-#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
-    markSparkQueue(evac);
-#endif
-    
-#if defined(RTS_USER_SIGNALS)
-    // mark the signal handlers (signals should be already blocked)
-    if (RtsFlags.MiscFlags.install_signal_handlers) {
-        markSignalHandlers(evac);
-    }
-#endif
-}
-
 /* -----------------------------------------------------------------------------
    performGC
 
@@ -2837,7 +2765,7 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
             "increasing stack size from %ld words to %d.",
             (long)tso->stack_size, new_stack_size);
 
-  dest = (StgTSO *)allocate(new_tso_size);
+  dest = (StgTSO *)allocateLocal(cap,new_tso_size);
   TICK_ALLOC_TSO(new_stack_size,0);
 
   /* copy the TSO block and the old stack into the new area */
@@ -2880,6 +2808,54 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
   return dest;
 }
 
+static StgTSO *
+threadStackUnderflow (Task *task STG_UNUSED, StgTSO *tso)
+{
+    bdescr *bd, *new_bd;
+    lnat new_tso_size_w, tso_size_w;
+    StgTSO *new_tso;
+
+    tso_size_w = tso_sizeW(tso);
+
+    if (tso_size_w < MBLOCK_SIZE_W || 
+        (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4) 
+    {
+        return tso;
+    }
+
+    // don't allow throwTo() to modify the blocked_exceptions queue
+    // while we are moving the TSO:
+    lockClosure((StgClosure *)tso);
+
+    new_tso_size_w = round_to_mblocks(tso_size_w/2);
+
+    debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
+               tso->id, tso_size_w, new_tso_size_w);
+
+    bd = Bdescr((StgPtr)tso);
+    new_bd = splitLargeBlock(bd, new_tso_size_w / BLOCK_SIZE_W);
+
+    new_tso = (StgTSO *)new_bd->start;
+    memcpy(new_tso,tso,TSO_STRUCT_SIZE);
+    new_tso->stack_size = new_tso_size_w - TSO_STRUCT_SIZEW;
+
+    tso->what_next = ThreadRelocated;
+    tso->_link = new_tso; // no write barrier reqd: same generation
+
+    // The TSO attached to this Task may have moved, so update the
+    // pointer to it.
+    if (task->tso == tso) {
+        task->tso = new_tso;
+    }
+
+    unlockTSO(new_tso);
+    unlockTSO(tso);
+
+    IF_DEBUG(sanity,checkTSO(new_tso));
+
+    return new_tso;
+}
+
 /* ---------------------------------------------------------------------------
    Interrupt execution
    - usually called inside a signal handler so it mustn't do anything fancy.   
@@ -2949,7 +2925,7 @@ checkBlackHoles (Capability *cap)
     t = blackhole_queue;
     while (t != END_TSO_QUEUE) {
        ASSERT(t->why_blocked == BlockedOnBlackHole);
-       type = get_itbl(t->block_info.closure)->type;
+       type = get_itbl(UNTAG_CLOSURE(t->block_info.closure))->type;
        if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
            IF_DEBUG(sanity,checkTSO(t));
            t = unblockOne(cap, t);