rts/Schedule.c

   1 /* ---------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * The scheduler and thread-related functionality
   6  *
   7  * --------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #define KEEP_LOCKCLOSURE
  11 #include "Rts.h"
  12
  13 #include "sm/Storage.h"
  14 #include "RtsUtils.h"
  15 #include "StgRun.h"
  16 #include "Schedule.h"
  17 #include "Interpreter.h"
  18 #include "Printer.h"
  19 #include "RtsSignals.h"
  20 #include "sm/Sanity.h"
  21 #include "Stats.h"
  22 #include "STM.h"
  23 #include "Prelude.h"
  24 #include "ThreadLabels.h"
  25 #include "Updates.h"
  26 #include "Proftimer.h"
  27 #include "ProfHeap.h"
  28 #include "Weak.h"
  29 #include "sm/GC.h" // waitForGcThreads, releaseGCThreads, N
  30 #include "Sparks.h"
  31 #include "Capability.h"
  32 #include "Task.h"
  33 #include "AwaitEvent.h"
  34 #if defined(mingw32_HOST_OS)
  35 #include "win32/IOManager.h"
  36 #endif
  37 #include "Trace.h"
  38 #include "RaiseAsync.h"
  39 #include "Threads.h"
  40 #include "Timer.h"
  41 #include "ThreadPaused.h"
  42 #include "Messages.h"
  43
  44 #ifdef HAVE_SYS_TYPES_H
  45 #include <sys/types.h>
  46 #endif
  47 #ifdef HAVE_UNISTD_H
  48 #include <unistd.h>
  49 #endif
  50
  51 #include <string.h>
  52 #include <stdlib.h>
  53 #include <stdarg.h>
  54
  55 #ifdef HAVE_ERRNO_H
  56 #include <errno.h>
  57 #endif
  58
  59 #ifdef TRACING
  60 #include "eventlog/EventLog.h"
  61 #endif
  62 /* -----------------------------------------------------------------------------
  63  * Global variables
  64  * -------------------------------------------------------------------------- */
  65
  66 #if !defined(THREADED_RTS)
  67 // Blocked/sleeping thrads
  68 StgTSO *blocked_queue_hd = NULL;
  69 StgTSO *blocked_queue_tl = NULL;
  70 StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
  71 #endif
  72
  73 /* Set to true when the latest garbage collection failed to reclaim
  74  * enough space, and the runtime should proceed to shut itself down in
  75  * an orderly fashion (emitting profiling info etc.)
  76  */
  77 rtsBool heap_overflow = rtsFalse;
  78
  79 /* flag that tracks whether we have done any execution in this time slice.
  80  * LOCK: currently none, perhaps we should lock (but needs to be
  81  * updated in the fast path of the scheduler).
  82  *
  83  * NB. must be StgWord, we do xchg() on it.
  84  */
  85 volatile StgWord recent_activity = ACTIVITY_YES;
  86
  87 /* if this flag is set as well, give up execution
  88  * LOCK: none (changes monotonically)
  89  */
  90 volatile StgWord sched_state = SCHED_RUNNING;
  91
  92 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually
  93  *  exists - earlier gccs apparently didn't.
  94  *  -= chak
  95  */
  96 StgTSO dummy_tso;
  97
  98 /*
  99  * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 100  * in an MT setting, needed to signal that a worker thread shouldn't hang around
 101  * in the scheduler when it is out of work.
 102  */
 103 rtsBool shutting_down_scheduler = rtsFalse;
 104
 105 /*
 106  * This mutex protects most of the global scheduler data in
 107  * the THREADED_RTS runtime.
 108  */
 109 #if defined(THREADED_RTS)
 110 Mutex sched_mutex;
 111 #endif
 112
 113 #if !defined(mingw32_HOST_OS)
 114 #define FORKPROCESS_PRIMOP_SUPPORTED
 115 #endif
 116
 117 /* -----------------------------------------------------------------------------
 118  * static function prototypes
 119  * -------------------------------------------------------------------------- */
 120
 121 static Capability *schedule (Capability *initialCapability, Task *task);
 122
 123 //
 124 // These function all encapsulate parts of the scheduler loop, and are
 125 // abstracted only to make the structure and control flow of the
 126 // scheduler clearer.
 127 //
 128 static void schedulePreLoop (void);
 129 static void scheduleFindWork (Capability *cap);
 130 #if defined(THREADED_RTS)
 131 static void scheduleYield (Capability **pcap, Task *task);
 132 #endif
 133 static void scheduleStartSignalHandlers (Capability *cap);
 134 static void scheduleCheckBlockedThreads (Capability *cap);
 135 static void scheduleProcessInbox(Capability *cap);
 136 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 137 static void schedulePushWork(Capability *cap, Task *task);
 138 #if defined(THREADED_RTS)
 139 static void scheduleActivateSpark(Capability *cap);
 140 #endif
 141 static void schedulePostRunThread(Capability *cap, StgTSO *t);
 142 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 143 static void scheduleHandleStackOverflow( Capability *cap, Task *task,
 144                                          StgTSO *t);
 145 static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 146                                     nat prev_what_next );
 147 static void scheduleHandleThreadBlocked( StgTSO *t );
 148 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
 149                                              StgTSO *t );
 150 static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
 151 static Capability *scheduleDoGC(Capability *cap, Task *task,
 152                                 rtsBool force_major);
 153
 154 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
 155 static StgTSO *threadStackUnderflow(Capability *cap, Task *task, StgTSO *tso);
 156
 157 static void deleteThread (Capability *cap, StgTSO *tso);
 158 static void deleteAllThreads (Capability *cap);
 159
 160 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
 161 static void deleteThread_(Capability *cap, StgTSO *tso);
 162 #endif
 163
 164 /* ---------------------------------------------------------------------------
 165    Main scheduling loop.
 166
 167    We use round-robin scheduling, each thread returning to the
 168    scheduler loop when one of these conditions is detected:
 169
 170       * out of heap space
 171       * timer expires (thread yields)
 172       * thread blocks
 173       * thread ends
 174       * stack overflow
 175
 176    GRAN version:
 177      In a GranSim setup this loop iterates over the global event queue.
 178      This revolves around the global event queue, which determines what
 179      to do next. Therefore, it's more complicated than either the
 180      concurrent or the parallel (GUM) setup.
 181   This version has been entirely removed (JB 2008/08).
 182
 183    GUM version:
 184      GUM iterates over incoming messages.
 185      It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
 186      and sends out a fish whenever it has nothing to do; in-between
 187      doing the actual reductions (shared code below) it processes the
 188      incoming messages and deals with delayed operations
 189      (see PendingFetches).
 190      This is not the ugliest code you could imagine, but it's bloody close.
 191
 192   (JB 2008/08) This version was formerly indicated by a PP-Flag PAR,
 193   now by PP-flag PARALLEL_HASKELL. The Eden RTS (in GHC-6.x) uses it,
 194   as well as future GUM versions. This file has been refurbished to
 195   only contain valid code, which is however incomplete, refers to
 196   invalid includes etc.
 197
 198    ------------------------------------------------------------------------ */
 199
 200 static Capability *
 201 schedule (Capability *initialCapability, Task *task)
 202 {
 203   StgTSO *t;
 204   Capability *cap;
 205   StgThreadReturnCode ret;
 206   nat prev_what_next;
 207   rtsBool ready_to_gc;
 208 #if defined(THREADED_RTS)
 209   rtsBool first = rtsTrue;
 210 #endif
 211
 212   cap = initialCapability;
 213
 214   // Pre-condition: this task owns initialCapability.
 215   // The sched_mutex is *NOT* held
 216   // NB. on return, we still hold a capability.
 217
 218   debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
 219
 220   schedulePreLoop();
 221
 222   // -----------------------------------------------------------
 223   // Scheduler loop starts here:
 224
 225   while (1) {
 226
 227     // Check whether we have re-entered the RTS from Haskell without
 228     // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
 229     // call).
 230     if (cap->in_haskell) {
 231           errorBelch("schedule: re-entered unsafely.\n"
 232                      "   Perhaps a 'foreign import unsafe' should be 'safe'?");
 233           stg_exit(EXIT_FAILURE);
 234     }
 235
 236     // The interruption / shutdown sequence.
 237     //
 238     // In order to cleanly shut down the runtime, we want to:
 239     //   * make sure that all main threads return to their callers
 240     //     with the state 'Interrupted'.
 241     //   * clean up all OS threads assocated with the runtime
 242     //   * free all memory etc.
 243     //
 244     // So the sequence for ^C goes like this:
 245     //
 246     //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
 247     //     arranges for some Capability to wake up
 248     //
 249     //   * all threads in the system are halted, and the zombies are
 250     //     placed on the run queue for cleaning up.  We acquire all
 251     //     the capabilities in order to delete the threads, this is
 252     //     done by scheduleDoGC() for convenience (because GC already
 253     //     needs to acquire all the capabilities).  We can't kill
 254     //     threads involved in foreign calls.
 255     //
 256     //   * somebody calls shutdownHaskell(), which calls exitScheduler()
 257     //
 258     //   * sched_state := SCHED_SHUTTING_DOWN
 259     //
 260     //   * all workers exit when the run queue on their capability
 261     //     drains.  All main threads will also exit when their TSO
 262     //     reaches the head of the run queue and they can return.
 263     //
 264     //   * eventually all Capabilities will shut down, and the RTS can
 265     //     exit.
 266     //
 267     //   * We might be left with threads blocked in foreign calls,
 268     //     we should really attempt to kill these somehow (TODO);
 269
 270     switch (sched_state) {
 271     case SCHED_RUNNING:
 272         break;
 273     case SCHED_INTERRUPTING:
 274         debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
 275 #if defined(THREADED_RTS)
 276         discardSparksCap(cap);
 277 #endif
 278         /* scheduleDoGC() deletes all the threads */
 279         cap = scheduleDoGC(cap,task,rtsFalse);
 280
 281         // after scheduleDoGC(), we must be shutting down.  Either some
 282         // other Capability did the final GC, or we did it above,
 283         // either way we can fall through to the SCHED_SHUTTING_DOWN
 284         // case now.
 285         ASSERT(sched_state == SCHED_SHUTTING_DOWN);
 286         // fall through
 287
 288     case SCHED_SHUTTING_DOWN:
 289         debugTrace(DEBUG_sched, "SCHED_SHUTTING_DOWN");
 290         // If we are a worker, just exit.  If we're a bound thread
 291         // then we will exit below when we've removed our TSO from
 292         // the run queue.
 293         if (!isBoundTask(task) && emptyRunQueue(cap)) {
 294             return cap;
 295         }
 296         break;
 297     default:
 298         barf("sched_state: %d", sched_state);
 299     }
 300
 301     scheduleFindWork(cap);
 302
 303     /* work pushing, currently relevant only for THREADED_RTS:
 304        (pushes threads, wakes up idle capabilities for stealing) */
 305     schedulePushWork(cap,task);
 306
 307     scheduleDetectDeadlock(cap,task);
 308
 309 #if defined(THREADED_RTS)
 310     cap = task->cap;    // reload cap, it might have changed
 311 #endif
 312
 313     // Normally, the only way we can get here with no threads to
 314     // run is if a keyboard interrupt received during
 315     // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
 316     // Additionally, it is not fatal for the
 317     // threaded RTS to reach here with no threads to run.
 318     //
 319     // win32: might be here due to awaitEvent() being abandoned
 320     // as a result of a console event having been delivered.
 321
 322 #if defined(THREADED_RTS)
 323     if (first)
 324     {
 325     // XXX: ToDo
 326     //     // don't yield the first time, we want a chance to run this
 327     //     // thread for a bit, even if there are others banging at the
 328     //     // door.
 329     //     first = rtsFalse;
 330     //     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 331     }
 332
 333     scheduleYield(&cap,task);
 334
 335     if (emptyRunQueue(cap)) continue; // look for work again
 336 #endif
 337
 338 #if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
 339     if ( emptyRunQueue(cap) ) {
 340         ASSERT(sched_state >= SCHED_INTERRUPTING);
 341     }
 342 #endif
 343
 344     //
 345     // Get a thread to run
 346     //
 347     t = popRunQueue(cap);
 348
 349     // Sanity check the thread we're about to run.  This can be
 350     // expensive if there is lots of thread switching going on...
 351     IF_DEBUG(sanity,checkTSO(t));
 352
 353 #if defined(THREADED_RTS)
 354     // Check whether we can run this thread in the current task.
 355     // If not, we have to pass our capability to the right task.
 356     {
 357         InCall *bound = t->bound;
 358
 359         if (bound) {
 360             if (bound->task == task) {
 361                 // yes, the Haskell thread is bound to the current native thread
 362             } else {
 363                 debugTrace(DEBUG_sched,
 364                            "thread %lu bound to another OS thread",
 365                            (unsigned long)t->id);
 366                 // no, bound to a different Haskell thread: pass to that thread
 367                 pushOnRunQueue(cap,t);
 368                 continue;
 369             }
 370         } else {
 371             // The thread we want to run is unbound.
 372             if (task->incall->tso) {
 373                 debugTrace(DEBUG_sched,
 374                            "this OS thread cannot run thread %lu",
 375                            (unsigned long)t->id);
 376                 // no, the current native thread is bound to a different
 377                 // Haskell thread, so pass it to any worker thread
 378                 pushOnRunQueue(cap,t);
 379                 continue;
 380             }
 381         }
 382     }
 383 #endif
 384
 385     // If we're shutting down, and this thread has not yet been
 386     // killed, kill it now.  This sometimes happens when a finalizer
 387     // thread is created by the final GC, or a thread previously
 388     // in a foreign call returns.
 389     if (sched_state >= SCHED_INTERRUPTING &&
 390         !(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) {
 391         deleteThread(cap,t);
 392     }
 393
 394     /* context switches are initiated by the timer signal, unless
 395      * the user specified "context switch as often as possible", with
 396      * +RTS -C0
 397      */
 398     if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
 399         && !emptyThreadQueues(cap)) {
 400         cap->context_switch = 1;
 401     }
 402
 403 run_thread:
 404
 405     // CurrentTSO is the thread to run.  t might be different if we
 406     // loop back to run_thread, so make sure to set CurrentTSO after
 407     // that.
 408     cap->r.rCurrentTSO = t;
 409
 410     startHeapProfTimer();
 411
 412     // ----------------------------------------------------------------------
 413     // Run the current thread
 414
 415     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 416     ASSERT(t->cap == cap);
 417     ASSERT(t->bound ? t->bound->task->cap == cap : 1);
 418
 419     prev_what_next = t->what_next;
 420
 421     errno = t->saved_errno;
 422 #if mingw32_HOST_OS
 423     SetLastError(t->saved_winerror);
 424 #endif
 425
 426     cap->in_haskell = rtsTrue;
 427
 428     dirty_TSO(cap,t);
 429
 430 #if defined(THREADED_RTS)
 431     if (recent_activity == ACTIVITY_DONE_GC) {
 432         // ACTIVITY_DONE_GC means we turned off the timer signal to
 433         // conserve power (see #1623).  Re-enable it here.
 434         nat prev;
 435         prev = xchg((P_)&recent_activity, ACTIVITY_YES);
 436         if (prev == ACTIVITY_DONE_GC) {
 437             startTimer();
 438         }
 439     } else if (recent_activity != ACTIVITY_INACTIVE) {
 440         // If we reached ACTIVITY_INACTIVE, then don't reset it until
 441         // we've done the GC.  The thread running here might just be
 442         // the IO manager thread that handle_tick() woke up via
 443         // wakeUpRts().
 444         recent_activity = ACTIVITY_YES;
 445     }
 446 #endif
 447
 448     traceEventRunThread(cap, t);
 449
 450     switch (prev_what_next) {
 451
 452     case ThreadKilled:
 453     case ThreadComplete:
 454         /* Thread already finished, return to scheduler. */
 455         ret = ThreadFinished;
 456         break;
 457
 458     case ThreadRunGHC:
 459     {
 460         StgRegTable *r;
 461         r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
 462         cap = regTableToCapability(r);
 463         ret = r->rRet;
 464         break;
 465     }
 466
 467     case ThreadInterpret:
 468         cap = interpretBCO(cap);
 469         ret = cap->r.rRet;
 470         break;
 471
 472     default:
 473         barf("schedule: invalid what_next field");
 474     }
 475
 476     cap->in_haskell = rtsFalse;
 477
 478     // The TSO might have moved, eg. if it re-entered the RTS and a GC
 479     // happened.  So find the new location:
 480     t = cap->r.rCurrentTSO;
 481
 482     // And save the current errno in this thread.
 483     // XXX: possibly bogus for SMP because this thread might already
 484     // be running again, see code below.
 485     t->saved_errno = errno;
 486 #if mingw32_HOST_OS
 487     // Similarly for Windows error code
 488     t->saved_winerror = GetLastError();
 489 #endif
 490
 491     traceEventStopThread(cap, t, ret);
 492
 493     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 494     ASSERT(t->cap == cap);
 495
 496     // ----------------------------------------------------------------------
 497
 498     // Costs for the scheduler are assigned to CCS_SYSTEM
 499     stopHeapProfTimer();
 500 #if defined(PROFILING)
 501     CCCS = CCS_SYSTEM;
 502 #endif
 503
 504     schedulePostRunThread(cap,t);
 505
 506     if (ret != StackOverflow) {
 507         t = threadStackUnderflow(cap,task,t);
 508     }
 509
 510     ready_to_gc = rtsFalse;
 511
 512     switch (ret) {
 513     case HeapOverflow:
 514         ready_to_gc = scheduleHandleHeapOverflow(cap,t);
 515         break;
 516
 517     case StackOverflow:
 518         scheduleHandleStackOverflow(cap,task,t);
 519         break;
 520
 521     case ThreadYielding:
 522         if (scheduleHandleYield(cap, t, prev_what_next)) {
 523             // shortcut for switching between compiler/interpreter:
 524             goto run_thread;
 525         }
 526         break;
 527
 528     case ThreadBlocked:
 529         scheduleHandleThreadBlocked(t);
 530         break;
 531
 532     case ThreadFinished:
 533         if (scheduleHandleThreadFinished(cap, task, t)) return cap;
 534         ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 535         break;
 536
 537     default:
 538       barf("schedule: invalid thread return code %d", (int)ret);
 539     }
 540
 541     if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
 542       cap = scheduleDoGC(cap,task,rtsFalse);
 543     }
 544   } /* end of while() */
 545 }
 546
 547 /* -----------------------------------------------------------------------------
 548  * Run queue operations
 549  * -------------------------------------------------------------------------- */
 550
 551 void
 552 removeFromRunQueue (Capability *cap, StgTSO *tso)
 553 {
 554     if (tso->block_info.prev == END_TSO_QUEUE) {
 555         ASSERT(cap->run_queue_hd == tso);
 556         cap->run_queue_hd = tso->_link;
 557     } else {
 558         setTSOLink(cap, tso->block_info.prev, tso->_link);
 559     }
 560     if (tso->_link == END_TSO_QUEUE) {
 561         ASSERT(cap->run_queue_tl == tso);
 562         cap->run_queue_tl = tso->block_info.prev;
 563     } else {
 564         setTSOPrev(cap, tso->_link, tso->block_info.prev);
 565     }
 566     tso->_link = tso->block_info.prev = END_TSO_QUEUE;
 567
 568     IF_DEBUG(sanity, checkRunQueue(cap));
 569 }
 570
 571 /* ----------------------------------------------------------------------------
 572  * Setting up the scheduler loop
 573  * ------------------------------------------------------------------------- */
 574
 575 static void
 576 schedulePreLoop(void)
 577 {
 578   // initialisation for scheduler - what cannot go into initScheduler()
 579 }
 580
 581 /* -----------------------------------------------------------------------------
 582  * scheduleFindWork()
 583  *
 584  * Search for work to do, and handle messages from elsewhere.
 585  * -------------------------------------------------------------------------- */
 586
 587 static void
 588 scheduleFindWork (Capability *cap)
 589 {
 590     scheduleStartSignalHandlers(cap);
 591
 592     scheduleProcessInbox(cap);
 593
 594     scheduleCheckBlockedThreads(cap);
 595
 596 #if defined(THREADED_RTS)
 597     if (emptyRunQueue(cap)) { scheduleActivateSpark(cap); }
 598 #endif
 599 }
 600
 601 #if defined(THREADED_RTS)
 602 STATIC_INLINE rtsBool
 603 shouldYieldCapability (Capability *cap, Task *task)
 604 {
 605     // we need to yield this capability to someone else if..
 606     //   - another thread is initiating a GC
 607     //   - another Task is returning from a foreign call
 608     //   - the thread at the head of the run queue cannot be run
 609     //     by this Task (it is bound to another Task, or it is unbound
 610     //     and this task it bound).
 611     return (waiting_for_gc ||
 612             cap->returning_tasks_hd != NULL ||
 613             (!emptyRunQueue(cap) && (task->incall->tso == NULL
 614                                      ? cap->run_queue_hd->bound != NULL
 615                                      : cap->run_queue_hd->bound != task->incall)));
 616 }
 617
 618 // This is the single place where a Task goes to sleep.  There are
 619 // two reasons it might need to sleep:
 620 //    - there are no threads to run
 621 //    - we need to yield this Capability to someone else
 622 //      (see shouldYieldCapability())
 623 //
 624 // Careful: the scheduler loop is quite delicate.  Make sure you run
 625 // the tests in testsuite/concurrent (all ways) after modifying this,
 626 // and also check the benchmarks in nofib/parallel for regressions.
 627
 628 static void
 629 scheduleYield (Capability **pcap, Task *task)
 630 {
 631     Capability *cap = *pcap;
 632
 633     // if we have work, and we don't need to give up the Capability, continue.
 634     //
 635     if (!shouldYieldCapability(cap,task) &&
 636         (!emptyRunQueue(cap) ||
 637          !emptyInbox(cap) ||
 638          sched_state >= SCHED_INTERRUPTING))
 639         return;
 640
 641     // otherwise yield (sleep), and keep yielding if necessary.
 642     do {
 643         yieldCapability(&cap,task);
 644     }
 645     while (shouldYieldCapability(cap,task));
 646
 647     // note there may still be no threads on the run queue at this
 648     // point, the caller has to check.
 649
 650     *pcap = cap;
 651     return;
 652 }
 653 #endif
 654
 655 /* -----------------------------------------------------------------------------
 656  * schedulePushWork()
 657  *
 658  * Push work to other Capabilities if we have some.
 659  * -------------------------------------------------------------------------- */
 660
 661 static void
 662 schedulePushWork(Capability *cap USED_IF_THREADS,
 663                  Task *task      USED_IF_THREADS)
 664 {
 665   /* following code not for PARALLEL_HASKELL. I kept the call general,
 666      future GUM versions might use pushing in a distributed setup */
 667 #if defined(THREADED_RTS)
 668
 669     Capability *free_caps[n_capabilities], *cap0;
 670     nat i, n_free_caps;
 671
 672     // migration can be turned off with +RTS -qm
 673     if (!RtsFlags.ParFlags.migrate) return;
 674
 675     // Check whether we have more threads on our run queue, or sparks
 676     // in our pool, that we could hand to another Capability.
 677     if (cap->run_queue_hd == END_TSO_QUEUE) {
 678         if (sparkPoolSizeCap(cap) < 2) return;
 679     } else {
 680         if (cap->run_queue_hd->_link == END_TSO_QUEUE &&
 681             sparkPoolSizeCap(cap) < 1) return;
 682     }
 683
 684     // First grab as many free Capabilities as we can.
 685     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
 686         cap0 = &capabilities[i];
 687         if (cap != cap0 && tryGrabCapability(cap0,task)) {
 688             if (!emptyRunQueue(cap0)
 689                 || cap->returning_tasks_hd != NULL
 690                 || cap->inbox != (Message*)END_TSO_QUEUE) {
 691                 // it already has some work, we just grabbed it at
 692                 // the wrong moment.  Or maybe it's deadlocked!
 693                 releaseCapability(cap0);
 694             } else {
 695                 free_caps[n_free_caps++] = cap0;
 696             }
 697         }
 698     }
 699
 700     // we now have n_free_caps free capabilities stashed in
 701     // free_caps[].  Share our run queue equally with them.  This is
 702     // probably the simplest thing we could do; improvements we might
 703     // want to do include:
 704     //
 705     //   - giving high priority to moving relatively new threads, on
 706     //     the gournds that they haven't had time to build up a
 707     //     working set in the cache on this CPU/Capability.
 708     //
 709     //   - giving low priority to moving long-lived threads
 710
 711     if (n_free_caps > 0) {
 712         StgTSO *prev, *t, *next;
 713         rtsBool pushed_to_all;
 714
 715         debugTrace(DEBUG_sched,
 716                    "cap %d: %s and %d free capabilities, sharing...",
 717                    cap->no,
 718                    (!emptyRunQueue(cap) && cap->run_queue_hd->_link != END_TSO_QUEUE)?
 719                    "excess threads on run queue":"sparks to share (>=2)",
 720                    n_free_caps);
 721
 722         i = 0;
 723         pushed_to_all = rtsFalse;
 724
 725         if (cap->run_queue_hd != END_TSO_QUEUE) {
 726             prev = cap->run_queue_hd;
 727             t = prev->_link;
 728             prev->_link = END_TSO_QUEUE;
 729             for (; t != END_TSO_QUEUE; t = next) {
 730                 next = t->_link;
 731                 t->_link = END_TSO_QUEUE;
 732                 if (t->what_next == ThreadRelocated
 733                     || t->bound == task->incall // don't move my bound thread
 734                     || tsoLocked(t)) {  // don't move a locked thread
 735                     setTSOLink(cap, prev, t);
 736                     setTSOPrev(cap, t, prev);
 737                     prev = t;
 738                 } else if (i == n_free_caps) {
 739                     pushed_to_all = rtsTrue;
 740                     i = 0;
 741                     // keep one for us
 742                     setTSOLink(cap, prev, t);
 743                     setTSOPrev(cap, t, prev);
 744                     prev = t;
 745                 } else {
 746                     appendToRunQueue(free_caps[i],t);
 747
 748                     traceEventMigrateThread (cap, t, free_caps[i]->no);
 749
 750                     if (t->bound) { t->bound->task->cap = free_caps[i]; }
 751                     t->cap = free_caps[i];
 752                     i++;
 753                 }
 754             }
 755             cap->run_queue_tl = prev;
 756
 757             IF_DEBUG(sanity, checkRunQueue(cap));
 758         }
 759
 760 #ifdef SPARK_PUSHING
 761         /* JB I left this code in place, it would work but is not necessary */
 762
 763         // If there are some free capabilities that we didn't push any
 764         // threads to, then try to push a spark to each one.
 765         if (!pushed_to_all) {
 766             StgClosure *spark;
 767             // i is the next free capability to push to
 768             for (; i < n_free_caps; i++) {
 769                 if (emptySparkPoolCap(free_caps[i])) {
 770                     spark = tryStealSpark(cap->sparks);
 771                     if (spark != NULL) {
 772                         debugTrace(DEBUG_sched, "pushing spark %p to capability %d", spark, free_caps[i]->no);
 773
 774             traceEventStealSpark(free_caps[i], t, cap->no);
 775
 776                         newSpark(&(free_caps[i]->r), spark);
 777                     }
 778                 }
 779             }
 780         }
 781 #endif /* SPARK_PUSHING */
 782
 783         // release the capabilities
 784         for (i = 0; i < n_free_caps; i++) {
 785             task->cap = free_caps[i];
 786             releaseAndWakeupCapability(free_caps[i]);
 787         }
 788     }
 789     task->cap = cap; // reset to point to our Capability.
 790
 791 #endif /* THREADED_RTS */
 792
 793 }
 794
 795 /* ----------------------------------------------------------------------------
 796  * Start any pending signal handlers
 797  * ------------------------------------------------------------------------- */
 798
 799 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 800 static void
 801 scheduleStartSignalHandlers(Capability *cap)
 802 {
 803     if (RtsFlags.MiscFlags.install_signal_handlers && signals_pending()) {
 804         // safe outside the lock
 805         startSignalHandlers(cap);
 806     }
 807 }
 808 #else
 809 static void
 810 scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
 811 {
 812 }
 813 #endif
 814
 815 /* ----------------------------------------------------------------------------
 816  * Check for blocked threads that can be woken up.
 817  * ------------------------------------------------------------------------- */
 818
 819 static void
 820 scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
 821 {
 822 #if !defined(THREADED_RTS)
 823     //
 824     // Check whether any waiting threads need to be woken up.  If the
 825     // run queue is empty, and there are no other tasks running, we
 826     // can wait indefinitely for something to happen.
 827     //
 828     if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
 829     {
 830         awaitEvent (emptyRunQueue(cap));
 831     }
 832 #endif
 833 }
 834
 835 /* ----------------------------------------------------------------------------
 836  * Detect deadlock conditions and attempt to resolve them.
 837  * ------------------------------------------------------------------------- */
 838
 839 static void
 840 scheduleDetectDeadlock (Capability *cap, Task *task)
 841 {
 842     /*
 843      * Detect deadlock: when we have no threads to run, there are no
 844      * threads blocked, waiting for I/O, or sleeping, and all the
 845      * other tasks are waiting for work, we must have a deadlock of
 846      * some description.
 847      */
 848     if ( emptyThreadQueues(cap) )
 849     {
 850 #if defined(THREADED_RTS)
 851         /*
 852          * In the threaded RTS, we only check for deadlock if there
 853          * has been no activity in a complete timeslice.  This means
 854          * we won't eagerly start a full GC just because we don't have
 855          * any threads to run currently.
 856          */
 857         if (recent_activity != ACTIVITY_INACTIVE) return;
 858 #endif
 859
 860         debugTrace(DEBUG_sched, "deadlocked, forcing major GC...");
 861
 862         // Garbage collection can release some new threads due to
 863         // either (a) finalizers or (b) threads resurrected because
 864         // they are unreachable and will therefore be sent an
 865         // exception.  Any threads thus released will be immediately
 866         // runnable.
 867         cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/);
 868         // when force_major == rtsTrue. scheduleDoGC sets
 869         // recent_activity to ACTIVITY_DONE_GC and turns off the timer
 870         // signal.
 871
 872         if ( !emptyRunQueue(cap) ) return;
 873
 874 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 875         /* If we have user-installed signal handlers, then wait
 876          * for signals to arrive rather then bombing out with a
 877          * deadlock.
 878          */
 879         if ( RtsFlags.MiscFlags.install_signal_handlers && anyUserHandlers() ) {
 880             debugTrace(DEBUG_sched,
 881                        "still deadlocked, waiting for signals...");
 882
 883             awaitUserSignals();
 884
 885             if (signals_pending()) {
 886                 startSignalHandlers(cap);
 887             }
 888
 889             // either we have threads to run, or we were interrupted:
 890             ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 891
 892             return;
 893         }
 894 #endif
 895
 896 #if !defined(THREADED_RTS)
 897         /* Probably a real deadlock.  Send the current main thread the
 898          * Deadlock exception.
 899          */
 900         if (task->incall->tso) {
 901             switch (task->incall->tso->why_blocked) {
 902             case BlockedOnSTM:
 903             case BlockedOnBlackHole:
 904             case BlockedOnMsgThrowTo:
 905             case BlockedOnMVar:
 906                 throwToSingleThreaded(cap, task->incall->tso,
 907                                       (StgClosure *)nonTermination_closure);
 908                 return;
 909             default:
 910                 barf("deadlock: main thread blocked in a strange way");
 911             }
 912         }
 913         return;
 914 #endif
 915     }
 916 }
 917
 918
 919 /* ----------------------------------------------------------------------------
 920  * Send pending messages (PARALLEL_HASKELL only)
 921  * ------------------------------------------------------------------------- */
 922
 923 #if defined(PARALLEL_HASKELL)
 924 static void
 925 scheduleSendPendingMessages(void)
 926 {
 927
 928 # if defined(PAR) // global Mem.Mgmt., omit for now
 929     if (PendingFetches != END_BF_QUEUE) {
 930         processFetches();
 931     }
 932 # endif
 933
 934     if (RtsFlags.ParFlags.BufferTime) {
 935         // if we use message buffering, we must send away all message
 936         // packets which have become too old...
 937         sendOldBuffers();
 938     }
 939 }
 940 #endif
 941
 942 /* ----------------------------------------------------------------------------
 943  * Process message in the current Capability's inbox
 944  * ------------------------------------------------------------------------- */
 945
 946 static void
 947 scheduleProcessInbox (Capability *cap USED_IF_THREADS)
 948 {
 949 #if defined(THREADED_RTS)
 950     Message *m;
 951
 952     while (!emptyInbox(cap)) {
 953         ACQUIRE_LOCK(&cap->lock);
 954         m = cap->inbox;
 955         cap->inbox = m->link;
 956         RELEASE_LOCK(&cap->lock);
 957         executeMessage(cap, (Message *)m);
 958     }
 959 #endif
 960 }
 961
 962 /* ----------------------------------------------------------------------------
 963  * Activate spark threads (PARALLEL_HASKELL and THREADED_RTS)
 964  * ------------------------------------------------------------------------- */
 965
 966 #if defined(THREADED_RTS)
 967 static void
 968 scheduleActivateSpark(Capability *cap)
 969 {
 970     if (anySparks())
 971     {
 972         createSparkThread(cap);
 973         debugTrace(DEBUG_sched, "creating a spark thread");
 974     }
 975 }
 976 #endif // PARALLEL_HASKELL || THREADED_RTS
 977
 978 /* ----------------------------------------------------------------------------
 979  * After running a thread...
 980  * ------------------------------------------------------------------------- */
 981
 982 static void
 983 schedulePostRunThread (Capability *cap, StgTSO *t)
 984 {
 985     // We have to be able to catch transactions that are in an
 986     // infinite loop as a result of seeing an inconsistent view of
 987     // memory, e.g.
 988     //
 989     //   atomically $ do
 990     //       [a,b] <- mapM readTVar [ta,tb]
 991     //       when (a == b) loop
 992     //
 993     // and a is never equal to b given a consistent view of memory.
 994     //
 995     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
 996         if (!stmValidateNestOfTransactions (t -> trec)) {
 997             debugTrace(DEBUG_sched | DEBUG_stm,
 998                        "trec %p found wasting its time", t);
 999
1000             // strip the stack back to the
1001             // ATOMICALLY_FRAME, aborting the (nested)
1002             // transaction, and saving the stack of any
1003             // partially-evaluated thunks on the heap.
1004             throwToSingleThreaded_(cap, t, NULL, rtsTrue);
1005
1006 //            ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
1007         }
1008     }
1009
1010   /* some statistics gathering in the parallel case */
1011 }
1012
1013 /* -----------------------------------------------------------------------------
1014  * Handle a thread that returned to the scheduler with ThreadHeepOverflow
1015  * -------------------------------------------------------------------------- */
1016
1017 static rtsBool
1018 scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
1019 {
1020     // did the task ask for a large block?
1021     if (cap->r.rHpAlloc > BLOCK_SIZE) {
1022         // if so, get one and push it on the front of the nursery.
1023         bdescr *bd;
1024         lnat blocks;
1025
1026         blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
1027
1028         if (blocks > BLOCKS_PER_MBLOCK) {
1029             barf("allocation of %ld bytes too large (GHC should have complained at compile-time)", (long)cap->r.rHpAlloc);
1030         }
1031
1032         debugTrace(DEBUG_sched,
1033                    "--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
1034                    (long)t->id, what_next_strs[t->what_next], blocks);
1035
1036         // don't do this if the nursery is (nearly) full, we'll GC first.
1037         if (cap->r.rCurrentNursery->link != NULL ||
1038             cap->r.rNursery->n_blocks == 1) {  // paranoia to prevent infinite loop
1039                                                // if the nursery has only one block.
1040
1041             bd = allocGroup_lock(blocks);
1042             cap->r.rNursery->n_blocks += blocks;
1043
1044             // link the new group into the list
1045             bd->link = cap->r.rCurrentNursery;
1046             bd->u.back = cap->r.rCurrentNursery->u.back;
1047             if (cap->r.rCurrentNursery->u.back != NULL) {
1048                 cap->r.rCurrentNursery->u.back->link = bd;
1049             } else {
1050                 cap->r.rNursery->blocks = bd;
1051             }
1052             cap->r.rCurrentNursery->u.back = bd;
1053
1054             // initialise it as a nursery block.  We initialise the
1055             // step, gen_no, and flags field of *every* sub-block in
1056             // this large block, because this is easier than making
1057             // sure that we always find the block head of a large
1058             // block whenever we call Bdescr() (eg. evacuate() and
1059             // isAlive() in the GC would both have to do this, at
1060             // least).
1061             {
1062                 bdescr *x;
1063                 for (x = bd; x < bd + blocks; x++) {
1064                     initBdescr(x,g0,g0);
1065                     x->free = x->start;
1066                     x->flags = 0;
1067                 }
1068             }
1069
1070             // This assert can be a killer if the app is doing lots
1071             // of large block allocations.
1072             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
1073
1074             // now update the nursery to point to the new block
1075             cap->r.rCurrentNursery = bd;
1076
1077             // we might be unlucky and have another thread get on the
1078             // run queue before us and steal the large block, but in that
1079             // case the thread will just end up requesting another large
1080             // block.
1081             pushOnRunQueue(cap,t);
1082             return rtsFalse;  /* not actually GC'ing */
1083         }
1084     }
1085
1086     if (cap->r.rHpLim == NULL || cap->context_switch) {
1087         // Sometimes we miss a context switch, e.g. when calling
1088         // primitives in a tight loop, MAYBE_GC() doesn't check the
1089         // context switch flag, and we end up waiting for a GC.
1090         // See #1984, and concurrent/should_run/1984
1091         cap->context_switch = 0;
1092         appendToRunQueue(cap,t);
1093     } else {
1094         pushOnRunQueue(cap,t);
1095     }
1096     return rtsTrue;
1097     /* actual GC is done at the end of the while loop in schedule() */
1098 }
1099
1100 /* -----------------------------------------------------------------------------
1101  * Handle a thread that returned to the scheduler with ThreadStackOverflow
1102  * -------------------------------------------------------------------------- */
1103
1104 static void
1105 scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t)
1106 {
1107     /* just adjust the stack for this thread, then pop it back
1108      * on the run queue.
1109      */
1110     {
1111         /* enlarge the stack */
1112         StgTSO *new_t = threadStackOverflow(cap, t);
1113
1114         /* The TSO attached to this Task may have moved, so update the
1115          * pointer to it.
1116          */
1117         if (task->incall->tso == t) {
1118             task->incall->tso = new_t;
1119         }
1120         pushOnRunQueue(cap,new_t);
1121     }
1122 }
1123
1124 /* -----------------------------------------------------------------------------
1125  * Handle a thread that returned to the scheduler with ThreadYielding
1126  * -------------------------------------------------------------------------- */
1127
1128 static rtsBool
1129 scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
1130 {
1131     /* put the thread back on the run queue.  Then, if we're ready to
1132      * GC, check whether this is the last task to stop.  If so, wake
1133      * up the GC thread.  getThread will block during a GC until the
1134      * GC is finished.
1135      */
1136
1137     ASSERT(t->_link == END_TSO_QUEUE);
1138
1139     // Shortcut if we're just switching evaluators: don't bother
1140     // doing stack squeezing (which can be expensive), just run the
1141     // thread.
1142     if (cap->context_switch == 0 && t->what_next != prev_what_next) {
1143         debugTrace(DEBUG_sched,
1144                    "--<< thread %ld (%s) stopped to switch evaluators",
1145                    (long)t->id, what_next_strs[t->what_next]);
1146         return rtsTrue;
1147     }
1148
1149     // Reset the context switch flag.  We don't do this just before
1150     // running the thread, because that would mean we would lose ticks
1151     // during GC, which can lead to unfair scheduling (a thread hogs
1152     // the CPU because the tick always arrives during GC).  This way
1153     // penalises threads that do a lot of allocation, but that seems
1154     // better than the alternative.
1155     cap->context_switch = 0;
1156
1157     IF_DEBUG(sanity,
1158              //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
1159              checkTSO(t));
1160
1161     appendToRunQueue(cap,t);
1162
1163     return rtsFalse;
1164 }
1165
1166 /* -----------------------------------------------------------------------------
1167  * Handle a thread that returned to the scheduler with ThreadBlocked
1168  * -------------------------------------------------------------------------- */
1169
1170 static void
1171 scheduleHandleThreadBlocked( StgTSO *t
1172 #if !defined(DEBUG)
1173     STG_UNUSED
1174 #endif
1175     )
1176 {
1177
1178       // We don't need to do anything.  The thread is blocked, and it
1179       // has tidied up its stack and placed itself on whatever queue
1180       // it needs to be on.
1181
1182     // ASSERT(t->why_blocked != NotBlocked);
1183     // Not true: for example,
1184     //    - the thread may have woken itself up already, because
1185     //      threadPaused() might have raised a blocked throwTo
1186     //      exception, see maybePerformBlockedException().
1187
1188 #ifdef DEBUG
1189     traceThreadStatus(DEBUG_sched, t);
1190 #endif
1191 }
1192
1193 /* -----------------------------------------------------------------------------
1194  * Handle a thread that returned to the scheduler with ThreadFinished
1195  * -------------------------------------------------------------------------- */
1196
1197 static rtsBool
1198 scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
1199 {
1200     /* Need to check whether this was a main thread, and if so,
1201      * return with the return value.
1202      *
1203      * We also end up here if the thread kills itself with an
1204      * uncaught exception, see Exception.cmm.
1205      */
1206
1207     // blocked exceptions can now complete, even if the thread was in
1208     // blocked mode (see #2910).
1209     awakenBlockedExceptionQueue (cap, t);
1210
1211       //
1212       // Check whether the thread that just completed was a bound
1213       // thread, and if so return with the result.
1214       //
1215       // There is an assumption here that all thread completion goes
1216       // through this point; we need to make sure that if a thread
1217       // ends up in the ThreadKilled state, that it stays on the run
1218       // queue so it can be dealt with here.
1219       //
1220
1221       if (t->bound) {
1222
1223           if (t->bound != task->incall) {
1224 #if !defined(THREADED_RTS)
1225               // Must be a bound thread that is not the topmost one.  Leave
1226               // it on the run queue until the stack has unwound to the
1227               // point where we can deal with this.  Leaving it on the run
1228               // queue also ensures that the garbage collector knows about
1229               // this thread and its return value (it gets dropped from the
1230               // step->threads list so there's no other way to find it).
1231               appendToRunQueue(cap,t);
1232               return rtsFalse;
1233 #else
1234               // this cannot happen in the threaded RTS, because a
1235               // bound thread can only be run by the appropriate Task.
1236               barf("finished bound thread that isn't mine");
1237 #endif
1238           }
1239
1240           ASSERT(task->incall->tso == t);
1241
1242           if (t->what_next == ThreadComplete) {
1243               if (task->incall->ret) {
1244                   // NOTE: return val is tso->sp[1] (see StgStartup.hc)
1245                   *(task->incall->ret) = (StgClosure *)task->incall->tso->sp[1];
1246               }
1247               task->incall->stat = Success;
1248           } else {
1249               if (task->incall->ret) {
1250                   *(task->incall->ret) = NULL;
1251               }
1252               if (sched_state >= SCHED_INTERRUPTING) {
1253                   if (heap_overflow) {
1254                       task->incall->stat = HeapExhausted;
1255                   } else {
1256                       task->incall->stat = Interrupted;
1257                   }
1258               } else {
1259                   task->incall->stat = Killed;
1260               }
1261           }
1262 #ifdef DEBUG
1263           removeThreadLabel((StgWord)task->incall->tso->id);
1264 #endif
1265
1266           // We no longer consider this thread and task to be bound to
1267           // each other.  The TSO lives on until it is GC'd, but the
1268           // task is about to be released by the caller, and we don't
1269           // want anyone following the pointer from the TSO to the
1270           // defunct task (which might have already been
1271           // re-used). This was a real bug: the GC updated
1272           // tso->bound->tso which lead to a deadlock.
1273           t->bound = NULL;
1274           task->incall->tso = NULL;
1275
1276           return rtsTrue; // tells schedule() to return
1277       }
1278
1279       return rtsFalse;
1280 }
1281
1282 /* -----------------------------------------------------------------------------
1283  * Perform a heap census
1284  * -------------------------------------------------------------------------- */
1285
1286 static rtsBool
1287 scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
1288 {
1289     // When we have +RTS -i0 and we're heap profiling, do a census at
1290     // every GC.  This lets us get repeatable runs for debugging.
1291     if (performHeapProfile ||
1292         (RtsFlags.ProfFlags.profileInterval==0 &&
1293          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
1294         return rtsTrue;
1295     } else {
1296         return rtsFalse;
1297     }
1298 }
1299
1300 /* -----------------------------------------------------------------------------
1301  * Perform a garbage collection if necessary
1302  * -------------------------------------------------------------------------- */
1303
1304 static Capability *
1305 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
1306 {
1307     rtsBool heap_census;
1308 #ifdef THREADED_RTS
1309     /* extern static volatile StgWord waiting_for_gc;
1310        lives inside capability.c */
1311     rtsBool gc_type, prev_pending_gc;
1312     nat i;
1313 #endif
1314
1315     if (sched_state == SCHED_SHUTTING_DOWN) {
1316         // The final GC has already been done, and the system is
1317         // shutting down.  We'll probably deadlock if we try to GC
1318         // now.
1319         return cap;
1320     }
1321
1322 #ifdef THREADED_RTS
1323     if (sched_state < SCHED_INTERRUPTING
1324         && RtsFlags.ParFlags.parGcEnabled
1325         && N >= RtsFlags.ParFlags.parGcGen
1326         && ! oldest_gen->mark)
1327     {
1328         gc_type = PENDING_GC_PAR;
1329     } else {
1330         gc_type = PENDING_GC_SEQ;
1331     }
1332
1333     // In order to GC, there must be no threads running Haskell code.
1334     // Therefore, the GC thread needs to hold *all* the capabilities,
1335     // and release them after the GC has completed.
1336     //
1337     // This seems to be the simplest way: previous attempts involved
1338     // making all the threads with capabilities give up their
1339     // capabilities and sleep except for the *last* one, which
1340     // actually did the GC.  But it's quite hard to arrange for all
1341     // the other tasks to sleep and stay asleep.
1342     //
1343
1344     /*  Other capabilities are prevented from running yet more Haskell
1345         threads if waiting_for_gc is set. Tested inside
1346         yieldCapability() and releaseCapability() in Capability.c */
1347
1348     prev_pending_gc = cas(&waiting_for_gc, 0, gc_type);
1349     if (prev_pending_gc) {
1350         do {
1351             debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...",
1352                        prev_pending_gc);
1353             ASSERT(cap);
1354             yieldCapability(&cap,task);
1355         } while (waiting_for_gc);
1356         return cap;  // NOTE: task->cap might have changed here
1357     }
1358
1359     setContextSwitches();
1360
1361     // The final shutdown GC is always single-threaded, because it's
1362     // possible that some of the Capabilities have no worker threads.
1363
1364     if (gc_type == PENDING_GC_SEQ)
1365     {
1366         traceEventRequestSeqGc(cap);
1367     }
1368     else
1369     {
1370         traceEventRequestParGc(cap);
1371         debugTrace(DEBUG_sched, "ready_to_gc, grabbing GC threads");
1372     }
1373
1374     if (gc_type == PENDING_GC_SEQ)
1375     {
1376         // single-threaded GC: grab all the capabilities
1377         for (i=0; i < n_capabilities; i++) {
1378             debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities);
1379             if (cap != &capabilities[i]) {
1380                 Capability *pcap = &capabilities[i];
1381                 // we better hope this task doesn't get migrated to
1382                 // another Capability while we're waiting for this one.
1383                 // It won't, because load balancing happens while we have
1384                 // all the Capabilities, but even so it's a slightly
1385                 // unsavoury invariant.
1386                 task->cap = pcap;
1387                 waitForReturnCapability(&pcap, task);
1388                 if (pcap != &capabilities[i]) {
1389                     barf("scheduleDoGC: got the wrong capability");
1390                 }
1391             }
1392         }
1393     }
1394     else
1395     {
1396         // multi-threaded GC: make sure all the Capabilities donate one
1397         // GC thread each.
1398         waitForGcThreads(cap);
1399     }
1400
1401 #endif
1402
1403     IF_DEBUG(scheduler, printAllThreads());
1404
1405 delete_threads_and_gc:
1406     /*
1407      * We now have all the capabilities; if we're in an interrupting
1408      * state, then we should take the opportunity to delete all the
1409      * threads in the system.
1410      */
1411     if (sched_state == SCHED_INTERRUPTING) {
1412         deleteAllThreads(cap);
1413         sched_state = SCHED_SHUTTING_DOWN;
1414     }
1415
1416     heap_census = scheduleNeedHeapProfile(rtsTrue);
1417
1418     traceEventGcStart(cap);
1419 #if defined(THREADED_RTS)
1420     // reset waiting_for_gc *before* GC, so that when the GC threads
1421     // emerge they don't immediately re-enter the GC.
1422     waiting_for_gc = 0;
1423     GarbageCollect(force_major || heap_census, gc_type, cap);
1424 #else
1425     GarbageCollect(force_major || heap_census, 0, cap);
1426 #endif
1427     traceEventGcEnd(cap);
1428
1429     if (recent_activity == ACTIVITY_INACTIVE && force_major)
1430     {
1431         // We are doing a GC because the system has been idle for a
1432         // timeslice and we need to check for deadlock.  Record the
1433         // fact that we've done a GC and turn off the timer signal;
1434         // it will get re-enabled if we run any threads after the GC.
1435         recent_activity = ACTIVITY_DONE_GC;
1436         stopTimer();
1437     }
1438     else
1439     {
1440         // the GC might have taken long enough for the timer to set
1441         // recent_activity = ACTIVITY_INACTIVE, but we aren't
1442         // necessarily deadlocked:
1443         recent_activity = ACTIVITY_YES;
1444     }
1445
1446 #if defined(THREADED_RTS)
1447     if (gc_type == PENDING_GC_PAR)
1448     {
1449         releaseGCThreads(cap);
1450     }
1451 #endif
1452
1453     if (heap_census) {
1454         debugTrace(DEBUG_sched, "performing heap census");
1455         heapCensus();
1456         performHeapProfile = rtsFalse;
1457     }
1458
1459     if (heap_overflow && sched_state < SCHED_INTERRUPTING) {
1460         // GC set the heap_overflow flag, so we should proceed with
1461         // an orderly shutdown now.  Ultimately we want the main
1462         // thread to return to its caller with HeapExhausted, at which
1463         // point the caller should call hs_exit().  The first step is
1464         // to delete all the threads.
1465         //
1466         // Another way to do this would be to raise an exception in
1467         // the main thread, which we really should do because it gives
1468         // the program a chance to clean up.  But how do we find the
1469         // main thread?  It should presumably be the same one that
1470         // gets ^C exceptions, but that's all done on the Haskell side
1471         // (GHC.TopHandler).
1472         sched_state = SCHED_INTERRUPTING;
1473         goto delete_threads_and_gc;
1474     }
1475
1476 #ifdef SPARKBALANCE
1477     /* JB
1478        Once we are all together... this would be the place to balance all
1479        spark pools. No concurrent stealing or adding of new sparks can
1480        occur. Should be defined in Sparks.c. */
1481     balanceSparkPoolsCaps(n_capabilities, capabilities);
1482 #endif
1483
1484 #if defined(THREADED_RTS)
1485     if (gc_type == PENDING_GC_SEQ) {
1486         // release our stash of capabilities.
1487         for (i = 0; i < n_capabilities; i++) {
1488             if (cap != &capabilities[i]) {
1489                 task->cap = &capabilities[i];
1490                 releaseCapability(&capabilities[i]);
1491             }
1492         }
1493     }
1494     if (cap) {
1495         task->cap = cap;
1496     } else {
1497         task->cap = NULL;
1498     }
1499 #endif
1500
1501     return cap;
1502 }
1503
1504 /* ---------------------------------------------------------------------------
1505  * Singleton fork(). Do not copy any running threads.
1506  * ------------------------------------------------------------------------- */
1507
1508 pid_t
1509 forkProcess(HsStablePtr *entry
1510 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
1511             STG_UNUSED
1512 #endif
1513            )
1514 {
1515 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
1516     pid_t pid;
1517     StgTSO* t,*next;
1518     Capability *cap;
1519     nat g;
1520
1521 #if defined(THREADED_RTS)
1522     if (RtsFlags.ParFlags.nNodes > 1) {
1523         errorBelch("forking not supported with +RTS -N<n> greater than 1");
1524         stg_exit(EXIT_FAILURE);
1525     }
1526 #endif
1527
1528     debugTrace(DEBUG_sched, "forking!");
1529
1530     // ToDo: for SMP, we should probably acquire *all* the capabilities
1531     cap = rts_lock();
1532
1533     // no funny business: hold locks while we fork, otherwise if some
1534     // other thread is holding a lock when the fork happens, the data
1535     // structure protected by the lock will forever be in an
1536     // inconsistent state in the child.  See also #1391.
1537     ACQUIRE_LOCK(&sched_mutex);
1538     ACQUIRE_LOCK(&cap->lock);
1539     ACQUIRE_LOCK(&cap->running_task->lock);
1540
1541     stopTimer(); // See #4074
1542
1543 #if defined(TRACING)
1544     flushEventLog(); // so that child won't inherit dirty file buffers
1545 #endif
1546
1547     pid = fork();
1548
1549     if (pid) { // parent
1550
1551         startTimer(); // #4074
1552
1553         RELEASE_LOCK(&sched_mutex);
1554         RELEASE_LOCK(&cap->lock);
1555         RELEASE_LOCK(&cap->running_task->lock);
1556
1557         // just return the pid
1558         rts_unlock(cap);
1559         return pid;
1560
1561     } else { // child
1562
1563 #if defined(THREADED_RTS)
1564         initMutex(&sched_mutex);
1565         initMutex(&cap->lock);
1566         initMutex(&cap->running_task->lock);
1567 #endif
1568
1569 #if defined(TRACING)
1570         abortEventLogging(); // abort eventlog inherited from parent
1571         initEventLogging(); // child starts its own eventlog
1572 #endif
1573         // Now, all OS threads except the thread that forked are
1574         // stopped.  We need to stop all Haskell threads, including
1575         // those involved in foreign calls.  Also we need to delete
1576         // all Tasks, because they correspond to OS threads that are
1577         // now gone.
1578
1579         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1580           for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1581             if (t->what_next == ThreadRelocated) {
1582                 next = t->_link;
1583             } else {
1584                 next = t->global_link;
1585                 // don't allow threads to catch the ThreadKilled
1586                 // exception, but we do want to raiseAsync() because these
1587                 // threads may be evaluating thunks that we need later.
1588                 deleteThread_(cap,t);
1589
1590                 // stop the GC from updating the InCall to point to
1591                 // the TSO.  This is only necessary because the
1592                 // OSThread bound to the TSO has been killed, and
1593                 // won't get a chance to exit in the usual way (see
1594                 // also scheduleHandleThreadFinished).
1595                 t->bound = NULL;
1596             }
1597           }
1598         }
1599
1600         // Empty the run queue.  It seems tempting to let all the
1601         // killed threads stay on the run queue as zombies to be
1602         // cleaned up later, but some of them correspond to bound
1603         // threads for which the corresponding Task does not exist.
1604         cap->run_queue_hd = END_TSO_QUEUE;
1605         cap->run_queue_tl = END_TSO_QUEUE;
1606
1607         // Any suspended C-calling Tasks are no more, their OS threads
1608         // don't exist now:
1609         cap->suspended_ccalls = NULL;
1610
1611         // Empty the threads lists.  Otherwise, the garbage
1612         // collector may attempt to resurrect some of these threads.
1613         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1614             generations[g].threads = END_TSO_QUEUE;
1615         }
1616
1617         discardTasksExcept(cap->running_task);
1618
1619 #if defined(THREADED_RTS)
1620         // Wipe our spare workers list, they no longer exist.  New
1621         // workers will be created if necessary.
1622         cap->spare_workers = NULL;
1623         cap->n_spare_workers = 0;
1624         cap->returning_tasks_hd = NULL;
1625         cap->returning_tasks_tl = NULL;
1626 #endif
1627
1628         // On Unix, all timers are reset in the child, so we need to start
1629         // the timer again.
1630         initTimer();
1631         startTimer();
1632
1633 #if defined(THREADED_RTS)
1634         cap = ioManagerStartCap(cap);
1635 #endif
1636
1637         cap = rts_evalStableIO(cap, entry, NULL);  // run the action
1638         rts_checkSchedStatus("forkProcess",cap);
1639
1640         rts_unlock(cap);
1641         hs_exit();                      // clean up and exit
1642         stg_exit(EXIT_SUCCESS);
1643     }
1644 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */
1645     barf("forkProcess#: primop not supported on this platform, sorry!\n");
1646 #endif
1647 }
1648
1649 /* ---------------------------------------------------------------------------
1650  * Delete all the threads in the system
1651  * ------------------------------------------------------------------------- */
1652
1653 static void
1654 deleteAllThreads ( Capability *cap )
1655 {
1656     // NOTE: only safe to call if we own all capabilities.
1657
1658     StgTSO* t, *next;
1659     nat g;
1660
1661     debugTrace(DEBUG_sched,"deleting all threads");
1662     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1663         for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1664             if (t->what_next == ThreadRelocated) {
1665                 next = t->_link;
1666             } else {
1667                 next = t->global_link;
1668                 deleteThread(cap,t);
1669             }
1670         }
1671     }
1672
1673     // The run queue now contains a bunch of ThreadKilled threads.  We
1674     // must not throw these away: the main thread(s) will be in there
1675     // somewhere, and the main scheduler loop has to deal with it.
1676     // Also, the run queue is the only thing keeping these threads from
1677     // being GC'd, and we don't want the "main thread has been GC'd" panic.
1678
1679 #if !defined(THREADED_RTS)
1680     ASSERT(blocked_queue_hd == END_TSO_QUEUE);
1681     ASSERT(sleeping_queue == END_TSO_QUEUE);
1682 #endif
1683 }
1684
1685 /* -----------------------------------------------------------------------------
1686    Managing the suspended_ccalls list.
1687    Locks required: sched_mutex
1688    -------------------------------------------------------------------------- */
1689
1690 STATIC_INLINE void
1691 suspendTask (Capability *cap, Task *task)
1692 {
1693     InCall *incall;
1694
1695     incall = task->incall;
1696     ASSERT(incall->next == NULL && incall->prev == NULL);
1697     incall->next = cap->suspended_ccalls;
1698     incall->prev = NULL;
1699     if (cap->suspended_ccalls) {
1700         cap->suspended_ccalls->prev = incall;
1701     }
1702     cap->suspended_ccalls = incall;
1703 }
1704
1705 STATIC_INLINE void
1706 recoverSuspendedTask (Capability *cap, Task *task)
1707 {
1708     InCall *incall;
1709
1710     incall = task->incall;
1711     if (incall->prev) {
1712         incall->prev->next = incall->next;
1713     } else {
1714         ASSERT(cap->suspended_ccalls == incall);
1715         cap->suspended_ccalls = incall->next;
1716     }
1717     if (incall->next) {
1718         incall->next->prev = incall->prev;
1719     }
1720     incall->next = incall->prev = NULL;
1721 }
1722
1723 /* ---------------------------------------------------------------------------
1724  * Suspending & resuming Haskell threads.
1725  *
1726  * When making a "safe" call to C (aka _ccall_GC), the task gives back
1727  * its capability before calling the C function.  This allows another
1728  * task to pick up the capability and carry on running Haskell
1729  * threads.  It also means that if the C call blocks, it won't lock
1730  * the whole system.
1731  *
1732  * The Haskell thread making the C call is put to sleep for the
1733  * duration of the call, on the suspended_ccalling_threads queue.  We
1734  * give out a token to the task, which it can use to resume the thread
1735  * on return from the C function.
1736  *
1737  * If this is an interruptible C call, this means that the FFI call may be
1738  * unceremoniously terminated and should be scheduled on an
1739  * unbound worker thread.
1740  * ------------------------------------------------------------------------- */
1741
1742 void *
1743 suspendThread (StgRegTable *reg, rtsBool interruptible)
1744 {
1745   Capability *cap;
1746   int saved_errno;
1747   StgTSO *tso;
1748   Task *task;
1749 #if mingw32_HOST_OS
1750   StgWord32 saved_winerror;
1751 #endif
1752
1753   saved_errno = errno;
1754 #if mingw32_HOST_OS
1755   saved_winerror = GetLastError();
1756 #endif
1757
1758   /* assume that *reg is a pointer to the StgRegTable part of a Capability.
1759    */
1760   cap = regTableToCapability(reg);
1761
1762   task = cap->running_task;
1763   tso = cap->r.rCurrentTSO;
1764
1765   traceEventStopThread(cap, tso, THREAD_SUSPENDED_FOREIGN_CALL);
1766
1767   // XXX this might not be necessary --SDM
1768   tso->what_next = ThreadRunGHC;
1769
1770   threadPaused(cap,tso);
1771
1772   if (interruptible) {
1773     tso->why_blocked = BlockedOnCCall_Interruptible;
1774   } else {
1775     tso->why_blocked = BlockedOnCCall;
1776   }
1777
1778   // Hand back capability
1779   task->incall->suspended_tso = tso;
1780   task->incall->suspended_cap = cap;
1781
1782   ACQUIRE_LOCK(&cap->lock);
1783
1784   suspendTask(cap,task);
1785   cap->in_haskell = rtsFalse;
1786   releaseCapability_(cap,rtsFalse);
1787
1788   RELEASE_LOCK(&cap->lock);
1789
1790   errno = saved_errno;
1791 #if mingw32_HOST_OS
1792   SetLastError(saved_winerror);
1793 #endif
1794   return task;
1795 }
1796
1797 StgRegTable *
1798 resumeThread (void *task_)
1799 {
1800     StgTSO *tso;
1801     InCall *incall;
1802     Capability *cap;
1803     Task *task = task_;
1804     int saved_errno;
1805 #if mingw32_HOST_OS
1806     StgWord32 saved_winerror;
1807 #endif
1808
1809     saved_errno = errno;
1810 #if mingw32_HOST_OS
1811     saved_winerror = GetLastError();
1812 #endif
1813
1814     incall = task->incall;
1815     cap = incall->suspended_cap;
1816     task->cap = cap;
1817
1818     // Wait for permission to re-enter the RTS with the result.
1819     waitForReturnCapability(&cap,task);
1820     // we might be on a different capability now... but if so, our
1821     // entry on the suspended_ccalls list will also have been
1822     // migrated.
1823
1824     // Remove the thread from the suspended list
1825     recoverSuspendedTask(cap,task);
1826
1827     tso = incall->suspended_tso;
1828     incall->suspended_tso = NULL;
1829     incall->suspended_cap = NULL;
1830     tso->_link = END_TSO_QUEUE; // no write barrier reqd
1831
1832     traceEventRunThread(cap, tso);
1833
1834     /* Reset blocking status */
1835     tso->why_blocked  = NotBlocked;
1836
1837     if ((tso->flags & TSO_BLOCKEX) == 0) {
1838         // avoid locking the TSO if we don't have to
1839         if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
1840             maybePerformBlockedException(cap,tso);
1841         }
1842     }
1843
1844     cap->r.rCurrentTSO = tso;
1845     cap->in_haskell = rtsTrue;
1846     errno = saved_errno;
1847 #if mingw32_HOST_OS
1848     SetLastError(saved_winerror);
1849 #endif
1850
1851     /* We might have GC'd, mark the TSO dirty again */
1852     dirty_TSO(cap,tso);
1853
1854     IF_DEBUG(sanity, checkTSO(tso));
1855
1856     return &cap->r;
1857 }
1858
1859 /* ---------------------------------------------------------------------------
1860  * scheduleThread()
1861  *
1862  * scheduleThread puts a thread on the end  of the runnable queue.
1863  * This will usually be done immediately after a thread is created.
1864  * The caller of scheduleThread must create the thread using e.g.
1865  * createThread and push an appropriate closure
1866  * on this thread's stack before the scheduler is invoked.
1867  * ------------------------------------------------------------------------ */
1868
1869 void
1870 scheduleThread(Capability *cap, StgTSO *tso)
1871 {
1872     // The thread goes at the *end* of the run-queue, to avoid possible
1873     // starvation of any threads already on the queue.
1874     appendToRunQueue(cap,tso);
1875 }
1876
1877 void
1878 scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
1879 {
1880 #if defined(THREADED_RTS)
1881     tso->flags |= TSO_LOCKED; // we requested explicit affinity; don't
1882                               // move this thread from now on.
1883     cpu %= RtsFlags.ParFlags.nNodes;
1884     if (cpu == cap->no) {
1885         appendToRunQueue(cap,tso);
1886     } else {
1887         migrateThread(cap, tso, &capabilities[cpu]);
1888     }
1889 #else
1890     appendToRunQueue(cap,tso);
1891 #endif
1892 }
1893
1894 Capability *
1895 scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
1896 {
1897     Task *task;
1898     StgThreadID id;
1899
1900     // We already created/initialised the Task
1901     task = cap->running_task;
1902
1903     // This TSO is now a bound thread; make the Task and TSO
1904     // point to each other.
1905     tso->bound = task->incall;
1906     tso->cap = cap;
1907
1908     task->incall->tso = tso;
1909     task->incall->ret = ret;
1910     task->incall->stat = NoStatus;
1911
1912     appendToRunQueue(cap,tso);
1913
1914     id = tso->id;
1915     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)id);
1916
1917     cap = schedule(cap,task);
1918
1919     ASSERT(task->incall->stat != NoStatus);
1920     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
1921
1922     debugTrace(DEBUG_sched, "bound thread (%lu) finished", (unsigned long)id);
1923     return cap;
1924 }
1925
1926 /* ----------------------------------------------------------------------------
1927  * Starting Tasks
1928  * ------------------------------------------------------------------------- */
1929
1930 #if defined(THREADED_RTS)
1931 void scheduleWorker (Capability *cap, Task *task)
1932 {
1933     // schedule() runs without a lock.
1934     cap = schedule(cap,task);
1935
1936     // On exit from schedule(), we have a Capability, but possibly not
1937     // the same one we started with.
1938
1939     // During shutdown, the requirement is that after all the
1940     // Capabilities are shut down, all workers that are shutting down
1941     // have finished workerTaskStop().  This is why we hold on to
1942     // cap->lock until we've finished workerTaskStop() below.
1943     //
1944     // There may be workers still involved in foreign calls; those
1945     // will just block in waitForReturnCapability() because the
1946     // Capability has been shut down.
1947     //
1948     ACQUIRE_LOCK(&cap->lock);
1949     releaseCapability_(cap,rtsFalse);
1950     workerTaskStop(task);
1951     RELEASE_LOCK(&cap->lock);
1952 }
1953 #endif
1954
1955 /* ---------------------------------------------------------------------------
1956  * initScheduler()
1957  *
1958  * Initialise the scheduler.  This resets all the queues - if the
1959  * queues contained any threads, they'll be garbage collected at the
1960  * next pass.
1961  *
1962  * ------------------------------------------------------------------------ */
1963
1964 void
1965 initScheduler(void)
1966 {
1967 #if !defined(THREADED_RTS)
1968   blocked_queue_hd  = END_TSO_QUEUE;
1969   blocked_queue_tl  = END_TSO_QUEUE;
1970   sleeping_queue    = END_TSO_QUEUE;
1971 #endif
1972
1973   sched_state    = SCHED_RUNNING;
1974   recent_activity = ACTIVITY_YES;
1975
1976 #if defined(THREADED_RTS)
1977   /* Initialise the mutex and condition variables used by
1978    * the scheduler. */
1979   initMutex(&sched_mutex);
1980 #endif
1981
1982   ACQUIRE_LOCK(&sched_mutex);
1983
1984   /* A capability holds the state a native thread needs in
1985    * order to execute STG code. At least one capability is
1986    * floating around (only THREADED_RTS builds have more than one).
1987    */
1988   initCapabilities();
1989
1990   initTaskManager();
1991
1992 #if defined(THREADED_RTS)
1993   initSparkPools();
1994 #endif
1995
1996   RELEASE_LOCK(&sched_mutex);
1997
1998 #if defined(THREADED_RTS)
1999   /*
2000    * Eagerly start one worker to run each Capability, except for
2001    * Capability 0.  The idea is that we're probably going to start a
2002    * bound thread on Capability 0 pretty soon, so we don't want a
2003    * worker task hogging it.
2004    */
2005   {
2006       nat i;
2007       Capability *cap;
2008       for (i = 1; i < n_capabilities; i++) {
2009           cap = &capabilities[i];
2010           ACQUIRE_LOCK(&cap->lock);
2011           startWorkerTask(cap);
2012           RELEASE_LOCK(&cap->lock);
2013       }
2014   }
2015 #endif
2016 }
2017
2018 void
2019 exitScheduler (rtsBool wait_foreign USED_IF_THREADS)
2020                /* see Capability.c, shutdownCapability() */
2021 {
2022     Task *task = NULL;
2023
2024     task = newBoundTask();
2025
2026     // If we haven't killed all the threads yet, do it now.
2027     if (sched_state < SCHED_SHUTTING_DOWN) {
2028         sched_state = SCHED_INTERRUPTING;
2029         waitForReturnCapability(&task->cap,task);
2030         scheduleDoGC(task->cap,task,rtsFalse);
2031         ASSERT(task->incall->tso == NULL);
2032         releaseCapability(task->cap);
2033     }
2034     sched_state = SCHED_SHUTTING_DOWN;
2035
2036 #if defined(THREADED_RTS)
2037     {
2038         nat i;
2039
2040         for (i = 0; i < n_capabilities; i++) {
2041             ASSERT(task->incall->tso == NULL);
2042             shutdownCapability(&capabilities[i], task, wait_foreign);
2043         }
2044     }
2045 #endif
2046
2047     boundTaskExiting(task);
2048 }
2049
2050 void
2051 freeScheduler( void )
2052 {
2053     nat still_running;
2054
2055     ACQUIRE_LOCK(&sched_mutex);
2056     still_running = freeTaskManager();
2057     // We can only free the Capabilities if there are no Tasks still
2058     // running.  We might have a Task about to return from a foreign
2059     // call into waitForReturnCapability(), for example (actually,
2060     // this should be the *only* thing that a still-running Task can
2061     // do at this point, and it will block waiting for the
2062     // Capability).
2063     if (still_running == 0) {
2064         freeCapabilities();
2065         if (n_capabilities != 1) {
2066             stgFree(capabilities);
2067         }
2068     }
2069     RELEASE_LOCK(&sched_mutex);
2070 #if defined(THREADED_RTS)
2071     closeMutex(&sched_mutex);
2072 #endif
2073 }
2074
2075 /* -----------------------------------------------------------------------------
2076    performGC
2077
2078    This is the interface to the garbage collector from Haskell land.
2079    We provide this so that external C code can allocate and garbage
2080    collect when called from Haskell via _ccall_GC.
2081    -------------------------------------------------------------------------- */
2082
2083 static void
2084 performGC_(rtsBool force_major)
2085 {
2086     Task *task;
2087
2088     // We must grab a new Task here, because the existing Task may be
2089     // associated with a particular Capability, and chained onto the
2090     // suspended_ccalls queue.
2091     task = newBoundTask();
2092
2093     waitForReturnCapability(&task->cap,task);
2094     scheduleDoGC(task->cap,task,force_major);
2095     releaseCapability(task->cap);
2096     boundTaskExiting(task);
2097 }
2098
2099 void
2100 performGC(void)
2101 {
2102     performGC_(rtsFalse);
2103 }
2104
2105 void
2106 performMajorGC(void)
2107 {
2108     performGC_(rtsTrue);
2109 }
2110
2111 /* -----------------------------------------------------------------------------
2112    Stack overflow
2113
2114    If the thread has reached its maximum stack size, then raise the
2115    StackOverflow exception in the offending thread.  Otherwise
2116    relocate the TSO into a larger chunk of memory and adjust its stack
2117    size appropriately.
2118    -------------------------------------------------------------------------- */
2119
2120 static StgTSO *
2121 threadStackOverflow(Capability *cap, StgTSO *tso)
2122 {
2123   nat new_stack_size, stack_words;
2124   lnat new_tso_size;
2125   StgPtr new_sp;
2126   StgTSO *dest;
2127
2128   IF_DEBUG(sanity,checkTSO(tso));
2129
2130   if (tso->stack_size >= tso->max_stack_size
2131       && !(tso->flags & TSO_BLOCKEX)) {
2132       // NB. never raise a StackOverflow exception if the thread is
2133       // inside Control.Exceptino.block.  It is impractical to protect
2134       // against stack overflow exceptions, since virtually anything
2135       // can raise one (even 'catch'), so this is the only sensible
2136       // thing to do here.  See bug #767.
2137       //
2138
2139       if (tso->flags & TSO_SQUEEZED) {
2140           return tso;
2141       }
2142       // #3677: In a stack overflow situation, stack squeezing may
2143       // reduce the stack size, but we don't know whether it has been
2144       // reduced enough for the stack check to succeed if we try
2145       // again.  Fortunately stack squeezing is idempotent, so all we
2146       // need to do is record whether *any* squeezing happened.  If we
2147       // are at the stack's absolute -K limit, and stack squeezing
2148       // happened, then we try running the thread again.  The
2149       // TSO_SQUEEZED flag is set by threadPaused() to tell us whether
2150       // squeezing happened or not.
2151
2152       debugTrace(DEBUG_gc,
2153                  "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
2154                  (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size);
2155       IF_DEBUG(gc,
2156                /* If we're debugging, just print out the top of the stack */
2157                printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
2158                                                 tso->sp+64)));
2159
2160       // Send this thread the StackOverflow exception
2161       throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
2162       return tso;
2163   }
2164
2165
2166   // We also want to avoid enlarging the stack if squeezing has
2167   // already released some of it.  However, we don't want to get into
2168   // a pathalogical situation where a thread has a nearly full stack
2169   // (near its current limit, but not near the absolute -K limit),
2170   // keeps allocating a little bit, squeezing removes a little bit,
2171   // and then it runs again.  So to avoid this, if we squeezed *and*
2172   // there is still less than BLOCK_SIZE_W words free, then we enlarge
2173   // the stack anyway.
2174   if ((tso->flags & TSO_SQUEEZED) &&
2175       ((W_)(tso->sp - tso->stack) >= BLOCK_SIZE_W)) {
2176       return tso;
2177   }
2178
2179   /* Try to double the current stack size.  If that takes us over the
2180    * maximum stack size for this thread, then use the maximum instead
2181    * (that is, unless we're already at or over the max size and we
2182    * can't raise the StackOverflow exception (see above), in which
2183    * case just double the size). Finally round up so the TSO ends up as
2184    * a whole number of blocks.
2185    */
2186   if (tso->stack_size >= tso->max_stack_size) {
2187       new_stack_size = tso->stack_size * 2;
2188   } else {
2189       new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
2190   }
2191   new_tso_size   = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) +
2192                                        TSO_STRUCT_SIZE)/sizeof(W_);
2193   new_tso_size = round_to_mblocks(new_tso_size);  /* Be MBLOCK-friendly */
2194   new_stack_size = new_tso_size - TSO_STRUCT_SIZEW;
2195
2196   debugTrace(DEBUG_sched,
2197              "increasing stack size from %ld words to %d.",
2198              (long)tso->stack_size, new_stack_size);
2199
2200   dest = (StgTSO *)allocate(cap,new_tso_size);
2201   TICK_ALLOC_TSO(new_stack_size,0);
2202
2203   /* copy the TSO block and the old stack into the new area */
2204   memcpy(dest,tso,TSO_STRUCT_SIZE);
2205   stack_words = tso->stack + tso->stack_size - tso->sp;
2206   new_sp = (P_)dest + new_tso_size - stack_words;
2207   memcpy(new_sp, tso->sp, stack_words * sizeof(W_));
2208
2209   /* relocate the stack pointers... */
2210   dest->sp         = new_sp;
2211   dest->stack_size = new_stack_size;
2212
2213   /* Mark the old TSO as relocated.  We have to check for relocated
2214    * TSOs in the garbage collector and any primops that deal with TSOs.
2215    *
2216    * It's important to set the sp value to just beyond the end
2217    * of the stack, so we don't attempt to scavenge any part of the
2218    * dead TSO's stack.
2219    */
2220   setTSOLink(cap,tso,dest);
2221   write_barrier(); // other threads seeing ThreadRelocated will look at _link
2222   tso->what_next = ThreadRelocated;
2223   tso->sp = (P_)&(tso->stack[tso->stack_size]);
2224   tso->why_blocked = NotBlocked;
2225
2226   IF_DEBUG(sanity,checkTSO(dest));
2227 #if 0
2228   IF_DEBUG(scheduler,printTSO(dest));
2229 #endif
2230
2231   return dest;
2232 }
2233
2234 static StgTSO *
2235 threadStackUnderflow (Capability *cap, Task *task, StgTSO *tso)
2236 {
2237     bdescr *bd, *new_bd;
2238     lnat free_w, tso_size_w;
2239     StgTSO *new_tso;
2240
2241     tso_size_w = tso_sizeW(tso);
2242
2243     if (tso_size_w < MBLOCK_SIZE_W ||
2244           // TSO is less than 2 mblocks (since the first mblock is
2245           // shorter than MBLOCK_SIZE_W)
2246         (tso_size_w - BLOCKS_PER_MBLOCK*BLOCK_SIZE_W) % MBLOCK_SIZE_W != 0 ||
2247           // or TSO is not a whole number of megablocks (ensuring
2248           // precondition of splitLargeBlock() below)
2249         (tso_size_w <= round_up_to_mblocks(RtsFlags.GcFlags.initialStkSize)) ||
2250           // or TSO is smaller than the minimum stack size (rounded up)
2251         (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4)
2252           // or stack is using more than 1/4 of the available space
2253     {
2254         // then do nothing
2255         return tso;
2256     }
2257
2258     // this is the number of words we'll free
2259     free_w = round_to_mblocks(tso_size_w/2);
2260
2261     bd = Bdescr((StgPtr)tso);
2262     new_bd = splitLargeBlock(bd, free_w / BLOCK_SIZE_W);
2263     bd->free = bd->start + TSO_STRUCT_SIZEW;
2264
2265     new_tso = (StgTSO *)new_bd->start;
2266     memcpy(new_tso,tso,TSO_STRUCT_SIZE);
2267     new_tso->stack_size = new_bd->free - new_tso->stack;
2268
2269     // The original TSO was dirty and probably on the mutable
2270     // list. The new TSO is not yet on the mutable list, so we better
2271     // put it there.
2272     new_tso->dirty = 0;
2273     new_tso->flags &= ~TSO_LINK_DIRTY;
2274     dirty_TSO(cap, new_tso);
2275
2276     debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
2277                (long)tso->id, tso_size_w, tso_sizeW(new_tso));
2278
2279     tso->_link = new_tso; // no write barrier reqd: same generation
2280     write_barrier(); // other threads seeing ThreadRelocated will look at _link
2281     tso->what_next = ThreadRelocated;
2282
2283     // The TSO attached to this Task may have moved, so update the
2284     // pointer to it.
2285     if (task->incall->tso == tso) {
2286         task->incall->tso = new_tso;
2287     }
2288
2289     IF_DEBUG(sanity,checkTSO(new_tso));
2290
2291     return new_tso;
2292 }
2293
2294 /* ---------------------------------------------------------------------------
2295    Interrupt execution
2296    - usually called inside a signal handler so it mustn't do anything fancy.
2297    ------------------------------------------------------------------------ */
2298
2299 void
2300 interruptStgRts(void)
2301 {
2302     sched_state = SCHED_INTERRUPTING;
2303     setContextSwitches();
2304 #if defined(THREADED_RTS)
2305     wakeUpRts();
2306 #endif
2307 }
2308
2309 /* -----------------------------------------------------------------------------
2310    Wake up the RTS
2311
2312    This function causes at least one OS thread to wake up and run the
2313    scheduler loop.  It is invoked when the RTS might be deadlocked, or
2314    an external event has arrived that may need servicing (eg. a
2315    keyboard interrupt).
2316
2317    In the single-threaded RTS we don't do anything here; we only have
2318    one thread anyway, and the event that caused us to want to wake up
2319    will have interrupted any blocking system call in progress anyway.
2320    -------------------------------------------------------------------------- */
2321
2322 #if defined(THREADED_RTS)
2323 void wakeUpRts(void)
2324 {
2325     // This forces the IO Manager thread to wakeup, which will
2326     // in turn ensure that some OS thread wakes up and runs the
2327     // scheduler loop, which will cause a GC and deadlock check.
2328     ioManagerWakeup();
2329 }
2330 #endif
2331
2332 /* -----------------------------------------------------------------------------
2333    Deleting threads
2334
2335    This is used for interruption (^C) and forking, and corresponds to
2336    raising an exception but without letting the thread catch the
2337    exception.
2338    -------------------------------------------------------------------------- */
2339
2340 static void
2341 deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
2342 {
2343     // NOTE: must only be called on a TSO that we have exclusive
2344     // access to, because we will call throwToSingleThreaded() below.
2345     // The TSO must be on the run queue of the Capability we own, or
2346     // we must own all Capabilities.
2347
2348     if (tso->why_blocked != BlockedOnCCall &&
2349         tso->why_blocked != BlockedOnCCall_Interruptible) {
2350         throwToSingleThreaded(tso->cap,tso,NULL);
2351     }
2352 }
2353
2354 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2355 static void
2356 deleteThread_(Capability *cap, StgTSO *tso)
2357 { // for forkProcess only:
2358   // like deleteThread(), but we delete threads in foreign calls, too.
2359
2360     if (tso->why_blocked == BlockedOnCCall ||
2361         tso->why_blocked == BlockedOnCCall_Interruptible) {
2362         tso->what_next = ThreadKilled;
2363         appendToRunQueue(tso->cap, tso);
2364     } else {
2365         deleteThread(cap,tso);
2366     }
2367 }
2368 #endif
2369
2370 /* -----------------------------------------------------------------------------
2371    raiseExceptionHelper
2372
2373    This function is called by the raise# primitve, just so that we can
2374    move some of the tricky bits of raising an exception from C-- into
2375    C.  Who knows, it might be a useful re-useable thing here too.
2376    -------------------------------------------------------------------------- */
2377
2378 StgWord
2379 raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
2380 {
2381     Capability *cap = regTableToCapability(reg);
2382     StgThunk *raise_closure = NULL;
2383     StgPtr p, next;
2384     StgRetInfoTable *info;
2385     //
2386     // This closure represents the expression 'raise# E' where E
2387     // is the exception raise.  It is used to overwrite all the
2388     // thunks which are currently under evaluataion.
2389     //
2390
2391     // OLD COMMENT (we don't have MIN_UPD_SIZE now):
2392     // LDV profiling: stg_raise_info has THUNK as its closure
2393     // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
2394     // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
2395     // 1 does not cause any problem unless profiling is performed.
2396     // However, when LDV profiling goes on, we need to linearly scan
2397     // small object pool, where raise_closure is stored, so we should
2398     // use MIN_UPD_SIZE.
2399     //
2400     // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
2401     //                                 sizeofW(StgClosure)+1);
2402     //
2403
2404     //
2405     // Walk up the stack, looking for the catch frame.  On the way,
2406     // we update any closures pointed to from update frames with the
2407     // raise closure that we just built.
2408     //
2409     p = tso->sp;
2410     while(1) {
2411         info = get_ret_itbl((StgClosure *)p);
2412         next = p + stack_frame_sizeW((StgClosure *)p);
2413         switch (info->i.type) {
2414
2415         case UPDATE_FRAME:
2416             // Only create raise_closure if we need to.
2417             if (raise_closure == NULL) {
2418                 raise_closure =
2419                     (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
2420                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
2421                 raise_closure->payload[0] = exception;
2422             }
2423             updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee,
2424                         (StgClosure *)raise_closure);
2425             p = next;
2426             continue;
2427
2428         case ATOMICALLY_FRAME:
2429             debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p);
2430             tso->sp = p;
2431             return ATOMICALLY_FRAME;
2432
2433         case CATCH_FRAME:
2434             tso->sp = p;
2435             return CATCH_FRAME;
2436
2437         case CATCH_STM_FRAME:
2438             debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p);
2439             tso->sp = p;
2440             return CATCH_STM_FRAME;
2441
2442         case STOP_FRAME:
2443             tso->sp = p;
2444             return STOP_FRAME;
2445
2446         case CATCH_RETRY_FRAME:
2447         default:
2448             p = next;
2449             continue;
2450         }
2451     }
2452 }
2453
2454
2455 /* -----------------------------------------------------------------------------
2456    findRetryFrameHelper
2457
2458    This function is called by the retry# primitive.  It traverses the stack
2459    leaving tso->sp referring to the frame which should handle the retry.
2460
2461    This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
2462    or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
2463
2464    We skip CATCH_STM_FRAMEs (aborting and rolling back the nested tx that they
2465    create) because retries are not considered to be exceptions, despite the
2466    similar implementation.
2467
2468    We should not expect to see CATCH_FRAME or STOP_FRAME because those should
2469    not be created within memory transactions.
2470    -------------------------------------------------------------------------- */
2471
2472 StgWord
2473 findRetryFrameHelper (StgTSO *tso)
2474 {
2475   StgPtr           p, next;
2476   StgRetInfoTable *info;
2477
2478   p = tso -> sp;
2479   while (1) {
2480     info = get_ret_itbl((StgClosure *)p);
2481     next = p + stack_frame_sizeW((StgClosure *)p);
2482     switch (info->i.type) {
2483
2484     case ATOMICALLY_FRAME:
2485         debugTrace(DEBUG_stm,
2486                    "found ATOMICALLY_FRAME at %p during retry", p);
2487         tso->sp = p;
2488         return ATOMICALLY_FRAME;
2489
2490     case CATCH_RETRY_FRAME:
2491         debugTrace(DEBUG_stm,
2492                    "found CATCH_RETRY_FRAME at %p during retrry", p);
2493         tso->sp = p;
2494         return CATCH_RETRY_FRAME;
2495
2496     case CATCH_STM_FRAME: {
2497         StgTRecHeader *trec = tso -> trec;
2498         StgTRecHeader *outer = trec -> enclosing_trec;
2499         debugTrace(DEBUG_stm,
2500                    "found CATCH_STM_FRAME at %p during retry", p);
2501         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
2502         stmAbortTransaction(tso -> cap, trec);
2503         stmFreeAbortedTRec(tso -> cap, trec);
2504         tso -> trec = outer;
2505         p = next;
2506         continue;
2507     }
2508
2509
2510     default:
2511       ASSERT(info->i.type != CATCH_FRAME);
2512       ASSERT(info->i.type != STOP_FRAME);
2513       p = next;
2514       continue;
2515     }
2516   }
2517 }
2518
2519 /* -----------------------------------------------------------------------------
2520    resurrectThreads is called after garbage collection on the list of
2521    threads found to be garbage.  Each of these threads will be woken
2522    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
2523    on an MVar, or NonTermination if the thread was blocked on a Black
2524    Hole.
2525
2526    Locks: assumes we hold *all* the capabilities.
2527    -------------------------------------------------------------------------- */
2528
2529 void
2530 resurrectThreads (StgTSO *threads)
2531 {
2532     StgTSO *tso, *next;
2533     Capability *cap;
2534     generation *gen;
2535
2536     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
2537         next = tso->global_link;
2538
2539         gen = Bdescr((P_)tso)->gen;
2540         tso->global_link = gen->threads;
2541         gen->threads = tso;
2542
2543         debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
2544
2545         // Wake up the thread on the Capability it was last on
2546         cap = tso->cap;
2547
2548         switch (tso->why_blocked) {
2549         case BlockedOnMVar:
2550             /* Called by GC - sched_mutex lock is currently held. */
2551             throwToSingleThreaded(cap, tso,
2552                                   (StgClosure *)blockedIndefinitelyOnMVar_closure);
2553             break;
2554         case BlockedOnBlackHole:
2555             throwToSingleThreaded(cap, tso,
2556                                   (StgClosure *)nonTermination_closure);
2557             break;
2558         case BlockedOnSTM:
2559             throwToSingleThreaded(cap, tso,
2560                                   (StgClosure *)blockedIndefinitelyOnSTM_closure);
2561             break;
2562         case NotBlocked:
2563             /* This might happen if the thread was blocked on a black hole
2564              * belonging to a thread that we've just woken up (raiseAsync
2565              * can wake up threads, remember...).
2566              */
2567             continue;
2568         case BlockedOnMsgThrowTo:
2569             // This can happen if the target is masking, blocks on a
2570             // black hole, and then is found to be unreachable.  In
2571             // this case, we want to let the target wake up and carry
2572             // on, and do nothing to this thread.
2573             continue;
2574         default:
2575             barf("resurrectThreads: thread blocked in a strange way: %d",
2576                  tso->why_blocked);
2577         }
2578     }
2579 }