rts/Schedule.c

   1 /* ---------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * The scheduler and thread-related functionality
   6  *
   7  * --------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #define KEEP_LOCKCLOSURE
  11 #include "Rts.h"
  12
  13 #include "sm/Storage.h"
  14 #include "RtsUtils.h"
  15 #include "StgRun.h"
  16 #include "Schedule.h"
  17 #include "Interpreter.h"
  18 #include "Printer.h"
  19 #include "RtsSignals.h"
  20 #include "sm/Sanity.h"
  21 #include "Stats.h"
  22 #include "STM.h"
  23 #include "Prelude.h"
  24 #include "ThreadLabels.h"
  25 #include "Updates.h"
  26 #include "Proftimer.h"
  27 #include "ProfHeap.h"
  28 #include "Weak.h"
  29 #include "sm/GC.h" // waitForGcThreads, releaseGCThreads, N
  30 #include "Sparks.h"
  31 #include "Capability.h"
  32 #include "Task.h"
  33 #include "AwaitEvent.h"
  34 #if defined(mingw32_HOST_OS)
  35 #include "win32/IOManager.h"
  36 #endif
  37 #include "Trace.h"
  38 #include "RaiseAsync.h"
  39 #include "Threads.h"
  40 #include "Timer.h"
  41 #include "ThreadPaused.h"
  42 #include "Messages.h"
  43
  44 #ifdef HAVE_SYS_TYPES_H
  45 #include <sys/types.h>
  46 #endif
  47 #ifdef HAVE_UNISTD_H
  48 #include <unistd.h>
  49 #endif
  50
  51 #include <string.h>
  52 #include <stdlib.h>
  53 #include <stdarg.h>
  54
  55 #ifdef HAVE_ERRNO_H
  56 #include <errno.h>
  57 #endif
  58
  59 #ifdef TRACING
  60 #include "eventlog/EventLog.h"
  61 #endif
  62 /* -----------------------------------------------------------------------------
  63  * Global variables
  64  * -------------------------------------------------------------------------- */
  65
  66 #if !defined(THREADED_RTS)
  67 // Blocked/sleeping thrads
  68 StgTSO *blocked_queue_hd = NULL;
  69 StgTSO *blocked_queue_tl = NULL;
  70 StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
  71 #endif
  72
  73 /* Set to true when the latest garbage collection failed to reclaim
  74  * enough space, and the runtime should proceed to shut itself down in
  75  * an orderly fashion (emitting profiling info etc.)
  76  */
  77 rtsBool heap_overflow = rtsFalse;
  78
  79 /* flag that tracks whether we have done any execution in this time slice.
  80  * LOCK: currently none, perhaps we should lock (but needs to be
  81  * updated in the fast path of the scheduler).
  82  *
  83  * NB. must be StgWord, we do xchg() on it.
  84  */
  85 volatile StgWord recent_activity = ACTIVITY_YES;
  86
  87 /* if this flag is set as well, give up execution
  88  * LOCK: none (changes monotonically)
  89  */
  90 volatile StgWord sched_state = SCHED_RUNNING;
  91
  92 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually
  93  *  exists - earlier gccs apparently didn't.
  94  *  -= chak
  95  */
  96 StgTSO dummy_tso;
  97
  98 /*
  99  * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 100  * in an MT setting, needed to signal that a worker thread shouldn't hang around
 101  * in the scheduler when it is out of work.
 102  */
 103 rtsBool shutting_down_scheduler = rtsFalse;
 104
 105 /*
 106  * This mutex protects most of the global scheduler data in
 107  * the THREADED_RTS runtime.
 108  */
 109 #if defined(THREADED_RTS)
 110 Mutex sched_mutex;
 111 #endif
 112
 113 #if !defined(mingw32_HOST_OS)
 114 #define FORKPROCESS_PRIMOP_SUPPORTED
 115 #endif
 116
 117 /* -----------------------------------------------------------------------------
 118  * static function prototypes
 119  * -------------------------------------------------------------------------- */
 120
 121 static Capability *schedule (Capability *initialCapability, Task *task);
 122
 123 //
 124 // These function all encapsulate parts of the scheduler loop, and are
 125 // abstracted only to make the structure and control flow of the
 126 // scheduler clearer.
 127 //
 128 static void schedulePreLoop (void);
 129 static void scheduleFindWork (Capability *cap);
 130 #if defined(THREADED_RTS)
 131 static void scheduleYield (Capability **pcap, Task *task);
 132 #endif
 133 static void scheduleStartSignalHandlers (Capability *cap);
 134 static void scheduleCheckBlockedThreads (Capability *cap);
 135 static void scheduleProcessInbox(Capability *cap);
 136 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 137 static void schedulePushWork(Capability *cap, Task *task);
 138 #if defined(THREADED_RTS)
 139 static void scheduleActivateSpark(Capability *cap);
 140 #endif
 141 static void schedulePostRunThread(Capability *cap, StgTSO *t);
 142 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 143 static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 144                                     nat prev_what_next );
 145 static void scheduleHandleThreadBlocked( StgTSO *t );
 146 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
 147                                              StgTSO *t );
 148 static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
 149 static Capability *scheduleDoGC(Capability *cap, Task *task,
 150                                 rtsBool force_major);
 151
 152 static void deleteThread (Capability *cap, StgTSO *tso);
 153 static void deleteAllThreads (Capability *cap);
 154
 155 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
 156 static void deleteThread_(Capability *cap, StgTSO *tso);
 157 #endif
 158
 159 /* ---------------------------------------------------------------------------
 160    Main scheduling loop.
 161
 162    We use round-robin scheduling, each thread returning to the
 163    scheduler loop when one of these conditions is detected:
 164
 165       * out of heap space
 166       * timer expires (thread yields)
 167       * thread blocks
 168       * thread ends
 169       * stack overflow
 170
 171    GRAN version:
 172      In a GranSim setup this loop iterates over the global event queue.
 173      This revolves around the global event queue, which determines what
 174      to do next. Therefore, it's more complicated than either the
 175      concurrent or the parallel (GUM) setup.
 176   This version has been entirely removed (JB 2008/08).
 177
 178    GUM version:
 179      GUM iterates over incoming messages.
 180      It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
 181      and sends out a fish whenever it has nothing to do; in-between
 182      doing the actual reductions (shared code below) it processes the
 183      incoming messages and deals with delayed operations
 184      (see PendingFetches).
 185      This is not the ugliest code you could imagine, but it's bloody close.
 186
 187   (JB 2008/08) This version was formerly indicated by a PP-Flag PAR,
 188   now by PP-flag PARALLEL_HASKELL. The Eden RTS (in GHC-6.x) uses it,
 189   as well as future GUM versions. This file has been refurbished to
 190   only contain valid code, which is however incomplete, refers to
 191   invalid includes etc.
 192
 193    ------------------------------------------------------------------------ */
 194
 195 static Capability *
 196 schedule (Capability *initialCapability, Task *task)
 197 {
 198   StgTSO *t;
 199   Capability *cap;
 200   StgThreadReturnCode ret;
 201   nat prev_what_next;
 202   rtsBool ready_to_gc;
 203 #if defined(THREADED_RTS)
 204   rtsBool first = rtsTrue;
 205 #endif
 206
 207   cap = initialCapability;
 208
 209   // Pre-condition: this task owns initialCapability.
 210   // The sched_mutex is *NOT* held
 211   // NB. on return, we still hold a capability.
 212
 213   debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
 214
 215   schedulePreLoop();
 216
 217   // -----------------------------------------------------------
 218   // Scheduler loop starts here:
 219
 220   while (1) {
 221
 222     // Check whether we have re-entered the RTS from Haskell without
 223     // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
 224     // call).
 225     if (cap->in_haskell) {
 226           errorBelch("schedule: re-entered unsafely.\n"
 227                      "   Perhaps a 'foreign import unsafe' should be 'safe'?");
 228           stg_exit(EXIT_FAILURE);
 229     }
 230
 231     // The interruption / shutdown sequence.
 232     //
 233     // In order to cleanly shut down the runtime, we want to:
 234     //   * make sure that all main threads return to their callers
 235     //     with the state 'Interrupted'.
 236     //   * clean up all OS threads assocated with the runtime
 237     //   * free all memory etc.
 238     //
 239     // So the sequence for ^C goes like this:
 240     //
 241     //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
 242     //     arranges for some Capability to wake up
 243     //
 244     //   * all threads in the system are halted, and the zombies are
 245     //     placed on the run queue for cleaning up.  We acquire all
 246     //     the capabilities in order to delete the threads, this is
 247     //     done by scheduleDoGC() for convenience (because GC already
 248     //     needs to acquire all the capabilities).  We can't kill
 249     //     threads involved in foreign calls.
 250     //
 251     //   * somebody calls shutdownHaskell(), which calls exitScheduler()
 252     //
 253     //   * sched_state := SCHED_SHUTTING_DOWN
 254     //
 255     //   * all workers exit when the run queue on their capability
 256     //     drains.  All main threads will also exit when their TSO
 257     //     reaches the head of the run queue and they can return.
 258     //
 259     //   * eventually all Capabilities will shut down, and the RTS can
 260     //     exit.
 261     //
 262     //   * We might be left with threads blocked in foreign calls,
 263     //     we should really attempt to kill these somehow (TODO);
 264
 265     switch (sched_state) {
 266     case SCHED_RUNNING:
 267         break;
 268     case SCHED_INTERRUPTING:
 269         debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
 270 #if defined(THREADED_RTS)
 271         discardSparksCap(cap);
 272 #endif
 273         /* scheduleDoGC() deletes all the threads */
 274         cap = scheduleDoGC(cap,task,rtsFalse);
 275
 276         // after scheduleDoGC(), we must be shutting down.  Either some
 277         // other Capability did the final GC, or we did it above,
 278         // either way we can fall through to the SCHED_SHUTTING_DOWN
 279         // case now.
 280         ASSERT(sched_state == SCHED_SHUTTING_DOWN);
 281         // fall through
 282
 283     case SCHED_SHUTTING_DOWN:
 284         debugTrace(DEBUG_sched, "SCHED_SHUTTING_DOWN");
 285         // If we are a worker, just exit.  If we're a bound thread
 286         // then we will exit below when we've removed our TSO from
 287         // the run queue.
 288         if (!isBoundTask(task) && emptyRunQueue(cap)) {
 289             return cap;
 290         }
 291         break;
 292     default:
 293         barf("sched_state: %d", sched_state);
 294     }
 295
 296     scheduleFindWork(cap);
 297
 298     /* work pushing, currently relevant only for THREADED_RTS:
 299        (pushes threads, wakes up idle capabilities for stealing) */
 300     schedulePushWork(cap,task);
 301
 302     scheduleDetectDeadlock(cap,task);
 303
 304 #if defined(THREADED_RTS)
 305     cap = task->cap;    // reload cap, it might have changed
 306 #endif
 307
 308     // Normally, the only way we can get here with no threads to
 309     // run is if a keyboard interrupt received during
 310     // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
 311     // Additionally, it is not fatal for the
 312     // threaded RTS to reach here with no threads to run.
 313     //
 314     // win32: might be here due to awaitEvent() being abandoned
 315     // as a result of a console event having been delivered.
 316
 317 #if defined(THREADED_RTS)
 318     if (first)
 319     {
 320     // XXX: ToDo
 321     //     // don't yield the first time, we want a chance to run this
 322     //     // thread for a bit, even if there are others banging at the
 323     //     // door.
 324     //     first = rtsFalse;
 325     //     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 326     }
 327
 328     scheduleYield(&cap,task);
 329
 330     if (emptyRunQueue(cap)) continue; // look for work again
 331 #endif
 332
 333 #if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
 334     if ( emptyRunQueue(cap) ) {
 335         ASSERT(sched_state >= SCHED_INTERRUPTING);
 336     }
 337 #endif
 338
 339     //
 340     // Get a thread to run
 341     //
 342     t = popRunQueue(cap);
 343
 344     // Sanity check the thread we're about to run.  This can be
 345     // expensive if there is lots of thread switching going on...
 346     IF_DEBUG(sanity,checkTSO(t));
 347
 348 #if defined(THREADED_RTS)
 349     // Check whether we can run this thread in the current task.
 350     // If not, we have to pass our capability to the right task.
 351     {
 352         InCall *bound = t->bound;
 353
 354         if (bound) {
 355             if (bound->task == task) {
 356                 // yes, the Haskell thread is bound to the current native thread
 357             } else {
 358                 debugTrace(DEBUG_sched,
 359                            "thread %lu bound to another OS thread",
 360                            (unsigned long)t->id);
 361                 // no, bound to a different Haskell thread: pass to that thread
 362                 pushOnRunQueue(cap,t);
 363                 continue;
 364             }
 365         } else {
 366             // The thread we want to run is unbound.
 367             if (task->incall->tso) {
 368                 debugTrace(DEBUG_sched,
 369                            "this OS thread cannot run thread %lu",
 370                            (unsigned long)t->id);
 371                 // no, the current native thread is bound to a different
 372                 // Haskell thread, so pass it to any worker thread
 373                 pushOnRunQueue(cap,t);
 374                 continue;
 375             }
 376         }
 377     }
 378 #endif
 379
 380     // If we're shutting down, and this thread has not yet been
 381     // killed, kill it now.  This sometimes happens when a finalizer
 382     // thread is created by the final GC, or a thread previously
 383     // in a foreign call returns.
 384     if (sched_state >= SCHED_INTERRUPTING &&
 385         !(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) {
 386         deleteThread(cap,t);
 387     }
 388
 389     /* context switches are initiated by the timer signal, unless
 390      * the user specified "context switch as often as possible", with
 391      * +RTS -C0
 392      */
 393     if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
 394         && !emptyThreadQueues(cap)) {
 395         cap->context_switch = 1;
 396     }
 397
 398 run_thread:
 399
 400     // CurrentTSO is the thread to run.  t might be different if we
 401     // loop back to run_thread, so make sure to set CurrentTSO after
 402     // that.
 403     cap->r.rCurrentTSO = t;
 404
 405     startHeapProfTimer();
 406
 407     // ----------------------------------------------------------------------
 408     // Run the current thread
 409
 410     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 411     ASSERT(t->cap == cap);
 412     ASSERT(t->bound ? t->bound->task->cap == cap : 1);
 413
 414     prev_what_next = t->what_next;
 415
 416     errno = t->saved_errno;
 417 #if mingw32_HOST_OS
 418     SetLastError(t->saved_winerror);
 419 #endif
 420
 421     cap->in_haskell = rtsTrue;
 422
 423     dirty_TSO(cap,t);
 424     dirty_STACK(cap,t->stackobj);
 425
 426 #if defined(THREADED_RTS)
 427     if (recent_activity == ACTIVITY_DONE_GC) {
 428         // ACTIVITY_DONE_GC means we turned off the timer signal to
 429         // conserve power (see #1623).  Re-enable it here.
 430         nat prev;
 431         prev = xchg((P_)&recent_activity, ACTIVITY_YES);
 432         if (prev == ACTIVITY_DONE_GC) {
 433             startTimer();
 434         }
 435     } else if (recent_activity != ACTIVITY_INACTIVE) {
 436         // If we reached ACTIVITY_INACTIVE, then don't reset it until
 437         // we've done the GC.  The thread running here might just be
 438         // the IO manager thread that handle_tick() woke up via
 439         // wakeUpRts().
 440         recent_activity = ACTIVITY_YES;
 441     }
 442 #endif
 443
 444     traceEventRunThread(cap, t);
 445
 446     switch (prev_what_next) {
 447
 448     case ThreadKilled:
 449     case ThreadComplete:
 450         /* Thread already finished, return to scheduler. */
 451         ret = ThreadFinished;
 452         break;
 453
 454     case ThreadRunGHC:
 455     {
 456         StgRegTable *r;
 457         r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
 458         cap = regTableToCapability(r);
 459         ret = r->rRet;
 460         break;
 461     }
 462
 463     case ThreadInterpret:
 464         cap = interpretBCO(cap);
 465         ret = cap->r.rRet;
 466         break;
 467
 468     default:
 469         barf("schedule: invalid what_next field");
 470     }
 471
 472     cap->in_haskell = rtsFalse;
 473
 474     // The TSO might have moved, eg. if it re-entered the RTS and a GC
 475     // happened.  So find the new location:
 476     t = cap->r.rCurrentTSO;
 477
 478     // And save the current errno in this thread.
 479     // XXX: possibly bogus for SMP because this thread might already
 480     // be running again, see code below.
 481     t->saved_errno = errno;
 482 #if mingw32_HOST_OS
 483     // Similarly for Windows error code
 484     t->saved_winerror = GetLastError();
 485 #endif
 486
 487     if (ret == ThreadBlocked) {
 488         if (t->why_blocked == BlockedOnBlackHole) {
 489             StgTSO *owner = blackHoleOwner(t->block_info.bh->bh);
 490             traceEventStopThread(cap, t, t->why_blocked + 6,
 491                                  owner != NULL ? owner->id : 0);
 492         } else {
 493             traceEventStopThread(cap, t, t->why_blocked + 6, 0);
 494         }
 495     } else {
 496         traceEventStopThread(cap, t, ret, 0);
 497     }
 498
 499     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 500     ASSERT(t->cap == cap);
 501
 502     // ----------------------------------------------------------------------
 503
 504     // Costs for the scheduler are assigned to CCS_SYSTEM
 505     stopHeapProfTimer();
 506 #if defined(PROFILING)
 507     CCCS = CCS_SYSTEM;
 508 #endif
 509
 510     schedulePostRunThread(cap,t);
 511
 512     ready_to_gc = rtsFalse;
 513
 514     switch (ret) {
 515     case HeapOverflow:
 516         ready_to_gc = scheduleHandleHeapOverflow(cap,t);
 517         break;
 518
 519     case StackOverflow:
 520         // just adjust the stack for this thread, then pop it back
 521         // on the run queue.
 522         threadStackOverflow(cap, t);
 523         pushOnRunQueue(cap,t);
 524         break;
 525
 526     case ThreadYielding:
 527         if (scheduleHandleYield(cap, t, prev_what_next)) {
 528             // shortcut for switching between compiler/interpreter:
 529             goto run_thread;
 530         }
 531         break;
 532
 533     case ThreadBlocked:
 534         scheduleHandleThreadBlocked(t);
 535         break;
 536
 537     case ThreadFinished:
 538         if (scheduleHandleThreadFinished(cap, task, t)) return cap;
 539         ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 540         break;
 541
 542     default:
 543       barf("schedule: invalid thread return code %d", (int)ret);
 544     }
 545
 546     if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
 547       cap = scheduleDoGC(cap,task,rtsFalse);
 548     }
 549   } /* end of while() */
 550 }
 551
 552 /* -----------------------------------------------------------------------------
 553  * Run queue operations
 554  * -------------------------------------------------------------------------- */
 555
 556 void
 557 removeFromRunQueue (Capability *cap, StgTSO *tso)
 558 {
 559     if (tso->block_info.prev == END_TSO_QUEUE) {
 560         ASSERT(cap->run_queue_hd == tso);
 561         cap->run_queue_hd = tso->_link;
 562     } else {
 563         setTSOLink(cap, tso->block_info.prev, tso->_link);
 564     }
 565     if (tso->_link == END_TSO_QUEUE) {
 566         ASSERT(cap->run_queue_tl == tso);
 567         cap->run_queue_tl = tso->block_info.prev;
 568     } else {
 569         setTSOPrev(cap, tso->_link, tso->block_info.prev);
 570     }
 571     tso->_link = tso->block_info.prev = END_TSO_QUEUE;
 572
 573     IF_DEBUG(sanity, checkRunQueue(cap));
 574 }
 575
 576 /* ----------------------------------------------------------------------------
 577  * Setting up the scheduler loop
 578  * ------------------------------------------------------------------------- */
 579
 580 static void
 581 schedulePreLoop(void)
 582 {
 583   // initialisation for scheduler - what cannot go into initScheduler()
 584 }
 585
 586 /* -----------------------------------------------------------------------------
 587  * scheduleFindWork()
 588  *
 589  * Search for work to do, and handle messages from elsewhere.
 590  * -------------------------------------------------------------------------- */
 591
 592 static void
 593 scheduleFindWork (Capability *cap)
 594 {
 595     scheduleStartSignalHandlers(cap);
 596
 597     scheduleProcessInbox(cap);
 598
 599     scheduleCheckBlockedThreads(cap);
 600
 601 #if defined(THREADED_RTS)
 602     if (emptyRunQueue(cap)) { scheduleActivateSpark(cap); }
 603 #endif
 604 }
 605
 606 #if defined(THREADED_RTS)
 607 STATIC_INLINE rtsBool
 608 shouldYieldCapability (Capability *cap, Task *task)
 609 {
 610     // we need to yield this capability to someone else if..
 611     //   - another thread is initiating a GC
 612     //   - another Task is returning from a foreign call
 613     //   - the thread at the head of the run queue cannot be run
 614     //     by this Task (it is bound to another Task, or it is unbound
 615     //     and this task it bound).
 616     return (waiting_for_gc ||
 617             cap->returning_tasks_hd != NULL ||
 618             (!emptyRunQueue(cap) && (task->incall->tso == NULL
 619                                      ? cap->run_queue_hd->bound != NULL
 620                                      : cap->run_queue_hd->bound != task->incall)));
 621 }
 622
 623 // This is the single place where a Task goes to sleep.  There are
 624 // two reasons it might need to sleep:
 625 //    - there are no threads to run
 626 //    - we need to yield this Capability to someone else
 627 //      (see shouldYieldCapability())
 628 //
 629 // Careful: the scheduler loop is quite delicate.  Make sure you run
 630 // the tests in testsuite/concurrent (all ways) after modifying this,
 631 // and also check the benchmarks in nofib/parallel for regressions.
 632
 633 static void
 634 scheduleYield (Capability **pcap, Task *task)
 635 {
 636     Capability *cap = *pcap;
 637
 638     // if we have work, and we don't need to give up the Capability, continue.
 639     //
 640     if (!shouldYieldCapability(cap,task) &&
 641         (!emptyRunQueue(cap) ||
 642          !emptyInbox(cap) ||
 643          sched_state >= SCHED_INTERRUPTING))
 644         return;
 645
 646     // otherwise yield (sleep), and keep yielding if necessary.
 647     do {
 648         yieldCapability(&cap,task);
 649     }
 650     while (shouldYieldCapability(cap,task));
 651
 652     // note there may still be no threads on the run queue at this
 653     // point, the caller has to check.
 654
 655     *pcap = cap;
 656     return;
 657 }
 658 #endif
 659
 660 /* -----------------------------------------------------------------------------
 661  * schedulePushWork()
 662  *
 663  * Push work to other Capabilities if we have some.
 664  * -------------------------------------------------------------------------- */
 665
 666 static void
 667 schedulePushWork(Capability *cap USED_IF_THREADS,
 668                  Task *task      USED_IF_THREADS)
 669 {
 670   /* following code not for PARALLEL_HASKELL. I kept the call general,
 671      future GUM versions might use pushing in a distributed setup */
 672 #if defined(THREADED_RTS)
 673
 674     Capability *free_caps[n_capabilities], *cap0;
 675     nat i, n_free_caps;
 676
 677     // migration can be turned off with +RTS -qm
 678     if (!RtsFlags.ParFlags.migrate) return;
 679
 680     // Check whether we have more threads on our run queue, or sparks
 681     // in our pool, that we could hand to another Capability.
 682     if (cap->run_queue_hd == END_TSO_QUEUE) {
 683         if (sparkPoolSizeCap(cap) < 2) return;
 684     } else {
 685         if (cap->run_queue_hd->_link == END_TSO_QUEUE &&
 686             sparkPoolSizeCap(cap) < 1) return;
 687     }
 688
 689     // First grab as many free Capabilities as we can.
 690     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
 691         cap0 = &capabilities[i];
 692         if (cap != cap0 && tryGrabCapability(cap0,task)) {
 693             if (!emptyRunQueue(cap0)
 694                 || cap->returning_tasks_hd != NULL
 695                 || cap->inbox != (Message*)END_TSO_QUEUE) {
 696                 // it already has some work, we just grabbed it at
 697                 // the wrong moment.  Or maybe it's deadlocked!
 698                 releaseCapability(cap0);
 699             } else {
 700                 free_caps[n_free_caps++] = cap0;
 701             }
 702         }
 703     }
 704
 705     // we now have n_free_caps free capabilities stashed in
 706     // free_caps[].  Share our run queue equally with them.  This is
 707     // probably the simplest thing we could do; improvements we might
 708     // want to do include:
 709     //
 710     //   - giving high priority to moving relatively new threads, on
 711     //     the gournds that they haven't had time to build up a
 712     //     working set in the cache on this CPU/Capability.
 713     //
 714     //   - giving low priority to moving long-lived threads
 715
 716     if (n_free_caps > 0) {
 717         StgTSO *prev, *t, *next;
 718         rtsBool pushed_to_all;
 719
 720         debugTrace(DEBUG_sched,
 721                    "cap %d: %s and %d free capabilities, sharing...",
 722                    cap->no,
 723                    (!emptyRunQueue(cap) && cap->run_queue_hd->_link != END_TSO_QUEUE)?
 724                    "excess threads on run queue":"sparks to share (>=2)",
 725                    n_free_caps);
 726
 727         i = 0;
 728         pushed_to_all = rtsFalse;
 729
 730         if (cap->run_queue_hd != END_TSO_QUEUE) {
 731             prev = cap->run_queue_hd;
 732             t = prev->_link;
 733             prev->_link = END_TSO_QUEUE;
 734             for (; t != END_TSO_QUEUE; t = next) {
 735                 next = t->_link;
 736                 t->_link = END_TSO_QUEUE;
 737                 if (t->bound == task->incall // don't move my bound thread
 738                     || tsoLocked(t)) {  // don't move a locked thread
 739                     setTSOLink(cap, prev, t);
 740                     setTSOPrev(cap, t, prev);
 741                     prev = t;
 742                 } else if (i == n_free_caps) {
 743                     pushed_to_all = rtsTrue;
 744                     i = 0;
 745                     // keep one for us
 746                     setTSOLink(cap, prev, t);
 747                     setTSOPrev(cap, t, prev);
 748                     prev = t;
 749                 } else {
 750                     appendToRunQueue(free_caps[i],t);
 751
 752                     traceEventMigrateThread (cap, t, free_caps[i]->no);
 753
 754                     if (t->bound) { t->bound->task->cap = free_caps[i]; }
 755                     t->cap = free_caps[i];
 756                     i++;
 757                 }
 758             }
 759             cap->run_queue_tl = prev;
 760
 761             IF_DEBUG(sanity, checkRunQueue(cap));
 762         }
 763
 764 #ifdef SPARK_PUSHING
 765         /* JB I left this code in place, it would work but is not necessary */
 766
 767         // If there are some free capabilities that we didn't push any
 768         // threads to, then try to push a spark to each one.
 769         if (!pushed_to_all) {
 770             StgClosure *spark;
 771             // i is the next free capability to push to
 772             for (; i < n_free_caps; i++) {
 773                 if (emptySparkPoolCap(free_caps[i])) {
 774                     spark = tryStealSpark(cap->sparks);
 775                     if (spark != NULL) {
 776                         debugTrace(DEBUG_sched, "pushing spark %p to capability %d", spark, free_caps[i]->no);
 777
 778             traceEventStealSpark(free_caps[i], t, cap->no);
 779
 780                         newSpark(&(free_caps[i]->r), spark);
 781                     }
 782                 }
 783             }
 784         }
 785 #endif /* SPARK_PUSHING */
 786
 787         // release the capabilities
 788         for (i = 0; i < n_free_caps; i++) {
 789             task->cap = free_caps[i];
 790             releaseAndWakeupCapability(free_caps[i]);
 791         }
 792     }
 793     task->cap = cap; // reset to point to our Capability.
 794
 795 #endif /* THREADED_RTS */
 796
 797 }
 798
 799 /* ----------------------------------------------------------------------------
 800  * Start any pending signal handlers
 801  * ------------------------------------------------------------------------- */
 802
 803 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 804 static void
 805 scheduleStartSignalHandlers(Capability *cap)
 806 {
 807     if (RtsFlags.MiscFlags.install_signal_handlers && signals_pending()) {
 808         // safe outside the lock
 809         startSignalHandlers(cap);
 810     }
 811 }
 812 #else
 813 static void
 814 scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
 815 {
 816 }
 817 #endif
 818
 819 /* ----------------------------------------------------------------------------
 820  * Check for blocked threads that can be woken up.
 821  * ------------------------------------------------------------------------- */
 822
 823 static void
 824 scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
 825 {
 826 #if !defined(THREADED_RTS)
 827     //
 828     // Check whether any waiting threads need to be woken up.  If the
 829     // run queue is empty, and there are no other tasks running, we
 830     // can wait indefinitely for something to happen.
 831     //
 832     if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
 833     {
 834         awaitEvent (emptyRunQueue(cap));
 835     }
 836 #endif
 837 }
 838
 839 /* ----------------------------------------------------------------------------
 840  * Detect deadlock conditions and attempt to resolve them.
 841  * ------------------------------------------------------------------------- */
 842
 843 static void
 844 scheduleDetectDeadlock (Capability *cap, Task *task)
 845 {
 846     /*
 847      * Detect deadlock: when we have no threads to run, there are no
 848      * threads blocked, waiting for I/O, or sleeping, and all the
 849      * other tasks are waiting for work, we must have a deadlock of
 850      * some description.
 851      */
 852     if ( emptyThreadQueues(cap) )
 853     {
 854 #if defined(THREADED_RTS)
 855         /*
 856          * In the threaded RTS, we only check for deadlock if there
 857          * has been no activity in a complete timeslice.  This means
 858          * we won't eagerly start a full GC just because we don't have
 859          * any threads to run currently.
 860          */
 861         if (recent_activity != ACTIVITY_INACTIVE) return;
 862 #endif
 863
 864         debugTrace(DEBUG_sched, "deadlocked, forcing major GC...");
 865
 866         // Garbage collection can release some new threads due to
 867         // either (a) finalizers or (b) threads resurrected because
 868         // they are unreachable and will therefore be sent an
 869         // exception.  Any threads thus released will be immediately
 870         // runnable.
 871         cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/);
 872         // when force_major == rtsTrue. scheduleDoGC sets
 873         // recent_activity to ACTIVITY_DONE_GC and turns off the timer
 874         // signal.
 875
 876         if ( !emptyRunQueue(cap) ) return;
 877
 878 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 879         /* If we have user-installed signal handlers, then wait
 880          * for signals to arrive rather then bombing out with a
 881          * deadlock.
 882          */
 883         if ( RtsFlags.MiscFlags.install_signal_handlers && anyUserHandlers() ) {
 884             debugTrace(DEBUG_sched,
 885                        "still deadlocked, waiting for signals...");
 886
 887             awaitUserSignals();
 888
 889             if (signals_pending()) {
 890                 startSignalHandlers(cap);
 891             }
 892
 893             // either we have threads to run, or we were interrupted:
 894             ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 895
 896             return;
 897         }
 898 #endif
 899
 900 #if !defined(THREADED_RTS)
 901         /* Probably a real deadlock.  Send the current main thread the
 902          * Deadlock exception.
 903          */
 904         if (task->incall->tso) {
 905             switch (task->incall->tso->why_blocked) {
 906             case BlockedOnSTM:
 907             case BlockedOnBlackHole:
 908             case BlockedOnMsgThrowTo:
 909             case BlockedOnMVar:
 910                 throwToSingleThreaded(cap, task->incall->tso,
 911                                       (StgClosure *)nonTermination_closure);
 912                 return;
 913             default:
 914                 barf("deadlock: main thread blocked in a strange way");
 915             }
 916         }
 917         return;
 918 #endif
 919     }
 920 }
 921
 922
 923 /* ----------------------------------------------------------------------------
 924  * Send pending messages (PARALLEL_HASKELL only)
 925  * ------------------------------------------------------------------------- */
 926
 927 #if defined(PARALLEL_HASKELL)
 928 static void
 929 scheduleSendPendingMessages(void)
 930 {
 931
 932 # if defined(PAR) // global Mem.Mgmt., omit for now
 933     if (PendingFetches != END_BF_QUEUE) {
 934         processFetches();
 935     }
 936 # endif
 937
 938     if (RtsFlags.ParFlags.BufferTime) {
 939         // if we use message buffering, we must send away all message
 940         // packets which have become too old...
 941         sendOldBuffers();
 942     }
 943 }
 944 #endif
 945
 946 /* ----------------------------------------------------------------------------
 947  * Process message in the current Capability's inbox
 948  * ------------------------------------------------------------------------- */
 949
 950 static void
 951 scheduleProcessInbox (Capability *cap USED_IF_THREADS)
 952 {
 953 #if defined(THREADED_RTS)
 954     Message *m;
 955
 956     while (!emptyInbox(cap)) {
 957         ACQUIRE_LOCK(&cap->lock);
 958         m = cap->inbox;
 959         cap->inbox = m->link;
 960         RELEASE_LOCK(&cap->lock);
 961         executeMessage(cap, (Message *)m);
 962     }
 963 #endif
 964 }
 965
 966 /* ----------------------------------------------------------------------------
 967  * Activate spark threads (PARALLEL_HASKELL and THREADED_RTS)
 968  * ------------------------------------------------------------------------- */
 969
 970 #if defined(THREADED_RTS)
 971 static void
 972 scheduleActivateSpark(Capability *cap)
 973 {
 974     if (anySparks())
 975     {
 976         createSparkThread(cap);
 977         debugTrace(DEBUG_sched, "creating a spark thread");
 978     }
 979 }
 980 #endif // PARALLEL_HASKELL || THREADED_RTS
 981
 982 /* ----------------------------------------------------------------------------
 983  * After running a thread...
 984  * ------------------------------------------------------------------------- */
 985
 986 static void
 987 schedulePostRunThread (Capability *cap, StgTSO *t)
 988 {
 989     // We have to be able to catch transactions that are in an
 990     // infinite loop as a result of seeing an inconsistent view of
 991     // memory, e.g.
 992     //
 993     //   atomically $ do
 994     //       [a,b] <- mapM readTVar [ta,tb]
 995     //       when (a == b) loop
 996     //
 997     // and a is never equal to b given a consistent view of memory.
 998     //
 999     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
1000         if (!stmValidateNestOfTransactions (t -> trec)) {
1001             debugTrace(DEBUG_sched | DEBUG_stm,
1002                        "trec %p found wasting its time", t);
1003
1004             // strip the stack back to the
1005             // ATOMICALLY_FRAME, aborting the (nested)
1006             // transaction, and saving the stack of any
1007             // partially-evaluated thunks on the heap.
1008             throwToSingleThreaded_(cap, t, NULL, rtsTrue);
1009
1010 //            ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
1011         }
1012     }
1013
1014   /* some statistics gathering in the parallel case */
1015 }
1016
1017 /* -----------------------------------------------------------------------------
1018  * Handle a thread that returned to the scheduler with ThreadHeepOverflow
1019  * -------------------------------------------------------------------------- */
1020
1021 static rtsBool
1022 scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
1023 {
1024     // did the task ask for a large block?
1025     if (cap->r.rHpAlloc > BLOCK_SIZE) {
1026         // if so, get one and push it on the front of the nursery.
1027         bdescr *bd;
1028         lnat blocks;
1029
1030         blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
1031
1032         if (blocks > BLOCKS_PER_MBLOCK) {
1033             barf("allocation of %ld bytes too large (GHC should have complained at compile-time)", (long)cap->r.rHpAlloc);
1034         }
1035
1036         debugTrace(DEBUG_sched,
1037                    "--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
1038                    (long)t->id, what_next_strs[t->what_next], blocks);
1039
1040         // don't do this if the nursery is (nearly) full, we'll GC first.
1041         if (cap->r.rCurrentNursery->link != NULL ||
1042             cap->r.rNursery->n_blocks == 1) {  // paranoia to prevent infinite loop
1043                                                // if the nursery has only one block.
1044
1045             bd = allocGroup_lock(blocks);
1046             cap->r.rNursery->n_blocks += blocks;
1047
1048             // link the new group into the list
1049             bd->link = cap->r.rCurrentNursery;
1050             bd->u.back = cap->r.rCurrentNursery->u.back;
1051             if (cap->r.rCurrentNursery->u.back != NULL) {
1052                 cap->r.rCurrentNursery->u.back->link = bd;
1053             } else {
1054                 cap->r.rNursery->blocks = bd;
1055             }
1056             cap->r.rCurrentNursery->u.back = bd;
1057
1058             // initialise it as a nursery block.  We initialise the
1059             // step, gen_no, and flags field of *every* sub-block in
1060             // this large block, because this is easier than making
1061             // sure that we always find the block head of a large
1062             // block whenever we call Bdescr() (eg. evacuate() and
1063             // isAlive() in the GC would both have to do this, at
1064             // least).
1065             {
1066                 bdescr *x;
1067                 for (x = bd; x < bd + blocks; x++) {
1068                     initBdescr(x,g0,g0);
1069                     x->free = x->start;
1070                     x->flags = 0;
1071                 }
1072             }
1073
1074             // This assert can be a killer if the app is doing lots
1075             // of large block allocations.
1076             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
1077
1078             // now update the nursery to point to the new block
1079             cap->r.rCurrentNursery = bd;
1080
1081             // we might be unlucky and have another thread get on the
1082             // run queue before us and steal the large block, but in that
1083             // case the thread will just end up requesting another large
1084             // block.
1085             pushOnRunQueue(cap,t);
1086             return rtsFalse;  /* not actually GC'ing */
1087         }
1088     }
1089
1090     if (cap->r.rHpLim == NULL || cap->context_switch) {
1091         // Sometimes we miss a context switch, e.g. when calling
1092         // primitives in a tight loop, MAYBE_GC() doesn't check the
1093         // context switch flag, and we end up waiting for a GC.
1094         // See #1984, and concurrent/should_run/1984
1095         cap->context_switch = 0;
1096         appendToRunQueue(cap,t);
1097     } else {
1098         pushOnRunQueue(cap,t);
1099     }
1100     return rtsTrue;
1101     /* actual GC is done at the end of the while loop in schedule() */
1102 }
1103
1104 /* -----------------------------------------------------------------------------
1105  * Handle a thread that returned to the scheduler with ThreadYielding
1106  * -------------------------------------------------------------------------- */
1107
1108 static rtsBool
1109 scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
1110 {
1111     /* put the thread back on the run queue.  Then, if we're ready to
1112      * GC, check whether this is the last task to stop.  If so, wake
1113      * up the GC thread.  getThread will block during a GC until the
1114      * GC is finished.
1115      */
1116
1117     ASSERT(t->_link == END_TSO_QUEUE);
1118
1119     // Shortcut if we're just switching evaluators: don't bother
1120     // doing stack squeezing (which can be expensive), just run the
1121     // thread.
1122     if (cap->context_switch == 0 && t->what_next != prev_what_next) {
1123         debugTrace(DEBUG_sched,
1124                    "--<< thread %ld (%s) stopped to switch evaluators",
1125                    (long)t->id, what_next_strs[t->what_next]);
1126         return rtsTrue;
1127     }
1128
1129     // Reset the context switch flag.  We don't do this just before
1130     // running the thread, because that would mean we would lose ticks
1131     // during GC, which can lead to unfair scheduling (a thread hogs
1132     // the CPU because the tick always arrives during GC).  This way
1133     // penalises threads that do a lot of allocation, but that seems
1134     // better than the alternative.
1135     cap->context_switch = 0;
1136
1137     IF_DEBUG(sanity,
1138              //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
1139              checkTSO(t));
1140
1141     appendToRunQueue(cap,t);
1142
1143     return rtsFalse;
1144 }
1145
1146 /* -----------------------------------------------------------------------------
1147  * Handle a thread that returned to the scheduler with ThreadBlocked
1148  * -------------------------------------------------------------------------- */
1149
1150 static void
1151 scheduleHandleThreadBlocked( StgTSO *t
1152 #if !defined(DEBUG)
1153     STG_UNUSED
1154 #endif
1155     )
1156 {
1157
1158       // We don't need to do anything.  The thread is blocked, and it
1159       // has tidied up its stack and placed itself on whatever queue
1160       // it needs to be on.
1161
1162     // ASSERT(t->why_blocked != NotBlocked);
1163     // Not true: for example,
1164     //    - the thread may have woken itself up already, because
1165     //      threadPaused() might have raised a blocked throwTo
1166     //      exception, see maybePerformBlockedException().
1167
1168 #ifdef DEBUG
1169     traceThreadStatus(DEBUG_sched, t);
1170 #endif
1171 }
1172
1173 /* -----------------------------------------------------------------------------
1174  * Handle a thread that returned to the scheduler with ThreadFinished
1175  * -------------------------------------------------------------------------- */
1176
1177 static rtsBool
1178 scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
1179 {
1180     /* Need to check whether this was a main thread, and if so,
1181      * return with the return value.
1182      *
1183      * We also end up here if the thread kills itself with an
1184      * uncaught exception, see Exception.cmm.
1185      */
1186
1187     // blocked exceptions can now complete, even if the thread was in
1188     // blocked mode (see #2910).
1189     awakenBlockedExceptionQueue (cap, t);
1190
1191       //
1192       // Check whether the thread that just completed was a bound
1193       // thread, and if so return with the result.
1194       //
1195       // There is an assumption here that all thread completion goes
1196       // through this point; we need to make sure that if a thread
1197       // ends up in the ThreadKilled state, that it stays on the run
1198       // queue so it can be dealt with here.
1199       //
1200
1201       if (t->bound) {
1202
1203           if (t->bound != task->incall) {
1204 #if !defined(THREADED_RTS)
1205               // Must be a bound thread that is not the topmost one.  Leave
1206               // it on the run queue until the stack has unwound to the
1207               // point where we can deal with this.  Leaving it on the run
1208               // queue also ensures that the garbage collector knows about
1209               // this thread and its return value (it gets dropped from the
1210               // step->threads list so there's no other way to find it).
1211               appendToRunQueue(cap,t);
1212               return rtsFalse;
1213 #else
1214               // this cannot happen in the threaded RTS, because a
1215               // bound thread can only be run by the appropriate Task.
1216               barf("finished bound thread that isn't mine");
1217 #endif
1218           }
1219
1220           ASSERT(task->incall->tso == t);
1221
1222           if (t->what_next == ThreadComplete) {
1223               if (task->incall->ret) {
1224                   // NOTE: return val is stack->sp[1] (see StgStartup.hc)
1225                   *(task->incall->ret) = (StgClosure *)task->incall->tso->stackobj->sp[1];
1226               }
1227               task->incall->stat = Success;
1228           } else {
1229               if (task->incall->ret) {
1230                   *(task->incall->ret) = NULL;
1231               }
1232               if (sched_state >= SCHED_INTERRUPTING) {
1233                   if (heap_overflow) {
1234                       task->incall->stat = HeapExhausted;
1235                   } else {
1236                       task->incall->stat = Interrupted;
1237                   }
1238               } else {
1239                   task->incall->stat = Killed;
1240               }
1241           }
1242 #ifdef DEBUG
1243           removeThreadLabel((StgWord)task->incall->tso->id);
1244 #endif
1245
1246           // We no longer consider this thread and task to be bound to
1247           // each other.  The TSO lives on until it is GC'd, but the
1248           // task is about to be released by the caller, and we don't
1249           // want anyone following the pointer from the TSO to the
1250           // defunct task (which might have already been
1251           // re-used). This was a real bug: the GC updated
1252           // tso->bound->tso which lead to a deadlock.
1253           t->bound = NULL;
1254           task->incall->tso = NULL;
1255
1256           return rtsTrue; // tells schedule() to return
1257       }
1258
1259       return rtsFalse;
1260 }
1261
1262 /* -----------------------------------------------------------------------------
1263  * Perform a heap census
1264  * -------------------------------------------------------------------------- */
1265
1266 static rtsBool
1267 scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
1268 {
1269     // When we have +RTS -i0 and we're heap profiling, do a census at
1270     // every GC.  This lets us get repeatable runs for debugging.
1271     if (performHeapProfile ||
1272         (RtsFlags.ProfFlags.profileInterval==0 &&
1273          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
1274         return rtsTrue;
1275     } else {
1276         return rtsFalse;
1277     }
1278 }
1279
1280 /* -----------------------------------------------------------------------------
1281  * Perform a garbage collection if necessary
1282  * -------------------------------------------------------------------------- */
1283
1284 static Capability *
1285 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
1286 {
1287     rtsBool heap_census;
1288 #ifdef THREADED_RTS
1289     /* extern static volatile StgWord waiting_for_gc;
1290        lives inside capability.c */
1291     rtsBool gc_type, prev_pending_gc;
1292     nat i;
1293 #endif
1294
1295     if (sched_state == SCHED_SHUTTING_DOWN) {
1296         // The final GC has already been done, and the system is
1297         // shutting down.  We'll probably deadlock if we try to GC
1298         // now.
1299         return cap;
1300     }
1301
1302 #ifdef THREADED_RTS
1303     if (sched_state < SCHED_INTERRUPTING
1304         && RtsFlags.ParFlags.parGcEnabled
1305         && N >= RtsFlags.ParFlags.parGcGen
1306         && ! oldest_gen->mark)
1307     {
1308         gc_type = PENDING_GC_PAR;
1309     } else {
1310         gc_type = PENDING_GC_SEQ;
1311     }
1312
1313     // In order to GC, there must be no threads running Haskell code.
1314     // Therefore, the GC thread needs to hold *all* the capabilities,
1315     // and release them after the GC has completed.
1316     //
1317     // This seems to be the simplest way: previous attempts involved
1318     // making all the threads with capabilities give up their
1319     // capabilities and sleep except for the *last* one, which
1320     // actually did the GC.  But it's quite hard to arrange for all
1321     // the other tasks to sleep and stay asleep.
1322     //
1323
1324     /*  Other capabilities are prevented from running yet more Haskell
1325         threads if waiting_for_gc is set. Tested inside
1326         yieldCapability() and releaseCapability() in Capability.c */
1327
1328     prev_pending_gc = cas(&waiting_for_gc, 0, gc_type);
1329     if (prev_pending_gc) {
1330         do {
1331             debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...",
1332                        prev_pending_gc);
1333             ASSERT(cap);
1334             yieldCapability(&cap,task);
1335         } while (waiting_for_gc);
1336         return cap;  // NOTE: task->cap might have changed here
1337     }
1338
1339     setContextSwitches();
1340
1341     // The final shutdown GC is always single-threaded, because it's
1342     // possible that some of the Capabilities have no worker threads.
1343
1344     if (gc_type == PENDING_GC_SEQ)
1345     {
1346         traceEventRequestSeqGc(cap);
1347     }
1348     else
1349     {
1350         traceEventRequestParGc(cap);
1351         debugTrace(DEBUG_sched, "ready_to_gc, grabbing GC threads");
1352     }
1353
1354     if (gc_type == PENDING_GC_SEQ)
1355     {
1356         // single-threaded GC: grab all the capabilities
1357         for (i=0; i < n_capabilities; i++) {
1358             debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities);
1359             if (cap != &capabilities[i]) {
1360                 Capability *pcap = &capabilities[i];
1361                 // we better hope this task doesn't get migrated to
1362                 // another Capability while we're waiting for this one.
1363                 // It won't, because load balancing happens while we have
1364                 // all the Capabilities, but even so it's a slightly
1365                 // unsavoury invariant.
1366                 task->cap = pcap;
1367                 waitForReturnCapability(&pcap, task);
1368                 if (pcap != &capabilities[i]) {
1369                     barf("scheduleDoGC: got the wrong capability");
1370                 }
1371             }
1372         }
1373     }
1374     else
1375     {
1376         // multi-threaded GC: make sure all the Capabilities donate one
1377         // GC thread each.
1378         waitForGcThreads(cap);
1379     }
1380
1381 #endif
1382
1383     IF_DEBUG(scheduler, printAllThreads());
1384
1385 delete_threads_and_gc:
1386     /*
1387      * We now have all the capabilities; if we're in an interrupting
1388      * state, then we should take the opportunity to delete all the
1389      * threads in the system.
1390      */
1391     if (sched_state == SCHED_INTERRUPTING) {
1392         deleteAllThreads(cap);
1393         sched_state = SCHED_SHUTTING_DOWN;
1394     }
1395
1396     heap_census = scheduleNeedHeapProfile(rtsTrue);
1397
1398     traceEventGcStart(cap);
1399 #if defined(THREADED_RTS)
1400     // reset waiting_for_gc *before* GC, so that when the GC threads
1401     // emerge they don't immediately re-enter the GC.
1402     waiting_for_gc = 0;
1403     GarbageCollect(force_major || heap_census, gc_type, cap);
1404 #else
1405     GarbageCollect(force_major || heap_census, 0, cap);
1406 #endif
1407     traceEventGcEnd(cap);
1408
1409     if (recent_activity == ACTIVITY_INACTIVE && force_major)
1410     {
1411         // We are doing a GC because the system has been idle for a
1412         // timeslice and we need to check for deadlock.  Record the
1413         // fact that we've done a GC and turn off the timer signal;
1414         // it will get re-enabled if we run any threads after the GC.
1415         recent_activity = ACTIVITY_DONE_GC;
1416         stopTimer();
1417     }
1418     else
1419     {
1420         // the GC might have taken long enough for the timer to set
1421         // recent_activity = ACTIVITY_INACTIVE, but we aren't
1422         // necessarily deadlocked:
1423         recent_activity = ACTIVITY_YES;
1424     }
1425
1426 #if defined(THREADED_RTS)
1427     if (gc_type == PENDING_GC_PAR)
1428     {
1429         releaseGCThreads(cap);
1430     }
1431 #endif
1432
1433     if (heap_census) {
1434         debugTrace(DEBUG_sched, "performing heap census");
1435         heapCensus();
1436         performHeapProfile = rtsFalse;
1437     }
1438
1439     if (heap_overflow && sched_state < SCHED_INTERRUPTING) {
1440         // GC set the heap_overflow flag, so we should proceed with
1441         // an orderly shutdown now.  Ultimately we want the main
1442         // thread to return to its caller with HeapExhausted, at which
1443         // point the caller should call hs_exit().  The first step is
1444         // to delete all the threads.
1445         //
1446         // Another way to do this would be to raise an exception in
1447         // the main thread, which we really should do because it gives
1448         // the program a chance to clean up.  But how do we find the
1449         // main thread?  It should presumably be the same one that
1450         // gets ^C exceptions, but that's all done on the Haskell side
1451         // (GHC.TopHandler).
1452         sched_state = SCHED_INTERRUPTING;
1453         goto delete_threads_and_gc;
1454     }
1455
1456 #ifdef SPARKBALANCE
1457     /* JB
1458        Once we are all together... this would be the place to balance all
1459        spark pools. No concurrent stealing or adding of new sparks can
1460        occur. Should be defined in Sparks.c. */
1461     balanceSparkPoolsCaps(n_capabilities, capabilities);
1462 #endif
1463
1464 #if defined(THREADED_RTS)
1465     if (gc_type == PENDING_GC_SEQ) {
1466         // release our stash of capabilities.
1467         for (i = 0; i < n_capabilities; i++) {
1468             if (cap != &capabilities[i]) {
1469                 task->cap = &capabilities[i];
1470                 releaseCapability(&capabilities[i]);
1471             }
1472         }
1473     }
1474     if (cap) {
1475         task->cap = cap;
1476     } else {
1477         task->cap = NULL;
1478     }
1479 #endif
1480
1481     return cap;
1482 }
1483
1484 /* ---------------------------------------------------------------------------
1485  * Singleton fork(). Do not copy any running threads.
1486  * ------------------------------------------------------------------------- */
1487
1488 pid_t
1489 forkProcess(HsStablePtr *entry
1490 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
1491             STG_UNUSED
1492 #endif
1493            )
1494 {
1495 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
1496     pid_t pid;
1497     StgTSO* t,*next;
1498     Capability *cap;
1499     nat g;
1500
1501 #if defined(THREADED_RTS)
1502     if (RtsFlags.ParFlags.nNodes > 1) {
1503         errorBelch("forking not supported with +RTS -N<n> greater than 1");
1504         stg_exit(EXIT_FAILURE);
1505     }
1506 #endif
1507
1508     debugTrace(DEBUG_sched, "forking!");
1509
1510     // ToDo: for SMP, we should probably acquire *all* the capabilities
1511     cap = rts_lock();
1512
1513     // no funny business: hold locks while we fork, otherwise if some
1514     // other thread is holding a lock when the fork happens, the data
1515     // structure protected by the lock will forever be in an
1516     // inconsistent state in the child.  See also #1391.
1517     ACQUIRE_LOCK(&sched_mutex);
1518     ACQUIRE_LOCK(&cap->lock);
1519     ACQUIRE_LOCK(&cap->running_task->lock);
1520
1521     stopTimer(); // See #4074
1522
1523 #if defined(TRACING)
1524     flushEventLog(); // so that child won't inherit dirty file buffers
1525 #endif
1526
1527     pid = fork();
1528
1529     if (pid) { // parent
1530
1531         startTimer(); // #4074
1532
1533         RELEASE_LOCK(&sched_mutex);
1534         RELEASE_LOCK(&cap->lock);
1535         RELEASE_LOCK(&cap->running_task->lock);
1536
1537         // just return the pid
1538         rts_unlock(cap);
1539         return pid;
1540
1541     } else { // child
1542
1543 #if defined(THREADED_RTS)
1544         initMutex(&sched_mutex);
1545         initMutex(&cap->lock);
1546         initMutex(&cap->running_task->lock);
1547 #endif
1548
1549 #ifdef TRACING
1550         resetTracing();
1551 #endif
1552
1553         // Now, all OS threads except the thread that forked are
1554         // stopped.  We need to stop all Haskell threads, including
1555         // those involved in foreign calls.  Also we need to delete
1556         // all Tasks, because they correspond to OS threads that are
1557         // now gone.
1558
1559         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1560           for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1561                 next = t->global_link;
1562                 // don't allow threads to catch the ThreadKilled
1563                 // exception, but we do want to raiseAsync() because these
1564                 // threads may be evaluating thunks that we need later.
1565                 deleteThread_(cap,t);
1566
1567                 // stop the GC from updating the InCall to point to
1568                 // the TSO.  This is only necessary because the
1569                 // OSThread bound to the TSO has been killed, and
1570                 // won't get a chance to exit in the usual way (see
1571                 // also scheduleHandleThreadFinished).
1572                 t->bound = NULL;
1573           }
1574         }
1575
1576         // Empty the run queue.  It seems tempting to let all the
1577         // killed threads stay on the run queue as zombies to be
1578         // cleaned up later, but some of them correspond to bound
1579         // threads for which the corresponding Task does not exist.
1580         cap->run_queue_hd = END_TSO_QUEUE;
1581         cap->run_queue_tl = END_TSO_QUEUE;
1582
1583         // Any suspended C-calling Tasks are no more, their OS threads
1584         // don't exist now:
1585         cap->suspended_ccalls = NULL;
1586
1587         // Empty the threads lists.  Otherwise, the garbage
1588         // collector may attempt to resurrect some of these threads.
1589         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1590             generations[g].threads = END_TSO_QUEUE;
1591         }
1592
1593         discardTasksExcept(cap->running_task);
1594
1595 #if defined(THREADED_RTS)
1596         // Wipe our spare workers list, they no longer exist.  New
1597         // workers will be created if necessary.
1598         cap->spare_workers = NULL;
1599         cap->n_spare_workers = 0;
1600         cap->returning_tasks_hd = NULL;
1601         cap->returning_tasks_tl = NULL;
1602 #endif
1603
1604         // On Unix, all timers are reset in the child, so we need to start
1605         // the timer again.
1606         initTimer();
1607         startTimer();
1608
1609 #if defined(THREADED_RTS)
1610         cap = ioManagerStartCap(cap);
1611 #endif
1612
1613         cap = rts_evalStableIO(cap, entry, NULL);  // run the action
1614         rts_checkSchedStatus("forkProcess",cap);
1615
1616         rts_unlock(cap);
1617         hs_exit();                      // clean up and exit
1618         stg_exit(EXIT_SUCCESS);
1619     }
1620 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */
1621     barf("forkProcess#: primop not supported on this platform, sorry!\n");
1622 #endif
1623 }
1624
1625 /* ---------------------------------------------------------------------------
1626  * Delete all the threads in the system
1627  * ------------------------------------------------------------------------- */
1628
1629 static void
1630 deleteAllThreads ( Capability *cap )
1631 {
1632     // NOTE: only safe to call if we own all capabilities.
1633
1634     StgTSO* t, *next;
1635     nat g;
1636
1637     debugTrace(DEBUG_sched,"deleting all threads");
1638     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1639         for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1640                 next = t->global_link;
1641                 deleteThread(cap,t);
1642         }
1643     }
1644
1645     // The run queue now contains a bunch of ThreadKilled threads.  We
1646     // must not throw these away: the main thread(s) will be in there
1647     // somewhere, and the main scheduler loop has to deal with it.
1648     // Also, the run queue is the only thing keeping these threads from
1649     // being GC'd, and we don't want the "main thread has been GC'd" panic.
1650
1651 #if !defined(THREADED_RTS)
1652     ASSERT(blocked_queue_hd == END_TSO_QUEUE);
1653     ASSERT(sleeping_queue == END_TSO_QUEUE);
1654 #endif
1655 }
1656
1657 /* -----------------------------------------------------------------------------
1658    Managing the suspended_ccalls list.
1659    Locks required: sched_mutex
1660    -------------------------------------------------------------------------- */
1661
1662 STATIC_INLINE void
1663 suspendTask (Capability *cap, Task *task)
1664 {
1665     InCall *incall;
1666
1667     incall = task->incall;
1668     ASSERT(incall->next == NULL && incall->prev == NULL);
1669     incall->next = cap->suspended_ccalls;
1670     incall->prev = NULL;
1671     if (cap->suspended_ccalls) {
1672         cap->suspended_ccalls->prev = incall;
1673     }
1674     cap->suspended_ccalls = incall;
1675 }
1676
1677 STATIC_INLINE void
1678 recoverSuspendedTask (Capability *cap, Task *task)
1679 {
1680     InCall *incall;
1681
1682     incall = task->incall;
1683     if (incall->prev) {
1684         incall->prev->next = incall->next;
1685     } else {
1686         ASSERT(cap->suspended_ccalls == incall);
1687         cap->suspended_ccalls = incall->next;
1688     }
1689     if (incall->next) {
1690         incall->next->prev = incall->prev;
1691     }
1692     incall->next = incall->prev = NULL;
1693 }
1694
1695 /* ---------------------------------------------------------------------------
1696  * Suspending & resuming Haskell threads.
1697  *
1698  * When making a "safe" call to C (aka _ccall_GC), the task gives back
1699  * its capability before calling the C function.  This allows another
1700  * task to pick up the capability and carry on running Haskell
1701  * threads.  It also means that if the C call blocks, it won't lock
1702  * the whole system.
1703  *
1704  * The Haskell thread making the C call is put to sleep for the
1705  * duration of the call, on the suspended_ccalling_threads queue.  We
1706  * give out a token to the task, which it can use to resume the thread
1707  * on return from the C function.
1708  *
1709  * If this is an interruptible C call, this means that the FFI call may be
1710  * unceremoniously terminated and should be scheduled on an
1711  * unbound worker thread.
1712  * ------------------------------------------------------------------------- */
1713
1714 void *
1715 suspendThread (StgRegTable *reg, rtsBool interruptible)
1716 {
1717   Capability *cap;
1718   int saved_errno;
1719   StgTSO *tso;
1720   Task *task;
1721 #if mingw32_HOST_OS
1722   StgWord32 saved_winerror;
1723 #endif
1724
1725   saved_errno = errno;
1726 #if mingw32_HOST_OS
1727   saved_winerror = GetLastError();
1728 #endif
1729
1730   /* assume that *reg is a pointer to the StgRegTable part of a Capability.
1731    */
1732   cap = regTableToCapability(reg);
1733
1734   task = cap->running_task;
1735   tso = cap->r.rCurrentTSO;
1736
1737   traceEventStopThread(cap, tso, THREAD_SUSPENDED_FOREIGN_CALL, 0);
1738
1739   // XXX this might not be necessary --SDM
1740   tso->what_next = ThreadRunGHC;
1741
1742   threadPaused(cap,tso);
1743
1744   if (interruptible) {
1745     tso->why_blocked = BlockedOnCCall_Interruptible;
1746   } else {
1747     tso->why_blocked = BlockedOnCCall;
1748   }
1749
1750   // Hand back capability
1751   task->incall->suspended_tso = tso;
1752   task->incall->suspended_cap = cap;
1753
1754   ACQUIRE_LOCK(&cap->lock);
1755
1756   suspendTask(cap,task);
1757   cap->in_haskell = rtsFalse;
1758   releaseCapability_(cap,rtsFalse);
1759
1760   RELEASE_LOCK(&cap->lock);
1761
1762   errno = saved_errno;
1763 #if mingw32_HOST_OS
1764   SetLastError(saved_winerror);
1765 #endif
1766   return task;
1767 }
1768
1769 StgRegTable *
1770 resumeThread (void *task_)
1771 {
1772     StgTSO *tso;
1773     InCall *incall;
1774     Capability *cap;
1775     Task *task = task_;
1776     int saved_errno;
1777 #if mingw32_HOST_OS
1778     StgWord32 saved_winerror;
1779 #endif
1780
1781     saved_errno = errno;
1782 #if mingw32_HOST_OS
1783     saved_winerror = GetLastError();
1784 #endif
1785
1786     incall = task->incall;
1787     cap = incall->suspended_cap;
1788     task->cap = cap;
1789
1790     // Wait for permission to re-enter the RTS with the result.
1791     waitForReturnCapability(&cap,task);
1792     // we might be on a different capability now... but if so, our
1793     // entry on the suspended_ccalls list will also have been
1794     // migrated.
1795
1796     // Remove the thread from the suspended list
1797     recoverSuspendedTask(cap,task);
1798
1799     tso = incall->suspended_tso;
1800     incall->suspended_tso = NULL;
1801     incall->suspended_cap = NULL;
1802     tso->_link = END_TSO_QUEUE; // no write barrier reqd
1803
1804     traceEventRunThread(cap, tso);
1805
1806     /* Reset blocking status */
1807     tso->why_blocked  = NotBlocked;
1808
1809     if ((tso->flags & TSO_BLOCKEX) == 0) {
1810         // avoid locking the TSO if we don't have to
1811         if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
1812             maybePerformBlockedException(cap,tso);
1813         }
1814     }
1815
1816     cap->r.rCurrentTSO = tso;
1817     cap->in_haskell = rtsTrue;
1818     errno = saved_errno;
1819 #if mingw32_HOST_OS
1820     SetLastError(saved_winerror);
1821 #endif
1822
1823     /* We might have GC'd, mark the TSO dirty again */
1824     dirty_TSO(cap,tso);
1825     dirty_STACK(cap,tso->stackobj);
1826
1827     IF_DEBUG(sanity, checkTSO(tso));
1828
1829     return &cap->r;
1830 }
1831
1832 /* ---------------------------------------------------------------------------
1833  * scheduleThread()
1834  *
1835  * scheduleThread puts a thread on the end  of the runnable queue.
1836  * This will usually be done immediately after a thread is created.
1837  * The caller of scheduleThread must create the thread using e.g.
1838  * createThread and push an appropriate closure
1839  * on this thread's stack before the scheduler is invoked.
1840  * ------------------------------------------------------------------------ */
1841
1842 void
1843 scheduleThread(Capability *cap, StgTSO *tso)
1844 {
1845     // The thread goes at the *end* of the run-queue, to avoid possible
1846     // starvation of any threads already on the queue.
1847     appendToRunQueue(cap,tso);
1848 }
1849
1850 void
1851 scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
1852 {
1853 #if defined(THREADED_RTS)
1854     tso->flags |= TSO_LOCKED; // we requested explicit affinity; don't
1855                               // move this thread from now on.
1856     cpu %= RtsFlags.ParFlags.nNodes;
1857     if (cpu == cap->no) {
1858         appendToRunQueue(cap,tso);
1859     } else {
1860         migrateThread(cap, tso, &capabilities[cpu]);
1861     }
1862 #else
1863     appendToRunQueue(cap,tso);
1864 #endif
1865 }
1866
1867 Capability *
1868 scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
1869 {
1870     Task *task;
1871     StgThreadID id;
1872
1873     // We already created/initialised the Task
1874     task = cap->running_task;
1875
1876     // This TSO is now a bound thread; make the Task and TSO
1877     // point to each other.
1878     tso->bound = task->incall;
1879     tso->cap = cap;
1880
1881     task->incall->tso = tso;
1882     task->incall->ret = ret;
1883     task->incall->stat = NoStatus;
1884
1885     appendToRunQueue(cap,tso);
1886
1887     id = tso->id;
1888     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)id);
1889
1890     cap = schedule(cap,task);
1891
1892     ASSERT(task->incall->stat != NoStatus);
1893     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
1894
1895     debugTrace(DEBUG_sched, "bound thread (%lu) finished", (unsigned long)id);
1896     return cap;
1897 }
1898
1899 /* ----------------------------------------------------------------------------
1900  * Starting Tasks
1901  * ------------------------------------------------------------------------- */
1902
1903 #if defined(THREADED_RTS)
1904 void scheduleWorker (Capability *cap, Task *task)
1905 {
1906     // schedule() runs without a lock.
1907     cap = schedule(cap,task);
1908
1909     // On exit from schedule(), we have a Capability, but possibly not
1910     // the same one we started with.
1911
1912     // During shutdown, the requirement is that after all the
1913     // Capabilities are shut down, all workers that are shutting down
1914     // have finished workerTaskStop().  This is why we hold on to
1915     // cap->lock until we've finished workerTaskStop() below.
1916     //
1917     // There may be workers still involved in foreign calls; those
1918     // will just block in waitForReturnCapability() because the
1919     // Capability has been shut down.
1920     //
1921     ACQUIRE_LOCK(&cap->lock);
1922     releaseCapability_(cap,rtsFalse);
1923     workerTaskStop(task);
1924     RELEASE_LOCK(&cap->lock);
1925 }
1926 #endif
1927
1928 /* ---------------------------------------------------------------------------
1929  * initScheduler()
1930  *
1931  * Initialise the scheduler.  This resets all the queues - if the
1932  * queues contained any threads, they'll be garbage collected at the
1933  * next pass.
1934  *
1935  * ------------------------------------------------------------------------ */
1936
1937 void
1938 initScheduler(void)
1939 {
1940 #if !defined(THREADED_RTS)
1941   blocked_queue_hd  = END_TSO_QUEUE;
1942   blocked_queue_tl  = END_TSO_QUEUE;
1943   sleeping_queue    = END_TSO_QUEUE;
1944 #endif
1945
1946   sched_state    = SCHED_RUNNING;
1947   recent_activity = ACTIVITY_YES;
1948
1949 #if defined(THREADED_RTS)
1950   /* Initialise the mutex and condition variables used by
1951    * the scheduler. */
1952   initMutex(&sched_mutex);
1953 #endif
1954
1955   ACQUIRE_LOCK(&sched_mutex);
1956
1957   /* A capability holds the state a native thread needs in
1958    * order to execute STG code. At least one capability is
1959    * floating around (only THREADED_RTS builds have more than one).
1960    */
1961   initCapabilities();
1962
1963   initTaskManager();
1964
1965 #if defined(THREADED_RTS)
1966   initSparkPools();
1967 #endif
1968
1969   RELEASE_LOCK(&sched_mutex);
1970
1971 #if defined(THREADED_RTS)
1972   /*
1973    * Eagerly start one worker to run each Capability, except for
1974    * Capability 0.  The idea is that we're probably going to start a
1975    * bound thread on Capability 0 pretty soon, so we don't want a
1976    * worker task hogging it.
1977    */
1978   {
1979       nat i;
1980       Capability *cap;
1981       for (i = 1; i < n_capabilities; i++) {
1982           cap = &capabilities[i];
1983           ACQUIRE_LOCK(&cap->lock);
1984           startWorkerTask(cap);
1985           RELEASE_LOCK(&cap->lock);
1986       }
1987   }
1988 #endif
1989 }
1990
1991 void
1992 exitScheduler (rtsBool wait_foreign USED_IF_THREADS)
1993                /* see Capability.c, shutdownCapability() */
1994 {
1995     Task *task = NULL;
1996
1997     task = newBoundTask();
1998
1999     // If we haven't killed all the threads yet, do it now.
2000     if (sched_state < SCHED_SHUTTING_DOWN) {
2001         sched_state = SCHED_INTERRUPTING;
2002         waitForReturnCapability(&task->cap,task);
2003         scheduleDoGC(task->cap,task,rtsFalse);
2004         ASSERT(task->incall->tso == NULL);
2005         releaseCapability(task->cap);
2006     }
2007     sched_state = SCHED_SHUTTING_DOWN;
2008
2009 #if defined(THREADED_RTS)
2010     {
2011         nat i;
2012
2013         for (i = 0; i < n_capabilities; i++) {
2014             ASSERT(task->incall->tso == NULL);
2015             shutdownCapability(&capabilities[i], task, wait_foreign);
2016         }
2017     }
2018 #endif
2019
2020     boundTaskExiting(task);
2021 }
2022
2023 void
2024 freeScheduler( void )
2025 {
2026     nat still_running;
2027
2028     ACQUIRE_LOCK(&sched_mutex);
2029     still_running = freeTaskManager();
2030     // We can only free the Capabilities if there are no Tasks still
2031     // running.  We might have a Task about to return from a foreign
2032     // call into waitForReturnCapability(), for example (actually,
2033     // this should be the *only* thing that a still-running Task can
2034     // do at this point, and it will block waiting for the
2035     // Capability).
2036     if (still_running == 0) {
2037         freeCapabilities();
2038         if (n_capabilities != 1) {
2039             stgFree(capabilities);
2040         }
2041     }
2042     RELEASE_LOCK(&sched_mutex);
2043 #if defined(THREADED_RTS)
2044     closeMutex(&sched_mutex);
2045 #endif
2046 }
2047
2048 /* -----------------------------------------------------------------------------
2049    performGC
2050
2051    This is the interface to the garbage collector from Haskell land.
2052    We provide this so that external C code can allocate and garbage
2053    collect when called from Haskell via _ccall_GC.
2054    -------------------------------------------------------------------------- */
2055
2056 static void
2057 performGC_(rtsBool force_major)
2058 {
2059     Task *task;
2060
2061     // We must grab a new Task here, because the existing Task may be
2062     // associated with a particular Capability, and chained onto the
2063     // suspended_ccalls queue.
2064     task = newBoundTask();
2065
2066     waitForReturnCapability(&task->cap,task);
2067     scheduleDoGC(task->cap,task,force_major);
2068     releaseCapability(task->cap);
2069     boundTaskExiting(task);
2070 }
2071
2072 void
2073 performGC(void)
2074 {
2075     performGC_(rtsFalse);
2076 }
2077
2078 void
2079 performMajorGC(void)
2080 {
2081     performGC_(rtsTrue);
2082 }
2083
2084 /* ---------------------------------------------------------------------------
2085    Interrupt execution
2086    - usually called inside a signal handler so it mustn't do anything fancy.
2087    ------------------------------------------------------------------------ */
2088
2089 void
2090 interruptStgRts(void)
2091 {
2092     sched_state = SCHED_INTERRUPTING;
2093     setContextSwitches();
2094 #if defined(THREADED_RTS)
2095     wakeUpRts();
2096 #endif
2097 }
2098
2099 /* -----------------------------------------------------------------------------
2100    Wake up the RTS
2101
2102    This function causes at least one OS thread to wake up and run the
2103    scheduler loop.  It is invoked when the RTS might be deadlocked, or
2104    an external event has arrived that may need servicing (eg. a
2105    keyboard interrupt).
2106
2107    In the single-threaded RTS we don't do anything here; we only have
2108    one thread anyway, and the event that caused us to want to wake up
2109    will have interrupted any blocking system call in progress anyway.
2110    -------------------------------------------------------------------------- */
2111
2112 #if defined(THREADED_RTS)
2113 void wakeUpRts(void)
2114 {
2115     // This forces the IO Manager thread to wakeup, which will
2116     // in turn ensure that some OS thread wakes up and runs the
2117     // scheduler loop, which will cause a GC and deadlock check.
2118     ioManagerWakeup();
2119 }
2120 #endif
2121
2122 /* -----------------------------------------------------------------------------
2123    Deleting threads
2124
2125    This is used for interruption (^C) and forking, and corresponds to
2126    raising an exception but without letting the thread catch the
2127    exception.
2128    -------------------------------------------------------------------------- */
2129
2130 static void
2131 deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
2132 {
2133     // NOTE: must only be called on a TSO that we have exclusive
2134     // access to, because we will call throwToSingleThreaded() below.
2135     // The TSO must be on the run queue of the Capability we own, or
2136     // we must own all Capabilities.
2137
2138     if (tso->why_blocked != BlockedOnCCall &&
2139         tso->why_blocked != BlockedOnCCall_Interruptible) {
2140         throwToSingleThreaded(tso->cap,tso,NULL);
2141     }
2142 }
2143
2144 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2145 static void
2146 deleteThread_(Capability *cap, StgTSO *tso)
2147 { // for forkProcess only:
2148   // like deleteThread(), but we delete threads in foreign calls, too.
2149
2150     if (tso->why_blocked == BlockedOnCCall ||
2151         tso->why_blocked == BlockedOnCCall_Interruptible) {
2152         tso->what_next = ThreadKilled;
2153         appendToRunQueue(tso->cap, tso);
2154     } else {
2155         deleteThread(cap,tso);
2156     }
2157 }
2158 #endif
2159
2160 /* -----------------------------------------------------------------------------
2161    raiseExceptionHelper
2162
2163    This function is called by the raise# primitve, just so that we can
2164    move some of the tricky bits of raising an exception from C-- into
2165    C.  Who knows, it might be a useful re-useable thing here too.
2166    -------------------------------------------------------------------------- */
2167
2168 StgWord
2169 raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
2170 {
2171     Capability *cap = regTableToCapability(reg);
2172     StgThunk *raise_closure = NULL;
2173     StgPtr p, next;
2174     StgRetInfoTable *info;
2175     //
2176     // This closure represents the expression 'raise# E' where E
2177     // is the exception raise.  It is used to overwrite all the
2178     // thunks which are currently under evaluataion.
2179     //
2180
2181     // OLD COMMENT (we don't have MIN_UPD_SIZE now):
2182     // LDV profiling: stg_raise_info has THUNK as its closure
2183     // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
2184     // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
2185     // 1 does not cause any problem unless profiling is performed.
2186     // However, when LDV profiling goes on, we need to linearly scan
2187     // small object pool, where raise_closure is stored, so we should
2188     // use MIN_UPD_SIZE.
2189     //
2190     // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
2191     //                                 sizeofW(StgClosure)+1);
2192     //
2193
2194     //
2195     // Walk up the stack, looking for the catch frame.  On the way,
2196     // we update any closures pointed to from update frames with the
2197     // raise closure that we just built.
2198     //
2199     p = tso->stackobj->sp;
2200     while(1) {
2201         info = get_ret_itbl((StgClosure *)p);
2202         next = p + stack_frame_sizeW((StgClosure *)p);
2203         switch (info->i.type) {
2204
2205         case UPDATE_FRAME:
2206             // Only create raise_closure if we need to.
2207             if (raise_closure == NULL) {
2208                 raise_closure =
2209                     (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
2210                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
2211                 raise_closure->payload[0] = exception;
2212             }
2213             updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee,
2214                         (StgClosure *)raise_closure);
2215             p = next;
2216             continue;
2217
2218         case ATOMICALLY_FRAME:
2219             debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p);
2220             tso->stackobj->sp = p;
2221             return ATOMICALLY_FRAME;
2222
2223         case CATCH_FRAME:
2224             tso->stackobj->sp = p;
2225             return CATCH_FRAME;
2226
2227         case CATCH_STM_FRAME:
2228             debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p);
2229             tso->stackobj->sp = p;
2230             return CATCH_STM_FRAME;
2231
2232         case UNDERFLOW_FRAME:
2233             tso->stackobj->sp = p;
2234             threadStackUnderflow(cap,tso);
2235             p = tso->stackobj->sp;
2236             continue;
2237
2238         case STOP_FRAME:
2239             tso->stackobj->sp = p;
2240             return STOP_FRAME;
2241
2242         case CATCH_RETRY_FRAME:
2243         default:
2244             p = next;
2245             continue;
2246         }
2247     }
2248 }
2249
2250
2251 /* -----------------------------------------------------------------------------
2252    findRetryFrameHelper
2253
2254    This function is called by the retry# primitive.  It traverses the stack
2255    leaving tso->sp referring to the frame which should handle the retry.
2256
2257    This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
2258    or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
2259
2260    We skip CATCH_STM_FRAMEs (aborting and rolling back the nested tx that they
2261    create) because retries are not considered to be exceptions, despite the
2262    similar implementation.
2263
2264    We should not expect to see CATCH_FRAME or STOP_FRAME because those should
2265    not be created within memory transactions.
2266    -------------------------------------------------------------------------- */
2267
2268 StgWord
2269 findRetryFrameHelper (Capability *cap, StgTSO *tso)
2270 {
2271   StgPtr           p, next;
2272   StgRetInfoTable *info;
2273
2274   p = tso->stackobj->sp;
2275   while (1) {
2276     info = get_ret_itbl((StgClosure *)p);
2277     next = p + stack_frame_sizeW((StgClosure *)p);
2278     switch (info->i.type) {
2279
2280     case ATOMICALLY_FRAME:
2281         debugTrace(DEBUG_stm,
2282                    "found ATOMICALLY_FRAME at %p during retry", p);
2283         tso->stackobj->sp = p;
2284         return ATOMICALLY_FRAME;
2285
2286     case CATCH_RETRY_FRAME:
2287         debugTrace(DEBUG_stm,
2288                    "found CATCH_RETRY_FRAME at %p during retrry", p);
2289         tso->stackobj->sp = p;
2290         return CATCH_RETRY_FRAME;
2291
2292     case CATCH_STM_FRAME: {
2293         StgTRecHeader *trec = tso -> trec;
2294         StgTRecHeader *outer = trec -> enclosing_trec;
2295         debugTrace(DEBUG_stm,
2296                    "found CATCH_STM_FRAME at %p during retry", p);
2297         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
2298         stmAbortTransaction(cap, trec);
2299         stmFreeAbortedTRec(cap, trec);
2300         tso -> trec = outer;
2301         p = next;
2302         continue;
2303     }
2304
2305     case UNDERFLOW_FRAME:
2306         threadStackUnderflow(cap,tso);
2307         p = tso->stackobj->sp;
2308         continue;
2309
2310     default:
2311       ASSERT(info->i.type != CATCH_FRAME);
2312       ASSERT(info->i.type != STOP_FRAME);
2313       p = next;
2314       continue;
2315     }
2316   }
2317 }
2318
2319 /* -----------------------------------------------------------------------------
2320    resurrectThreads is called after garbage collection on the list of
2321    threads found to be garbage.  Each of these threads will be woken
2322    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
2323    on an MVar, or NonTermination if the thread was blocked on a Black
2324    Hole.
2325
2326    Locks: assumes we hold *all* the capabilities.
2327    -------------------------------------------------------------------------- */
2328
2329 void
2330 resurrectThreads (StgTSO *threads)
2331 {
2332     StgTSO *tso, *next;
2333     Capability *cap;
2334     generation *gen;
2335
2336     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
2337         next = tso->global_link;
2338
2339         gen = Bdescr((P_)tso)->gen;
2340         tso->global_link = gen->threads;
2341         gen->threads = tso;
2342
2343         debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
2344
2345         // Wake up the thread on the Capability it was last on
2346         cap = tso->cap;
2347
2348         switch (tso->why_blocked) {
2349         case BlockedOnMVar:
2350             /* Called by GC - sched_mutex lock is currently held. */
2351             throwToSingleThreaded(cap, tso,
2352                                   (StgClosure *)blockedIndefinitelyOnMVar_closure);
2353             break;
2354         case BlockedOnBlackHole:
2355             throwToSingleThreaded(cap, tso,
2356                                   (StgClosure *)nonTermination_closure);
2357             break;
2358         case BlockedOnSTM:
2359             throwToSingleThreaded(cap, tso,
2360                                   (StgClosure *)blockedIndefinitelyOnSTM_closure);
2361             break;
2362         case NotBlocked:
2363             /* This might happen if the thread was blocked on a black hole
2364              * belonging to a thread that we've just woken up (raiseAsync
2365              * can wake up threads, remember...).
2366              */
2367             continue;
2368         case BlockedOnMsgThrowTo:
2369             // This can happen if the target is masking, blocks on a
2370             // black hole, and then is found to be unreachable.  In
2371             // this case, we want to let the target wake up and carry
2372             // on, and do nothing to this thread.
2373             continue;
2374         default:
2375             barf("resurrectThreads: thread blocked in a strange way: %d",
2376                  tso->why_blocked);
2377         }
2378     }
2379 }