rts/Schedule.c

   1 /* ---------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * The scheduler and thread-related functionality
   6  *
   7  * --------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #define KEEP_LOCKCLOSURE
  11 #include "Rts.h"
  12
  13 #include "sm/Storage.h"
  14 #include "RtsUtils.h"
  15 #include "StgRun.h"
  16 #include "Schedule.h"
  17 #include "Interpreter.h"
  18 #include "Printer.h"
  19 #include "RtsSignals.h"
  20 #include "sm/Sanity.h"
  21 #include "Stats.h"
  22 #include "STM.h"
  23 #include "Prelude.h"
  24 #include "ThreadLabels.h"
  25 #include "Updates.h"
  26 #include "Proftimer.h"
  27 #include "ProfHeap.h"
  28 #include "Weak.h"
  29 #include "sm/GC.h" // waitForGcThreads, releaseGCThreads, N
  30 #include "Sparks.h"
  31 #include "Capability.h"
  32 #include "Task.h"
  33 #include "AwaitEvent.h"
  34 #if defined(mingw32_HOST_OS)
  35 #include "win32/IOManager.h"
  36 #endif
  37 #include "Trace.h"
  38 #include "RaiseAsync.h"
  39 #include "Threads.h"
  40 #include "Timer.h"
  41 #include "ThreadPaused.h"
  42 #include "Messages.h"
  43
  44 #ifdef HAVE_SYS_TYPES_H
  45 #include <sys/types.h>
  46 #endif
  47 #ifdef HAVE_UNISTD_H
  48 #include <unistd.h>
  49 #endif
  50
  51 #include <string.h>
  52 #include <stdlib.h>
  53 #include <stdarg.h>
  54
  55 #ifdef HAVE_ERRNO_H
  56 #include <errno.h>
  57 #endif
  58
  59 #ifdef TRACING
  60 #include "eventlog/EventLog.h"
  61 #endif
  62 /* -----------------------------------------------------------------------------
  63  * Global variables
  64  * -------------------------------------------------------------------------- */
  65
  66 #if !defined(THREADED_RTS)
  67 // Blocked/sleeping thrads
  68 StgTSO *blocked_queue_hd = NULL;
  69 StgTSO *blocked_queue_tl = NULL;
  70 StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
  71 #endif
  72
  73 /* Set to true when the latest garbage collection failed to reclaim
  74  * enough space, and the runtime should proceed to shut itself down in
  75  * an orderly fashion (emitting profiling info etc.)
  76  */
  77 rtsBool heap_overflow = rtsFalse;
  78
  79 /* flag that tracks whether we have done any execution in this time slice.
  80  * LOCK: currently none, perhaps we should lock (but needs to be
  81  * updated in the fast path of the scheduler).
  82  *
  83  * NB. must be StgWord, we do xchg() on it.
  84  */
  85 volatile StgWord recent_activity = ACTIVITY_YES;
  86
  87 /* if this flag is set as well, give up execution
  88  * LOCK: none (changes monotonically)
  89  */
  90 volatile StgWord sched_state = SCHED_RUNNING;
  91
  92 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually
  93  *  exists - earlier gccs apparently didn't.
  94  *  -= chak
  95  */
  96 StgTSO dummy_tso;
  97
  98 /*
  99  * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 100  * in an MT setting, needed to signal that a worker thread shouldn't hang around
 101  * in the scheduler when it is out of work.
 102  */
 103 rtsBool shutting_down_scheduler = rtsFalse;
 104
 105 /*
 106  * This mutex protects most of the global scheduler data in
 107  * the THREADED_RTS runtime.
 108  */
 109 #if defined(THREADED_RTS)
 110 Mutex sched_mutex;
 111 #endif
 112
 113 #if !defined(mingw32_HOST_OS)
 114 #define FORKPROCESS_PRIMOP_SUPPORTED
 115 #endif
 116
 117 /* -----------------------------------------------------------------------------
 118  * static function prototypes
 119  * -------------------------------------------------------------------------- */
 120
 121 static Capability *schedule (Capability *initialCapability, Task *task);
 122
 123 //
 124 // These function all encapsulate parts of the scheduler loop, and are
 125 // abstracted only to make the structure and control flow of the
 126 // scheduler clearer.
 127 //
 128 static void schedulePreLoop (void);
 129 static void scheduleFindWork (Capability *cap);
 130 #if defined(THREADED_RTS)
 131 static void scheduleYield (Capability **pcap, Task *task);
 132 #endif
 133 static void scheduleStartSignalHandlers (Capability *cap);
 134 static void scheduleCheckBlockedThreads (Capability *cap);
 135 static void scheduleProcessInbox(Capability *cap);
 136 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 137 static void schedulePushWork(Capability *cap, Task *task);
 138 #if defined(THREADED_RTS)
 139 static void scheduleActivateSpark(Capability *cap);
 140 #endif
 141 static void schedulePostRunThread(Capability *cap, StgTSO *t);
 142 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 143 static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 144                                     nat prev_what_next );
 145 static void scheduleHandleThreadBlocked( StgTSO *t );
 146 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
 147                                              StgTSO *t );
 148 static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
 149 static Capability *scheduleDoGC(Capability *cap, Task *task,
 150                                 rtsBool force_major);
 151
 152 static void deleteThread (Capability *cap, StgTSO *tso);
 153 static void deleteAllThreads (Capability *cap);
 154
 155 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
 156 static void deleteThread_(Capability *cap, StgTSO *tso);
 157 #endif
 158
 159 /* ---------------------------------------------------------------------------
 160    Main scheduling loop.
 161
 162    We use round-robin scheduling, each thread returning to the
 163    scheduler loop when one of these conditions is detected:
 164
 165       * out of heap space
 166       * timer expires (thread yields)
 167       * thread blocks
 168       * thread ends
 169       * stack overflow
 170
 171    GRAN version:
 172      In a GranSim setup this loop iterates over the global event queue.
 173      This revolves around the global event queue, which determines what
 174      to do next. Therefore, it's more complicated than either the
 175      concurrent or the parallel (GUM) setup.
 176   This version has been entirely removed (JB 2008/08).
 177
 178    GUM version:
 179      GUM iterates over incoming messages.
 180      It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
 181      and sends out a fish whenever it has nothing to do; in-between
 182      doing the actual reductions (shared code below) it processes the
 183      incoming messages and deals with delayed operations
 184      (see PendingFetches).
 185      This is not the ugliest code you could imagine, but it's bloody close.
 186
 187   (JB 2008/08) This version was formerly indicated by a PP-Flag PAR,
 188   now by PP-flag PARALLEL_HASKELL. The Eden RTS (in GHC-6.x) uses it,
 189   as well as future GUM versions. This file has been refurbished to
 190   only contain valid code, which is however incomplete, refers to
 191   invalid includes etc.
 192
 193    ------------------------------------------------------------------------ */
 194
 195 static Capability *
 196 schedule (Capability *initialCapability, Task *task)
 197 {
 198   StgTSO *t;
 199   Capability *cap;
 200   StgThreadReturnCode ret;
 201   nat prev_what_next;
 202   rtsBool ready_to_gc;
 203 #if defined(THREADED_RTS)
 204   rtsBool first = rtsTrue;
 205 #endif
 206
 207   cap = initialCapability;
 208
 209   // Pre-condition: this task owns initialCapability.
 210   // The sched_mutex is *NOT* held
 211   // NB. on return, we still hold a capability.
 212
 213   debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
 214
 215   schedulePreLoop();
 216
 217   // -----------------------------------------------------------
 218   // Scheduler loop starts here:
 219
 220   while (1) {
 221
 222     // Check whether we have re-entered the RTS from Haskell without
 223     // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
 224     // call).
 225     if (cap->in_haskell) {
 226           errorBelch("schedule: re-entered unsafely.\n"
 227                      "   Perhaps a 'foreign import unsafe' should be 'safe'?");
 228           stg_exit(EXIT_FAILURE);
 229     }
 230
 231     // The interruption / shutdown sequence.
 232     //
 233     // In order to cleanly shut down the runtime, we want to:
 234     //   * make sure that all main threads return to their callers
 235     //     with the state 'Interrupted'.
 236     //   * clean up all OS threads assocated with the runtime
 237     //   * free all memory etc.
 238     //
 239     // So the sequence for ^C goes like this:
 240     //
 241     //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
 242     //     arranges for some Capability to wake up
 243     //
 244     //   * all threads in the system are halted, and the zombies are
 245     //     placed on the run queue for cleaning up.  We acquire all
 246     //     the capabilities in order to delete the threads, this is
 247     //     done by scheduleDoGC() for convenience (because GC already
 248     //     needs to acquire all the capabilities).  We can't kill
 249     //     threads involved in foreign calls.
 250     //
 251     //   * somebody calls shutdownHaskell(), which calls exitScheduler()
 252     //
 253     //   * sched_state := SCHED_SHUTTING_DOWN
 254     //
 255     //   * all workers exit when the run queue on their capability
 256     //     drains.  All main threads will also exit when their TSO
 257     //     reaches the head of the run queue and they can return.
 258     //
 259     //   * eventually all Capabilities will shut down, and the RTS can
 260     //     exit.
 261     //
 262     //   * We might be left with threads blocked in foreign calls,
 263     //     we should really attempt to kill these somehow (TODO);
 264
 265     switch (sched_state) {
 266     case SCHED_RUNNING:
 267         break;
 268     case SCHED_INTERRUPTING:
 269         debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
 270 #if defined(THREADED_RTS)
 271         discardSparksCap(cap);
 272 #endif
 273         /* scheduleDoGC() deletes all the threads */
 274         cap = scheduleDoGC(cap,task,rtsFalse);
 275
 276         // after scheduleDoGC(), we must be shutting down.  Either some
 277         // other Capability did the final GC, or we did it above,
 278         // either way we can fall through to the SCHED_SHUTTING_DOWN
 279         // case now.
 280         ASSERT(sched_state == SCHED_SHUTTING_DOWN);
 281         // fall through
 282
 283     case SCHED_SHUTTING_DOWN:
 284         debugTrace(DEBUG_sched, "SCHED_SHUTTING_DOWN");
 285         // If we are a worker, just exit.  If we're a bound thread
 286         // then we will exit below when we've removed our TSO from
 287         // the run queue.
 288         if (!isBoundTask(task) && emptyRunQueue(cap)) {
 289             return cap;
 290         }
 291         break;
 292     default:
 293         barf("sched_state: %d", sched_state);
 294     }
 295
 296     scheduleFindWork(cap);
 297
 298     /* work pushing, currently relevant only for THREADED_RTS:
 299        (pushes threads, wakes up idle capabilities for stealing) */
 300     schedulePushWork(cap,task);
 301
 302     scheduleDetectDeadlock(cap,task);
 303
 304 #if defined(THREADED_RTS)
 305     cap = task->cap;    // reload cap, it might have changed
 306 #endif
 307
 308     // Normally, the only way we can get here with no threads to
 309     // run is if a keyboard interrupt received during
 310     // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
 311     // Additionally, it is not fatal for the
 312     // threaded RTS to reach here with no threads to run.
 313     //
 314     // win32: might be here due to awaitEvent() being abandoned
 315     // as a result of a console event having been delivered.
 316
 317 #if defined(THREADED_RTS)
 318     if (first)
 319     {
 320     // XXX: ToDo
 321     //     // don't yield the first time, we want a chance to run this
 322     //     // thread for a bit, even if there are others banging at the
 323     //     // door.
 324     //     first = rtsFalse;
 325     //     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 326     }
 327
 328     scheduleYield(&cap,task);
 329
 330     if (emptyRunQueue(cap)) continue; // look for work again
 331 #endif
 332
 333 #if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
 334     if ( emptyRunQueue(cap) ) {
 335         ASSERT(sched_state >= SCHED_INTERRUPTING);
 336     }
 337 #endif
 338
 339     //
 340     // Get a thread to run
 341     //
 342     t = popRunQueue(cap);
 343
 344     // Sanity check the thread we're about to run.  This can be
 345     // expensive if there is lots of thread switching going on...
 346     IF_DEBUG(sanity,checkTSO(t));
 347
 348 #if defined(THREADED_RTS)
 349     // Check whether we can run this thread in the current task.
 350     // If not, we have to pass our capability to the right task.
 351     {
 352         InCall *bound = t->bound;
 353
 354         if (bound) {
 355             if (bound->task == task) {
 356                 // yes, the Haskell thread is bound to the current native thread
 357             } else {
 358                 debugTrace(DEBUG_sched,
 359                            "thread %lu bound to another OS thread",
 360                            (unsigned long)t->id);
 361                 // no, bound to a different Haskell thread: pass to that thread
 362                 pushOnRunQueue(cap,t);
 363                 continue;
 364             }
 365         } else {
 366             // The thread we want to run is unbound.
 367             if (task->incall->tso) {
 368                 debugTrace(DEBUG_sched,
 369                            "this OS thread cannot run thread %lu",
 370                            (unsigned long)t->id);
 371                 // no, the current native thread is bound to a different
 372                 // Haskell thread, so pass it to any worker thread
 373                 pushOnRunQueue(cap,t);
 374                 continue;
 375             }
 376         }
 377     }
 378 #endif
 379
 380     // If we're shutting down, and this thread has not yet been
 381     // killed, kill it now.  This sometimes happens when a finalizer
 382     // thread is created by the final GC, or a thread previously
 383     // in a foreign call returns.
 384     if (sched_state >= SCHED_INTERRUPTING &&
 385         !(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) {
 386         deleteThread(cap,t);
 387     }
 388
 389     /* context switches are initiated by the timer signal, unless
 390      * the user specified "context switch as often as possible", with
 391      * +RTS -C0
 392      */
 393     if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
 394         && !emptyThreadQueues(cap)) {
 395         cap->context_switch = 1;
 396     }
 397
 398 run_thread:
 399
 400     // CurrentTSO is the thread to run.  t might be different if we
 401     // loop back to run_thread, so make sure to set CurrentTSO after
 402     // that.
 403     cap->r.rCurrentTSO = t;
 404
 405     startHeapProfTimer();
 406
 407     // ----------------------------------------------------------------------
 408     // Run the current thread
 409
 410     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 411     ASSERT(t->cap == cap);
 412     ASSERT(t->bound ? t->bound->task->cap == cap : 1);
 413
 414     prev_what_next = t->what_next;
 415
 416     errno = t->saved_errno;
 417 #if mingw32_HOST_OS
 418     SetLastError(t->saved_winerror);
 419 #endif
 420
 421     cap->in_haskell = rtsTrue;
 422
 423     dirty_TSO(cap,t);
 424     dirty_STACK(cap,t->stackobj);
 425
 426 #if defined(THREADED_RTS)
 427     if (recent_activity == ACTIVITY_DONE_GC) {
 428         // ACTIVITY_DONE_GC means we turned off the timer signal to
 429         // conserve power (see #1623).  Re-enable it here.
 430         nat prev;
 431         prev = xchg((P_)&recent_activity, ACTIVITY_YES);
 432         if (prev == ACTIVITY_DONE_GC) {
 433             startTimer();
 434         }
 435     } else if (recent_activity != ACTIVITY_INACTIVE) {
 436         // If we reached ACTIVITY_INACTIVE, then don't reset it until
 437         // we've done the GC.  The thread running here might just be
 438         // the IO manager thread that handle_tick() woke up via
 439         // wakeUpRts().
 440         recent_activity = ACTIVITY_YES;
 441     }
 442 #endif
 443
 444     traceEventRunThread(cap, t);
 445
 446     switch (prev_what_next) {
 447
 448     case ThreadKilled:
 449     case ThreadComplete:
 450         /* Thread already finished, return to scheduler. */
 451         ret = ThreadFinished;
 452         break;
 453
 454     case ThreadRunGHC:
 455     {
 456         StgRegTable *r;
 457         r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
 458         cap = regTableToCapability(r);
 459         ret = r->rRet;
 460         break;
 461     }
 462
 463     case ThreadInterpret:
 464         cap = interpretBCO(cap);
 465         ret = cap->r.rRet;
 466         break;
 467
 468     default:
 469         barf("schedule: invalid what_next field");
 470     }
 471
 472     cap->in_haskell = rtsFalse;
 473
 474     // The TSO might have moved, eg. if it re-entered the RTS and a GC
 475     // happened.  So find the new location:
 476     t = cap->r.rCurrentTSO;
 477
 478     // And save the current errno in this thread.
 479     // XXX: possibly bogus for SMP because this thread might already
 480     // be running again, see code below.
 481     t->saved_errno = errno;
 482 #if mingw32_HOST_OS
 483     // Similarly for Windows error code
 484     t->saved_winerror = GetLastError();
 485 #endif
 486
 487     traceEventStopThread(cap, t, ret);
 488
 489     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 490     ASSERT(t->cap == cap);
 491
 492     // ----------------------------------------------------------------------
 493
 494     // Costs for the scheduler are assigned to CCS_SYSTEM
 495     stopHeapProfTimer();
 496 #if defined(PROFILING)
 497     CCCS = CCS_SYSTEM;
 498 #endif
 499
 500     schedulePostRunThread(cap,t);
 501
 502     ready_to_gc = rtsFalse;
 503
 504     switch (ret) {
 505     case HeapOverflow:
 506         ready_to_gc = scheduleHandleHeapOverflow(cap,t);
 507         break;
 508
 509     case StackOverflow:
 510         // just adjust the stack for this thread, then pop it back
 511         // on the run queue.
 512         threadStackOverflow(cap, t);
 513         pushOnRunQueue(cap,t);
 514         break;
 515
 516     case ThreadYielding:
 517         if (scheduleHandleYield(cap, t, prev_what_next)) {
 518             // shortcut for switching between compiler/interpreter:
 519             goto run_thread;
 520         }
 521         break;
 522
 523     case ThreadBlocked:
 524         scheduleHandleThreadBlocked(t);
 525         break;
 526
 527     case ThreadFinished:
 528         if (scheduleHandleThreadFinished(cap, task, t)) return cap;
 529         ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 530         break;
 531
 532     default:
 533       barf("schedule: invalid thread return code %d", (int)ret);
 534     }
 535
 536     if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
 537       cap = scheduleDoGC(cap,task,rtsFalse);
 538     }
 539   } /* end of while() */
 540 }
 541
 542 /* -----------------------------------------------------------------------------
 543  * Run queue operations
 544  * -------------------------------------------------------------------------- */
 545
 546 void
 547 removeFromRunQueue (Capability *cap, StgTSO *tso)
 548 {
 549     if (tso->block_info.prev == END_TSO_QUEUE) {
 550         ASSERT(cap->run_queue_hd == tso);
 551         cap->run_queue_hd = tso->_link;
 552     } else {
 553         setTSOLink(cap, tso->block_info.prev, tso->_link);
 554     }
 555     if (tso->_link == END_TSO_QUEUE) {
 556         ASSERT(cap->run_queue_tl == tso);
 557         cap->run_queue_tl = tso->block_info.prev;
 558     } else {
 559         setTSOPrev(cap, tso->_link, tso->block_info.prev);
 560     }
 561     tso->_link = tso->block_info.prev = END_TSO_QUEUE;
 562
 563     IF_DEBUG(sanity, checkRunQueue(cap));
 564 }
 565
 566 /* ----------------------------------------------------------------------------
 567  * Setting up the scheduler loop
 568  * ------------------------------------------------------------------------- */
 569
 570 static void
 571 schedulePreLoop(void)
 572 {
 573   // initialisation for scheduler - what cannot go into initScheduler()
 574 }
 575
 576 /* -----------------------------------------------------------------------------
 577  * scheduleFindWork()
 578  *
 579  * Search for work to do, and handle messages from elsewhere.
 580  * -------------------------------------------------------------------------- */
 581
 582 static void
 583 scheduleFindWork (Capability *cap)
 584 {
 585     scheduleStartSignalHandlers(cap);
 586
 587     scheduleProcessInbox(cap);
 588
 589     scheduleCheckBlockedThreads(cap);
 590
 591 #if defined(THREADED_RTS)
 592     if (emptyRunQueue(cap)) { scheduleActivateSpark(cap); }
 593 #endif
 594 }
 595
 596 #if defined(THREADED_RTS)
 597 STATIC_INLINE rtsBool
 598 shouldYieldCapability (Capability *cap, Task *task)
 599 {
 600     // we need to yield this capability to someone else if..
 601     //   - another thread is initiating a GC
 602     //   - another Task is returning from a foreign call
 603     //   - the thread at the head of the run queue cannot be run
 604     //     by this Task (it is bound to another Task, or it is unbound
 605     //     and this task it bound).
 606     return (waiting_for_gc ||
 607             cap->returning_tasks_hd != NULL ||
 608             (!emptyRunQueue(cap) && (task->incall->tso == NULL
 609                                      ? cap->run_queue_hd->bound != NULL
 610                                      : cap->run_queue_hd->bound != task->incall)));
 611 }
 612
 613 // This is the single place where a Task goes to sleep.  There are
 614 // two reasons it might need to sleep:
 615 //    - there are no threads to run
 616 //    - we need to yield this Capability to someone else
 617 //      (see shouldYieldCapability())
 618 //
 619 // Careful: the scheduler loop is quite delicate.  Make sure you run
 620 // the tests in testsuite/concurrent (all ways) after modifying this,
 621 // and also check the benchmarks in nofib/parallel for regressions.
 622
 623 static void
 624 scheduleYield (Capability **pcap, Task *task)
 625 {
 626     Capability *cap = *pcap;
 627
 628     // if we have work, and we don't need to give up the Capability, continue.
 629     //
 630     if (!shouldYieldCapability(cap,task) &&
 631         (!emptyRunQueue(cap) ||
 632          !emptyInbox(cap) ||
 633          sched_state >= SCHED_INTERRUPTING))
 634         return;
 635
 636     // otherwise yield (sleep), and keep yielding if necessary.
 637     do {
 638         yieldCapability(&cap,task);
 639     }
 640     while (shouldYieldCapability(cap,task));
 641
 642     // note there may still be no threads on the run queue at this
 643     // point, the caller has to check.
 644
 645     *pcap = cap;
 646     return;
 647 }
 648 #endif
 649
 650 /* -----------------------------------------------------------------------------
 651  * schedulePushWork()
 652  *
 653  * Push work to other Capabilities if we have some.
 654  * -------------------------------------------------------------------------- */
 655
 656 static void
 657 schedulePushWork(Capability *cap USED_IF_THREADS,
 658                  Task *task      USED_IF_THREADS)
 659 {
 660   /* following code not for PARALLEL_HASKELL. I kept the call general,
 661      future GUM versions might use pushing in a distributed setup */
 662 #if defined(THREADED_RTS)
 663
 664     Capability *free_caps[n_capabilities], *cap0;
 665     nat i, n_free_caps;
 666
 667     // migration can be turned off with +RTS -qm
 668     if (!RtsFlags.ParFlags.migrate) return;
 669
 670     // Check whether we have more threads on our run queue, or sparks
 671     // in our pool, that we could hand to another Capability.
 672     if (cap->run_queue_hd == END_TSO_QUEUE) {
 673         if (sparkPoolSizeCap(cap) < 2) return;
 674     } else {
 675         if (cap->run_queue_hd->_link == END_TSO_QUEUE &&
 676             sparkPoolSizeCap(cap) < 1) return;
 677     }
 678
 679     // First grab as many free Capabilities as we can.
 680     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
 681         cap0 = &capabilities[i];
 682         if (cap != cap0 && tryGrabCapability(cap0,task)) {
 683             if (!emptyRunQueue(cap0)
 684                 || cap->returning_tasks_hd != NULL
 685                 || cap->inbox != (Message*)END_TSO_QUEUE) {
 686                 // it already has some work, we just grabbed it at
 687                 // the wrong moment.  Or maybe it's deadlocked!
 688                 releaseCapability(cap0);
 689             } else {
 690                 free_caps[n_free_caps++] = cap0;
 691             }
 692         }
 693     }
 694
 695     // we now have n_free_caps free capabilities stashed in
 696     // free_caps[].  Share our run queue equally with them.  This is
 697     // probably the simplest thing we could do; improvements we might
 698     // want to do include:
 699     //
 700     //   - giving high priority to moving relatively new threads, on
 701     //     the gournds that they haven't had time to build up a
 702     //     working set in the cache on this CPU/Capability.
 703     //
 704     //   - giving low priority to moving long-lived threads
 705
 706     if (n_free_caps > 0) {
 707         StgTSO *prev, *t, *next;
 708         rtsBool pushed_to_all;
 709
 710         debugTrace(DEBUG_sched,
 711                    "cap %d: %s and %d free capabilities, sharing...",
 712                    cap->no,
 713                    (!emptyRunQueue(cap) && cap->run_queue_hd->_link != END_TSO_QUEUE)?
 714                    "excess threads on run queue":"sparks to share (>=2)",
 715                    n_free_caps);
 716
 717         i = 0;
 718         pushed_to_all = rtsFalse;
 719
 720         if (cap->run_queue_hd != END_TSO_QUEUE) {
 721             prev = cap->run_queue_hd;
 722             t = prev->_link;
 723             prev->_link = END_TSO_QUEUE;
 724             for (; t != END_TSO_QUEUE; t = next) {
 725                 next = t->_link;
 726                 t->_link = END_TSO_QUEUE;
 727                 if (t->bound == task->incall // don't move my bound thread
 728                     || tsoLocked(t)) {  // don't move a locked thread
 729                     setTSOLink(cap, prev, t);
 730                     setTSOPrev(cap, t, prev);
 731                     prev = t;
 732                 } else if (i == n_free_caps) {
 733                     pushed_to_all = rtsTrue;
 734                     i = 0;
 735                     // keep one for us
 736                     setTSOLink(cap, prev, t);
 737                     setTSOPrev(cap, t, prev);
 738                     prev = t;
 739                 } else {
 740                     appendToRunQueue(free_caps[i],t);
 741
 742                     traceEventMigrateThread (cap, t, free_caps[i]->no);
 743
 744                     if (t->bound) { t->bound->task->cap = free_caps[i]; }
 745                     t->cap = free_caps[i];
 746                     i++;
 747                 }
 748             }
 749             cap->run_queue_tl = prev;
 750
 751             IF_DEBUG(sanity, checkRunQueue(cap));
 752         }
 753
 754 #ifdef SPARK_PUSHING
 755         /* JB I left this code in place, it would work but is not necessary */
 756
 757         // If there are some free capabilities that we didn't push any
 758         // threads to, then try to push a spark to each one.
 759         if (!pushed_to_all) {
 760             StgClosure *spark;
 761             // i is the next free capability to push to
 762             for (; i < n_free_caps; i++) {
 763                 if (emptySparkPoolCap(free_caps[i])) {
 764                     spark = tryStealSpark(cap->sparks);
 765                     if (spark != NULL) {
 766                         debugTrace(DEBUG_sched, "pushing spark %p to capability %d", spark, free_caps[i]->no);
 767
 768             traceEventStealSpark(free_caps[i], t, cap->no);
 769
 770                         newSpark(&(free_caps[i]->r), spark);
 771                     }
 772                 }
 773             }
 774         }
 775 #endif /* SPARK_PUSHING */
 776
 777         // release the capabilities
 778         for (i = 0; i < n_free_caps; i++) {
 779             task->cap = free_caps[i];
 780             releaseAndWakeupCapability(free_caps[i]);
 781         }
 782     }
 783     task->cap = cap; // reset to point to our Capability.
 784
 785 #endif /* THREADED_RTS */
 786
 787 }
 788
 789 /* ----------------------------------------------------------------------------
 790  * Start any pending signal handlers
 791  * ------------------------------------------------------------------------- */
 792
 793 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 794 static void
 795 scheduleStartSignalHandlers(Capability *cap)
 796 {
 797     if (RtsFlags.MiscFlags.install_signal_handlers && signals_pending()) {
 798         // safe outside the lock
 799         startSignalHandlers(cap);
 800     }
 801 }
 802 #else
 803 static void
 804 scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
 805 {
 806 }
 807 #endif
 808
 809 /* ----------------------------------------------------------------------------
 810  * Check for blocked threads that can be woken up.
 811  * ------------------------------------------------------------------------- */
 812
 813 static void
 814 scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
 815 {
 816 #if !defined(THREADED_RTS)
 817     //
 818     // Check whether any waiting threads need to be woken up.  If the
 819     // run queue is empty, and there are no other tasks running, we
 820     // can wait indefinitely for something to happen.
 821     //
 822     if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
 823     {
 824         awaitEvent (emptyRunQueue(cap));
 825     }
 826 #endif
 827 }
 828
 829 /* ----------------------------------------------------------------------------
 830  * Detect deadlock conditions and attempt to resolve them.
 831  * ------------------------------------------------------------------------- */
 832
 833 static void
 834 scheduleDetectDeadlock (Capability *cap, Task *task)
 835 {
 836     /*
 837      * Detect deadlock: when we have no threads to run, there are no
 838      * threads blocked, waiting for I/O, or sleeping, and all the
 839      * other tasks are waiting for work, we must have a deadlock of
 840      * some description.
 841      */
 842     if ( emptyThreadQueues(cap) )
 843     {
 844 #if defined(THREADED_RTS)
 845         /*
 846          * In the threaded RTS, we only check for deadlock if there
 847          * has been no activity in a complete timeslice.  This means
 848          * we won't eagerly start a full GC just because we don't have
 849          * any threads to run currently.
 850          */
 851         if (recent_activity != ACTIVITY_INACTIVE) return;
 852 #endif
 853
 854         debugTrace(DEBUG_sched, "deadlocked, forcing major GC...");
 855
 856         // Garbage collection can release some new threads due to
 857         // either (a) finalizers or (b) threads resurrected because
 858         // they are unreachable and will therefore be sent an
 859         // exception.  Any threads thus released will be immediately
 860         // runnable.
 861         cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/);
 862         // when force_major == rtsTrue. scheduleDoGC sets
 863         // recent_activity to ACTIVITY_DONE_GC and turns off the timer
 864         // signal.
 865
 866         if ( !emptyRunQueue(cap) ) return;
 867
 868 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 869         /* If we have user-installed signal handlers, then wait
 870          * for signals to arrive rather then bombing out with a
 871          * deadlock.
 872          */
 873         if ( RtsFlags.MiscFlags.install_signal_handlers && anyUserHandlers() ) {
 874             debugTrace(DEBUG_sched,
 875                        "still deadlocked, waiting for signals...");
 876
 877             awaitUserSignals();
 878
 879             if (signals_pending()) {
 880                 startSignalHandlers(cap);
 881             }
 882
 883             // either we have threads to run, or we were interrupted:
 884             ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 885
 886             return;
 887         }
 888 #endif
 889
 890 #if !defined(THREADED_RTS)
 891         /* Probably a real deadlock.  Send the current main thread the
 892          * Deadlock exception.
 893          */
 894         if (task->incall->tso) {
 895             switch (task->incall->tso->why_blocked) {
 896             case BlockedOnSTM:
 897             case BlockedOnBlackHole:
 898             case BlockedOnMsgThrowTo:
 899             case BlockedOnMVar:
 900                 throwToSingleThreaded(cap, task->incall->tso,
 901                                       (StgClosure *)nonTermination_closure);
 902                 return;
 903             default:
 904                 barf("deadlock: main thread blocked in a strange way");
 905             }
 906         }
 907         return;
 908 #endif
 909     }
 910 }
 911
 912
 913 /* ----------------------------------------------------------------------------
 914  * Send pending messages (PARALLEL_HASKELL only)
 915  * ------------------------------------------------------------------------- */
 916
 917 #if defined(PARALLEL_HASKELL)
 918 static void
 919 scheduleSendPendingMessages(void)
 920 {
 921
 922 # if defined(PAR) // global Mem.Mgmt., omit for now
 923     if (PendingFetches != END_BF_QUEUE) {
 924         processFetches();
 925     }
 926 # endif
 927
 928     if (RtsFlags.ParFlags.BufferTime) {
 929         // if we use message buffering, we must send away all message
 930         // packets which have become too old...
 931         sendOldBuffers();
 932     }
 933 }
 934 #endif
 935
 936 /* ----------------------------------------------------------------------------
 937  * Process message in the current Capability's inbox
 938  * ------------------------------------------------------------------------- */
 939
 940 static void
 941 scheduleProcessInbox (Capability *cap USED_IF_THREADS)
 942 {
 943 #if defined(THREADED_RTS)
 944     Message *m;
 945
 946     while (!emptyInbox(cap)) {
 947         ACQUIRE_LOCK(&cap->lock);
 948         m = cap->inbox;
 949         cap->inbox = m->link;
 950         RELEASE_LOCK(&cap->lock);
 951         executeMessage(cap, (Message *)m);
 952     }
 953 #endif
 954 }
 955
 956 /* ----------------------------------------------------------------------------
 957  * Activate spark threads (PARALLEL_HASKELL and THREADED_RTS)
 958  * ------------------------------------------------------------------------- */
 959
 960 #if defined(THREADED_RTS)
 961 static void
 962 scheduleActivateSpark(Capability *cap)
 963 {
 964     if (anySparks())
 965     {
 966         createSparkThread(cap);
 967         debugTrace(DEBUG_sched, "creating a spark thread");
 968     }
 969 }
 970 #endif // PARALLEL_HASKELL || THREADED_RTS
 971
 972 /* ----------------------------------------------------------------------------
 973  * After running a thread...
 974  * ------------------------------------------------------------------------- */
 975
 976 static void
 977 schedulePostRunThread (Capability *cap, StgTSO *t)
 978 {
 979     // We have to be able to catch transactions that are in an
 980     // infinite loop as a result of seeing an inconsistent view of
 981     // memory, e.g.
 982     //
 983     //   atomically $ do
 984     //       [a,b] <- mapM readTVar [ta,tb]
 985     //       when (a == b) loop
 986     //
 987     // and a is never equal to b given a consistent view of memory.
 988     //
 989     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
 990         if (!stmValidateNestOfTransactions (t -> trec)) {
 991             debugTrace(DEBUG_sched | DEBUG_stm,
 992                        "trec %p found wasting its time", t);
 993
 994             // strip the stack back to the
 995             // ATOMICALLY_FRAME, aborting the (nested)
 996             // transaction, and saving the stack of any
 997             // partially-evaluated thunks on the heap.
 998             throwToSingleThreaded_(cap, t, NULL, rtsTrue);
 999
1000 //            ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
1001         }
1002     }
1003
1004   /* some statistics gathering in the parallel case */
1005 }
1006
1007 /* -----------------------------------------------------------------------------
1008  * Handle a thread that returned to the scheduler with ThreadHeepOverflow
1009  * -------------------------------------------------------------------------- */
1010
1011 static rtsBool
1012 scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
1013 {
1014     // did the task ask for a large block?
1015     if (cap->r.rHpAlloc > BLOCK_SIZE) {
1016         // if so, get one and push it on the front of the nursery.
1017         bdescr *bd;
1018         lnat blocks;
1019
1020         blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
1021
1022         if (blocks > BLOCKS_PER_MBLOCK) {
1023             barf("allocation of %ld bytes too large (GHC should have complained at compile-time)", (long)cap->r.rHpAlloc);
1024         }
1025
1026         debugTrace(DEBUG_sched,
1027                    "--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
1028                    (long)t->id, what_next_strs[t->what_next], blocks);
1029
1030         // don't do this if the nursery is (nearly) full, we'll GC first.
1031         if (cap->r.rCurrentNursery->link != NULL ||
1032             cap->r.rNursery->n_blocks == 1) {  // paranoia to prevent infinite loop
1033                                                // if the nursery has only one block.
1034
1035             bd = allocGroup_lock(blocks);
1036             cap->r.rNursery->n_blocks += blocks;
1037
1038             // link the new group into the list
1039             bd->link = cap->r.rCurrentNursery;
1040             bd->u.back = cap->r.rCurrentNursery->u.back;
1041             if (cap->r.rCurrentNursery->u.back != NULL) {
1042                 cap->r.rCurrentNursery->u.back->link = bd;
1043             } else {
1044                 cap->r.rNursery->blocks = bd;
1045             }
1046             cap->r.rCurrentNursery->u.back = bd;
1047
1048             // initialise it as a nursery block.  We initialise the
1049             // step, gen_no, and flags field of *every* sub-block in
1050             // this large block, because this is easier than making
1051             // sure that we always find the block head of a large
1052             // block whenever we call Bdescr() (eg. evacuate() and
1053             // isAlive() in the GC would both have to do this, at
1054             // least).
1055             {
1056                 bdescr *x;
1057                 for (x = bd; x < bd + blocks; x++) {
1058                     initBdescr(x,g0,g0);
1059                     x->free = x->start;
1060                     x->flags = 0;
1061                 }
1062             }
1063
1064             // This assert can be a killer if the app is doing lots
1065             // of large block allocations.
1066             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
1067
1068             // now update the nursery to point to the new block
1069             cap->r.rCurrentNursery = bd;
1070
1071             // we might be unlucky and have another thread get on the
1072             // run queue before us and steal the large block, but in that
1073             // case the thread will just end up requesting another large
1074             // block.
1075             pushOnRunQueue(cap,t);
1076             return rtsFalse;  /* not actually GC'ing */
1077         }
1078     }
1079
1080     if (cap->r.rHpLim == NULL || cap->context_switch) {
1081         // Sometimes we miss a context switch, e.g. when calling
1082         // primitives in a tight loop, MAYBE_GC() doesn't check the
1083         // context switch flag, and we end up waiting for a GC.
1084         // See #1984, and concurrent/should_run/1984
1085         cap->context_switch = 0;
1086         appendToRunQueue(cap,t);
1087     } else {
1088         pushOnRunQueue(cap,t);
1089     }
1090     return rtsTrue;
1091     /* actual GC is done at the end of the while loop in schedule() */
1092 }
1093
1094 /* -----------------------------------------------------------------------------
1095  * Handle a thread that returned to the scheduler with ThreadYielding
1096  * -------------------------------------------------------------------------- */
1097
1098 static rtsBool
1099 scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
1100 {
1101     /* put the thread back on the run queue.  Then, if we're ready to
1102      * GC, check whether this is the last task to stop.  If so, wake
1103      * up the GC thread.  getThread will block during a GC until the
1104      * GC is finished.
1105      */
1106
1107     ASSERT(t->_link == END_TSO_QUEUE);
1108
1109     // Shortcut if we're just switching evaluators: don't bother
1110     // doing stack squeezing (which can be expensive), just run the
1111     // thread.
1112     if (cap->context_switch == 0 && t->what_next != prev_what_next) {
1113         debugTrace(DEBUG_sched,
1114                    "--<< thread %ld (%s) stopped to switch evaluators",
1115                    (long)t->id, what_next_strs[t->what_next]);
1116         return rtsTrue;
1117     }
1118
1119     // Reset the context switch flag.  We don't do this just before
1120     // running the thread, because that would mean we would lose ticks
1121     // during GC, which can lead to unfair scheduling (a thread hogs
1122     // the CPU because the tick always arrives during GC).  This way
1123     // penalises threads that do a lot of allocation, but that seems
1124     // better than the alternative.
1125     cap->context_switch = 0;
1126
1127     IF_DEBUG(sanity,
1128              //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
1129              checkTSO(t));
1130
1131     appendToRunQueue(cap,t);
1132
1133     return rtsFalse;
1134 }
1135
1136 /* -----------------------------------------------------------------------------
1137  * Handle a thread that returned to the scheduler with ThreadBlocked
1138  * -------------------------------------------------------------------------- */
1139
1140 static void
1141 scheduleHandleThreadBlocked( StgTSO *t
1142 #if !defined(DEBUG)
1143     STG_UNUSED
1144 #endif
1145     )
1146 {
1147
1148       // We don't need to do anything.  The thread is blocked, and it
1149       // has tidied up its stack and placed itself on whatever queue
1150       // it needs to be on.
1151
1152     // ASSERT(t->why_blocked != NotBlocked);
1153     // Not true: for example,
1154     //    - the thread may have woken itself up already, because
1155     //      threadPaused() might have raised a blocked throwTo
1156     //      exception, see maybePerformBlockedException().
1157
1158 #ifdef DEBUG
1159     traceThreadStatus(DEBUG_sched, t);
1160 #endif
1161 }
1162
1163 /* -----------------------------------------------------------------------------
1164  * Handle a thread that returned to the scheduler with ThreadFinished
1165  * -------------------------------------------------------------------------- */
1166
1167 static rtsBool
1168 scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
1169 {
1170     /* Need to check whether this was a main thread, and if so,
1171      * return with the return value.
1172      *
1173      * We also end up here if the thread kills itself with an
1174      * uncaught exception, see Exception.cmm.
1175      */
1176
1177     // blocked exceptions can now complete, even if the thread was in
1178     // blocked mode (see #2910).
1179     awakenBlockedExceptionQueue (cap, t);
1180
1181       //
1182       // Check whether the thread that just completed was a bound
1183       // thread, and if so return with the result.
1184       //
1185       // There is an assumption here that all thread completion goes
1186       // through this point; we need to make sure that if a thread
1187       // ends up in the ThreadKilled state, that it stays on the run
1188       // queue so it can be dealt with here.
1189       //
1190
1191       if (t->bound) {
1192
1193           if (t->bound != task->incall) {
1194 #if !defined(THREADED_RTS)
1195               // Must be a bound thread that is not the topmost one.  Leave
1196               // it on the run queue until the stack has unwound to the
1197               // point where we can deal with this.  Leaving it on the run
1198               // queue also ensures that the garbage collector knows about
1199               // this thread and its return value (it gets dropped from the
1200               // step->threads list so there's no other way to find it).
1201               appendToRunQueue(cap,t);
1202               return rtsFalse;
1203 #else
1204               // this cannot happen in the threaded RTS, because a
1205               // bound thread can only be run by the appropriate Task.
1206               barf("finished bound thread that isn't mine");
1207 #endif
1208           }
1209
1210           ASSERT(task->incall->tso == t);
1211
1212           if (t->what_next == ThreadComplete) {
1213               if (task->incall->ret) {
1214                   // NOTE: return val is stack->sp[1] (see StgStartup.hc)
1215                   *(task->incall->ret) = (StgClosure *)task->incall->tso->stackobj->sp[1];
1216               }
1217               task->incall->stat = Success;
1218           } else {
1219               if (task->incall->ret) {
1220                   *(task->incall->ret) = NULL;
1221               }
1222               if (sched_state >= SCHED_INTERRUPTING) {
1223                   if (heap_overflow) {
1224                       task->incall->stat = HeapExhausted;
1225                   } else {
1226                       task->incall->stat = Interrupted;
1227                   }
1228               } else {
1229                   task->incall->stat = Killed;
1230               }
1231           }
1232 #ifdef DEBUG
1233           removeThreadLabel((StgWord)task->incall->tso->id);
1234 #endif
1235
1236           // We no longer consider this thread and task to be bound to
1237           // each other.  The TSO lives on until it is GC'd, but the
1238           // task is about to be released by the caller, and we don't
1239           // want anyone following the pointer from the TSO to the
1240           // defunct task (which might have already been
1241           // re-used). This was a real bug: the GC updated
1242           // tso->bound->tso which lead to a deadlock.
1243           t->bound = NULL;
1244           task->incall->tso = NULL;
1245
1246           return rtsTrue; // tells schedule() to return
1247       }
1248
1249       return rtsFalse;
1250 }
1251
1252 /* -----------------------------------------------------------------------------
1253  * Perform a heap census
1254  * -------------------------------------------------------------------------- */
1255
1256 static rtsBool
1257 scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
1258 {
1259     // When we have +RTS -i0 and we're heap profiling, do a census at
1260     // every GC.  This lets us get repeatable runs for debugging.
1261     if (performHeapProfile ||
1262         (RtsFlags.ProfFlags.profileInterval==0 &&
1263          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
1264         return rtsTrue;
1265     } else {
1266         return rtsFalse;
1267     }
1268 }
1269
1270 /* -----------------------------------------------------------------------------
1271  * Perform a garbage collection if necessary
1272  * -------------------------------------------------------------------------- */
1273
1274 static Capability *
1275 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
1276 {
1277     rtsBool heap_census;
1278 #ifdef THREADED_RTS
1279     /* extern static volatile StgWord waiting_for_gc;
1280        lives inside capability.c */
1281     rtsBool gc_type, prev_pending_gc;
1282     nat i;
1283 #endif
1284
1285     if (sched_state == SCHED_SHUTTING_DOWN) {
1286         // The final GC has already been done, and the system is
1287         // shutting down.  We'll probably deadlock if we try to GC
1288         // now.
1289         return cap;
1290     }
1291
1292 #ifdef THREADED_RTS
1293     if (sched_state < SCHED_INTERRUPTING
1294         && RtsFlags.ParFlags.parGcEnabled
1295         && N >= RtsFlags.ParFlags.parGcGen
1296         && ! oldest_gen->mark)
1297     {
1298         gc_type = PENDING_GC_PAR;
1299     } else {
1300         gc_type = PENDING_GC_SEQ;
1301     }
1302
1303     // In order to GC, there must be no threads running Haskell code.
1304     // Therefore, the GC thread needs to hold *all* the capabilities,
1305     // and release them after the GC has completed.
1306     //
1307     // This seems to be the simplest way: previous attempts involved
1308     // making all the threads with capabilities give up their
1309     // capabilities and sleep except for the *last* one, which
1310     // actually did the GC.  But it's quite hard to arrange for all
1311     // the other tasks to sleep and stay asleep.
1312     //
1313
1314     /*  Other capabilities are prevented from running yet more Haskell
1315         threads if waiting_for_gc is set. Tested inside
1316         yieldCapability() and releaseCapability() in Capability.c */
1317
1318     prev_pending_gc = cas(&waiting_for_gc, 0, gc_type);
1319     if (prev_pending_gc) {
1320         do {
1321             debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...",
1322                        prev_pending_gc);
1323             ASSERT(cap);
1324             yieldCapability(&cap,task);
1325         } while (waiting_for_gc);
1326         return cap;  // NOTE: task->cap might have changed here
1327     }
1328
1329     setContextSwitches();
1330
1331     // The final shutdown GC is always single-threaded, because it's
1332     // possible that some of the Capabilities have no worker threads.
1333
1334     if (gc_type == PENDING_GC_SEQ)
1335     {
1336         traceEventRequestSeqGc(cap);
1337     }
1338     else
1339     {
1340         traceEventRequestParGc(cap);
1341         debugTrace(DEBUG_sched, "ready_to_gc, grabbing GC threads");
1342     }
1343
1344     if (gc_type == PENDING_GC_SEQ)
1345     {
1346         // single-threaded GC: grab all the capabilities
1347         for (i=0; i < n_capabilities; i++) {
1348             debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities);
1349             if (cap != &capabilities[i]) {
1350                 Capability *pcap = &capabilities[i];
1351                 // we better hope this task doesn't get migrated to
1352                 // another Capability while we're waiting for this one.
1353                 // It won't, because load balancing happens while we have
1354                 // all the Capabilities, but even so it's a slightly
1355                 // unsavoury invariant.
1356                 task->cap = pcap;
1357                 waitForReturnCapability(&pcap, task);
1358                 if (pcap != &capabilities[i]) {
1359                     barf("scheduleDoGC: got the wrong capability");
1360                 }
1361             }
1362         }
1363     }
1364     else
1365     {
1366         // multi-threaded GC: make sure all the Capabilities donate one
1367         // GC thread each.
1368         waitForGcThreads(cap);
1369     }
1370
1371 #endif
1372
1373     IF_DEBUG(scheduler, printAllThreads());
1374
1375 delete_threads_and_gc:
1376     /*
1377      * We now have all the capabilities; if we're in an interrupting
1378      * state, then we should take the opportunity to delete all the
1379      * threads in the system.
1380      */
1381     if (sched_state == SCHED_INTERRUPTING) {
1382         deleteAllThreads(cap);
1383         sched_state = SCHED_SHUTTING_DOWN;
1384     }
1385
1386     heap_census = scheduleNeedHeapProfile(rtsTrue);
1387
1388     traceEventGcStart(cap);
1389 #if defined(THREADED_RTS)
1390     // reset waiting_for_gc *before* GC, so that when the GC threads
1391     // emerge they don't immediately re-enter the GC.
1392     waiting_for_gc = 0;
1393     GarbageCollect(force_major || heap_census, gc_type, cap);
1394 #else
1395     GarbageCollect(force_major || heap_census, 0, cap);
1396 #endif
1397     traceEventGcEnd(cap);
1398
1399     if (recent_activity == ACTIVITY_INACTIVE && force_major)
1400     {
1401         // We are doing a GC because the system has been idle for a
1402         // timeslice and we need to check for deadlock.  Record the
1403         // fact that we've done a GC and turn off the timer signal;
1404         // it will get re-enabled if we run any threads after the GC.
1405         recent_activity = ACTIVITY_DONE_GC;
1406         stopTimer();
1407     }
1408     else
1409     {
1410         // the GC might have taken long enough for the timer to set
1411         // recent_activity = ACTIVITY_INACTIVE, but we aren't
1412         // necessarily deadlocked:
1413         recent_activity = ACTIVITY_YES;
1414     }
1415
1416 #if defined(THREADED_RTS)
1417     if (gc_type == PENDING_GC_PAR)
1418     {
1419         releaseGCThreads(cap);
1420     }
1421 #endif
1422
1423     if (heap_census) {
1424         debugTrace(DEBUG_sched, "performing heap census");
1425         heapCensus();
1426         performHeapProfile = rtsFalse;
1427     }
1428
1429     if (heap_overflow && sched_state < SCHED_INTERRUPTING) {
1430         // GC set the heap_overflow flag, so we should proceed with
1431         // an orderly shutdown now.  Ultimately we want the main
1432         // thread to return to its caller with HeapExhausted, at which
1433         // point the caller should call hs_exit().  The first step is
1434         // to delete all the threads.
1435         //
1436         // Another way to do this would be to raise an exception in
1437         // the main thread, which we really should do because it gives
1438         // the program a chance to clean up.  But how do we find the
1439         // main thread?  It should presumably be the same one that
1440         // gets ^C exceptions, but that's all done on the Haskell side
1441         // (GHC.TopHandler).
1442         sched_state = SCHED_INTERRUPTING;
1443         goto delete_threads_and_gc;
1444     }
1445
1446 #ifdef SPARKBALANCE
1447     /* JB
1448        Once we are all together... this would be the place to balance all
1449        spark pools. No concurrent stealing or adding of new sparks can
1450        occur. Should be defined in Sparks.c. */
1451     balanceSparkPoolsCaps(n_capabilities, capabilities);
1452 #endif
1453
1454 #if defined(THREADED_RTS)
1455     if (gc_type == PENDING_GC_SEQ) {
1456         // release our stash of capabilities.
1457         for (i = 0; i < n_capabilities; i++) {
1458             if (cap != &capabilities[i]) {
1459                 task->cap = &capabilities[i];
1460                 releaseCapability(&capabilities[i]);
1461             }
1462         }
1463     }
1464     if (cap) {
1465         task->cap = cap;
1466     } else {
1467         task->cap = NULL;
1468     }
1469 #endif
1470
1471     return cap;
1472 }
1473
1474 /* ---------------------------------------------------------------------------
1475  * Singleton fork(). Do not copy any running threads.
1476  * ------------------------------------------------------------------------- */
1477
1478 pid_t
1479 forkProcess(HsStablePtr *entry
1480 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
1481             STG_UNUSED
1482 #endif
1483            )
1484 {
1485 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
1486     pid_t pid;
1487     StgTSO* t,*next;
1488     Capability *cap;
1489     nat g;
1490
1491 #if defined(THREADED_RTS)
1492     if (RtsFlags.ParFlags.nNodes > 1) {
1493         errorBelch("forking not supported with +RTS -N<n> greater than 1");
1494         stg_exit(EXIT_FAILURE);
1495     }
1496 #endif
1497
1498     debugTrace(DEBUG_sched, "forking!");
1499
1500     // ToDo: for SMP, we should probably acquire *all* the capabilities
1501     cap = rts_lock();
1502
1503     // no funny business: hold locks while we fork, otherwise if some
1504     // other thread is holding a lock when the fork happens, the data
1505     // structure protected by the lock will forever be in an
1506     // inconsistent state in the child.  See also #1391.
1507     ACQUIRE_LOCK(&sched_mutex);
1508     ACQUIRE_LOCK(&cap->lock);
1509     ACQUIRE_LOCK(&cap->running_task->lock);
1510
1511     stopTimer(); // See #4074
1512
1513 #if defined(TRACING)
1514     flushEventLog(); // so that child won't inherit dirty file buffers
1515 #endif
1516
1517     pid = fork();
1518
1519     if (pid) { // parent
1520
1521         startTimer(); // #4074
1522
1523         RELEASE_LOCK(&sched_mutex);
1524         RELEASE_LOCK(&cap->lock);
1525         RELEASE_LOCK(&cap->running_task->lock);
1526
1527         // just return the pid
1528         rts_unlock(cap);
1529         return pid;
1530
1531     } else { // child
1532
1533 #if defined(THREADED_RTS)
1534         initMutex(&sched_mutex);
1535         initMutex(&cap->lock);
1536         initMutex(&cap->running_task->lock);
1537 #endif
1538
1539 #ifdef TRACING
1540         resetTracing();
1541 #endif
1542
1543         // Now, all OS threads except the thread that forked are
1544         // stopped.  We need to stop all Haskell threads, including
1545         // those involved in foreign calls.  Also we need to delete
1546         // all Tasks, because they correspond to OS threads that are
1547         // now gone.
1548
1549         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1550           for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1551                 next = t->global_link;
1552                 // don't allow threads to catch the ThreadKilled
1553                 // exception, but we do want to raiseAsync() because these
1554                 // threads may be evaluating thunks that we need later.
1555                 deleteThread_(cap,t);
1556
1557                 // stop the GC from updating the InCall to point to
1558                 // the TSO.  This is only necessary because the
1559                 // OSThread bound to the TSO has been killed, and
1560                 // won't get a chance to exit in the usual way (see
1561                 // also scheduleHandleThreadFinished).
1562                 t->bound = NULL;
1563           }
1564         }
1565
1566         // Empty the run queue.  It seems tempting to let all the
1567         // killed threads stay on the run queue as zombies to be
1568         // cleaned up later, but some of them correspond to bound
1569         // threads for which the corresponding Task does not exist.
1570         cap->run_queue_hd = END_TSO_QUEUE;
1571         cap->run_queue_tl = END_TSO_QUEUE;
1572
1573         // Any suspended C-calling Tasks are no more, their OS threads
1574         // don't exist now:
1575         cap->suspended_ccalls = NULL;
1576
1577         // Empty the threads lists.  Otherwise, the garbage
1578         // collector may attempt to resurrect some of these threads.
1579         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1580             generations[g].threads = END_TSO_QUEUE;
1581         }
1582
1583         discardTasksExcept(cap->running_task);
1584
1585 #if defined(THREADED_RTS)
1586         // Wipe our spare workers list, they no longer exist.  New
1587         // workers will be created if necessary.
1588         cap->spare_workers = NULL;
1589         cap->n_spare_workers = 0;
1590         cap->returning_tasks_hd = NULL;
1591         cap->returning_tasks_tl = NULL;
1592 #endif
1593
1594         // On Unix, all timers are reset in the child, so we need to start
1595         // the timer again.
1596         initTimer();
1597         startTimer();
1598
1599 #if defined(THREADED_RTS)
1600         cap = ioManagerStartCap(cap);
1601 #endif
1602
1603         cap = rts_evalStableIO(cap, entry, NULL);  // run the action
1604         rts_checkSchedStatus("forkProcess",cap);
1605
1606         rts_unlock(cap);
1607         hs_exit();                      // clean up and exit
1608         stg_exit(EXIT_SUCCESS);
1609     }
1610 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */
1611     barf("forkProcess#: primop not supported on this platform, sorry!\n");
1612 #endif
1613 }
1614
1615 /* ---------------------------------------------------------------------------
1616  * Delete all the threads in the system
1617  * ------------------------------------------------------------------------- */
1618
1619 static void
1620 deleteAllThreads ( Capability *cap )
1621 {
1622     // NOTE: only safe to call if we own all capabilities.
1623
1624     StgTSO* t, *next;
1625     nat g;
1626
1627     debugTrace(DEBUG_sched,"deleting all threads");
1628     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1629         for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1630                 next = t->global_link;
1631                 deleteThread(cap,t);
1632         }
1633     }
1634
1635     // The run queue now contains a bunch of ThreadKilled threads.  We
1636     // must not throw these away: the main thread(s) will be in there
1637     // somewhere, and the main scheduler loop has to deal with it.
1638     // Also, the run queue is the only thing keeping these threads from
1639     // being GC'd, and we don't want the "main thread has been GC'd" panic.
1640
1641 #if !defined(THREADED_RTS)
1642     ASSERT(blocked_queue_hd == END_TSO_QUEUE);
1643     ASSERT(sleeping_queue == END_TSO_QUEUE);
1644 #endif
1645 }
1646
1647 /* -----------------------------------------------------------------------------
1648    Managing the suspended_ccalls list.
1649    Locks required: sched_mutex
1650    -------------------------------------------------------------------------- */
1651
1652 STATIC_INLINE void
1653 suspendTask (Capability *cap, Task *task)
1654 {
1655     InCall *incall;
1656
1657     incall = task->incall;
1658     ASSERT(incall->next == NULL && incall->prev == NULL);
1659     incall->next = cap->suspended_ccalls;
1660     incall->prev = NULL;
1661     if (cap->suspended_ccalls) {
1662         cap->suspended_ccalls->prev = incall;
1663     }
1664     cap->suspended_ccalls = incall;
1665 }
1666
1667 STATIC_INLINE void
1668 recoverSuspendedTask (Capability *cap, Task *task)
1669 {
1670     InCall *incall;
1671
1672     incall = task->incall;
1673     if (incall->prev) {
1674         incall->prev->next = incall->next;
1675     } else {
1676         ASSERT(cap->suspended_ccalls == incall);
1677         cap->suspended_ccalls = incall->next;
1678     }
1679     if (incall->next) {
1680         incall->next->prev = incall->prev;
1681     }
1682     incall->next = incall->prev = NULL;
1683 }
1684
1685 /* ---------------------------------------------------------------------------
1686  * Suspending & resuming Haskell threads.
1687  *
1688  * When making a "safe" call to C (aka _ccall_GC), the task gives back
1689  * its capability before calling the C function.  This allows another
1690  * task to pick up the capability and carry on running Haskell
1691  * threads.  It also means that if the C call blocks, it won't lock
1692  * the whole system.
1693  *
1694  * The Haskell thread making the C call is put to sleep for the
1695  * duration of the call, on the suspended_ccalling_threads queue.  We
1696  * give out a token to the task, which it can use to resume the thread
1697  * on return from the C function.
1698  *
1699  * If this is an interruptible C call, this means that the FFI call may be
1700  * unceremoniously terminated and should be scheduled on an
1701  * unbound worker thread.
1702  * ------------------------------------------------------------------------- */
1703
1704 void *
1705 suspendThread (StgRegTable *reg, rtsBool interruptible)
1706 {
1707   Capability *cap;
1708   int saved_errno;
1709   StgTSO *tso;
1710   Task *task;
1711 #if mingw32_HOST_OS
1712   StgWord32 saved_winerror;
1713 #endif
1714
1715   saved_errno = errno;
1716 #if mingw32_HOST_OS
1717   saved_winerror = GetLastError();
1718 #endif
1719
1720   /* assume that *reg is a pointer to the StgRegTable part of a Capability.
1721    */
1722   cap = regTableToCapability(reg);
1723
1724   task = cap->running_task;
1725   tso = cap->r.rCurrentTSO;
1726
1727   traceEventStopThread(cap, tso, THREAD_SUSPENDED_FOREIGN_CALL);
1728
1729   // XXX this might not be necessary --SDM
1730   tso->what_next = ThreadRunGHC;
1731
1732   threadPaused(cap,tso);
1733
1734   if (interruptible) {
1735     tso->why_blocked = BlockedOnCCall_Interruptible;
1736   } else {
1737     tso->why_blocked = BlockedOnCCall;
1738   }
1739
1740   // Hand back capability
1741   task->incall->suspended_tso = tso;
1742   task->incall->suspended_cap = cap;
1743
1744   ACQUIRE_LOCK(&cap->lock);
1745
1746   suspendTask(cap,task);
1747   cap->in_haskell = rtsFalse;
1748   releaseCapability_(cap,rtsFalse);
1749
1750   RELEASE_LOCK(&cap->lock);
1751
1752   errno = saved_errno;
1753 #if mingw32_HOST_OS
1754   SetLastError(saved_winerror);
1755 #endif
1756   return task;
1757 }
1758
1759 StgRegTable *
1760 resumeThread (void *task_)
1761 {
1762     StgTSO *tso;
1763     InCall *incall;
1764     Capability *cap;
1765     Task *task = task_;
1766     int saved_errno;
1767 #if mingw32_HOST_OS
1768     StgWord32 saved_winerror;
1769 #endif
1770
1771     saved_errno = errno;
1772 #if mingw32_HOST_OS
1773     saved_winerror = GetLastError();
1774 #endif
1775
1776     incall = task->incall;
1777     cap = incall->suspended_cap;
1778     task->cap = cap;
1779
1780     // Wait for permission to re-enter the RTS with the result.
1781     waitForReturnCapability(&cap,task);
1782     // we might be on a different capability now... but if so, our
1783     // entry on the suspended_ccalls list will also have been
1784     // migrated.
1785
1786     // Remove the thread from the suspended list
1787     recoverSuspendedTask(cap,task);
1788
1789     tso = incall->suspended_tso;
1790     incall->suspended_tso = NULL;
1791     incall->suspended_cap = NULL;
1792     tso->_link = END_TSO_QUEUE; // no write barrier reqd
1793
1794     traceEventRunThread(cap, tso);
1795
1796     /* Reset blocking status */
1797     tso->why_blocked  = NotBlocked;
1798
1799     if ((tso->flags & TSO_BLOCKEX) == 0) {
1800         // avoid locking the TSO if we don't have to
1801         if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
1802             maybePerformBlockedException(cap,tso);
1803         }
1804     }
1805
1806     cap->r.rCurrentTSO = tso;
1807     cap->in_haskell = rtsTrue;
1808     errno = saved_errno;
1809 #if mingw32_HOST_OS
1810     SetLastError(saved_winerror);
1811 #endif
1812
1813     /* We might have GC'd, mark the TSO dirty again */
1814     dirty_TSO(cap,tso);
1815     dirty_STACK(cap,tso->stackobj);
1816
1817     IF_DEBUG(sanity, checkTSO(tso));
1818
1819     return &cap->r;
1820 }
1821
1822 /* ---------------------------------------------------------------------------
1823  * scheduleThread()
1824  *
1825  * scheduleThread puts a thread on the end  of the runnable queue.
1826  * This will usually be done immediately after a thread is created.
1827  * The caller of scheduleThread must create the thread using e.g.
1828  * createThread and push an appropriate closure
1829  * on this thread's stack before the scheduler is invoked.
1830  * ------------------------------------------------------------------------ */
1831
1832 void
1833 scheduleThread(Capability *cap, StgTSO *tso)
1834 {
1835     // The thread goes at the *end* of the run-queue, to avoid possible
1836     // starvation of any threads already on the queue.
1837     appendToRunQueue(cap,tso);
1838 }
1839
1840 void
1841 scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
1842 {
1843 #if defined(THREADED_RTS)
1844     tso->flags |= TSO_LOCKED; // we requested explicit affinity; don't
1845                               // move this thread from now on.
1846     cpu %= RtsFlags.ParFlags.nNodes;
1847     if (cpu == cap->no) {
1848         appendToRunQueue(cap,tso);
1849     } else {
1850         migrateThread(cap, tso, &capabilities[cpu]);
1851     }
1852 #else
1853     appendToRunQueue(cap,tso);
1854 #endif
1855 }
1856
1857 Capability *
1858 scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
1859 {
1860     Task *task;
1861     StgThreadID id;
1862
1863     // We already created/initialised the Task
1864     task = cap->running_task;
1865
1866     // This TSO is now a bound thread; make the Task and TSO
1867     // point to each other.
1868     tso->bound = task->incall;
1869     tso->cap = cap;
1870
1871     task->incall->tso = tso;
1872     task->incall->ret = ret;
1873     task->incall->stat = NoStatus;
1874
1875     appendToRunQueue(cap,tso);
1876
1877     id = tso->id;
1878     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)id);
1879
1880     cap = schedule(cap,task);
1881
1882     ASSERT(task->incall->stat != NoStatus);
1883     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
1884
1885     debugTrace(DEBUG_sched, "bound thread (%lu) finished", (unsigned long)id);
1886     return cap;
1887 }
1888
1889 /* ----------------------------------------------------------------------------
1890  * Starting Tasks
1891  * ------------------------------------------------------------------------- */
1892
1893 #if defined(THREADED_RTS)
1894 void scheduleWorker (Capability *cap, Task *task)
1895 {
1896     // schedule() runs without a lock.
1897     cap = schedule(cap,task);
1898
1899     // On exit from schedule(), we have a Capability, but possibly not
1900     // the same one we started with.
1901
1902     // During shutdown, the requirement is that after all the
1903     // Capabilities are shut down, all workers that are shutting down
1904     // have finished workerTaskStop().  This is why we hold on to
1905     // cap->lock until we've finished workerTaskStop() below.
1906     //
1907     // There may be workers still involved in foreign calls; those
1908     // will just block in waitForReturnCapability() because the
1909     // Capability has been shut down.
1910     //
1911     ACQUIRE_LOCK(&cap->lock);
1912     releaseCapability_(cap,rtsFalse);
1913     workerTaskStop(task);
1914     RELEASE_LOCK(&cap->lock);
1915 }
1916 #endif
1917
1918 /* ---------------------------------------------------------------------------
1919  * initScheduler()
1920  *
1921  * Initialise the scheduler.  This resets all the queues - if the
1922  * queues contained any threads, they'll be garbage collected at the
1923  * next pass.
1924  *
1925  * ------------------------------------------------------------------------ */
1926
1927 void
1928 initScheduler(void)
1929 {
1930 #if !defined(THREADED_RTS)
1931   blocked_queue_hd  = END_TSO_QUEUE;
1932   blocked_queue_tl  = END_TSO_QUEUE;
1933   sleeping_queue    = END_TSO_QUEUE;
1934 #endif
1935
1936   sched_state    = SCHED_RUNNING;
1937   recent_activity = ACTIVITY_YES;
1938
1939 #if defined(THREADED_RTS)
1940   /* Initialise the mutex and condition variables used by
1941    * the scheduler. */
1942   initMutex(&sched_mutex);
1943 #endif
1944
1945   ACQUIRE_LOCK(&sched_mutex);
1946
1947   /* A capability holds the state a native thread needs in
1948    * order to execute STG code. At least one capability is
1949    * floating around (only THREADED_RTS builds have more than one).
1950    */
1951   initCapabilities();
1952
1953   initTaskManager();
1954
1955 #if defined(THREADED_RTS)
1956   initSparkPools();
1957 #endif
1958
1959   RELEASE_LOCK(&sched_mutex);
1960
1961 #if defined(THREADED_RTS)
1962   /*
1963    * Eagerly start one worker to run each Capability, except for
1964    * Capability 0.  The idea is that we're probably going to start a
1965    * bound thread on Capability 0 pretty soon, so we don't want a
1966    * worker task hogging it.
1967    */
1968   {
1969       nat i;
1970       Capability *cap;
1971       for (i = 1; i < n_capabilities; i++) {
1972           cap = &capabilities[i];
1973           ACQUIRE_LOCK(&cap->lock);
1974           startWorkerTask(cap);
1975           RELEASE_LOCK(&cap->lock);
1976       }
1977   }
1978 #endif
1979 }
1980
1981 void
1982 exitScheduler (rtsBool wait_foreign USED_IF_THREADS)
1983                /* see Capability.c, shutdownCapability() */
1984 {
1985     Task *task = NULL;
1986
1987     task = newBoundTask();
1988
1989     // If we haven't killed all the threads yet, do it now.
1990     if (sched_state < SCHED_SHUTTING_DOWN) {
1991         sched_state = SCHED_INTERRUPTING;
1992         waitForReturnCapability(&task->cap,task);
1993         scheduleDoGC(task->cap,task,rtsFalse);
1994         ASSERT(task->incall->tso == NULL);
1995         releaseCapability(task->cap);
1996     }
1997     sched_state = SCHED_SHUTTING_DOWN;
1998
1999 #if defined(THREADED_RTS)
2000     {
2001         nat i;
2002
2003         for (i = 0; i < n_capabilities; i++) {
2004             ASSERT(task->incall->tso == NULL);
2005             shutdownCapability(&capabilities[i], task, wait_foreign);
2006         }
2007     }
2008 #endif
2009
2010     boundTaskExiting(task);
2011 }
2012
2013 void
2014 freeScheduler( void )
2015 {
2016     nat still_running;
2017
2018     ACQUIRE_LOCK(&sched_mutex);
2019     still_running = freeTaskManager();
2020     // We can only free the Capabilities if there are no Tasks still
2021     // running.  We might have a Task about to return from a foreign
2022     // call into waitForReturnCapability(), for example (actually,
2023     // this should be the *only* thing that a still-running Task can
2024     // do at this point, and it will block waiting for the
2025     // Capability).
2026     if (still_running == 0) {
2027         freeCapabilities();
2028         if (n_capabilities != 1) {
2029             stgFree(capabilities);
2030         }
2031     }
2032     RELEASE_LOCK(&sched_mutex);
2033 #if defined(THREADED_RTS)
2034     closeMutex(&sched_mutex);
2035 #endif
2036 }
2037
2038 /* -----------------------------------------------------------------------------
2039    performGC
2040
2041    This is the interface to the garbage collector from Haskell land.
2042    We provide this so that external C code can allocate and garbage
2043    collect when called from Haskell via _ccall_GC.
2044    -------------------------------------------------------------------------- */
2045
2046 static void
2047 performGC_(rtsBool force_major)
2048 {
2049     Task *task;
2050
2051     // We must grab a new Task here, because the existing Task may be
2052     // associated with a particular Capability, and chained onto the
2053     // suspended_ccalls queue.
2054     task = newBoundTask();
2055
2056     waitForReturnCapability(&task->cap,task);
2057     scheduleDoGC(task->cap,task,force_major);
2058     releaseCapability(task->cap);
2059     boundTaskExiting(task);
2060 }
2061
2062 void
2063 performGC(void)
2064 {
2065     performGC_(rtsFalse);
2066 }
2067
2068 void
2069 performMajorGC(void)
2070 {
2071     performGC_(rtsTrue);
2072 }
2073
2074 /* ---------------------------------------------------------------------------
2075    Interrupt execution
2076    - usually called inside a signal handler so it mustn't do anything fancy.
2077    ------------------------------------------------------------------------ */
2078
2079 void
2080 interruptStgRts(void)
2081 {
2082     sched_state = SCHED_INTERRUPTING;
2083     setContextSwitches();
2084 #if defined(THREADED_RTS)
2085     wakeUpRts();
2086 #endif
2087 }
2088
2089 /* -----------------------------------------------------------------------------
2090    Wake up the RTS
2091
2092    This function causes at least one OS thread to wake up and run the
2093    scheduler loop.  It is invoked when the RTS might be deadlocked, or
2094    an external event has arrived that may need servicing (eg. a
2095    keyboard interrupt).
2096
2097    In the single-threaded RTS we don't do anything here; we only have
2098    one thread anyway, and the event that caused us to want to wake up
2099    will have interrupted any blocking system call in progress anyway.
2100    -------------------------------------------------------------------------- */
2101
2102 #if defined(THREADED_RTS)
2103 void wakeUpRts(void)
2104 {
2105     // This forces the IO Manager thread to wakeup, which will
2106     // in turn ensure that some OS thread wakes up and runs the
2107     // scheduler loop, which will cause a GC and deadlock check.
2108     ioManagerWakeup();
2109 }
2110 #endif
2111
2112 /* -----------------------------------------------------------------------------
2113    Deleting threads
2114
2115    This is used for interruption (^C) and forking, and corresponds to
2116    raising an exception but without letting the thread catch the
2117    exception.
2118    -------------------------------------------------------------------------- */
2119
2120 static void
2121 deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
2122 {
2123     // NOTE: must only be called on a TSO that we have exclusive
2124     // access to, because we will call throwToSingleThreaded() below.
2125     // The TSO must be on the run queue of the Capability we own, or
2126     // we must own all Capabilities.
2127
2128     if (tso->why_blocked != BlockedOnCCall &&
2129         tso->why_blocked != BlockedOnCCall_Interruptible) {
2130         throwToSingleThreaded(tso->cap,tso,NULL);
2131     }
2132 }
2133
2134 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2135 static void
2136 deleteThread_(Capability *cap, StgTSO *tso)
2137 { // for forkProcess only:
2138   // like deleteThread(), but we delete threads in foreign calls, too.
2139
2140     if (tso->why_blocked == BlockedOnCCall ||
2141         tso->why_blocked == BlockedOnCCall_Interruptible) {
2142         tso->what_next = ThreadKilled;
2143         appendToRunQueue(tso->cap, tso);
2144     } else {
2145         deleteThread(cap,tso);
2146     }
2147 }
2148 #endif
2149
2150 /* -----------------------------------------------------------------------------
2151    raiseExceptionHelper
2152
2153    This function is called by the raise# primitve, just so that we can
2154    move some of the tricky bits of raising an exception from C-- into
2155    C.  Who knows, it might be a useful re-useable thing here too.
2156    -------------------------------------------------------------------------- */
2157
2158 StgWord
2159 raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
2160 {
2161     Capability *cap = regTableToCapability(reg);
2162     StgThunk *raise_closure = NULL;
2163     StgPtr p, next;
2164     StgRetInfoTable *info;
2165     //
2166     // This closure represents the expression 'raise# E' where E
2167     // is the exception raise.  It is used to overwrite all the
2168     // thunks which are currently under evaluataion.
2169     //
2170
2171     // OLD COMMENT (we don't have MIN_UPD_SIZE now):
2172     // LDV profiling: stg_raise_info has THUNK as its closure
2173     // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
2174     // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
2175     // 1 does not cause any problem unless profiling is performed.
2176     // However, when LDV profiling goes on, we need to linearly scan
2177     // small object pool, where raise_closure is stored, so we should
2178     // use MIN_UPD_SIZE.
2179     //
2180     // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
2181     //                                 sizeofW(StgClosure)+1);
2182     //
2183
2184     //
2185     // Walk up the stack, looking for the catch frame.  On the way,
2186     // we update any closures pointed to from update frames with the
2187     // raise closure that we just built.
2188     //
2189     p = tso->stackobj->sp;
2190     while(1) {
2191         info = get_ret_itbl((StgClosure *)p);
2192         next = p + stack_frame_sizeW((StgClosure *)p);
2193         switch (info->i.type) {
2194
2195         case UPDATE_FRAME:
2196             // Only create raise_closure if we need to.
2197             if (raise_closure == NULL) {
2198                 raise_closure =
2199                     (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
2200                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
2201                 raise_closure->payload[0] = exception;
2202             }
2203             updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee,
2204                         (StgClosure *)raise_closure);
2205             p = next;
2206             continue;
2207
2208         case ATOMICALLY_FRAME:
2209             debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p);
2210             tso->stackobj->sp = p;
2211             return ATOMICALLY_FRAME;
2212
2213         case CATCH_FRAME:
2214             tso->stackobj->sp = p;
2215             return CATCH_FRAME;
2216
2217         case CATCH_STM_FRAME:
2218             debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p);
2219             tso->stackobj->sp = p;
2220             return CATCH_STM_FRAME;
2221
2222         case UNDERFLOW_FRAME:
2223             tso->stackobj->sp = p;
2224             threadStackUnderflow(cap,tso);
2225             p = tso->stackobj->sp;
2226             continue;
2227
2228         case STOP_FRAME:
2229             tso->stackobj->sp = p;
2230             return STOP_FRAME;
2231
2232         case CATCH_RETRY_FRAME:
2233         default:
2234             p = next;
2235             continue;
2236         }
2237     }
2238 }
2239
2240
2241 /* -----------------------------------------------------------------------------
2242    findRetryFrameHelper
2243
2244    This function is called by the retry# primitive.  It traverses the stack
2245    leaving tso->sp referring to the frame which should handle the retry.
2246
2247    This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
2248    or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
2249
2250    We skip CATCH_STM_FRAMEs (aborting and rolling back the nested tx that they
2251    create) because retries are not considered to be exceptions, despite the
2252    similar implementation.
2253
2254    We should not expect to see CATCH_FRAME or STOP_FRAME because those should
2255    not be created within memory transactions.
2256    -------------------------------------------------------------------------- */
2257
2258 StgWord
2259 findRetryFrameHelper (Capability *cap, StgTSO *tso)
2260 {
2261   StgPtr           p, next;
2262   StgRetInfoTable *info;
2263
2264   p = tso->stackobj->sp;
2265   while (1) {
2266     info = get_ret_itbl((StgClosure *)p);
2267     next = p + stack_frame_sizeW((StgClosure *)p);
2268     switch (info->i.type) {
2269
2270     case ATOMICALLY_FRAME:
2271         debugTrace(DEBUG_stm,
2272                    "found ATOMICALLY_FRAME at %p during retry", p);
2273         tso->stackobj->sp = p;
2274         return ATOMICALLY_FRAME;
2275
2276     case CATCH_RETRY_FRAME:
2277         debugTrace(DEBUG_stm,
2278                    "found CATCH_RETRY_FRAME at %p during retrry", p);
2279         tso->stackobj->sp = p;
2280         return CATCH_RETRY_FRAME;
2281
2282     case CATCH_STM_FRAME: {
2283         StgTRecHeader *trec = tso -> trec;
2284         StgTRecHeader *outer = trec -> enclosing_trec;
2285         debugTrace(DEBUG_stm,
2286                    "found CATCH_STM_FRAME at %p during retry", p);
2287         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
2288         stmAbortTransaction(cap, trec);
2289         stmFreeAbortedTRec(cap, trec);
2290         tso -> trec = outer;
2291         p = next;
2292         continue;
2293     }
2294
2295     case UNDERFLOW_FRAME:
2296         threadStackUnderflow(cap,tso);
2297         p = tso->stackobj->sp;
2298         continue;
2299
2300     default:
2301       ASSERT(info->i.type != CATCH_FRAME);
2302       ASSERT(info->i.type != STOP_FRAME);
2303       p = next;
2304       continue;
2305     }
2306   }
2307 }
2308
2309 /* -----------------------------------------------------------------------------
2310    resurrectThreads is called after garbage collection on the list of
2311    threads found to be garbage.  Each of these threads will be woken
2312    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
2313    on an MVar, or NonTermination if the thread was blocked on a Black
2314    Hole.
2315
2316    Locks: assumes we hold *all* the capabilities.
2317    -------------------------------------------------------------------------- */
2318
2319 void
2320 resurrectThreads (StgTSO *threads)
2321 {
2322     StgTSO *tso, *next;
2323     Capability *cap;
2324     generation *gen;
2325
2326     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
2327         next = tso->global_link;
2328
2329         gen = Bdescr((P_)tso)->gen;
2330         tso->global_link = gen->threads;
2331         gen->threads = tso;
2332
2333         debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
2334
2335         // Wake up the thread on the Capability it was last on
2336         cap = tso->cap;
2337
2338         switch (tso->why_blocked) {
2339         case BlockedOnMVar:
2340             /* Called by GC - sched_mutex lock is currently held. */
2341             throwToSingleThreaded(cap, tso,
2342                                   (StgClosure *)blockedIndefinitelyOnMVar_closure);
2343             break;
2344         case BlockedOnBlackHole:
2345             throwToSingleThreaded(cap, tso,
2346                                   (StgClosure *)nonTermination_closure);
2347             break;
2348         case BlockedOnSTM:
2349             throwToSingleThreaded(cap, tso,
2350                                   (StgClosure *)blockedIndefinitelyOnSTM_closure);
2351             break;
2352         case NotBlocked:
2353             /* This might happen if the thread was blocked on a black hole
2354              * belonging to a thread that we've just woken up (raiseAsync
2355              * can wake up threads, remember...).
2356              */
2357             continue;
2358         case BlockedOnMsgThrowTo:
2359             // This can happen if the target is masking, blocks on a
2360             // black hole, and then is found to be unreachable.  In
2361             // this case, we want to let the target wake up and carry
2362             // on, and do nothing to this thread.
2363             continue;
2364         default:
2365             barf("resurrectThreads: thread blocked in a strange way: %d",
2366                  tso->why_blocked);
2367         }
2368     }
2369 }