rts/Schedule.c

   1 /* ---------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * The scheduler and thread-related functionality
   6  *
   7  * --------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #define KEEP_LOCKCLOSURE
  11 #include "Rts.h"
  12
  13 #include "sm/Storage.h"
  14 #include "RtsUtils.h"
  15 #include "StgRun.h"
  16 #include "Schedule.h"
  17 #include "Interpreter.h"
  18 #include "Printer.h"
  19 #include "RtsSignals.h"
  20 #include "sm/Sanity.h"
  21 #include "Stats.h"
  22 #include "STM.h"
  23 #include "Prelude.h"
  24 #include "ThreadLabels.h"
  25 #include "Updates.h"
  26 #include "Proftimer.h"
  27 #include "ProfHeap.h"
  28 #include "Weak.h"
  29 #include "sm/GC.h" // waitForGcThreads, releaseGCThreads, N
  30 #include "Sparks.h"
  31 #include "Capability.h"
  32 #include "Task.h"
  33 #include "AwaitEvent.h"
  34 #if defined(mingw32_HOST_OS)
  35 #include "win32/IOManager.h"
  36 #endif
  37 #include "Trace.h"
  38 #include "RaiseAsync.h"
  39 #include "Threads.h"
  40 #include "Timer.h"
  41 #include "ThreadPaused.h"
  42 #include "Messages.h"
  43
  44 #ifdef HAVE_SYS_TYPES_H
  45 #include <sys/types.h>
  46 #endif
  47 #ifdef HAVE_UNISTD_H
  48 #include <unistd.h>
  49 #endif
  50
  51 #include <string.h>
  52 #include <stdlib.h>
  53 #include <stdarg.h>
  54
  55 #ifdef HAVE_ERRNO_H
  56 #include <errno.h>
  57 #endif
  58
  59 /* -----------------------------------------------------------------------------
  60  * Global variables
  61  * -------------------------------------------------------------------------- */
  62
  63 #if !defined(THREADED_RTS)
  64 // Blocked/sleeping thrads
  65 StgTSO *blocked_queue_hd = NULL;
  66 StgTSO *blocked_queue_tl = NULL;
  67 StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
  68 #endif
  69
  70 /* Set to true when the latest garbage collection failed to reclaim
  71  * enough space, and the runtime should proceed to shut itself down in
  72  * an orderly fashion (emitting profiling info etc.)
  73  */
  74 rtsBool heap_overflow = rtsFalse;
  75
  76 /* flag that tracks whether we have done any execution in this time slice.
  77  * LOCK: currently none, perhaps we should lock (but needs to be
  78  * updated in the fast path of the scheduler).
  79  *
  80  * NB. must be StgWord, we do xchg() on it.
  81  */
  82 volatile StgWord recent_activity = ACTIVITY_YES;
  83
  84 /* if this flag is set as well, give up execution
  85  * LOCK: none (changes monotonically)
  86  */
  87 volatile StgWord sched_state = SCHED_RUNNING;
  88
  89 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually
  90  *  exists - earlier gccs apparently didn't.
  91  *  -= chak
  92  */
  93 StgTSO dummy_tso;
  94
  95 /*
  96  * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
  97  * in an MT setting, needed to signal that a worker thread shouldn't hang around
  98  * in the scheduler when it is out of work.
  99  */
 100 rtsBool shutting_down_scheduler = rtsFalse;
 101
 102 /*
 103  * This mutex protects most of the global scheduler data in
 104  * the THREADED_RTS runtime.
 105  */
 106 #if defined(THREADED_RTS)
 107 Mutex sched_mutex;
 108 #endif
 109
 110 #if !defined(mingw32_HOST_OS)
 111 #define FORKPROCESS_PRIMOP_SUPPORTED
 112 #endif
 113
 114 /* -----------------------------------------------------------------------------
 115  * static function prototypes
 116  * -------------------------------------------------------------------------- */
 117
 118 static Capability *schedule (Capability *initialCapability, Task *task);
 119
 120 //
 121 // These function all encapsulate parts of the scheduler loop, and are
 122 // abstracted only to make the structure and control flow of the
 123 // scheduler clearer.
 124 //
 125 static void schedulePreLoop (void);
 126 static void scheduleFindWork (Capability *cap);
 127 #if defined(THREADED_RTS)
 128 static void scheduleYield (Capability **pcap, Task *task);
 129 #endif
 130 static void scheduleStartSignalHandlers (Capability *cap);
 131 static void scheduleCheckBlockedThreads (Capability *cap);
 132 static void scheduleProcessInbox(Capability *cap);
 133 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 134 static void schedulePushWork(Capability *cap, Task *task);
 135 #if defined(THREADED_RTS)
 136 static void scheduleActivateSpark(Capability *cap);
 137 #endif
 138 static void schedulePostRunThread(Capability *cap, StgTSO *t);
 139 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 140 static void scheduleHandleStackOverflow( Capability *cap, Task *task,
 141                                          StgTSO *t);
 142 static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 143                                     nat prev_what_next );
 144 static void scheduleHandleThreadBlocked( StgTSO *t );
 145 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
 146                                              StgTSO *t );
 147 static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
 148 static Capability *scheduleDoGC(Capability *cap, Task *task,
 149                                 rtsBool force_major);
 150
 151 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
 152 static StgTSO *threadStackUnderflow(Capability *cap, Task *task, StgTSO *tso);
 153
 154 static void deleteThread (Capability *cap, StgTSO *tso);
 155 static void deleteAllThreads (Capability *cap);
 156
 157 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
 158 static void deleteThread_(Capability *cap, StgTSO *tso);
 159 #endif
 160
 161 /* ---------------------------------------------------------------------------
 162    Main scheduling loop.
 163
 164    We use round-robin scheduling, each thread returning to the
 165    scheduler loop when one of these conditions is detected:
 166
 167       * out of heap space
 168       * timer expires (thread yields)
 169       * thread blocks
 170       * thread ends
 171       * stack overflow
 172
 173    GRAN version:
 174      In a GranSim setup this loop iterates over the global event queue.
 175      This revolves around the global event queue, which determines what
 176      to do next. Therefore, it's more complicated than either the
 177      concurrent or the parallel (GUM) setup.
 178   This version has been entirely removed (JB 2008/08).
 179
 180    GUM version:
 181      GUM iterates over incoming messages.
 182      It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
 183      and sends out a fish whenever it has nothing to do; in-between
 184      doing the actual reductions (shared code below) it processes the
 185      incoming messages and deals with delayed operations
 186      (see PendingFetches).
 187      This is not the ugliest code you could imagine, but it's bloody close.
 188
 189   (JB 2008/08) This version was formerly indicated by a PP-Flag PAR,
 190   now by PP-flag PARALLEL_HASKELL. The Eden RTS (in GHC-6.x) uses it,
 191   as well as future GUM versions. This file has been refurbished to
 192   only contain valid code, which is however incomplete, refers to
 193   invalid includes etc.
 194
 195    ------------------------------------------------------------------------ */
 196
 197 static Capability *
 198 schedule (Capability *initialCapability, Task *task)
 199 {
 200   StgTSO *t;
 201   Capability *cap;
 202   StgThreadReturnCode ret;
 203   nat prev_what_next;
 204   rtsBool ready_to_gc;
 205 #if defined(THREADED_RTS)
 206   rtsBool first = rtsTrue;
 207 #endif
 208
 209   cap = initialCapability;
 210
 211   // Pre-condition: this task owns initialCapability.
 212   // The sched_mutex is *NOT* held
 213   // NB. on return, we still hold a capability.
 214
 215   debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
 216
 217   schedulePreLoop();
 218
 219   // -----------------------------------------------------------
 220   // Scheduler loop starts here:
 221
 222   while (1) {
 223
 224     // Check whether we have re-entered the RTS from Haskell without
 225     // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
 226     // call).
 227     if (cap->in_haskell) {
 228           errorBelch("schedule: re-entered unsafely.\n"
 229                      "   Perhaps a 'foreign import unsafe' should be 'safe'?");
 230           stg_exit(EXIT_FAILURE);
 231     }
 232
 233     // The interruption / shutdown sequence.
 234     //
 235     // In order to cleanly shut down the runtime, we want to:
 236     //   * make sure that all main threads return to their callers
 237     //     with the state 'Interrupted'.
 238     //   * clean up all OS threads assocated with the runtime
 239     //   * free all memory etc.
 240     //
 241     // So the sequence for ^C goes like this:
 242     //
 243     //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
 244     //     arranges for some Capability to wake up
 245     //
 246     //   * all threads in the system are halted, and the zombies are
 247     //     placed on the run queue for cleaning up.  We acquire all
 248     //     the capabilities in order to delete the threads, this is
 249     //     done by scheduleDoGC() for convenience (because GC already
 250     //     needs to acquire all the capabilities).  We can't kill
 251     //     threads involved in foreign calls.
 252     //
 253     //   * somebody calls shutdownHaskell(), which calls exitScheduler()
 254     //
 255     //   * sched_state := SCHED_SHUTTING_DOWN
 256     //
 257     //   * all workers exit when the run queue on their capability
 258     //     drains.  All main threads will also exit when their TSO
 259     //     reaches the head of the run queue and they can return.
 260     //
 261     //   * eventually all Capabilities will shut down, and the RTS can
 262     //     exit.
 263     //
 264     //   * We might be left with threads blocked in foreign calls,
 265     //     we should really attempt to kill these somehow (TODO);
 266
 267     switch (sched_state) {
 268     case SCHED_RUNNING:
 269         break;
 270     case SCHED_INTERRUPTING:
 271         debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
 272 #if defined(THREADED_RTS)
 273         discardSparksCap(cap);
 274 #endif
 275         /* scheduleDoGC() deletes all the threads */
 276         cap = scheduleDoGC(cap,task,rtsFalse);
 277
 278         // after scheduleDoGC(), we must be shutting down.  Either some
 279         // other Capability did the final GC, or we did it above,
 280         // either way we can fall through to the SCHED_SHUTTING_DOWN
 281         // case now.
 282         ASSERT(sched_state == SCHED_SHUTTING_DOWN);
 283         // fall through
 284
 285     case SCHED_SHUTTING_DOWN:
 286         debugTrace(DEBUG_sched, "SCHED_SHUTTING_DOWN");
 287         // If we are a worker, just exit.  If we're a bound thread
 288         // then we will exit below when we've removed our TSO from
 289         // the run queue.
 290         if (!isBoundTask(task) && emptyRunQueue(cap)) {
 291             return cap;
 292         }
 293         break;
 294     default:
 295         barf("sched_state: %d", sched_state);
 296     }
 297
 298     scheduleFindWork(cap);
 299
 300     /* work pushing, currently relevant only for THREADED_RTS:
 301        (pushes threads, wakes up idle capabilities for stealing) */
 302     schedulePushWork(cap,task);
 303
 304     scheduleDetectDeadlock(cap,task);
 305
 306 #if defined(THREADED_RTS)
 307     cap = task->cap;    // reload cap, it might have changed
 308 #endif
 309
 310     // Normally, the only way we can get here with no threads to
 311     // run is if a keyboard interrupt received during
 312     // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
 313     // Additionally, it is not fatal for the
 314     // threaded RTS to reach here with no threads to run.
 315     //
 316     // win32: might be here due to awaitEvent() being abandoned
 317     // as a result of a console event having been delivered.
 318
 319 #if defined(THREADED_RTS)
 320     if (first)
 321     {
 322     // XXX: ToDo
 323     //     // don't yield the first time, we want a chance to run this
 324     //     // thread for a bit, even if there are others banging at the
 325     //     // door.
 326     //     first = rtsFalse;
 327     //     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 328     }
 329
 330     scheduleYield(&cap,task);
 331
 332     if (emptyRunQueue(cap)) continue; // look for work again
 333 #endif
 334
 335 #if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
 336     if ( emptyRunQueue(cap) ) {
 337         ASSERT(sched_state >= SCHED_INTERRUPTING);
 338     }
 339 #endif
 340
 341     //
 342     // Get a thread to run
 343     //
 344     t = popRunQueue(cap);
 345
 346     // Sanity check the thread we're about to run.  This can be
 347     // expensive if there is lots of thread switching going on...
 348     IF_DEBUG(sanity,checkTSO(t));
 349
 350 #if defined(THREADED_RTS)
 351     // Check whether we can run this thread in the current task.
 352     // If not, we have to pass our capability to the right task.
 353     {
 354         InCall *bound = t->bound;
 355
 356         if (bound) {
 357             if (bound->task == task) {
 358                 // yes, the Haskell thread is bound to the current native thread
 359             } else {
 360                 debugTrace(DEBUG_sched,
 361                            "thread %lu bound to another OS thread",
 362                            (unsigned long)t->id);
 363                 // no, bound to a different Haskell thread: pass to that thread
 364                 pushOnRunQueue(cap,t);
 365                 continue;
 366             }
 367         } else {
 368             // The thread we want to run is unbound.
 369             if (task->incall->tso) {
 370                 debugTrace(DEBUG_sched,
 371                            "this OS thread cannot run thread %lu",
 372                            (unsigned long)t->id);
 373                 // no, the current native thread is bound to a different
 374                 // Haskell thread, so pass it to any worker thread
 375                 pushOnRunQueue(cap,t);
 376                 continue;
 377             }
 378         }
 379     }
 380 #endif
 381
 382     // If we're shutting down, and this thread has not yet been
 383     // killed, kill it now.  This sometimes happens when a finalizer
 384     // thread is created by the final GC, or a thread previously
 385     // in a foreign call returns.
 386     if (sched_state >= SCHED_INTERRUPTING &&
 387         !(t->what_next == ThreadComplete || t->what_next == ThreadKilled)) {
 388         deleteThread(cap,t);
 389     }
 390
 391     /* context switches are initiated by the timer signal, unless
 392      * the user specified "context switch as often as possible", with
 393      * +RTS -C0
 394      */
 395     if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
 396         && !emptyThreadQueues(cap)) {
 397         cap->context_switch = 1;
 398     }
 399
 400 run_thread:
 401
 402     // CurrentTSO is the thread to run.  t might be different if we
 403     // loop back to run_thread, so make sure to set CurrentTSO after
 404     // that.
 405     cap->r.rCurrentTSO = t;
 406
 407     startHeapProfTimer();
 408
 409     // ----------------------------------------------------------------------
 410     // Run the current thread
 411
 412     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 413     ASSERT(t->cap == cap);
 414     ASSERT(t->bound ? t->bound->task->cap == cap : 1);
 415
 416     prev_what_next = t->what_next;
 417
 418     errno = t->saved_errno;
 419 #if mingw32_HOST_OS
 420     SetLastError(t->saved_winerror);
 421 #endif
 422
 423     cap->in_haskell = rtsTrue;
 424
 425     dirty_TSO(cap,t);
 426
 427 #if defined(THREADED_RTS)
 428     if (recent_activity == ACTIVITY_DONE_GC) {
 429         // ACTIVITY_DONE_GC means we turned off the timer signal to
 430         // conserve power (see #1623).  Re-enable it here.
 431         nat prev;
 432         prev = xchg((P_)&recent_activity, ACTIVITY_YES);
 433         if (prev == ACTIVITY_DONE_GC) {
 434             startTimer();
 435         }
 436     } else if (recent_activity != ACTIVITY_INACTIVE) {
 437         // If we reached ACTIVITY_INACTIVE, then don't reset it until
 438         // we've done the GC.  The thread running here might just be
 439         // the IO manager thread that handle_tick() woke up via
 440         // wakeUpRts().
 441         recent_activity = ACTIVITY_YES;
 442     }
 443 #endif
 444
 445     traceEventRunThread(cap, t);
 446
 447     switch (prev_what_next) {
 448
 449     case ThreadKilled:
 450     case ThreadComplete:
 451         /* Thread already finished, return to scheduler. */
 452         ret = ThreadFinished;
 453         break;
 454
 455     case ThreadRunGHC:
 456     {
 457         StgRegTable *r;
 458         r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
 459         cap = regTableToCapability(r);
 460         ret = r->rRet;
 461         break;
 462     }
 463
 464     case ThreadInterpret:
 465         cap = interpretBCO(cap);
 466         ret = cap->r.rRet;
 467         break;
 468
 469     default:
 470         barf("schedule: invalid what_next field");
 471     }
 472
 473     cap->in_haskell = rtsFalse;
 474
 475     // The TSO might have moved, eg. if it re-entered the RTS and a GC
 476     // happened.  So find the new location:
 477     t = cap->r.rCurrentTSO;
 478
 479     // And save the current errno in this thread.
 480     // XXX: possibly bogus for SMP because this thread might already
 481     // be running again, see code below.
 482     t->saved_errno = errno;
 483 #if mingw32_HOST_OS
 484     // Similarly for Windows error code
 485     t->saved_winerror = GetLastError();
 486 #endif
 487
 488     traceEventStopThread(cap, t, ret);
 489
 490     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 491     ASSERT(t->cap == cap);
 492
 493     // ----------------------------------------------------------------------
 494
 495     // Costs for the scheduler are assigned to CCS_SYSTEM
 496     stopHeapProfTimer();
 497 #if defined(PROFILING)
 498     CCCS = CCS_SYSTEM;
 499 #endif
 500
 501     schedulePostRunThread(cap,t);
 502
 503     if (ret != StackOverflow) {
 504         t = threadStackUnderflow(cap,task,t);
 505     }
 506
 507     ready_to_gc = rtsFalse;
 508
 509     switch (ret) {
 510     case HeapOverflow:
 511         ready_to_gc = scheduleHandleHeapOverflow(cap,t);
 512         break;
 513
 514     case StackOverflow:
 515         scheduleHandleStackOverflow(cap,task,t);
 516         break;
 517
 518     case ThreadYielding:
 519         if (scheduleHandleYield(cap, t, prev_what_next)) {
 520             // shortcut for switching between compiler/interpreter:
 521             goto run_thread;
 522         }
 523         break;
 524
 525     case ThreadBlocked:
 526         scheduleHandleThreadBlocked(t);
 527         break;
 528
 529     case ThreadFinished:
 530         if (scheduleHandleThreadFinished(cap, task, t)) return cap;
 531         ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 532         break;
 533
 534     default:
 535       barf("schedule: invalid thread return code %d", (int)ret);
 536     }
 537
 538     if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
 539       cap = scheduleDoGC(cap,task,rtsFalse);
 540     }
 541   } /* end of while() */
 542 }
 543
 544 /* -----------------------------------------------------------------------------
 545  * Run queue operations
 546  * -------------------------------------------------------------------------- */
 547
 548 void
 549 removeFromRunQueue (Capability *cap, StgTSO *tso)
 550 {
 551     if (tso->block_info.prev == END_TSO_QUEUE) {
 552         ASSERT(cap->run_queue_hd == tso);
 553         cap->run_queue_hd = tso->_link;
 554     } else {
 555         setTSOLink(cap, tso->block_info.prev, tso->_link);
 556     }
 557     if (tso->_link == END_TSO_QUEUE) {
 558         ASSERT(cap->run_queue_tl == tso);
 559         cap->run_queue_tl = tso->block_info.prev;
 560     } else {
 561         setTSOPrev(cap, tso->_link, tso->block_info.prev);
 562     }
 563     tso->_link = tso->block_info.prev = END_TSO_QUEUE;
 564
 565     IF_DEBUG(sanity, checkRunQueue(cap));
 566 }
 567
 568 /* ----------------------------------------------------------------------------
 569  * Setting up the scheduler loop
 570  * ------------------------------------------------------------------------- */
 571
 572 static void
 573 schedulePreLoop(void)
 574 {
 575   // initialisation for scheduler - what cannot go into initScheduler()
 576 }
 577
 578 /* -----------------------------------------------------------------------------
 579  * scheduleFindWork()
 580  *
 581  * Search for work to do, and handle messages from elsewhere.
 582  * -------------------------------------------------------------------------- */
 583
 584 static void
 585 scheduleFindWork (Capability *cap)
 586 {
 587     scheduleStartSignalHandlers(cap);
 588
 589     scheduleProcessInbox(cap);
 590
 591     scheduleCheckBlockedThreads(cap);
 592
 593 #if defined(THREADED_RTS)
 594     if (emptyRunQueue(cap)) { scheduleActivateSpark(cap); }
 595 #endif
 596 }
 597
 598 #if defined(THREADED_RTS)
 599 STATIC_INLINE rtsBool
 600 shouldYieldCapability (Capability *cap, Task *task)
 601 {
 602     // we need to yield this capability to someone else if..
 603     //   - another thread is initiating a GC
 604     //   - another Task is returning from a foreign call
 605     //   - the thread at the head of the run queue cannot be run
 606     //     by this Task (it is bound to another Task, or it is unbound
 607     //     and this task it bound).
 608     return (waiting_for_gc ||
 609             cap->returning_tasks_hd != NULL ||
 610             (!emptyRunQueue(cap) && (task->incall->tso == NULL
 611                                      ? cap->run_queue_hd->bound != NULL
 612                                      : cap->run_queue_hd->bound != task->incall)));
 613 }
 614
 615 // This is the single place where a Task goes to sleep.  There are
 616 // two reasons it might need to sleep:
 617 //    - there are no threads to run
 618 //    - we need to yield this Capability to someone else
 619 //      (see shouldYieldCapability())
 620 //
 621 // Careful: the scheduler loop is quite delicate.  Make sure you run
 622 // the tests in testsuite/concurrent (all ways) after modifying this,
 623 // and also check the benchmarks in nofib/parallel for regressions.
 624
 625 static void
 626 scheduleYield (Capability **pcap, Task *task)
 627 {
 628     Capability *cap = *pcap;
 629
 630     // if we have work, and we don't need to give up the Capability, continue.
 631     //
 632     if (!shouldYieldCapability(cap,task) &&
 633         (!emptyRunQueue(cap) ||
 634          !emptyInbox(cap) ||
 635          sched_state >= SCHED_INTERRUPTING))
 636         return;
 637
 638     // otherwise yield (sleep), and keep yielding if necessary.
 639     do {
 640         yieldCapability(&cap,task);
 641     }
 642     while (shouldYieldCapability(cap,task));
 643
 644     // note there may still be no threads on the run queue at this
 645     // point, the caller has to check.
 646
 647     *pcap = cap;
 648     return;
 649 }
 650 #endif
 651
 652 /* -----------------------------------------------------------------------------
 653  * schedulePushWork()
 654  *
 655  * Push work to other Capabilities if we have some.
 656  * -------------------------------------------------------------------------- */
 657
 658 static void
 659 schedulePushWork(Capability *cap USED_IF_THREADS,
 660                  Task *task      USED_IF_THREADS)
 661 {
 662   /* following code not for PARALLEL_HASKELL. I kept the call general,
 663      future GUM versions might use pushing in a distributed setup */
 664 #if defined(THREADED_RTS)
 665
 666     Capability *free_caps[n_capabilities], *cap0;
 667     nat i, n_free_caps;
 668
 669     // migration can be turned off with +RTS -qm
 670     if (!RtsFlags.ParFlags.migrate) return;
 671
 672     // Check whether we have more threads on our run queue, or sparks
 673     // in our pool, that we could hand to another Capability.
 674     if (cap->run_queue_hd == END_TSO_QUEUE) {
 675         if (sparkPoolSizeCap(cap) < 2) return;
 676     } else {
 677         if (cap->run_queue_hd->_link == END_TSO_QUEUE &&
 678             sparkPoolSizeCap(cap) < 1) return;
 679     }
 680
 681     // First grab as many free Capabilities as we can.
 682     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
 683         cap0 = &capabilities[i];
 684         if (cap != cap0 && tryGrabCapability(cap0,task)) {
 685             if (!emptyRunQueue(cap0)
 686                 || cap->returning_tasks_hd != NULL
 687                 || cap->inbox != (Message*)END_TSO_QUEUE) {
 688                 // it already has some work, we just grabbed it at
 689                 // the wrong moment.  Or maybe it's deadlocked!
 690                 releaseCapability(cap0);
 691             } else {
 692                 free_caps[n_free_caps++] = cap0;
 693             }
 694         }
 695     }
 696
 697     // we now have n_free_caps free capabilities stashed in
 698     // free_caps[].  Share our run queue equally with them.  This is
 699     // probably the simplest thing we could do; improvements we might
 700     // want to do include:
 701     //
 702     //   - giving high priority to moving relatively new threads, on
 703     //     the gournds that they haven't had time to build up a
 704     //     working set in the cache on this CPU/Capability.
 705     //
 706     //   - giving low priority to moving long-lived threads
 707
 708     if (n_free_caps > 0) {
 709         StgTSO *prev, *t, *next;
 710         rtsBool pushed_to_all;
 711
 712         debugTrace(DEBUG_sched,
 713                    "cap %d: %s and %d free capabilities, sharing...",
 714                    cap->no,
 715                    (!emptyRunQueue(cap) && cap->run_queue_hd->_link != END_TSO_QUEUE)?
 716                    "excess threads on run queue":"sparks to share (>=2)",
 717                    n_free_caps);
 718
 719         i = 0;
 720         pushed_to_all = rtsFalse;
 721
 722         if (cap->run_queue_hd != END_TSO_QUEUE) {
 723             prev = cap->run_queue_hd;
 724             t = prev->_link;
 725             prev->_link = END_TSO_QUEUE;
 726             for (; t != END_TSO_QUEUE; t = next) {
 727                 next = t->_link;
 728                 t->_link = END_TSO_QUEUE;
 729                 if (t->what_next == ThreadRelocated
 730                     || t->bound == task->incall // don't move my bound thread
 731                     || tsoLocked(t)) {  // don't move a locked thread
 732                     setTSOLink(cap, prev, t);
 733                     setTSOPrev(cap, t, prev);
 734                     prev = t;
 735                 } else if (i == n_free_caps) {
 736                     pushed_to_all = rtsTrue;
 737                     i = 0;
 738                     // keep one for us
 739                     setTSOLink(cap, prev, t);
 740                     setTSOPrev(cap, t, prev);
 741                     prev = t;
 742                 } else {
 743                     appendToRunQueue(free_caps[i],t);
 744
 745                     traceEventMigrateThread (cap, t, free_caps[i]->no);
 746
 747                     if (t->bound) { t->bound->task->cap = free_caps[i]; }
 748                     t->cap = free_caps[i];
 749                     i++;
 750                 }
 751             }
 752             cap->run_queue_tl = prev;
 753
 754             IF_DEBUG(sanity, checkRunQueue(cap));
 755         }
 756
 757 #ifdef SPARK_PUSHING
 758         /* JB I left this code in place, it would work but is not necessary */
 759
 760         // If there are some free capabilities that we didn't push any
 761         // threads to, then try to push a spark to each one.
 762         if (!pushed_to_all) {
 763             StgClosure *spark;
 764             // i is the next free capability to push to
 765             for (; i < n_free_caps; i++) {
 766                 if (emptySparkPoolCap(free_caps[i])) {
 767                     spark = tryStealSpark(cap->sparks);
 768                     if (spark != NULL) {
 769                         debugTrace(DEBUG_sched, "pushing spark %p to capability %d", spark, free_caps[i]->no);
 770
 771             traceEventStealSpark(free_caps[i], t, cap->no);
 772
 773                         newSpark(&(free_caps[i]->r), spark);
 774                     }
 775                 }
 776             }
 777         }
 778 #endif /* SPARK_PUSHING */
 779
 780         // release the capabilities
 781         for (i = 0; i < n_free_caps; i++) {
 782             task->cap = free_caps[i];
 783             releaseAndWakeupCapability(free_caps[i]);
 784         }
 785     }
 786     task->cap = cap; // reset to point to our Capability.
 787
 788 #endif /* THREADED_RTS */
 789
 790 }
 791
 792 /* ----------------------------------------------------------------------------
 793  * Start any pending signal handlers
 794  * ------------------------------------------------------------------------- */
 795
 796 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 797 static void
 798 scheduleStartSignalHandlers(Capability *cap)
 799 {
 800     if (RtsFlags.MiscFlags.install_signal_handlers && signals_pending()) {
 801         // safe outside the lock
 802         startSignalHandlers(cap);
 803     }
 804 }
 805 #else
 806 static void
 807 scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
 808 {
 809 }
 810 #endif
 811
 812 /* ----------------------------------------------------------------------------
 813  * Check for blocked threads that can be woken up.
 814  * ------------------------------------------------------------------------- */
 815
 816 static void
 817 scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
 818 {
 819 #if !defined(THREADED_RTS)
 820     //
 821     // Check whether any waiting threads need to be woken up.  If the
 822     // run queue is empty, and there are no other tasks running, we
 823     // can wait indefinitely for something to happen.
 824     //
 825     if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
 826     {
 827         awaitEvent (emptyRunQueue(cap));
 828     }
 829 #endif
 830 }
 831
 832 /* ----------------------------------------------------------------------------
 833  * Detect deadlock conditions and attempt to resolve them.
 834  * ------------------------------------------------------------------------- */
 835
 836 static void
 837 scheduleDetectDeadlock (Capability *cap, Task *task)
 838 {
 839     /*
 840      * Detect deadlock: when we have no threads to run, there are no
 841      * threads blocked, waiting for I/O, or sleeping, and all the
 842      * other tasks are waiting for work, we must have a deadlock of
 843      * some description.
 844      */
 845     if ( emptyThreadQueues(cap) )
 846     {
 847 #if defined(THREADED_RTS)
 848         /*
 849          * In the threaded RTS, we only check for deadlock if there
 850          * has been no activity in a complete timeslice.  This means
 851          * we won't eagerly start a full GC just because we don't have
 852          * any threads to run currently.
 853          */
 854         if (recent_activity != ACTIVITY_INACTIVE) return;
 855 #endif
 856
 857         debugTrace(DEBUG_sched, "deadlocked, forcing major GC...");
 858
 859         // Garbage collection can release some new threads due to
 860         // either (a) finalizers or (b) threads resurrected because
 861         // they are unreachable and will therefore be sent an
 862         // exception.  Any threads thus released will be immediately
 863         // runnable.
 864         cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/);
 865         // when force_major == rtsTrue. scheduleDoGC sets
 866         // recent_activity to ACTIVITY_DONE_GC and turns off the timer
 867         // signal.
 868
 869         if ( !emptyRunQueue(cap) ) return;
 870
 871 #if defined(RTS_USER_SIGNALS) && !defined(THREADED_RTS)
 872         /* If we have user-installed signal handlers, then wait
 873          * for signals to arrive rather then bombing out with a
 874          * deadlock.
 875          */
 876         if ( RtsFlags.MiscFlags.install_signal_handlers && anyUserHandlers() ) {
 877             debugTrace(DEBUG_sched,
 878                        "still deadlocked, waiting for signals...");
 879
 880             awaitUserSignals();
 881
 882             if (signals_pending()) {
 883                 startSignalHandlers(cap);
 884             }
 885
 886             // either we have threads to run, or we were interrupted:
 887             ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 888
 889             return;
 890         }
 891 #endif
 892
 893 #if !defined(THREADED_RTS)
 894         /* Probably a real deadlock.  Send the current main thread the
 895          * Deadlock exception.
 896          */
 897         if (task->incall->tso) {
 898             switch (task->incall->tso->why_blocked) {
 899             case BlockedOnSTM:
 900             case BlockedOnBlackHole:
 901             case BlockedOnMsgThrowTo:
 902             case BlockedOnMVar:
 903                 throwToSingleThreaded(cap, task->incall->tso,
 904                                       (StgClosure *)nonTermination_closure);
 905                 return;
 906             default:
 907                 barf("deadlock: main thread blocked in a strange way");
 908             }
 909         }
 910         return;
 911 #endif
 912     }
 913 }
 914
 915
 916 /* ----------------------------------------------------------------------------
 917  * Send pending messages (PARALLEL_HASKELL only)
 918  * ------------------------------------------------------------------------- */
 919
 920 #if defined(PARALLEL_HASKELL)
 921 static void
 922 scheduleSendPendingMessages(void)
 923 {
 924
 925 # if defined(PAR) // global Mem.Mgmt., omit for now
 926     if (PendingFetches != END_BF_QUEUE) {
 927         processFetches();
 928     }
 929 # endif
 930
 931     if (RtsFlags.ParFlags.BufferTime) {
 932         // if we use message buffering, we must send away all message
 933         // packets which have become too old...
 934         sendOldBuffers();
 935     }
 936 }
 937 #endif
 938
 939 /* ----------------------------------------------------------------------------
 940  * Process message in the current Capability's inbox
 941  * ------------------------------------------------------------------------- */
 942
 943 static void
 944 scheduleProcessInbox (Capability *cap USED_IF_THREADS)
 945 {
 946 #if defined(THREADED_RTS)
 947     Message *m;
 948
 949     while (!emptyInbox(cap)) {
 950         ACQUIRE_LOCK(&cap->lock);
 951         m = cap->inbox;
 952         cap->inbox = m->link;
 953         RELEASE_LOCK(&cap->lock);
 954         executeMessage(cap, (Message *)m);
 955     }
 956 #endif
 957 }
 958
 959 /* ----------------------------------------------------------------------------
 960  * Activate spark threads (PARALLEL_HASKELL and THREADED_RTS)
 961  * ------------------------------------------------------------------------- */
 962
 963 #if defined(THREADED_RTS)
 964 static void
 965 scheduleActivateSpark(Capability *cap)
 966 {
 967     if (anySparks())
 968     {
 969         createSparkThread(cap);
 970         debugTrace(DEBUG_sched, "creating a spark thread");
 971     }
 972 }
 973 #endif // PARALLEL_HASKELL || THREADED_RTS
 974
 975 /* ----------------------------------------------------------------------------
 976  * After running a thread...
 977  * ------------------------------------------------------------------------- */
 978
 979 static void
 980 schedulePostRunThread (Capability *cap, StgTSO *t)
 981 {
 982     // We have to be able to catch transactions that are in an
 983     // infinite loop as a result of seeing an inconsistent view of
 984     // memory, e.g.
 985     //
 986     //   atomically $ do
 987     //       [a,b] <- mapM readTVar [ta,tb]
 988     //       when (a == b) loop
 989     //
 990     // and a is never equal to b given a consistent view of memory.
 991     //
 992     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
 993         if (!stmValidateNestOfTransactions (t -> trec)) {
 994             debugTrace(DEBUG_sched | DEBUG_stm,
 995                        "trec %p found wasting its time", t);
 996
 997             // strip the stack back to the
 998             // ATOMICALLY_FRAME, aborting the (nested)
 999             // transaction, and saving the stack of any
1000             // partially-evaluated thunks on the heap.
1001             throwToSingleThreaded_(cap, t, NULL, rtsTrue);
1002
1003 //            ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
1004         }
1005     }
1006
1007   /* some statistics gathering in the parallel case */
1008 }
1009
1010 /* -----------------------------------------------------------------------------
1011  * Handle a thread that returned to the scheduler with ThreadHeepOverflow
1012  * -------------------------------------------------------------------------- */
1013
1014 static rtsBool
1015 scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
1016 {
1017     // did the task ask for a large block?
1018     if (cap->r.rHpAlloc > BLOCK_SIZE) {
1019         // if so, get one and push it on the front of the nursery.
1020         bdescr *bd;
1021         lnat blocks;
1022
1023         blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
1024
1025         debugTrace(DEBUG_sched,
1026                    "--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
1027                    (long)t->id, what_next_strs[t->what_next], blocks);
1028
1029         // don't do this if the nursery is (nearly) full, we'll GC first.
1030         if (cap->r.rCurrentNursery->link != NULL ||
1031             cap->r.rNursery->n_blocks == 1) {  // paranoia to prevent infinite loop
1032                                                // if the nursery has only one block.
1033
1034             bd = allocGroup_lock(blocks);
1035             cap->r.rNursery->n_blocks += blocks;
1036
1037             // link the new group into the list
1038             bd->link = cap->r.rCurrentNursery;
1039             bd->u.back = cap->r.rCurrentNursery->u.back;
1040             if (cap->r.rCurrentNursery->u.back != NULL) {
1041                 cap->r.rCurrentNursery->u.back->link = bd;
1042             } else {
1043                 cap->r.rNursery->blocks = bd;
1044             }
1045             cap->r.rCurrentNursery->u.back = bd;
1046
1047             // initialise it as a nursery block.  We initialise the
1048             // step, gen_no, and flags field of *every* sub-block in
1049             // this large block, because this is easier than making
1050             // sure that we always find the block head of a large
1051             // block whenever we call Bdescr() (eg. evacuate() and
1052             // isAlive() in the GC would both have to do this, at
1053             // least).
1054             {
1055                 bdescr *x;
1056                 for (x = bd; x < bd + blocks; x++) {
1057                     initBdescr(x,g0,g0);
1058                     x->free = x->start;
1059                     x->flags = 0;
1060                 }
1061             }
1062
1063             // This assert can be a killer if the app is doing lots
1064             // of large block allocations.
1065             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
1066
1067             // now update the nursery to point to the new block
1068             cap->r.rCurrentNursery = bd;
1069
1070             // we might be unlucky and have another thread get on the
1071             // run queue before us and steal the large block, but in that
1072             // case the thread will just end up requesting another large
1073             // block.
1074             pushOnRunQueue(cap,t);
1075             return rtsFalse;  /* not actually GC'ing */
1076         }
1077     }
1078
1079     if (cap->r.rHpLim == NULL || cap->context_switch) {
1080         // Sometimes we miss a context switch, e.g. when calling
1081         // primitives in a tight loop, MAYBE_GC() doesn't check the
1082         // context switch flag, and we end up waiting for a GC.
1083         // See #1984, and concurrent/should_run/1984
1084         cap->context_switch = 0;
1085         appendToRunQueue(cap,t);
1086     } else {
1087         pushOnRunQueue(cap,t);
1088     }
1089     return rtsTrue;
1090     /* actual GC is done at the end of the while loop in schedule() */
1091 }
1092
1093 /* -----------------------------------------------------------------------------
1094  * Handle a thread that returned to the scheduler with ThreadStackOverflow
1095  * -------------------------------------------------------------------------- */
1096
1097 static void
1098 scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t)
1099 {
1100     /* just adjust the stack for this thread, then pop it back
1101      * on the run queue.
1102      */
1103     {
1104         /* enlarge the stack */
1105         StgTSO *new_t = threadStackOverflow(cap, t);
1106
1107         /* The TSO attached to this Task may have moved, so update the
1108          * pointer to it.
1109          */
1110         if (task->incall->tso == t) {
1111             task->incall->tso = new_t;
1112         }
1113         pushOnRunQueue(cap,new_t);
1114     }
1115 }
1116
1117 /* -----------------------------------------------------------------------------
1118  * Handle a thread that returned to the scheduler with ThreadYielding
1119  * -------------------------------------------------------------------------- */
1120
1121 static rtsBool
1122 scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
1123 {
1124     /* put the thread back on the run queue.  Then, if we're ready to
1125      * GC, check whether this is the last task to stop.  If so, wake
1126      * up the GC thread.  getThread will block during a GC until the
1127      * GC is finished.
1128      */
1129
1130     ASSERT(t->_link == END_TSO_QUEUE);
1131
1132     // Shortcut if we're just switching evaluators: don't bother
1133     // doing stack squeezing (which can be expensive), just run the
1134     // thread.
1135     if (cap->context_switch == 0 && t->what_next != prev_what_next) {
1136         debugTrace(DEBUG_sched,
1137                    "--<< thread %ld (%s) stopped to switch evaluators",
1138                    (long)t->id, what_next_strs[t->what_next]);
1139         return rtsTrue;
1140     }
1141
1142     // Reset the context switch flag.  We don't do this just before
1143     // running the thread, because that would mean we would lose ticks
1144     // during GC, which can lead to unfair scheduling (a thread hogs
1145     // the CPU because the tick always arrives during GC).  This way
1146     // penalises threads that do a lot of allocation, but that seems
1147     // better than the alternative.
1148     cap->context_switch = 0;
1149
1150     IF_DEBUG(sanity,
1151              //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
1152              checkTSO(t));
1153
1154     appendToRunQueue(cap,t);
1155
1156     return rtsFalse;
1157 }
1158
1159 /* -----------------------------------------------------------------------------
1160  * Handle a thread that returned to the scheduler with ThreadBlocked
1161  * -------------------------------------------------------------------------- */
1162
1163 static void
1164 scheduleHandleThreadBlocked( StgTSO *t
1165 #if !defined(DEBUG)
1166     STG_UNUSED
1167 #endif
1168     )
1169 {
1170
1171       // We don't need to do anything.  The thread is blocked, and it
1172       // has tidied up its stack and placed itself on whatever queue
1173       // it needs to be on.
1174
1175     // ASSERT(t->why_blocked != NotBlocked);
1176     // Not true: for example,
1177     //    - the thread may have woken itself up already, because
1178     //      threadPaused() might have raised a blocked throwTo
1179     //      exception, see maybePerformBlockedException().
1180
1181 #ifdef DEBUG
1182     traceThreadStatus(DEBUG_sched, t);
1183 #endif
1184 }
1185
1186 /* -----------------------------------------------------------------------------
1187  * Handle a thread that returned to the scheduler with ThreadFinished
1188  * -------------------------------------------------------------------------- */
1189
1190 static rtsBool
1191 scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
1192 {
1193     /* Need to check whether this was a main thread, and if so,
1194      * return with the return value.
1195      *
1196      * We also end up here if the thread kills itself with an
1197      * uncaught exception, see Exception.cmm.
1198      */
1199
1200     // blocked exceptions can now complete, even if the thread was in
1201     // blocked mode (see #2910).
1202     awakenBlockedExceptionQueue (cap, t);
1203
1204       //
1205       // Check whether the thread that just completed was a bound
1206       // thread, and if so return with the result.
1207       //
1208       // There is an assumption here that all thread completion goes
1209       // through this point; we need to make sure that if a thread
1210       // ends up in the ThreadKilled state, that it stays on the run
1211       // queue so it can be dealt with here.
1212       //
1213
1214       if (t->bound) {
1215
1216           if (t->bound != task->incall) {
1217 #if !defined(THREADED_RTS)
1218               // Must be a bound thread that is not the topmost one.  Leave
1219               // it on the run queue until the stack has unwound to the
1220               // point where we can deal with this.  Leaving it on the run
1221               // queue also ensures that the garbage collector knows about
1222               // this thread and its return value (it gets dropped from the
1223               // step->threads list so there's no other way to find it).
1224               appendToRunQueue(cap,t);
1225               return rtsFalse;
1226 #else
1227               // this cannot happen in the threaded RTS, because a
1228               // bound thread can only be run by the appropriate Task.
1229               barf("finished bound thread that isn't mine");
1230 #endif
1231           }
1232
1233           ASSERT(task->incall->tso == t);
1234
1235           if (t->what_next == ThreadComplete) {
1236               if (task->incall->ret) {
1237                   // NOTE: return val is tso->sp[1] (see StgStartup.hc)
1238                   *(task->incall->ret) = (StgClosure *)task->incall->tso->sp[1];
1239               }
1240               task->incall->stat = Success;
1241           } else {
1242               if (task->incall->ret) {
1243                   *(task->incall->ret) = NULL;
1244               }
1245               if (sched_state >= SCHED_INTERRUPTING) {
1246                   if (heap_overflow) {
1247                       task->incall->stat = HeapExhausted;
1248                   } else {
1249                       task->incall->stat = Interrupted;
1250                   }
1251               } else {
1252                   task->incall->stat = Killed;
1253               }
1254           }
1255 #ifdef DEBUG
1256           removeThreadLabel((StgWord)task->incall->tso->id);
1257 #endif
1258
1259           // We no longer consider this thread and task to be bound to
1260           // each other.  The TSO lives on until it is GC'd, but the
1261           // task is about to be released by the caller, and we don't
1262           // want anyone following the pointer from the TSO to the
1263           // defunct task (which might have already been
1264           // re-used). This was a real bug: the GC updated
1265           // tso->bound->tso which lead to a deadlock.
1266           t->bound = NULL;
1267           task->incall->tso = NULL;
1268
1269           return rtsTrue; // tells schedule() to return
1270       }
1271
1272       return rtsFalse;
1273 }
1274
1275 /* -----------------------------------------------------------------------------
1276  * Perform a heap census
1277  * -------------------------------------------------------------------------- */
1278
1279 static rtsBool
1280 scheduleNeedHeapProfile( rtsBool ready_to_gc STG_UNUSED )
1281 {
1282     // When we have +RTS -i0 and we're heap profiling, do a census at
1283     // every GC.  This lets us get repeatable runs for debugging.
1284     if (performHeapProfile ||
1285         (RtsFlags.ProfFlags.profileInterval==0 &&
1286          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
1287         return rtsTrue;
1288     } else {
1289         return rtsFalse;
1290     }
1291 }
1292
1293 /* -----------------------------------------------------------------------------
1294  * Perform a garbage collection if necessary
1295  * -------------------------------------------------------------------------- */
1296
1297 static Capability *
1298 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
1299 {
1300     rtsBool heap_census;
1301 #ifdef THREADED_RTS
1302     /* extern static volatile StgWord waiting_for_gc;
1303        lives inside capability.c */
1304     rtsBool gc_type, prev_pending_gc;
1305     nat i;
1306 #endif
1307
1308     if (sched_state == SCHED_SHUTTING_DOWN) {
1309         // The final GC has already been done, and the system is
1310         // shutting down.  We'll probably deadlock if we try to GC
1311         // now.
1312         return cap;
1313     }
1314
1315 #ifdef THREADED_RTS
1316     if (sched_state < SCHED_INTERRUPTING
1317         && RtsFlags.ParFlags.parGcEnabled
1318         && N >= RtsFlags.ParFlags.parGcGen
1319         && ! oldest_gen->mark)
1320     {
1321         gc_type = PENDING_GC_PAR;
1322     } else {
1323         gc_type = PENDING_GC_SEQ;
1324     }
1325
1326     // In order to GC, there must be no threads running Haskell code.
1327     // Therefore, the GC thread needs to hold *all* the capabilities,
1328     // and release them after the GC has completed.
1329     //
1330     // This seems to be the simplest way: previous attempts involved
1331     // making all the threads with capabilities give up their
1332     // capabilities and sleep except for the *last* one, which
1333     // actually did the GC.  But it's quite hard to arrange for all
1334     // the other tasks to sleep and stay asleep.
1335     //
1336
1337     /*  Other capabilities are prevented from running yet more Haskell
1338         threads if waiting_for_gc is set. Tested inside
1339         yieldCapability() and releaseCapability() in Capability.c */
1340
1341     prev_pending_gc = cas(&waiting_for_gc, 0, gc_type);
1342     if (prev_pending_gc) {
1343         do {
1344             debugTrace(DEBUG_sched, "someone else is trying to GC (%d)...",
1345                        prev_pending_gc);
1346             ASSERT(cap);
1347             yieldCapability(&cap,task);
1348         } while (waiting_for_gc);
1349         return cap;  // NOTE: task->cap might have changed here
1350     }
1351
1352     setContextSwitches();
1353
1354     // The final shutdown GC is always single-threaded, because it's
1355     // possible that some of the Capabilities have no worker threads.
1356
1357     if (gc_type == PENDING_GC_SEQ)
1358     {
1359         traceEventRequestSeqGc(cap);
1360     }
1361     else
1362     {
1363         traceEventRequestParGc(cap);
1364         debugTrace(DEBUG_sched, "ready_to_gc, grabbing GC threads");
1365     }
1366
1367     if (gc_type == PENDING_GC_SEQ)
1368     {
1369         // single-threaded GC: grab all the capabilities
1370         for (i=0; i < n_capabilities; i++) {
1371             debugTrace(DEBUG_sched, "ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities);
1372             if (cap != &capabilities[i]) {
1373                 Capability *pcap = &capabilities[i];
1374                 // we better hope this task doesn't get migrated to
1375                 // another Capability while we're waiting for this one.
1376                 // It won't, because load balancing happens while we have
1377                 // all the Capabilities, but even so it's a slightly
1378                 // unsavoury invariant.
1379                 task->cap = pcap;
1380                 waitForReturnCapability(&pcap, task);
1381                 if (pcap != &capabilities[i]) {
1382                     barf("scheduleDoGC: got the wrong capability");
1383                 }
1384             }
1385         }
1386     }
1387     else
1388     {
1389         // multi-threaded GC: make sure all the Capabilities donate one
1390         // GC thread each.
1391         waitForGcThreads(cap);
1392     }
1393
1394 #endif
1395
1396     IF_DEBUG(scheduler, printAllThreads());
1397
1398 delete_threads_and_gc:
1399     /*
1400      * We now have all the capabilities; if we're in an interrupting
1401      * state, then we should take the opportunity to delete all the
1402      * threads in the system.
1403      */
1404     if (sched_state == SCHED_INTERRUPTING) {
1405         deleteAllThreads(cap);
1406         sched_state = SCHED_SHUTTING_DOWN;
1407     }
1408
1409     heap_census = scheduleNeedHeapProfile(rtsTrue);
1410
1411     traceEventGcStart(cap);
1412 #if defined(THREADED_RTS)
1413     // reset waiting_for_gc *before* GC, so that when the GC threads
1414     // emerge they don't immediately re-enter the GC.
1415     waiting_for_gc = 0;
1416     GarbageCollect(force_major || heap_census, gc_type, cap);
1417 #else
1418     GarbageCollect(force_major || heap_census, 0, cap);
1419 #endif
1420     traceEventGcEnd(cap);
1421
1422     if (recent_activity == ACTIVITY_INACTIVE && force_major)
1423     {
1424         // We are doing a GC because the system has been idle for a
1425         // timeslice and we need to check for deadlock.  Record the
1426         // fact that we've done a GC and turn off the timer signal;
1427         // it will get re-enabled if we run any threads after the GC.
1428         recent_activity = ACTIVITY_DONE_GC;
1429         stopTimer();
1430     }
1431     else
1432     {
1433         // the GC might have taken long enough for the timer to set
1434         // recent_activity = ACTIVITY_INACTIVE, but we aren't
1435         // necessarily deadlocked:
1436         recent_activity = ACTIVITY_YES;
1437     }
1438
1439 #if defined(THREADED_RTS)
1440     if (gc_type == PENDING_GC_PAR)
1441     {
1442         releaseGCThreads(cap);
1443     }
1444 #endif
1445
1446     if (heap_census) {
1447         debugTrace(DEBUG_sched, "performing heap census");
1448         heapCensus();
1449         performHeapProfile = rtsFalse;
1450     }
1451
1452     if (heap_overflow && sched_state < SCHED_INTERRUPTING) {
1453         // GC set the heap_overflow flag, so we should proceed with
1454         // an orderly shutdown now.  Ultimately we want the main
1455         // thread to return to its caller with HeapExhausted, at which
1456         // point the caller should call hs_exit().  The first step is
1457         // to delete all the threads.
1458         //
1459         // Another way to do this would be to raise an exception in
1460         // the main thread, which we really should do because it gives
1461         // the program a chance to clean up.  But how do we find the
1462         // main thread?  It should presumably be the same one that
1463         // gets ^C exceptions, but that's all done on the Haskell side
1464         // (GHC.TopHandler).
1465         sched_state = SCHED_INTERRUPTING;
1466         goto delete_threads_and_gc;
1467     }
1468
1469 #ifdef SPARKBALANCE
1470     /* JB
1471        Once we are all together... this would be the place to balance all
1472        spark pools. No concurrent stealing or adding of new sparks can
1473        occur. Should be defined in Sparks.c. */
1474     balanceSparkPoolsCaps(n_capabilities, capabilities);
1475 #endif
1476
1477 #if defined(THREADED_RTS)
1478     if (gc_type == PENDING_GC_SEQ) {
1479         // release our stash of capabilities.
1480         for (i = 0; i < n_capabilities; i++) {
1481             if (cap != &capabilities[i]) {
1482                 task->cap = &capabilities[i];
1483                 releaseCapability(&capabilities[i]);
1484             }
1485         }
1486     }
1487     if (cap) {
1488         task->cap = cap;
1489     } else {
1490         task->cap = NULL;
1491     }
1492 #endif
1493
1494     return cap;
1495 }
1496
1497 /* ---------------------------------------------------------------------------
1498  * Singleton fork(). Do not copy any running threads.
1499  * ------------------------------------------------------------------------- */
1500
1501 pid_t
1502 forkProcess(HsStablePtr *entry
1503 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
1504             STG_UNUSED
1505 #endif
1506            )
1507 {
1508 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
1509     pid_t pid;
1510     StgTSO* t,*next;
1511     Capability *cap;
1512     nat g;
1513
1514 #if defined(THREADED_RTS)
1515     if (RtsFlags.ParFlags.nNodes > 1) {
1516         errorBelch("forking not supported with +RTS -N<n> greater than 1");
1517         stg_exit(EXIT_FAILURE);
1518     }
1519 #endif
1520
1521     debugTrace(DEBUG_sched, "forking!");
1522
1523     // ToDo: for SMP, we should probably acquire *all* the capabilities
1524     cap = rts_lock();
1525
1526     // no funny business: hold locks while we fork, otherwise if some
1527     // other thread is holding a lock when the fork happens, the data
1528     // structure protected by the lock will forever be in an
1529     // inconsistent state in the child.  See also #1391.
1530     ACQUIRE_LOCK(&sched_mutex);
1531     ACQUIRE_LOCK(&cap->lock);
1532     ACQUIRE_LOCK(&cap->running_task->lock);
1533
1534     stopTimer(); // See #4074
1535
1536     pid = fork();
1537
1538     if (pid) { // parent
1539
1540         startTimer(); // #4074
1541
1542         RELEASE_LOCK(&sched_mutex);
1543         RELEASE_LOCK(&cap->lock);
1544         RELEASE_LOCK(&cap->running_task->lock);
1545
1546         // just return the pid
1547         rts_unlock(cap);
1548         return pid;
1549
1550     } else { // child
1551
1552 #if defined(THREADED_RTS)
1553         initMutex(&sched_mutex);
1554         initMutex(&cap->lock);
1555         initMutex(&cap->running_task->lock);
1556 #endif
1557
1558         // Now, all OS threads except the thread that forked are
1559         // stopped.  We need to stop all Haskell threads, including
1560         // those involved in foreign calls.  Also we need to delete
1561         // all Tasks, because they correspond to OS threads that are
1562         // now gone.
1563
1564         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1565           for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1566             if (t->what_next == ThreadRelocated) {
1567                 next = t->_link;
1568             } else {
1569                 next = t->global_link;
1570                 // don't allow threads to catch the ThreadKilled
1571                 // exception, but we do want to raiseAsync() because these
1572                 // threads may be evaluating thunks that we need later.
1573                 deleteThread_(cap,t);
1574
1575                 // stop the GC from updating the InCall to point to
1576                 // the TSO.  This is only necessary because the
1577                 // OSThread bound to the TSO has been killed, and
1578                 // won't get a chance to exit in the usual way (see
1579                 // also scheduleHandleThreadFinished).
1580                 t->bound = NULL;
1581             }
1582           }
1583         }
1584
1585         // Empty the run queue.  It seems tempting to let all the
1586         // killed threads stay on the run queue as zombies to be
1587         // cleaned up later, but some of them correspond to bound
1588         // threads for which the corresponding Task does not exist.
1589         cap->run_queue_hd = END_TSO_QUEUE;
1590         cap->run_queue_tl = END_TSO_QUEUE;
1591
1592         // Any suspended C-calling Tasks are no more, their OS threads
1593         // don't exist now:
1594         cap->suspended_ccalls = NULL;
1595
1596         // Empty the threads lists.  Otherwise, the garbage
1597         // collector may attempt to resurrect some of these threads.
1598         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1599             generations[g].threads = END_TSO_QUEUE;
1600         }
1601
1602         discardTasksExcept(cap->running_task);
1603
1604 #if defined(THREADED_RTS)
1605         // Wipe our spare workers list, they no longer exist.  New
1606         // workers will be created if necessary.
1607         cap->spare_workers = NULL;
1608         cap->n_spare_workers = 0;
1609         cap->returning_tasks_hd = NULL;
1610         cap->returning_tasks_tl = NULL;
1611 #endif
1612
1613         // On Unix, all timers are reset in the child, so we need to start
1614         // the timer again.
1615         initTimer();
1616         startTimer();
1617
1618 #if defined(THREADED_RTS)
1619         cap = ioManagerStartCap(cap);
1620 #endif
1621
1622         cap = rts_evalStableIO(cap, entry, NULL);  // run the action
1623         rts_checkSchedStatus("forkProcess",cap);
1624
1625         rts_unlock(cap);
1626         hs_exit();                      // clean up and exit
1627         stg_exit(EXIT_SUCCESS);
1628     }
1629 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */
1630     barf("forkProcess#: primop not supported on this platform, sorry!\n");
1631 #endif
1632 }
1633
1634 /* ---------------------------------------------------------------------------
1635  * Delete all the threads in the system
1636  * ------------------------------------------------------------------------- */
1637
1638 static void
1639 deleteAllThreads ( Capability *cap )
1640 {
1641     // NOTE: only safe to call if we own all capabilities.
1642
1643     StgTSO* t, *next;
1644     nat g;
1645
1646     debugTrace(DEBUG_sched,"deleting all threads");
1647     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1648         for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
1649             if (t->what_next == ThreadRelocated) {
1650                 next = t->_link;
1651             } else {
1652                 next = t->global_link;
1653                 deleteThread(cap,t);
1654             }
1655         }
1656     }
1657
1658     // The run queue now contains a bunch of ThreadKilled threads.  We
1659     // must not throw these away: the main thread(s) will be in there
1660     // somewhere, and the main scheduler loop has to deal with it.
1661     // Also, the run queue is the only thing keeping these threads from
1662     // being GC'd, and we don't want the "main thread has been GC'd" panic.
1663
1664 #if !defined(THREADED_RTS)
1665     ASSERT(blocked_queue_hd == END_TSO_QUEUE);
1666     ASSERT(sleeping_queue == END_TSO_QUEUE);
1667 #endif
1668 }
1669
1670 /* -----------------------------------------------------------------------------
1671    Managing the suspended_ccalls list.
1672    Locks required: sched_mutex
1673    -------------------------------------------------------------------------- */
1674
1675 STATIC_INLINE void
1676 suspendTask (Capability *cap, Task *task)
1677 {
1678     InCall *incall;
1679
1680     incall = task->incall;
1681     ASSERT(incall->next == NULL && incall->prev == NULL);
1682     incall->next = cap->suspended_ccalls;
1683     incall->prev = NULL;
1684     if (cap->suspended_ccalls) {
1685         cap->suspended_ccalls->prev = incall;
1686     }
1687     cap->suspended_ccalls = incall;
1688 }
1689
1690 STATIC_INLINE void
1691 recoverSuspendedTask (Capability *cap, Task *task)
1692 {
1693     InCall *incall;
1694
1695     incall = task->incall;
1696     if (incall->prev) {
1697         incall->prev->next = incall->next;
1698     } else {
1699         ASSERT(cap->suspended_ccalls == incall);
1700         cap->suspended_ccalls = incall->next;
1701     }
1702     if (incall->next) {
1703         incall->next->prev = incall->prev;
1704     }
1705     incall->next = incall->prev = NULL;
1706 }
1707
1708 /* ---------------------------------------------------------------------------
1709  * Suspending & resuming Haskell threads.
1710  *
1711  * When making a "safe" call to C (aka _ccall_GC), the task gives back
1712  * its capability before calling the C function.  This allows another
1713  * task to pick up the capability and carry on running Haskell
1714  * threads.  It also means that if the C call blocks, it won't lock
1715  * the whole system.
1716  *
1717  * The Haskell thread making the C call is put to sleep for the
1718  * duration of the call, on the suspended_ccalling_threads queue.  We
1719  * give out a token to the task, which it can use to resume the thread
1720  * on return from the C function.
1721  *
1722  * If this is an interruptible C call, this means that the FFI call may be
1723  * unceremoniously terminated and should be scheduled on an
1724  * unbound worker thread.
1725  * ------------------------------------------------------------------------- */
1726
1727 void *
1728 suspendThread (StgRegTable *reg, rtsBool interruptible)
1729 {
1730   Capability *cap;
1731   int saved_errno;
1732   StgTSO *tso;
1733   Task *task;
1734 #if mingw32_HOST_OS
1735   StgWord32 saved_winerror;
1736 #endif
1737
1738   saved_errno = errno;
1739 #if mingw32_HOST_OS
1740   saved_winerror = GetLastError();
1741 #endif
1742
1743   /* assume that *reg is a pointer to the StgRegTable part of a Capability.
1744    */
1745   cap = regTableToCapability(reg);
1746
1747   task = cap->running_task;
1748   tso = cap->r.rCurrentTSO;
1749
1750   traceEventStopThread(cap, tso, THREAD_SUSPENDED_FOREIGN_CALL);
1751
1752   // XXX this might not be necessary --SDM
1753   tso->what_next = ThreadRunGHC;
1754
1755   threadPaused(cap,tso);
1756
1757   if (interruptible) {
1758     tso->why_blocked = BlockedOnCCall_Interruptible;
1759   } else {
1760     tso->why_blocked = BlockedOnCCall;
1761   }
1762
1763   // Hand back capability
1764   task->incall->suspended_tso = tso;
1765   task->incall->suspended_cap = cap;
1766
1767   ACQUIRE_LOCK(&cap->lock);
1768
1769   suspendTask(cap,task);
1770   cap->in_haskell = rtsFalse;
1771   releaseCapability_(cap,rtsFalse);
1772
1773   RELEASE_LOCK(&cap->lock);
1774
1775   errno = saved_errno;
1776 #if mingw32_HOST_OS
1777   SetLastError(saved_winerror);
1778 #endif
1779   return task;
1780 }
1781
1782 StgRegTable *
1783 resumeThread (void *task_)
1784 {
1785     StgTSO *tso;
1786     InCall *incall;
1787     Capability *cap;
1788     Task *task = task_;
1789     int saved_errno;
1790 #if mingw32_HOST_OS
1791     StgWord32 saved_winerror;
1792 #endif
1793
1794     saved_errno = errno;
1795 #if mingw32_HOST_OS
1796     saved_winerror = GetLastError();
1797 #endif
1798
1799     incall = task->incall;
1800     cap = incall->suspended_cap;
1801     task->cap = cap;
1802
1803     // Wait for permission to re-enter the RTS with the result.
1804     waitForReturnCapability(&cap,task);
1805     // we might be on a different capability now... but if so, our
1806     // entry on the suspended_ccalls list will also have been
1807     // migrated.
1808
1809     // Remove the thread from the suspended list
1810     recoverSuspendedTask(cap,task);
1811
1812     tso = incall->suspended_tso;
1813     incall->suspended_tso = NULL;
1814     incall->suspended_cap = NULL;
1815     tso->_link = END_TSO_QUEUE; // no write barrier reqd
1816
1817     traceEventRunThread(cap, tso);
1818
1819     /* Reset blocking status */
1820     tso->why_blocked  = NotBlocked;
1821
1822     if ((tso->flags & TSO_BLOCKEX) == 0) {
1823         // avoid locking the TSO if we don't have to
1824         if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
1825             maybePerformBlockedException(cap,tso);
1826         }
1827     }
1828
1829     cap->r.rCurrentTSO = tso;
1830     cap->in_haskell = rtsTrue;
1831     errno = saved_errno;
1832 #if mingw32_HOST_OS
1833     SetLastError(saved_winerror);
1834 #endif
1835
1836     /* We might have GC'd, mark the TSO dirty again */
1837     dirty_TSO(cap,tso);
1838
1839     IF_DEBUG(sanity, checkTSO(tso));
1840
1841     return &cap->r;
1842 }
1843
1844 /* ---------------------------------------------------------------------------
1845  * scheduleThread()
1846  *
1847  * scheduleThread puts a thread on the end  of the runnable queue.
1848  * This will usually be done immediately after a thread is created.
1849  * The caller of scheduleThread must create the thread using e.g.
1850  * createThread and push an appropriate closure
1851  * on this thread's stack before the scheduler is invoked.
1852  * ------------------------------------------------------------------------ */
1853
1854 void
1855 scheduleThread(Capability *cap, StgTSO *tso)
1856 {
1857     // The thread goes at the *end* of the run-queue, to avoid possible
1858     // starvation of any threads already on the queue.
1859     appendToRunQueue(cap,tso);
1860 }
1861
1862 void
1863 scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
1864 {
1865 #if defined(THREADED_RTS)
1866     tso->flags |= TSO_LOCKED; // we requested explicit affinity; don't
1867                               // move this thread from now on.
1868     cpu %= RtsFlags.ParFlags.nNodes;
1869     if (cpu == cap->no) {
1870         appendToRunQueue(cap,tso);
1871     } else {
1872         migrateThread(cap, tso, &capabilities[cpu]);
1873     }
1874 #else
1875     appendToRunQueue(cap,tso);
1876 #endif
1877 }
1878
1879 Capability *
1880 scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
1881 {
1882     Task *task;
1883     StgThreadID id;
1884
1885     // We already created/initialised the Task
1886     task = cap->running_task;
1887
1888     // This TSO is now a bound thread; make the Task and TSO
1889     // point to each other.
1890     tso->bound = task->incall;
1891     tso->cap = cap;
1892
1893     task->incall->tso = tso;
1894     task->incall->ret = ret;
1895     task->incall->stat = NoStatus;
1896
1897     appendToRunQueue(cap,tso);
1898
1899     id = tso->id;
1900     debugTrace(DEBUG_sched, "new bound thread (%lu)", (unsigned long)id);
1901
1902     cap = schedule(cap,task);
1903
1904     ASSERT(task->incall->stat != NoStatus);
1905     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
1906
1907     debugTrace(DEBUG_sched, "bound thread (%lu) finished", (unsigned long)id);
1908     return cap;
1909 }
1910
1911 /* ----------------------------------------------------------------------------
1912  * Starting Tasks
1913  * ------------------------------------------------------------------------- */
1914
1915 #if defined(THREADED_RTS)
1916 void scheduleWorker (Capability *cap, Task *task)
1917 {
1918     // schedule() runs without a lock.
1919     cap = schedule(cap,task);
1920
1921     // On exit from schedule(), we have a Capability, but possibly not
1922     // the same one we started with.
1923
1924     // During shutdown, the requirement is that after all the
1925     // Capabilities are shut down, all workers that are shutting down
1926     // have finished workerTaskStop().  This is why we hold on to
1927     // cap->lock until we've finished workerTaskStop() below.
1928     //
1929     // There may be workers still involved in foreign calls; those
1930     // will just block in waitForReturnCapability() because the
1931     // Capability has been shut down.
1932     //
1933     ACQUIRE_LOCK(&cap->lock);
1934     releaseCapability_(cap,rtsFalse);
1935     workerTaskStop(task);
1936     RELEASE_LOCK(&cap->lock);
1937 }
1938 #endif
1939
1940 /* ---------------------------------------------------------------------------
1941  * initScheduler()
1942  *
1943  * Initialise the scheduler.  This resets all the queues - if the
1944  * queues contained any threads, they'll be garbage collected at the
1945  * next pass.
1946  *
1947  * ------------------------------------------------------------------------ */
1948
1949 void
1950 initScheduler(void)
1951 {
1952 #if !defined(THREADED_RTS)
1953   blocked_queue_hd  = END_TSO_QUEUE;
1954   blocked_queue_tl  = END_TSO_QUEUE;
1955   sleeping_queue    = END_TSO_QUEUE;
1956 #endif
1957
1958   sched_state    = SCHED_RUNNING;
1959   recent_activity = ACTIVITY_YES;
1960
1961 #if defined(THREADED_RTS)
1962   /* Initialise the mutex and condition variables used by
1963    * the scheduler. */
1964   initMutex(&sched_mutex);
1965 #endif
1966
1967   ACQUIRE_LOCK(&sched_mutex);
1968
1969   /* A capability holds the state a native thread needs in
1970    * order to execute STG code. At least one capability is
1971    * floating around (only THREADED_RTS builds have more than one).
1972    */
1973   initCapabilities();
1974
1975   initTaskManager();
1976
1977 #if defined(THREADED_RTS)
1978   initSparkPools();
1979 #endif
1980
1981   RELEASE_LOCK(&sched_mutex);
1982
1983 #if defined(THREADED_RTS)
1984   /*
1985    * Eagerly start one worker to run each Capability, except for
1986    * Capability 0.  The idea is that we're probably going to start a
1987    * bound thread on Capability 0 pretty soon, so we don't want a
1988    * worker task hogging it.
1989    */
1990   {
1991       nat i;
1992       Capability *cap;
1993       for (i = 1; i < n_capabilities; i++) {
1994           cap = &capabilities[i];
1995           ACQUIRE_LOCK(&cap->lock);
1996           startWorkerTask(cap);
1997           RELEASE_LOCK(&cap->lock);
1998       }
1999   }
2000 #endif
2001 }
2002
2003 void
2004 exitScheduler (rtsBool wait_foreign USED_IF_THREADS)
2005                /* see Capability.c, shutdownCapability() */
2006 {
2007     Task *task = NULL;
2008
2009     task = newBoundTask();
2010
2011     // If we haven't killed all the threads yet, do it now.
2012     if (sched_state < SCHED_SHUTTING_DOWN) {
2013         sched_state = SCHED_INTERRUPTING;
2014         waitForReturnCapability(&task->cap,task);
2015         scheduleDoGC(task->cap,task,rtsFalse);
2016         ASSERT(task->incall->tso == NULL);
2017         releaseCapability(task->cap);
2018     }
2019     sched_state = SCHED_SHUTTING_DOWN;
2020
2021 #if defined(THREADED_RTS)
2022     {
2023         nat i;
2024
2025         for (i = 0; i < n_capabilities; i++) {
2026             ASSERT(task->incall->tso == NULL);
2027             shutdownCapability(&capabilities[i], task, wait_foreign);
2028         }
2029     }
2030 #endif
2031
2032     boundTaskExiting(task);
2033 }
2034
2035 void
2036 freeScheduler( void )
2037 {
2038     nat still_running;
2039
2040     ACQUIRE_LOCK(&sched_mutex);
2041     still_running = freeTaskManager();
2042     // We can only free the Capabilities if there are no Tasks still
2043     // running.  We might have a Task about to return from a foreign
2044     // call into waitForReturnCapability(), for example (actually,
2045     // this should be the *only* thing that a still-running Task can
2046     // do at this point, and it will block waiting for the
2047     // Capability).
2048     if (still_running == 0) {
2049         freeCapabilities();
2050         if (n_capabilities != 1) {
2051             stgFree(capabilities);
2052         }
2053     }
2054     RELEASE_LOCK(&sched_mutex);
2055 #if defined(THREADED_RTS)
2056     closeMutex(&sched_mutex);
2057 #endif
2058 }
2059
2060 /* -----------------------------------------------------------------------------
2061    performGC
2062
2063    This is the interface to the garbage collector from Haskell land.
2064    We provide this so that external C code can allocate and garbage
2065    collect when called from Haskell via _ccall_GC.
2066    -------------------------------------------------------------------------- */
2067
2068 static void
2069 performGC_(rtsBool force_major)
2070 {
2071     Task *task;
2072
2073     // We must grab a new Task here, because the existing Task may be
2074     // associated with a particular Capability, and chained onto the
2075     // suspended_ccalls queue.
2076     task = newBoundTask();
2077
2078     waitForReturnCapability(&task->cap,task);
2079     scheduleDoGC(task->cap,task,force_major);
2080     releaseCapability(task->cap);
2081     boundTaskExiting(task);
2082 }
2083
2084 void
2085 performGC(void)
2086 {
2087     performGC_(rtsFalse);
2088 }
2089
2090 void
2091 performMajorGC(void)
2092 {
2093     performGC_(rtsTrue);
2094 }
2095
2096 /* -----------------------------------------------------------------------------
2097    Stack overflow
2098
2099    If the thread has reached its maximum stack size, then raise the
2100    StackOverflow exception in the offending thread.  Otherwise
2101    relocate the TSO into a larger chunk of memory and adjust its stack
2102    size appropriately.
2103    -------------------------------------------------------------------------- */
2104
2105 static StgTSO *
2106 threadStackOverflow(Capability *cap, StgTSO *tso)
2107 {
2108   nat new_stack_size, stack_words;
2109   lnat new_tso_size;
2110   StgPtr new_sp;
2111   StgTSO *dest;
2112
2113   IF_DEBUG(sanity,checkTSO(tso));
2114
2115   if (tso->stack_size >= tso->max_stack_size
2116       && !(tso->flags & TSO_BLOCKEX)) {
2117       // NB. never raise a StackOverflow exception if the thread is
2118       // inside Control.Exceptino.block.  It is impractical to protect
2119       // against stack overflow exceptions, since virtually anything
2120       // can raise one (even 'catch'), so this is the only sensible
2121       // thing to do here.  See bug #767.
2122       //
2123
2124       if (tso->flags & TSO_SQUEEZED) {
2125           return tso;
2126       }
2127       // #3677: In a stack overflow situation, stack squeezing may
2128       // reduce the stack size, but we don't know whether it has been
2129       // reduced enough for the stack check to succeed if we try
2130       // again.  Fortunately stack squeezing is idempotent, so all we
2131       // need to do is record whether *any* squeezing happened.  If we
2132       // are at the stack's absolute -K limit, and stack squeezing
2133       // happened, then we try running the thread again.  The
2134       // TSO_SQUEEZED flag is set by threadPaused() to tell us whether
2135       // squeezing happened or not.
2136
2137       debugTrace(DEBUG_gc,
2138                  "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
2139                  (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size);
2140       IF_DEBUG(gc,
2141                /* If we're debugging, just print out the top of the stack */
2142                printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
2143                                                 tso->sp+64)));
2144
2145       // Send this thread the StackOverflow exception
2146       throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
2147       return tso;
2148   }
2149
2150
2151   // We also want to avoid enlarging the stack if squeezing has
2152   // already released some of it.  However, we don't want to get into
2153   // a pathalogical situation where a thread has a nearly full stack
2154   // (near its current limit, but not near the absolute -K limit),
2155   // keeps allocating a little bit, squeezing removes a little bit,
2156   // and then it runs again.  So to avoid this, if we squeezed *and*
2157   // there is still less than BLOCK_SIZE_W words free, then we enlarge
2158   // the stack anyway.
2159   if ((tso->flags & TSO_SQUEEZED) &&
2160       ((W_)(tso->sp - tso->stack) >= BLOCK_SIZE_W)) {
2161       return tso;
2162   }
2163
2164   /* Try to double the current stack size.  If that takes us over the
2165    * maximum stack size for this thread, then use the maximum instead
2166    * (that is, unless we're already at or over the max size and we
2167    * can't raise the StackOverflow exception (see above), in which
2168    * case just double the size). Finally round up so the TSO ends up as
2169    * a whole number of blocks.
2170    */
2171   if (tso->stack_size >= tso->max_stack_size) {
2172       new_stack_size = tso->stack_size * 2;
2173   } else {
2174       new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
2175   }
2176   new_tso_size   = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) +
2177                                        TSO_STRUCT_SIZE)/sizeof(W_);
2178   new_tso_size = round_to_mblocks(new_tso_size);  /* Be MBLOCK-friendly */
2179   new_stack_size = new_tso_size - TSO_STRUCT_SIZEW;
2180
2181   debugTrace(DEBUG_sched,
2182              "increasing stack size from %ld words to %d.",
2183              (long)tso->stack_size, new_stack_size);
2184
2185   dest = (StgTSO *)allocate(cap,new_tso_size);
2186   TICK_ALLOC_TSO(new_stack_size,0);
2187
2188   /* copy the TSO block and the old stack into the new area */
2189   memcpy(dest,tso,TSO_STRUCT_SIZE);
2190   stack_words = tso->stack + tso->stack_size - tso->sp;
2191   new_sp = (P_)dest + new_tso_size - stack_words;
2192   memcpy(new_sp, tso->sp, stack_words * sizeof(W_));
2193
2194   /* relocate the stack pointers... */
2195   dest->sp         = new_sp;
2196   dest->stack_size = new_stack_size;
2197
2198   /* Mark the old TSO as relocated.  We have to check for relocated
2199    * TSOs in the garbage collector and any primops that deal with TSOs.
2200    *
2201    * It's important to set the sp value to just beyond the end
2202    * of the stack, so we don't attempt to scavenge any part of the
2203    * dead TSO's stack.
2204    */
2205   setTSOLink(cap,tso,dest);
2206   write_barrier(); // other threads seeing ThreadRelocated will look at _link
2207   tso->what_next = ThreadRelocated;
2208   tso->sp = (P_)&(tso->stack[tso->stack_size]);
2209   tso->why_blocked = NotBlocked;
2210
2211   IF_DEBUG(sanity,checkTSO(dest));
2212 #if 0
2213   IF_DEBUG(scheduler,printTSO(dest));
2214 #endif
2215
2216   return dest;
2217 }
2218
2219 static StgTSO *
2220 threadStackUnderflow (Capability *cap, Task *task, StgTSO *tso)
2221 {
2222     bdescr *bd, *new_bd;
2223     lnat free_w, tso_size_w;
2224     StgTSO *new_tso;
2225
2226     tso_size_w = tso_sizeW(tso);
2227
2228     if (tso_size_w < MBLOCK_SIZE_W ||
2229           // TSO is less than 2 mblocks (since the first mblock is
2230           // shorter than MBLOCK_SIZE_W)
2231         (tso_size_w - BLOCKS_PER_MBLOCK*BLOCK_SIZE_W) % MBLOCK_SIZE_W != 0 ||
2232           // or TSO is not a whole number of megablocks (ensuring
2233           // precondition of splitLargeBlock() below)
2234         (tso_size_w <= round_up_to_mblocks(RtsFlags.GcFlags.initialStkSize)) ||
2235           // or TSO is smaller than the minimum stack size (rounded up)
2236         (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4)
2237           // or stack is using more than 1/4 of the available space
2238     {
2239         // then do nothing
2240         return tso;
2241     }
2242
2243     // this is the number of words we'll free
2244     free_w = round_to_mblocks(tso_size_w/2);
2245
2246     bd = Bdescr((StgPtr)tso);
2247     new_bd = splitLargeBlock(bd, free_w / BLOCK_SIZE_W);
2248     bd->free = bd->start + TSO_STRUCT_SIZEW;
2249
2250     new_tso = (StgTSO *)new_bd->start;
2251     memcpy(new_tso,tso,TSO_STRUCT_SIZE);
2252     new_tso->stack_size = new_bd->free - new_tso->stack;
2253
2254     // The original TSO was dirty and probably on the mutable
2255     // list. The new TSO is not yet on the mutable list, so we better
2256     // put it there.
2257     new_tso->dirty = 0;
2258     new_tso->flags &= ~TSO_LINK_DIRTY;
2259     dirty_TSO(cap, new_tso);
2260
2261     debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
2262                (long)tso->id, tso_size_w, tso_sizeW(new_tso));
2263
2264     tso->_link = new_tso; // no write barrier reqd: same generation
2265     write_barrier(); // other threads seeing ThreadRelocated will look at _link
2266     tso->what_next = ThreadRelocated;
2267
2268     // The TSO attached to this Task may have moved, so update the
2269     // pointer to it.
2270     if (task->incall->tso == tso) {
2271         task->incall->tso = new_tso;
2272     }
2273
2274     IF_DEBUG(sanity,checkTSO(new_tso));
2275
2276     return new_tso;
2277 }
2278
2279 /* ---------------------------------------------------------------------------
2280    Interrupt execution
2281    - usually called inside a signal handler so it mustn't do anything fancy.
2282    ------------------------------------------------------------------------ */
2283
2284 void
2285 interruptStgRts(void)
2286 {
2287     sched_state = SCHED_INTERRUPTING;
2288     setContextSwitches();
2289 #if defined(THREADED_RTS)
2290     wakeUpRts();
2291 #endif
2292 }
2293
2294 /* -----------------------------------------------------------------------------
2295    Wake up the RTS
2296
2297    This function causes at least one OS thread to wake up and run the
2298    scheduler loop.  It is invoked when the RTS might be deadlocked, or
2299    an external event has arrived that may need servicing (eg. a
2300    keyboard interrupt).
2301
2302    In the single-threaded RTS we don't do anything here; we only have
2303    one thread anyway, and the event that caused us to want to wake up
2304    will have interrupted any blocking system call in progress anyway.
2305    -------------------------------------------------------------------------- */
2306
2307 #if defined(THREADED_RTS)
2308 void wakeUpRts(void)
2309 {
2310     // This forces the IO Manager thread to wakeup, which will
2311     // in turn ensure that some OS thread wakes up and runs the
2312     // scheduler loop, which will cause a GC and deadlock check.
2313     ioManagerWakeup();
2314 }
2315 #endif
2316
2317 /* -----------------------------------------------------------------------------
2318    Deleting threads
2319
2320    This is used for interruption (^C) and forking, and corresponds to
2321    raising an exception but without letting the thread catch the
2322    exception.
2323    -------------------------------------------------------------------------- */
2324
2325 static void
2326 deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
2327 {
2328     // NOTE: must only be called on a TSO that we have exclusive
2329     // access to, because we will call throwToSingleThreaded() below.
2330     // The TSO must be on the run queue of the Capability we own, or
2331     // we must own all Capabilities.
2332
2333     if (tso->why_blocked != BlockedOnCCall &&
2334         tso->why_blocked != BlockedOnCCall_Interruptible) {
2335         throwToSingleThreaded(tso->cap,tso,NULL);
2336     }
2337 }
2338
2339 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2340 static void
2341 deleteThread_(Capability *cap, StgTSO *tso)
2342 { // for forkProcess only:
2343   // like deleteThread(), but we delete threads in foreign calls, too.
2344
2345     if (tso->why_blocked == BlockedOnCCall ||
2346         tso->why_blocked == BlockedOnCCall_Interruptible) {
2347         tso->what_next = ThreadKilled;
2348         appendToRunQueue(tso->cap, tso);
2349     } else {
2350         deleteThread(cap,tso);
2351     }
2352 }
2353 #endif
2354
2355 /* -----------------------------------------------------------------------------
2356    raiseExceptionHelper
2357
2358    This function is called by the raise# primitve, just so that we can
2359    move some of the tricky bits of raising an exception from C-- into
2360    C.  Who knows, it might be a useful re-useable thing here too.
2361    -------------------------------------------------------------------------- */
2362
2363 StgWord
2364 raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
2365 {
2366     Capability *cap = regTableToCapability(reg);
2367     StgThunk *raise_closure = NULL;
2368     StgPtr p, next;
2369     StgRetInfoTable *info;
2370     //
2371     // This closure represents the expression 'raise# E' where E
2372     // is the exception raise.  It is used to overwrite all the
2373     // thunks which are currently under evaluataion.
2374     //
2375
2376     // OLD COMMENT (we don't have MIN_UPD_SIZE now):
2377     // LDV profiling: stg_raise_info has THUNK as its closure
2378     // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
2379     // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
2380     // 1 does not cause any problem unless profiling is performed.
2381     // However, when LDV profiling goes on, we need to linearly scan
2382     // small object pool, where raise_closure is stored, so we should
2383     // use MIN_UPD_SIZE.
2384     //
2385     // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
2386     //                                 sizeofW(StgClosure)+1);
2387     //
2388
2389     //
2390     // Walk up the stack, looking for the catch frame.  On the way,
2391     // we update any closures pointed to from update frames with the
2392     // raise closure that we just built.
2393     //
2394     p = tso->sp;
2395     while(1) {
2396         info = get_ret_itbl((StgClosure *)p);
2397         next = p + stack_frame_sizeW((StgClosure *)p);
2398         switch (info->i.type) {
2399
2400         case UPDATE_FRAME:
2401             // Only create raise_closure if we need to.
2402             if (raise_closure == NULL) {
2403                 raise_closure =
2404                     (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
2405                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
2406                 raise_closure->payload[0] = exception;
2407             }
2408             updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee,
2409                         (StgClosure *)raise_closure);
2410             p = next;
2411             continue;
2412
2413         case ATOMICALLY_FRAME:
2414             debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p);
2415             tso->sp = p;
2416             return ATOMICALLY_FRAME;
2417
2418         case CATCH_FRAME:
2419             tso->sp = p;
2420             return CATCH_FRAME;
2421
2422         case CATCH_STM_FRAME:
2423             debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p);
2424             tso->sp = p;
2425             return CATCH_STM_FRAME;
2426
2427         case STOP_FRAME:
2428             tso->sp = p;
2429             return STOP_FRAME;
2430
2431         case CATCH_RETRY_FRAME:
2432         default:
2433             p = next;
2434             continue;
2435         }
2436     }
2437 }
2438
2439
2440 /* -----------------------------------------------------------------------------
2441    findRetryFrameHelper
2442
2443    This function is called by the retry# primitive.  It traverses the stack
2444    leaving tso->sp referring to the frame which should handle the retry.
2445
2446    This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
2447    or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
2448
2449    We skip CATCH_STM_FRAMEs (aborting and rolling back the nested tx that they
2450    create) because retries are not considered to be exceptions, despite the
2451    similar implementation.
2452
2453    We should not expect to see CATCH_FRAME or STOP_FRAME because those should
2454    not be created within memory transactions.
2455    -------------------------------------------------------------------------- */
2456
2457 StgWord
2458 findRetryFrameHelper (StgTSO *tso)
2459 {
2460   StgPtr           p, next;
2461   StgRetInfoTable *info;
2462
2463   p = tso -> sp;
2464   while (1) {
2465     info = get_ret_itbl((StgClosure *)p);
2466     next = p + stack_frame_sizeW((StgClosure *)p);
2467     switch (info->i.type) {
2468
2469     case ATOMICALLY_FRAME:
2470         debugTrace(DEBUG_stm,
2471                    "found ATOMICALLY_FRAME at %p during retry", p);
2472         tso->sp = p;
2473         return ATOMICALLY_FRAME;
2474
2475     case CATCH_RETRY_FRAME:
2476         debugTrace(DEBUG_stm,
2477                    "found CATCH_RETRY_FRAME at %p during retrry", p);
2478         tso->sp = p;
2479         return CATCH_RETRY_FRAME;
2480
2481     case CATCH_STM_FRAME: {
2482         StgTRecHeader *trec = tso -> trec;
2483         StgTRecHeader *outer = trec -> enclosing_trec;
2484         debugTrace(DEBUG_stm,
2485                    "found CATCH_STM_FRAME at %p during retry", p);
2486         debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
2487         stmAbortTransaction(tso -> cap, trec);
2488         stmFreeAbortedTRec(tso -> cap, trec);
2489         tso -> trec = outer;
2490         p = next;
2491         continue;
2492     }
2493
2494
2495     default:
2496       ASSERT(info->i.type != CATCH_FRAME);
2497       ASSERT(info->i.type != STOP_FRAME);
2498       p = next;
2499       continue;
2500     }
2501   }
2502 }
2503
2504 /* -----------------------------------------------------------------------------
2505    resurrectThreads is called after garbage collection on the list of
2506    threads found to be garbage.  Each of these threads will be woken
2507    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
2508    on an MVar, or NonTermination if the thread was blocked on a Black
2509    Hole.
2510
2511    Locks: assumes we hold *all* the capabilities.
2512    -------------------------------------------------------------------------- */
2513
2514 void
2515 resurrectThreads (StgTSO *threads)
2516 {
2517     StgTSO *tso, *next;
2518     Capability *cap;
2519     generation *gen;
2520
2521     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
2522         next = tso->global_link;
2523
2524         gen = Bdescr((P_)tso)->gen;
2525         tso->global_link = gen->threads;
2526         gen->threads = tso;
2527
2528         debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
2529
2530         // Wake up the thread on the Capability it was last on
2531         cap = tso->cap;
2532
2533         switch (tso->why_blocked) {
2534         case BlockedOnMVar:
2535             /* Called by GC - sched_mutex lock is currently held. */
2536             throwToSingleThreaded(cap, tso,
2537                                   (StgClosure *)blockedIndefinitelyOnMVar_closure);
2538             break;
2539         case BlockedOnBlackHole:
2540             throwToSingleThreaded(cap, tso,
2541                                   (StgClosure *)nonTermination_closure);
2542             break;
2543         case BlockedOnSTM:
2544             throwToSingleThreaded(cap, tso,
2545                                   (StgClosure *)blockedIndefinitelyOnSTM_closure);
2546             break;
2547         case NotBlocked:
2548             /* This might happen if the thread was blocked on a black hole
2549              * belonging to a thread that we've just woken up (raiseAsync
2550              * can wake up threads, remember...).
2551              */
2552             continue;
2553         case BlockedOnMsgThrowTo:
2554             // This can happen if the target is masking, blocks on a
2555             // black hole, and then is found to be unreachable.  In
2556             // this case, we want to let the target wake up and carry
2557             // on, and do nothing to this thread.
2558             continue;
2559         default:
2560             barf("resurrectThreads: thread blocked in a strange way: %d",
2561                  tso->why_blocked);
2562         }
2563     }
2564 }