ghc/rts/Schedule.c

   1 /* ---------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2005
   4  *
   5  * The scheduler and thread-related functionality
   6  *
   7  * --------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "SchedAPI.h"
  12 #include "RtsUtils.h"
  13 #include "RtsFlags.h"
  14 #include "BlockAlloc.h"
  15 #include "OSThreads.h"
  16 #include "Storage.h"
  17 #include "StgRun.h"
  18 #include "Hooks.h"
  19 #include "Schedule.h"
  20 #include "StgMiscClosures.h"
  21 #include "Interpreter.h"
  22 #include "Exception.h"
  23 #include "Printer.h"
  24 #include "RtsSignals.h"
  25 #include "Sanity.h"
  26 #include "Stats.h"
  27 #include "STM.h"
  28 #include "Timer.h"
  29 #include "Prelude.h"
  30 #include "ThreadLabels.h"
  31 #include "LdvProfile.h"
  32 #include "Updates.h"
  33 #ifdef PROFILING
  34 #include "Proftimer.h"
  35 #include "ProfHeap.h"
  36 #endif
  37 #if defined(GRAN) || defined(PARALLEL_HASKELL)
  38 # include "GranSimRts.h"
  39 # include "GranSim.h"
  40 # include "ParallelRts.h"
  41 # include "Parallel.h"
  42 # include "ParallelDebug.h"
  43 # include "FetchMe.h"
  44 # include "HLC.h"
  45 #endif
  46 #include "Sparks.h"
  47 #include "Capability.h"
  48 #include "Task.h"
  49 #include "AwaitEvent.h"
  50 #if defined(mingw32_HOST_OS)
  51 #include "win32/IOManager.h"
  52 #endif
  53
  54 #ifdef HAVE_SYS_TYPES_H
  55 #include <sys/types.h>
  56 #endif
  57 #ifdef HAVE_UNISTD_H
  58 #include <unistd.h>
  59 #endif
  60
  61 #include <string.h>
  62 #include <stdlib.h>
  63 #include <stdarg.h>
  64
  65 #ifdef HAVE_ERRNO_H
  66 #include <errno.h>
  67 #endif
  68
  69 // Turn off inlining when debugging - it obfuscates things
  70 #ifdef DEBUG
  71 # undef  STATIC_INLINE
  72 # define STATIC_INLINE static
  73 #endif
  74
  75 /* -----------------------------------------------------------------------------
  76  * Global variables
  77  * -------------------------------------------------------------------------- */
  78
  79 #if defined(GRAN)
  80
  81 StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
  82 /* rtsTime TimeOfNextEvent, EndOfTimeSlice;            now in GranSim.c */
  83
  84 /*
  85    In GranSim we have a runnable and a blocked queue for each processor.
  86    In order to minimise code changes new arrays run_queue_hds/tls
  87    are created. run_queue_hd is then a short cut (macro) for
  88    run_queue_hds[CurrentProc] (see GranSim.h).
  89    -- HWL
  90 */
  91 StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
  92 StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
  93 StgTSO *ccalling_threadss[MAX_PROC];
  94 /* We use the same global list of threads (all_threads) in GranSim as in
  95    the std RTS (i.e. we are cheating). However, we don't use this list in
  96    the GranSim specific code at the moment (so we are only potentially
  97    cheating).  */
  98
  99 #else /* !GRAN */
 100
 101 #if !defined(THREADED_RTS)
 102 // Blocked/sleeping thrads
 103 StgTSO *blocked_queue_hd = NULL;
 104 StgTSO *blocked_queue_tl = NULL;
 105 StgTSO *sleeping_queue = NULL;    // perhaps replace with a hash table?
 106 #endif
 107
 108 /* Threads blocked on blackholes.
 109  * LOCK: sched_mutex+capability, or all capabilities
 110  */
 111 StgTSO *blackhole_queue = NULL;
 112 #endif
 113
 114 /* The blackhole_queue should be checked for threads to wake up.  See
 115  * Schedule.h for more thorough comment.
 116  * LOCK: none (doesn't matter if we miss an update)
 117  */
 118 rtsBool blackholes_need_checking = rtsFalse;
 119
 120 /* Linked list of all threads.
 121  * Used for detecting garbage collected threads.
 122  * LOCK: sched_mutex+capability, or all capabilities
 123  */
 124 StgTSO *all_threads = NULL;
 125
 126 /* flag set by signal handler to precipitate a context switch
 127  * LOCK: none (just an advisory flag)
 128  */
 129 int context_switch = 0;
 130
 131 /* flag that tracks whether we have done any execution in this time slice.
 132  * LOCK: currently none, perhaps we should lock (but needs to be
 133  * updated in the fast path of the scheduler).
 134  */
 135 nat recent_activity = ACTIVITY_YES;
 136
 137 /* if this flag is set as well, give up execution
 138  * LOCK: none (changes once, from false->true)
 139  */
 140 rtsBool sched_state = SCHED_RUNNING;
 141
 142 /* Next thread ID to allocate.
 143  * LOCK: sched_mutex
 144  */
 145 static StgThreadID next_thread_id = 1;
 146
 147 /* The smallest stack size that makes any sense is:
 148  *    RESERVED_STACK_WORDS    (so we can get back from the stack overflow)
 149  *  + sizeofW(StgStopFrame)   (the stg_stop_thread_info frame)
 150  *  + 1                       (the closure to enter)
 151  *  + 1                       (stg_ap_v_ret)
 152  *  + 1                       (spare slot req'd by stg_ap_v_ret)
 153  *
 154  * A thread with this stack will bomb immediately with a stack
 155  * overflow, which will increase its stack size.
 156  */
 157 #define MIN_STACK_WORDS (RESERVED_STACK_WORDS + sizeofW(StgStopFrame) + 3)
 158
 159 #if defined(GRAN)
 160 StgTSO *CurrentTSO;
 161 #endif
 162
 163 /*  This is used in `TSO.h' and gcc 2.96 insists that this variable actually
 164  *  exists - earlier gccs apparently didn't.
 165  *  -= chak
 166  */
 167 StgTSO dummy_tso;
 168
 169 /*
 170  * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
 171  * in an MT setting, needed to signal that a worker thread shouldn't hang around
 172  * in the scheduler when it is out of work.
 173  */
 174 rtsBool shutting_down_scheduler = rtsFalse;
 175
 176 /*
 177  * This mutex protects most of the global scheduler data in
 178  * the THREADED_RTS runtime.
 179  */
 180 #if defined(THREADED_RTS)
 181 Mutex sched_mutex;
 182 #endif
 183
 184 #if defined(PARALLEL_HASKELL)
 185 StgTSO *LastTSO;
 186 rtsTime TimeOfLastYield;
 187 rtsBool emitSchedule = rtsTrue;
 188 #endif
 189
 190 /* -----------------------------------------------------------------------------
 191  * static function prototypes
 192  * -------------------------------------------------------------------------- */
 193
 194 static Capability *schedule (Capability *initialCapability, Task *task);
 195
 196 //
 197 // These function all encapsulate parts of the scheduler loop, and are
 198 // abstracted only to make the structure and control flow of the
 199 // scheduler clearer.
 200 //
 201 static void schedulePreLoop (void);
 202 #if defined(THREADED_RTS)
 203 static void schedulePushWork(Capability *cap, Task *task);
 204 #endif
 205 static void scheduleStartSignalHandlers (Capability *cap);
 206 static void scheduleCheckBlockedThreads (Capability *cap);
 207 static void scheduleCheckBlackHoles (Capability *cap);
 208 static void scheduleDetectDeadlock (Capability *cap, Task *task);
 209 #if defined(GRAN)
 210 static StgTSO *scheduleProcessEvent(rtsEvent *event);
 211 #endif
 212 #if defined(PARALLEL_HASKELL)
 213 static StgTSO *scheduleSendPendingMessages(void);
 214 static void scheduleActivateSpark(void);
 215 static rtsBool scheduleGetRemoteWork(rtsBool *receivedFinish);
 216 #endif
 217 #if defined(PAR) || defined(GRAN)
 218 static void scheduleGranParReport(void);
 219 #endif
 220 static void schedulePostRunThread(void);
 221 static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
 222 static void scheduleHandleStackOverflow( Capability *cap, Task *task,
 223                                          StgTSO *t);
 224 static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
 225                                     nat prev_what_next );
 226 static void scheduleHandleThreadBlocked( StgTSO *t );
 227 static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
 228                                              StgTSO *t );
 229 static rtsBool scheduleDoHeapProfile(rtsBool ready_to_gc);
 230 static Capability *scheduleDoGC(Capability *cap, Task *task,
 231                                 rtsBool force_major,
 232                                 void (*get_roots)(evac_fn));
 233
 234 static void unblockThread(Capability *cap, StgTSO *tso);
 235 static rtsBool checkBlackHoles(Capability *cap);
 236 static void AllRoots(evac_fn evac);
 237
 238 static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
 239
 240 static void raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
 241                         rtsBool stop_at_atomically, StgPtr stop_here);
 242
 243 static void deleteThread (Capability *cap, StgTSO *tso);
 244 static void deleteAllThreads (Capability *cap);
 245
 246 #ifdef DEBUG
 247 static void printThreadBlockage(StgTSO *tso);
 248 static void printThreadStatus(StgTSO *tso);
 249 void printThreadQueue(StgTSO *tso);
 250 #endif
 251
 252 #if defined(PARALLEL_HASKELL)
 253 StgTSO * createSparkThread(rtsSpark spark);
 254 StgTSO * activateSpark (rtsSpark spark);
 255 #endif
 256
 257 #ifdef DEBUG
 258 static char *whatNext_strs[] = {
 259   "(unknown)",
 260   "ThreadRunGHC",
 261   "ThreadInterpret",
 262   "ThreadKilled",
 263   "ThreadRelocated",
 264   "ThreadComplete"
 265 };
 266 #endif
 267
 268 /* -----------------------------------------------------------------------------
 269  * Putting a thread on the run queue: different scheduling policies
 270  * -------------------------------------------------------------------------- */
 271
 272 STATIC_INLINE void
 273 addToRunQueue( Capability *cap, StgTSO *t )
 274 {
 275 #if defined(PARALLEL_HASKELL)
 276     if (RtsFlags.ParFlags.doFairScheduling) {
 277         // this does round-robin scheduling; good for concurrency
 278         appendToRunQueue(cap,t);
 279     } else {
 280         // this does unfair scheduling; good for parallelism
 281         pushOnRunQueue(cap,t);
 282     }
 283 #else
 284     // this does round-robin scheduling; good for concurrency
 285     appendToRunQueue(cap,t);
 286 #endif
 287 }
 288
 289 /* ---------------------------------------------------------------------------
 290    Main scheduling loop.
 291
 292    We use round-robin scheduling, each thread returning to the
 293    scheduler loop when one of these conditions is detected:
 294
 295       * out of heap space
 296       * timer expires (thread yields)
 297       * thread blocks
 298       * thread ends
 299       * stack overflow
 300
 301    GRAN version:
 302      In a GranSim setup this loop iterates over the global event queue.
 303      This revolves around the global event queue, which determines what
 304      to do next. Therefore, it's more complicated than either the
 305      concurrent or the parallel (GUM) setup.
 306
 307    GUM version:
 308      GUM iterates over incoming messages.
 309      It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
 310      and sends out a fish whenever it has nothing to do; in-between
 311      doing the actual reductions (shared code below) it processes the
 312      incoming messages and deals with delayed operations
 313      (see PendingFetches).
 314      This is not the ugliest code you could imagine, but it's bloody close.
 315
 316    ------------------------------------------------------------------------ */
 317
 318 static Capability *
 319 schedule (Capability *initialCapability, Task *task)
 320 {
 321   StgTSO *t;
 322   Capability *cap;
 323   StgThreadReturnCode ret;
 324 #if defined(GRAN)
 325   rtsEvent *event;
 326 #elif defined(PARALLEL_HASKELL)
 327   StgTSO *tso;
 328   GlobalTaskId pe;
 329   rtsBool receivedFinish = rtsFalse;
 330 # if defined(DEBUG)
 331   nat tp_size, sp_size; // stats only
 332 # endif
 333 #endif
 334   nat prev_what_next;
 335   rtsBool ready_to_gc;
 336 #if defined(THREADED_RTS)
 337   rtsBool first = rtsTrue;
 338 #endif
 339
 340   cap = initialCapability;
 341
 342   // Pre-condition: this task owns initialCapability.
 343   // The sched_mutex is *NOT* held
 344   // NB. on return, we still hold a capability.
 345
 346   IF_DEBUG(scheduler,
 347            sched_belch("### NEW SCHEDULER LOOP (task: %p, cap: %p)",
 348                        task, initialCapability);
 349       );
 350
 351   schedulePreLoop();
 352
 353   // -----------------------------------------------------------
 354   // Scheduler loop starts here:
 355
 356 #if defined(PARALLEL_HASKELL)
 357 #define TERMINATION_CONDITION        (!receivedFinish)
 358 #elif defined(GRAN)
 359 #define TERMINATION_CONDITION        ((event = get_next_event()) != (rtsEvent*)NULL)
 360 #else
 361 #define TERMINATION_CONDITION        rtsTrue
 362 #endif
 363
 364   while (TERMINATION_CONDITION) {
 365
 366 #if defined(GRAN)
 367       /* Choose the processor with the next event */
 368       CurrentProc = event->proc;
 369       CurrentTSO = event->tso;
 370 #endif
 371
 372 #if defined(THREADED_RTS)
 373       if (first) {
 374           // don't yield the first time, we want a chance to run this
 375           // thread for a bit, even if there are others banging at the
 376           // door.
 377           first = rtsFalse;
 378           ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 379       } else {
 380           // Yield the capability to higher-priority tasks if necessary.
 381           yieldCapability(&cap, task);
 382       }
 383 #endif
 384
 385 #if defined(THREADED_RTS)
 386       schedulePushWork(cap,task);
 387 #endif
 388
 389     // Check whether we have re-entered the RTS from Haskell without
 390     // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
 391     // call).
 392     if (cap->in_haskell) {
 393           errorBelch("schedule: re-entered unsafely.\n"
 394                      "   Perhaps a 'foreign import unsafe' should be 'safe'?");
 395           stg_exit(EXIT_FAILURE);
 396     }
 397
 398     // The interruption / shutdown sequence.
 399     //
 400     // In order to cleanly shut down the runtime, we want to:
 401     //   * make sure that all main threads return to their callers
 402     //     with the state 'Interrupted'.
 403     //   * clean up all OS threads assocated with the runtime
 404     //   * free all memory etc.
 405     //
 406     // So the sequence for ^C goes like this:
 407     //
 408     //   * ^C handler sets sched_state := SCHED_INTERRUPTING and
 409     //     arranges for some Capability to wake up
 410     //
 411     //   * all threads in the system are halted, and the zombies are
 412     //     placed on the run queue for cleaning up.  We acquire all
 413     //     the capabilities in order to delete the threads, this is
 414     //     done by scheduleDoGC() for convenience (because GC already
 415     //     needs to acquire all the capabilities).  We can't kill
 416     //     threads involved in foreign calls.
 417     //
 418     //   * sched_state := SCHED_INTERRUPTED
 419     //
 420     //   * somebody calls shutdownHaskell(), which calls exitScheduler()
 421     //
 422     //   * sched_state := SCHED_SHUTTING_DOWN
 423     //
 424     //   * all workers exit when the run queue on their capability
 425     //     drains.  All main threads will also exit when their TSO
 426     //     reaches the head of the run queue and they can return.
 427     //
 428     //   * eventually all Capabilities will shut down, and the RTS can
 429     //     exit.
 430     //
 431     //   * We might be left with threads blocked in foreign calls,
 432     //     we should really attempt to kill these somehow (TODO);
 433
 434     switch (sched_state) {
 435     case SCHED_RUNNING:
 436         break;
 437     case SCHED_INTERRUPTING:
 438         IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTING"));
 439 #if defined(THREADED_RTS)
 440         discardSparksCap(cap);
 441 #endif
 442         /* scheduleDoGC() deletes all the threads */
 443         cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
 444         break;
 445     case SCHED_INTERRUPTED:
 446         IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTED"));
 447         break;
 448     case SCHED_SHUTTING_DOWN:
 449         IF_DEBUG(scheduler, sched_belch("SCHED_SHUTTING_DOWN"));
 450         // If we are a worker, just exit.  If we're a bound thread
 451         // then we will exit below when we've removed our TSO from
 452         // the run queue.
 453         if (task->tso == NULL && emptyRunQueue(cap)) {
 454             return cap;
 455         }
 456         break;
 457     default:
 458         barf("sched_state: %d", sched_state);
 459     }
 460
 461 #if defined(THREADED_RTS)
 462     // If the run queue is empty, take a spark and turn it into a thread.
 463     {
 464         if (emptyRunQueue(cap)) {
 465             StgClosure *spark;
 466             spark = findSpark(cap);
 467             if (spark != NULL) {
 468                 IF_DEBUG(scheduler,
 469                          sched_belch("turning spark of closure %p into a thread",
 470                                      (StgClosure *)spark));
 471                 createSparkThread(cap,spark);
 472             }
 473         }
 474     }
 475 #endif // THREADED_RTS
 476
 477     scheduleStartSignalHandlers(cap);
 478
 479     // Only check the black holes here if we've nothing else to do.
 480     // During normal execution, the black hole list only gets checked
 481     // at GC time, to avoid repeatedly traversing this possibly long
 482     // list each time around the scheduler.
 483     if (emptyRunQueue(cap)) { scheduleCheckBlackHoles(cap); }
 484
 485     scheduleCheckBlockedThreads(cap);
 486
 487     scheduleDetectDeadlock(cap,task);
 488 #if defined(THREADED_RTS)
 489     cap = task->cap;    // reload cap, it might have changed
 490 #endif
 491
 492     // Normally, the only way we can get here with no threads to
 493     // run is if a keyboard interrupt received during
 494     // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
 495     // Additionally, it is not fatal for the
 496     // threaded RTS to reach here with no threads to run.
 497     //
 498     // win32: might be here due to awaitEvent() being abandoned
 499     // as a result of a console event having been delivered.
 500     if ( emptyRunQueue(cap) ) {
 501 #if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
 502         ASSERT(sched_state >= SCHED_INTERRUPTING);
 503 #endif
 504         continue; // nothing to do
 505     }
 506
 507 #if defined(PARALLEL_HASKELL)
 508     scheduleSendPendingMessages();
 509     if (emptyRunQueue(cap) && scheduleActivateSpark())
 510         continue;
 511
 512 #if defined(SPARKS)
 513     ASSERT(next_fish_to_send_at==0);  // i.e. no delayed fishes left!
 514 #endif
 515
 516     /* If we still have no work we need to send a FISH to get a spark
 517        from another PE */
 518     if (emptyRunQueue(cap)) {
 519         if (!scheduleGetRemoteWork(&receivedFinish)) continue;
 520         ASSERT(rtsFalse); // should not happen at the moment
 521     }
 522     // from here: non-empty run queue.
 523     //  TODO: merge above case with this, only one call processMessages() !
 524     if (PacketsWaiting()) {  /* process incoming messages, if
 525                                 any pending...  only in else
 526                                 because getRemoteWork waits for
 527                                 messages as well */
 528         receivedFinish = processMessages();
 529     }
 530 #endif
 531
 532 #if defined(GRAN)
 533     scheduleProcessEvent(event);
 534 #endif
 535
 536     //
 537     // Get a thread to run
 538     //
 539     t = popRunQueue(cap);
 540
 541 #if defined(GRAN) || defined(PAR)
 542     scheduleGranParReport(); // some kind of debuging output
 543 #else
 544     // Sanity check the thread we're about to run.  This can be
 545     // expensive if there is lots of thread switching going on...
 546     IF_DEBUG(sanity,checkTSO(t));
 547 #endif
 548
 549 #if defined(THREADED_RTS)
 550     // Check whether we can run this thread in the current task.
 551     // If not, we have to pass our capability to the right task.
 552     {
 553         Task *bound = t->bound;
 554
 555         if (bound) {
 556             if (bound == task) {
 557                 IF_DEBUG(scheduler,
 558                          sched_belch("### Running thread %d in bound thread",
 559                                      t->id));
 560                 // yes, the Haskell thread is bound to the current native thread
 561             } else {
 562                 IF_DEBUG(scheduler,
 563                          sched_belch("### thread %d bound to another OS thread",
 564                                      t->id));
 565                 // no, bound to a different Haskell thread: pass to that thread
 566                 pushOnRunQueue(cap,t);
 567                 continue;
 568             }
 569         } else {
 570             // The thread we want to run is unbound.
 571             if (task->tso) {
 572                 IF_DEBUG(scheduler,
 573                          sched_belch("### this OS thread cannot run thread %d", t->id));
 574                 // no, the current native thread is bound to a different
 575                 // Haskell thread, so pass it to any worker thread
 576                 pushOnRunQueue(cap,t);
 577                 continue;
 578             }
 579         }
 580     }
 581 #endif
 582
 583     cap->r.rCurrentTSO = t;
 584
 585     /* context switches are initiated by the timer signal, unless
 586      * the user specified "context switch as often as possible", with
 587      * +RTS -C0
 588      */
 589     if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
 590         && !emptyThreadQueues(cap)) {
 591         context_switch = 1;
 592     }
 593
 594 run_thread:
 595
 596     IF_DEBUG(scheduler, sched_belch("-->> running thread %ld %s ...",
 597                               (long)t->id, whatNext_strs[t->what_next]));
 598
 599 #if defined(PROFILING)
 600     startHeapProfTimer();
 601 #endif
 602
 603     // ----------------------------------------------------------------------
 604     // Run the current thread
 605
 606     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 607
 608     prev_what_next = t->what_next;
 609
 610     errno = t->saved_errno;
 611     cap->in_haskell = rtsTrue;
 612
 613     dirtyTSO(t);
 614
 615     recent_activity = ACTIVITY_YES;
 616
 617     switch (prev_what_next) {
 618
 619     case ThreadKilled:
 620     case ThreadComplete:
 621         /* Thread already finished, return to scheduler. */
 622         ret = ThreadFinished;
 623         break;
 624
 625     case ThreadRunGHC:
 626     {
 627         StgRegTable *r;
 628         r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
 629         cap = regTableToCapability(r);
 630         ret = r->rRet;
 631         break;
 632     }
 633
 634     case ThreadInterpret:
 635         cap = interpretBCO(cap);
 636         ret = cap->r.rRet;
 637         break;
 638
 639     default:
 640         barf("schedule: invalid what_next field");
 641     }
 642
 643     cap->in_haskell = rtsFalse;
 644
 645     // The TSO might have moved, eg. if it re-entered the RTS and a GC
 646     // happened.  So find the new location:
 647     t = cap->r.rCurrentTSO;
 648
 649     // We have run some Haskell code: there might be blackhole-blocked
 650     // threads to wake up now.
 651     // Lock-free test here should be ok, we're just setting a flag.
 652     if ( blackhole_queue != END_TSO_QUEUE ) {
 653         blackholes_need_checking = rtsTrue;
 654     }
 655
 656     // And save the current errno in this thread.
 657     // XXX: possibly bogus for SMP because this thread might already
 658     // be running again, see code below.
 659     t->saved_errno = errno;
 660
 661 #if defined(THREADED_RTS)
 662     // If ret is ThreadBlocked, and this Task is bound to the TSO that
 663     // blocked, we are in limbo - the TSO is now owned by whatever it
 664     // is blocked on, and may in fact already have been woken up,
 665     // perhaps even on a different Capability.  It may be the case
 666     // that task->cap != cap.  We better yield this Capability
 667     // immediately and return to normaility.
 668     if (ret == ThreadBlocked) {
 669         IF_DEBUG(scheduler,
 670                  sched_belch("--<< thread %d (%s) stopped: blocked\n",
 671                              t->id, whatNext_strs[t->what_next]));
 672         continue;
 673     }
 674 #endif
 675
 676     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 677
 678     // ----------------------------------------------------------------------
 679
 680     // Costs for the scheduler are assigned to CCS_SYSTEM
 681 #if defined(PROFILING)
 682     stopHeapProfTimer();
 683     CCCS = CCS_SYSTEM;
 684 #endif
 685
 686 #if defined(THREADED_RTS)
 687     IF_DEBUG(scheduler,debugBelch("sched (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId()););
 688 #elif !defined(GRAN) && !defined(PARALLEL_HASKELL)
 689     IF_DEBUG(scheduler,debugBelch("sched: "););
 690 #endif
 691
 692     schedulePostRunThread();
 693
 694     ready_to_gc = rtsFalse;
 695
 696     switch (ret) {
 697     case HeapOverflow:
 698         ready_to_gc = scheduleHandleHeapOverflow(cap,t);
 699         break;
 700
 701     case StackOverflow:
 702         scheduleHandleStackOverflow(cap,task,t);
 703         break;
 704
 705     case ThreadYielding:
 706         if (scheduleHandleYield(cap, t, prev_what_next)) {
 707             // shortcut for switching between compiler/interpreter:
 708             goto run_thread;
 709         }
 710         break;
 711
 712     case ThreadBlocked:
 713         scheduleHandleThreadBlocked(t);
 714         break;
 715
 716     case ThreadFinished:
 717         if (scheduleHandleThreadFinished(cap, task, t)) return cap;
 718         ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
 719         break;
 720
 721     default:
 722       barf("schedule: invalid thread return code %d", (int)ret);
 723     }
 724
 725     if (scheduleDoHeapProfile(ready_to_gc)) { ready_to_gc = rtsFalse; }
 726     if (ready_to_gc) {
 727       cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
 728     }
 729   } /* end of while() */
 730
 731   IF_PAR_DEBUG(verbose,
 732                debugBelch("== Leaving schedule() after having received Finish\n"));
 733 }
 734
 735 /* ----------------------------------------------------------------------------
 736  * Setting up the scheduler loop
 737  * ------------------------------------------------------------------------- */
 738
 739 static void
 740 schedulePreLoop(void)
 741 {
 742 #if defined(GRAN)
 743     /* set up first event to get things going */
 744     /* ToDo: assign costs for system setup and init MainTSO ! */
 745     new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
 746               ContinueThread,
 747               CurrentTSO, (StgClosure*)NULL, (rtsSpark*)NULL);
 748
 749     IF_DEBUG(gran,
 750              debugBelch("GRAN: Init CurrentTSO (in schedule) = %p\n",
 751                         CurrentTSO);
 752              G_TSO(CurrentTSO, 5));
 753
 754     if (RtsFlags.GranFlags.Light) {
 755         /* Save current time; GranSim Light only */
 756         CurrentTSO->gran.clock = CurrentTime[CurrentProc];
 757     }
 758 #endif
 759 }
 760
 761 /* -----------------------------------------------------------------------------
 762  * schedulePushWork()
 763  *
 764  * Push work to other Capabilities if we have some.
 765  * -------------------------------------------------------------------------- */
 766
 767 #if defined(THREADED_RTS)
 768 static void
 769 schedulePushWork(Capability *cap USED_IF_THREADS,
 770                  Task *task      USED_IF_THREADS)
 771 {
 772     Capability *free_caps[n_capabilities], *cap0;
 773     nat i, n_free_caps;
 774
 775     // Check whether we have more threads on our run queue, or sparks
 776     // in our pool, that we could hand to another Capability.
 777     if ((emptyRunQueue(cap) || cap->run_queue_hd->link == END_TSO_QUEUE)
 778         && sparkPoolSizeCap(cap) < 2) {
 779         return;
 780     }
 781
 782     // First grab as many free Capabilities as we can.
 783     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
 784         cap0 = &capabilities[i];
 785         if (cap != cap0 && tryGrabCapability(cap0,task)) {
 786             if (!emptyRunQueue(cap0) || cap->returning_tasks_hd != NULL) {
 787                 // it already has some work, we just grabbed it at
 788                 // the wrong moment.  Or maybe it's deadlocked!
 789                 releaseCapability(cap0);
 790             } else {
 791                 free_caps[n_free_caps++] = cap0;
 792             }
 793         }
 794     }
 795
 796     // we now have n_free_caps free capabilities stashed in
 797     // free_caps[].  Share our run queue equally with them.  This is
 798     // probably the simplest thing we could do; improvements we might
 799     // want to do include:
 800     //
 801     //   - giving high priority to moving relatively new threads, on
 802     //     the gournds that they haven't had time to build up a
 803     //     working set in the cache on this CPU/Capability.
 804     //
 805     //   - giving low priority to moving long-lived threads
 806
 807     if (n_free_caps > 0) {
 808         StgTSO *prev, *t, *next;
 809         rtsBool pushed_to_all;
 810
 811         IF_DEBUG(scheduler, sched_belch("excess threads on run queue and %d free capabilities, sharing...", n_free_caps));
 812
 813         i = 0;
 814         pushed_to_all = rtsFalse;
 815
 816         if (cap->run_queue_hd != END_TSO_QUEUE) {
 817             prev = cap->run_queue_hd;
 818             t = prev->link;
 819             prev->link = END_TSO_QUEUE;
 820             for (; t != END_TSO_QUEUE; t = next) {
 821                 next = t->link;
 822                 t->link = END_TSO_QUEUE;
 823                 if (t->what_next == ThreadRelocated
 824                     || t->bound == task) { // don't move my bound thread
 825                     prev->link = t;
 826                     prev = t;
 827                 } else if (i == n_free_caps) {
 828                     pushed_to_all = rtsTrue;
 829                     i = 0;
 830                     // keep one for us
 831                     prev->link = t;
 832                     prev = t;
 833                 } else {
 834                     IF_DEBUG(scheduler, sched_belch("pushing thread %d to capability %d", t->id, free_caps[i]->no));
 835                     appendToRunQueue(free_caps[i],t);
 836                     if (t->bound) { t->bound->cap = free_caps[i]; }
 837                     i++;
 838                 }
 839             }
 840             cap->run_queue_tl = prev;
 841         }
 842
 843         // If there are some free capabilities that we didn't push any
 844         // threads to, then try to push a spark to each one.
 845         if (!pushed_to_all) {
 846             StgClosure *spark;
 847             // i is the next free capability to push to
 848             for (; i < n_free_caps; i++) {
 849                 if (emptySparkPoolCap(free_caps[i])) {
 850                     spark = findSpark(cap);
 851                     if (spark != NULL) {
 852                         IF_DEBUG(scheduler, sched_belch("pushing spark %p to capability %d", spark, free_caps[i]->no));
 853                         newSpark(&(free_caps[i]->r), spark);
 854                     }
 855                 }
 856             }
 857         }
 858
 859         // release the capabilities
 860         for (i = 0; i < n_free_caps; i++) {
 861             task->cap = free_caps[i];
 862             releaseCapability(free_caps[i]);
 863         }
 864     }
 865     task->cap = cap; // reset to point to our Capability.
 866 }
 867 #endif
 868
 869 /* ----------------------------------------------------------------------------
 870  * Start any pending signal handlers
 871  * ------------------------------------------------------------------------- */
 872
 873 #if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
 874 static void
 875 scheduleStartSignalHandlers(Capability *cap)
 876 {
 877     if (signals_pending()) { // safe outside the lock
 878         startSignalHandlers(cap);
 879     }
 880 }
 881 #else
 882 static void
 883 scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
 884 {
 885 }
 886 #endif
 887
 888 /* ----------------------------------------------------------------------------
 889  * Check for blocked threads that can be woken up.
 890  * ------------------------------------------------------------------------- */
 891
 892 static void
 893 scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
 894 {
 895 #if !defined(THREADED_RTS)
 896     //
 897     // Check whether any waiting threads need to be woken up.  If the
 898     // run queue is empty, and there are no other tasks running, we
 899     // can wait indefinitely for something to happen.
 900     //
 901     if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
 902     {
 903         awaitEvent( emptyRunQueue(cap) && !blackholes_need_checking );
 904     }
 905 #endif
 906 }
 907
 908
 909 /* ----------------------------------------------------------------------------
 910  * Check for threads blocked on BLACKHOLEs that can be woken up
 911  * ------------------------------------------------------------------------- */
 912 static void
 913 scheduleCheckBlackHoles (Capability *cap)
 914 {
 915     if ( blackholes_need_checking ) // check without the lock first
 916     {
 917         ACQUIRE_LOCK(&sched_mutex);
 918         if ( blackholes_need_checking ) {
 919             checkBlackHoles(cap);
 920             blackholes_need_checking = rtsFalse;
 921         }
 922         RELEASE_LOCK(&sched_mutex);
 923     }
 924 }
 925
 926 /* ----------------------------------------------------------------------------
 927  * Detect deadlock conditions and attempt to resolve them.
 928  * ------------------------------------------------------------------------- */
 929
 930 static void
 931 scheduleDetectDeadlock (Capability *cap, Task *task)
 932 {
 933
 934 #if defined(PARALLEL_HASKELL)
 935     // ToDo: add deadlock detection in GUM (similar to THREADED_RTS) -- HWL
 936     return;
 937 #endif
 938
 939     /*
 940      * Detect deadlock: when we have no threads to run, there are no
 941      * threads blocked, waiting for I/O, or sleeping, and all the
 942      * other tasks are waiting for work, we must have a deadlock of
 943      * some description.
 944      */
 945     if ( emptyThreadQueues(cap) )
 946     {
 947 #if defined(THREADED_RTS)
 948         /*
 949          * In the threaded RTS, we only check for deadlock if there
 950          * has been no activity in a complete timeslice.  This means
 951          * we won't eagerly start a full GC just because we don't have
 952          * any threads to run currently.
 953          */
 954         if (recent_activity != ACTIVITY_INACTIVE) return;
 955 #endif
 956
 957         IF_DEBUG(scheduler, sched_belch("deadlocked, forcing major GC..."));
 958
 959         // Garbage collection can release some new threads due to
 960         // either (a) finalizers or (b) threads resurrected because
 961         // they are unreachable and will therefore be sent an
 962         // exception.  Any threads thus released will be immediately
 963         // runnable.
 964         cap = scheduleDoGC (cap, task, rtsTrue/*force  major GC*/, GetRoots);
 965
 966         recent_activity = ACTIVITY_DONE_GC;
 967
 968         if ( !emptyRunQueue(cap) ) return;
 969
 970 #if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
 971         /* If we have user-installed signal handlers, then wait
 972          * for signals to arrive rather then bombing out with a
 973          * deadlock.
 974          */
 975         if ( anyUserHandlers() ) {
 976             IF_DEBUG(scheduler,
 977                      sched_belch("still deadlocked, waiting for signals..."));
 978
 979             awaitUserSignals();
 980
 981             if (signals_pending()) {
 982                 startSignalHandlers(cap);
 983             }
 984
 985             // either we have threads to run, or we were interrupted:
 986             ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
 987         }
 988 #endif
 989
 990 #if !defined(THREADED_RTS)
 991         /* Probably a real deadlock.  Send the current main thread the
 992          * Deadlock exception.
 993          */
 994         if (task->tso) {
 995             switch (task->tso->why_blocked) {
 996             case BlockedOnSTM:
 997             case BlockedOnBlackHole:
 998             case BlockedOnException:
 999             case BlockedOnMVar:
1000                 raiseAsync(cap, task->tso, (StgClosure *)NonTermination_closure);
1001                 return;
1002             default:
1003                 barf("deadlock: main thread blocked in a strange way");
1004             }
1005         }
1006         return;
1007 #endif
1008     }
1009 }
1010
1011 /* ----------------------------------------------------------------------------
1012  * Process an event (GRAN only)
1013  * ------------------------------------------------------------------------- */
1014
1015 #if defined(GRAN)
1016 static StgTSO *
1017 scheduleProcessEvent(rtsEvent *event)
1018 {
1019     StgTSO *t;
1020
1021     if (RtsFlags.GranFlags.Light)
1022       GranSimLight_enter_system(event, &ActiveTSO); // adjust ActiveTSO etc
1023
1024     /* adjust time based on time-stamp */
1025     if (event->time > CurrentTime[CurrentProc] &&
1026         event->evttype != ContinueThread)
1027       CurrentTime[CurrentProc] = event->time;
1028
1029     /* Deal with the idle PEs (may issue FindWork or MoveSpark events) */
1030     if (!RtsFlags.GranFlags.Light)
1031       handleIdlePEs();
1032
1033     IF_DEBUG(gran, debugBelch("GRAN: switch by event-type\n"));
1034
1035     /* main event dispatcher in GranSim */
1036     switch (event->evttype) {
1037       /* Should just be continuing execution */
1038     case ContinueThread:
1039       IF_DEBUG(gran, debugBelch("GRAN: doing ContinueThread\n"));
1040       /* ToDo: check assertion
1041       ASSERT(run_queue_hd != (StgTSO*)NULL &&
1042              run_queue_hd != END_TSO_QUEUE);
1043       */
1044       /* Ignore ContinueThreads for fetching threads (if synchr comm) */
1045       if (!RtsFlags.GranFlags.DoAsyncFetch &&
1046           procStatus[CurrentProc]==Fetching) {
1047         debugBelch("ghuH: Spurious ContinueThread while Fetching ignored; TSO %d (%p) [PE %d]\n",
1048               CurrentTSO->id, CurrentTSO, CurrentProc);
1049         goto next_thread;
1050       }
1051       /* Ignore ContinueThreads for completed threads */
1052       if (CurrentTSO->what_next == ThreadComplete) {
1053         debugBelch("ghuH: found a ContinueThread event for completed thread %d (%p) [PE %d] (ignoring ContinueThread)\n",
1054               CurrentTSO->id, CurrentTSO, CurrentProc);
1055         goto next_thread;
1056       }
1057       /* Ignore ContinueThreads for threads that are being migrated */
1058       if (PROCS(CurrentTSO)==Nowhere) {
1059         debugBelch("ghuH: trying to run the migrating TSO %d (%p) [PE %d] (ignoring ContinueThread)\n",
1060               CurrentTSO->id, CurrentTSO, CurrentProc);
1061         goto next_thread;
1062       }
1063       /* The thread should be at the beginning of the run queue */
1064       if (CurrentTSO!=run_queue_hds[CurrentProc]) {
1065         debugBelch("ghuH: TSO %d (%p) [PE %d] is not at the start of the run_queue when doing a ContinueThread\n",
1066               CurrentTSO->id, CurrentTSO, CurrentProc);
1067         break; // run the thread anyway
1068       }
1069       /*
1070       new_event(proc, proc, CurrentTime[proc],
1071                 FindWork,
1072                 (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
1073       goto next_thread;
1074       */ /* Catches superfluous CONTINUEs -- should be unnecessary */
1075       break; // now actually run the thread; DaH Qu'vam yImuHbej
1076
1077     case FetchNode:
1078       do_the_fetchnode(event);
1079       goto next_thread;             /* handle next event in event queue  */
1080
1081     case GlobalBlock:
1082       do_the_globalblock(event);
1083       goto next_thread;             /* handle next event in event queue  */
1084
1085     case FetchReply:
1086       do_the_fetchreply(event);
1087       goto next_thread;             /* handle next event in event queue  */
1088
1089     case UnblockThread:   /* Move from the blocked queue to the tail of */
1090       do_the_unblock(event);
1091       goto next_thread;             /* handle next event in event queue  */
1092
1093     case ResumeThread:  /* Move from the blocked queue to the tail of */
1094       /* the runnable queue ( i.e. Qu' SImqa'lu') */
1095       event->tso->gran.blocktime +=
1096         CurrentTime[CurrentProc] - event->tso->gran.blockedat;
1097       do_the_startthread(event);
1098       goto next_thread;             /* handle next event in event queue  */
1099
1100     case StartThread:
1101       do_the_startthread(event);
1102       goto next_thread;             /* handle next event in event queue  */
1103
1104     case MoveThread:
1105       do_the_movethread(event);
1106       goto next_thread;             /* handle next event in event queue  */
1107
1108     case MoveSpark:
1109       do_the_movespark(event);
1110       goto next_thread;             /* handle next event in event queue  */
1111
1112     case FindWork:
1113       do_the_findwork(event);
1114       goto next_thread;             /* handle next event in event queue  */
1115
1116     default:
1117       barf("Illegal event type %u\n", event->evttype);
1118     }  /* switch */
1119
1120     /* This point was scheduler_loop in the old RTS */
1121
1122     IF_DEBUG(gran, debugBelch("GRAN: after main switch\n"));
1123
1124     TimeOfLastEvent = CurrentTime[CurrentProc];
1125     TimeOfNextEvent = get_time_of_next_event();
1126     IgnoreEvents=(TimeOfNextEvent==0); // HWL HACK
1127     // CurrentTSO = ThreadQueueHd;
1128
1129     IF_DEBUG(gran, debugBelch("GRAN: time of next event is: %ld\n",
1130                          TimeOfNextEvent));
1131
1132     if (RtsFlags.GranFlags.Light)
1133       GranSimLight_leave_system(event, &ActiveTSO);
1134
1135     EndOfTimeSlice = CurrentTime[CurrentProc]+RtsFlags.GranFlags.time_slice;
1136
1137     IF_DEBUG(gran,
1138              debugBelch("GRAN: end of time-slice is %#lx\n", EndOfTimeSlice));
1139
1140     /* in a GranSim setup the TSO stays on the run queue */
1141     t = CurrentTSO;
1142     /* Take a thread from the run queue. */
1143     POP_RUN_QUEUE(t); // take_off_run_queue(t);
1144
1145     IF_DEBUG(gran,
1146              debugBelch("GRAN: About to run current thread, which is\n");
1147              G_TSO(t,5));
1148
1149     context_switch = 0; // turned on via GranYield, checking events and time slice
1150
1151     IF_DEBUG(gran,
1152              DumpGranEvent(GR_SCHEDULE, t));
1153
1154     procStatus[CurrentProc] = Busy;
1155 }
1156 #endif // GRAN
1157
1158 /* ----------------------------------------------------------------------------
1159  * Send pending messages (PARALLEL_HASKELL only)
1160  * ------------------------------------------------------------------------- */
1161
1162 #if defined(PARALLEL_HASKELL)
1163 static StgTSO *
1164 scheduleSendPendingMessages(void)
1165 {
1166     StgSparkPool *pool;
1167     rtsSpark spark;
1168     StgTSO *t;
1169
1170 # if defined(PAR) // global Mem.Mgmt., omit for now
1171     if (PendingFetches != END_BF_QUEUE) {
1172         processFetches();
1173     }
1174 # endif
1175
1176     if (RtsFlags.ParFlags.BufferTime) {
1177         // if we use message buffering, we must send away all message
1178         // packets which have become too old...
1179         sendOldBuffers();
1180     }
1181 }
1182 #endif
1183
1184 /* ----------------------------------------------------------------------------
1185  * Activate spark threads (PARALLEL_HASKELL only)
1186  * ------------------------------------------------------------------------- */
1187
1188 #if defined(PARALLEL_HASKELL)
1189 static void
1190 scheduleActivateSpark(void)
1191 {
1192 #if defined(SPARKS)
1193   ASSERT(emptyRunQueue());
1194 /* We get here if the run queue is empty and want some work.
1195    We try to turn a spark into a thread, and add it to the run queue,
1196    from where it will be picked up in the next iteration of the scheduler
1197    loop.
1198 */
1199
1200       /* :-[  no local threads => look out for local sparks */
1201       /* the spark pool for the current PE */
1202       pool = &(cap.r.rSparks); // JB: cap = (old) MainCap
1203       if (advisory_thread_count < RtsFlags.ParFlags.maxThreads &&
1204           pool->hd < pool->tl) {
1205         /*
1206          * ToDo: add GC code check that we really have enough heap afterwards!!
1207          * Old comment:
1208          * If we're here (no runnable threads) and we have pending
1209          * sparks, we must have a space problem.  Get enough space
1210          * to turn one of those pending sparks into a
1211          * thread...
1212          */
1213
1214         spark = findSpark(rtsFalse);            /* get a spark */
1215         if (spark != (rtsSpark) NULL) {
1216           tso = createThreadFromSpark(spark);       /* turn the spark into a thread */
1217           IF_PAR_DEBUG(fish, // schedule,
1218                        debugBelch("==== schedule: Created TSO %d (%p); %d threads active\n",
1219                              tso->id, tso, advisory_thread_count));
1220
1221           if (tso==END_TSO_QUEUE) { /* failed to activate spark->back to loop */
1222             IF_PAR_DEBUG(fish, // schedule,
1223                          debugBelch("==^^ failed to create thread from spark @ %lx\n",
1224                             spark));
1225             return rtsFalse; /* failed to generate a thread */
1226           }                  /* otherwise fall through & pick-up new tso */
1227         } else {
1228           IF_PAR_DEBUG(fish, // schedule,
1229                        debugBelch("==^^ no local sparks (spark pool contains only NFs: %d)\n",
1230                              spark_queue_len(pool)));
1231           return rtsFalse;  /* failed to generate a thread */
1232         }
1233         return rtsTrue;  /* success in generating a thread */
1234   } else { /* no more threads permitted or pool empty */
1235     return rtsFalse;  /* failed to generateThread */
1236   }
1237 #else
1238   tso = NULL; // avoid compiler warning only
1239   return rtsFalse;  /* dummy in non-PAR setup */
1240 #endif // SPARKS
1241 }
1242 #endif // PARALLEL_HASKELL
1243
1244 /* ----------------------------------------------------------------------------
1245  * Get work from a remote node (PARALLEL_HASKELL only)
1246  * ------------------------------------------------------------------------- */
1247
1248 #if defined(PARALLEL_HASKELL)
1249 static rtsBool
1250 scheduleGetRemoteWork(rtsBool *receivedFinish)
1251 {
1252   ASSERT(emptyRunQueue());
1253
1254   if (RtsFlags.ParFlags.BufferTime) {
1255         IF_PAR_DEBUG(verbose,
1256                 debugBelch("...send all pending data,"));
1257         {
1258           nat i;
1259           for (i=1; i<=nPEs; i++)
1260             sendImmediately(i); // send all messages away immediately
1261         }
1262   }
1263 # ifndef SPARKS
1264         //++EDEN++ idle() , i.e. send all buffers, wait for work
1265         // suppress fishing in EDEN... just look for incoming messages
1266         // (blocking receive)
1267   IF_PAR_DEBUG(verbose,
1268                debugBelch("...wait for incoming messages...\n"));
1269   *receivedFinish = processMessages(); // blocking receive...
1270
1271         // and reenter scheduling loop after having received something
1272         // (return rtsFalse below)
1273
1274 # else /* activate SPARKS machinery */
1275 /* We get here, if we have no work, tried to activate a local spark, but still
1276    have no work. We try to get a remote spark, by sending a FISH message.
1277    Thread migration should be added here, and triggered when a sequence of
1278    fishes returns without work. */
1279         delay = (RtsFlags.ParFlags.fishDelay!=0ll ? RtsFlags.ParFlags.fishDelay : 0ll);
1280
1281       /* =8-[  no local sparks => look for work on other PEs */
1282         /*
1283          * We really have absolutely no work.  Send out a fish
1284          * (there may be some out there already), and wait for
1285          * something to arrive.  We clearly can't run any threads
1286          * until a SCHEDULE or RESUME arrives, and so that's what
1287          * we're hoping to see.  (Of course, we still have to
1288          * respond to other types of messages.)
1289          */
1290         rtsTime now = msTime() /*CURRENT_TIME*/;
1291         IF_PAR_DEBUG(verbose,
1292                      debugBelch("--  now=%ld\n", now));
1293         IF_PAR_DEBUG(fish, // verbose,
1294              if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
1295                  (last_fish_arrived_at!=0 &&
1296                   last_fish_arrived_at+delay > now)) {
1297                debugBelch("--$$ <%llu> delaying FISH until %llu (last fish %llu, delay %llu)\n",
1298                      now, last_fish_arrived_at+delay,
1299                      last_fish_arrived_at,
1300                      delay);
1301              });
1302
1303         if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
1304             advisory_thread_count < RtsFlags.ParFlags.maxThreads) { // send a FISH, but when?
1305           if (last_fish_arrived_at==0 ||
1306               (last_fish_arrived_at+delay <= now)) {           // send FISH now!
1307             /* outstandingFishes is set in sendFish, processFish;
1308                avoid flooding system with fishes via delay */
1309     next_fish_to_send_at = 0;
1310   } else {
1311     /* ToDo: this should be done in the main scheduling loop to avoid the
1312              busy wait here; not so bad if fish delay is very small  */
1313     int iq = 0; // DEBUGGING -- HWL
1314     next_fish_to_send_at = last_fish_arrived_at+delay; // remember when to send
1315     /* send a fish when ready, but process messages that arrive in the meantime */
1316     do {
1317       if (PacketsWaiting()) {
1318         iq++; // DEBUGGING
1319         *receivedFinish = processMessages();
1320       }
1321       now = msTime();
1322     } while (!*receivedFinish || now<next_fish_to_send_at);
1323     // JB: This means the fish could become obsolete, if we receive
1324     // work. Better check for work again?
1325     // last line: while (!receivedFinish || !haveWork || now<...)
1326     // next line: if (receivedFinish || haveWork )
1327
1328     if (*receivedFinish) // no need to send a FISH if we are finishing anyway
1329       return rtsFalse;  // NB: this will leave scheduler loop
1330                         // immediately after return!
1331
1332     IF_PAR_DEBUG(fish, // verbose,
1333                debugBelch("--$$ <%llu> sent delayed fish (%d processMessages); active/total threads=%d/%d\n",now,iq,run_queue_len(),advisory_thread_count));
1334
1335   }
1336
1337     // JB: IMHO, this should all be hidden inside sendFish(...)
1338     /* pe = choosePE();
1339        sendFish(pe, thisPE, NEW_FISH_AGE, NEW_FISH_HISTORY,
1340                 NEW_FISH_HUNGER);
1341
1342     // Global statistics: count no. of fishes
1343     if (RtsFlags.ParFlags.ParStats.Global &&
1344          RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
1345            globalParStats.tot_fish_mess++;
1346            }
1347     */
1348
1349   /* delayed fishes must have been sent by now! */
1350   next_fish_to_send_at = 0;
1351   }
1352
1353   *receivedFinish = processMessages();
1354 # endif /* SPARKS */
1355
1356  return rtsFalse;
1357  /* NB: this function always returns rtsFalse, meaning the scheduler
1358     loop continues with the next iteration;
1359     rationale:
1360       return code means success in finding work; we enter this function
1361       if there is no local work, thus have to send a fish which takes
1362       time until it arrives with work; in the meantime we should process
1363       messages in the main loop;
1364  */
1365 }
1366 #endif // PARALLEL_HASKELL
1367
1368 /* ----------------------------------------------------------------------------
1369  * PAR/GRAN: Report stats & debugging info(?)
1370  * ------------------------------------------------------------------------- */
1371
1372 #if defined(PAR) || defined(GRAN)
1373 static void
1374 scheduleGranParReport(void)
1375 {
1376   ASSERT(run_queue_hd != END_TSO_QUEUE);
1377
1378   /* Take a thread from the run queue, if we have work */
1379   POP_RUN_QUEUE(t);  // take_off_run_queue(END_TSO_QUEUE);
1380
1381     /* If this TSO has got its outport closed in the meantime,
1382      *   it mustn't be run. Instead, we have to clean it up as if it was finished.
1383      * It has to be marked as TH_DEAD for this purpose.
1384      * If it is TH_TERM instead, it is supposed to have finished in the normal way.
1385
1386 JB: TODO: investigate wether state change field could be nuked
1387      entirely and replaced by the normal tso state (whatnext
1388      field). All we want to do is to kill tsos from outside.
1389      */
1390
1391     /* ToDo: write something to the log-file
1392     if (RTSflags.ParFlags.granSimStats && !sameThread)
1393         DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
1394
1395     CurrentTSO = t;
1396     */
1397     /* the spark pool for the current PE */
1398     pool = &(cap.r.rSparks); //  cap = (old) MainCap
1399
1400     IF_DEBUG(scheduler,
1401              debugBelch("--=^ %d threads, %d sparks on [%#x]\n",
1402                    run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
1403
1404     IF_PAR_DEBUG(fish,
1405              debugBelch("--=^ %d threads, %d sparks on [%#x]\n",
1406                    run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
1407
1408     if (RtsFlags.ParFlags.ParStats.Full &&
1409         (t->par.sparkname != (StgInt)0) && // only log spark generated threads
1410         (emitSchedule || // forced emit
1411          (t && LastTSO && t->id != LastTSO->id))) {
1412       /*
1413          we are running a different TSO, so write a schedule event to log file
1414          NB: If we use fair scheduling we also have to write  a deschedule
1415              event for LastTSO; with unfair scheduling we know that the
1416              previous tso has blocked whenever we switch to another tso, so
1417              we don't need it in GUM for now
1418       */
1419       IF_PAR_DEBUG(fish, // schedule,
1420                    debugBelch("____ scheduling spark generated thread %d (%lx) (%lx) via a forced emit\n",t->id,t,t->par.sparkname));
1421
1422       DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
1423                        GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0);
1424       emitSchedule = rtsFalse;
1425     }
1426 }
1427 #endif
1428
1429 /* ----------------------------------------------------------------------------
1430  * After running a thread...
1431  * ------------------------------------------------------------------------- */
1432
1433 static void
1434 schedulePostRunThread(void)
1435 {
1436 #if defined(PAR)
1437     /* HACK 675: if the last thread didn't yield, make sure to print a
1438        SCHEDULE event to the log file when StgRunning the next thread, even
1439        if it is the same one as before */
1440     LastTSO = t;
1441     TimeOfLastYield = CURRENT_TIME;
1442 #endif
1443
1444   /* some statistics gathering in the parallel case */
1445
1446 #if defined(GRAN) || defined(PAR) || defined(EDEN)
1447   switch (ret) {
1448     case HeapOverflow:
1449 # if defined(GRAN)
1450       IF_DEBUG(gran, DumpGranEvent(GR_DESCHEDULE, t));
1451       globalGranStats.tot_heapover++;
1452 # elif defined(PAR)
1453       globalParStats.tot_heapover++;
1454 # endif
1455       break;
1456
1457      case StackOverflow:
1458 # if defined(GRAN)
1459       IF_DEBUG(gran,
1460                DumpGranEvent(GR_DESCHEDULE, t));
1461       globalGranStats.tot_stackover++;
1462 # elif defined(PAR)
1463       // IF_DEBUG(par,
1464       // DumpGranEvent(GR_DESCHEDULE, t);
1465       globalParStats.tot_stackover++;
1466 # endif
1467       break;
1468
1469     case ThreadYielding:
1470 # if defined(GRAN)
1471       IF_DEBUG(gran,
1472                DumpGranEvent(GR_DESCHEDULE, t));
1473       globalGranStats.tot_yields++;
1474 # elif defined(PAR)
1475       // IF_DEBUG(par,
1476       // DumpGranEvent(GR_DESCHEDULE, t);
1477       globalParStats.tot_yields++;
1478 # endif
1479       break;
1480
1481     case ThreadBlocked:
1482 # if defined(GRAN)
1483       IF_DEBUG(scheduler,
1484                debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: ",
1485                           t->id, t, whatNext_strs[t->what_next], t->block_info.closure,
1486                           (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
1487                if (t->block_info.closure!=(StgClosure*)NULL)
1488                  print_bq(t->block_info.closure);
1489                debugBelch("\n"));
1490
1491       // ??? needed; should emit block before
1492       IF_DEBUG(gran,
1493                DumpGranEvent(GR_DESCHEDULE, t));
1494       prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
1495       /*
1496         ngoq Dogh!
1497       ASSERT(procStatus[CurrentProc]==Busy ||
1498               ((procStatus[CurrentProc]==Fetching) &&
1499               (t->block_info.closure!=(StgClosure*)NULL)));
1500       if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
1501           !(!RtsFlags.GranFlags.DoAsyncFetch &&
1502             procStatus[CurrentProc]==Fetching))
1503         procStatus[CurrentProc] = Idle;
1504       */
1505 # elif defined(PAR)
1506 //++PAR++  blockThread() writes the event (change?)
1507 # endif
1508     break;
1509
1510   case ThreadFinished:
1511     break;
1512
1513   default:
1514     barf("parGlobalStats: unknown return code");
1515     break;
1516     }
1517 #endif
1518 }
1519
1520 /* -----------------------------------------------------------------------------
1521  * Handle a thread that returned to the scheduler with ThreadHeepOverflow
1522  * -------------------------------------------------------------------------- */
1523
1524 static rtsBool
1525 scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
1526 {
1527     // did the task ask for a large block?
1528     if (cap->r.rHpAlloc > BLOCK_SIZE) {
1529         // if so, get one and push it on the front of the nursery.
1530         bdescr *bd;
1531         lnat blocks;
1532
1533         blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
1534
1535         IF_DEBUG(scheduler,
1536                  debugBelch("--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
1537                             (long)t->id, whatNext_strs[t->what_next], blocks));
1538
1539         // don't do this if the nursery is (nearly) full, we'll GC first.
1540         if (cap->r.rCurrentNursery->link != NULL ||
1541             cap->r.rNursery->n_blocks == 1) {  // paranoia to prevent infinite loop
1542                                                // if the nursery has only one block.
1543
1544             ACQUIRE_SM_LOCK
1545             bd = allocGroup( blocks );
1546             RELEASE_SM_LOCK
1547             cap->r.rNursery->n_blocks += blocks;
1548
1549             // link the new group into the list
1550             bd->link = cap->r.rCurrentNursery;
1551             bd->u.back = cap->r.rCurrentNursery->u.back;
1552             if (cap->r.rCurrentNursery->u.back != NULL) {
1553                 cap->r.rCurrentNursery->u.back->link = bd;
1554             } else {
1555 #if !defined(THREADED_RTS)
1556                 ASSERT(g0s0->blocks == cap->r.rCurrentNursery &&
1557                        g0s0 == cap->r.rNursery);
1558 #endif
1559                 cap->r.rNursery->blocks = bd;
1560             }
1561             cap->r.rCurrentNursery->u.back = bd;
1562
1563             // initialise it as a nursery block.  We initialise the
1564             // step, gen_no, and flags field of *every* sub-block in
1565             // this large block, because this is easier than making
1566             // sure that we always find the block head of a large
1567             // block whenever we call Bdescr() (eg. evacuate() and
1568             // isAlive() in the GC would both have to do this, at
1569             // least).
1570             {
1571                 bdescr *x;
1572                 for (x = bd; x < bd + blocks; x++) {
1573                     x->step = cap->r.rNursery;
1574                     x->gen_no = 0;
1575                     x->flags = 0;
1576                 }
1577             }
1578
1579             // This assert can be a killer if the app is doing lots
1580             // of large block allocations.
1581             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
1582
1583             // now update the nursery to point to the new block
1584             cap->r.rCurrentNursery = bd;
1585
1586             // we might be unlucky and have another thread get on the
1587             // run queue before us and steal the large block, but in that
1588             // case the thread will just end up requesting another large
1589             // block.
1590             pushOnRunQueue(cap,t);
1591             return rtsFalse;  /* not actually GC'ing */
1592         }
1593     }
1594
1595     IF_DEBUG(scheduler,
1596              debugBelch("--<< thread %ld (%s) stopped: HeapOverflow\n",
1597                         (long)t->id, whatNext_strs[t->what_next]));
1598 #if defined(GRAN)
1599     ASSERT(!is_on_queue(t,CurrentProc));
1600 #elif defined(PARALLEL_HASKELL)
1601     /* Currently we emit a DESCHEDULE event before GC in GUM.
1602        ToDo: either add separate event to distinguish SYSTEM time from rest
1603        or just nuke this DESCHEDULE (and the following SCHEDULE) */
1604     if (0 && RtsFlags.ParFlags.ParStats.Full) {
1605         DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
1606                          GR_DESCHEDULE, t, (StgClosure *)NULL, 0, 0);
1607         emitSchedule = rtsTrue;
1608     }
1609 #endif
1610
1611     pushOnRunQueue(cap,t);
1612     return rtsTrue;
1613     /* actual GC is done at the end of the while loop in schedule() */
1614 }
1615
1616 /* -----------------------------------------------------------------------------
1617  * Handle a thread that returned to the scheduler with ThreadStackOverflow
1618  * -------------------------------------------------------------------------- */
1619
1620 static void
1621 scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t)
1622 {
1623     IF_DEBUG(scheduler,debugBelch("--<< thread %ld (%s) stopped, StackOverflow\n",
1624                                   (long)t->id, whatNext_strs[t->what_next]));
1625     /* just adjust the stack for this thread, then pop it back
1626      * on the run queue.
1627      */
1628     {
1629         /* enlarge the stack */
1630         StgTSO *new_t = threadStackOverflow(cap, t);
1631
1632         /* The TSO attached to this Task may have moved, so update the
1633          * pointer to it.
1634          */
1635         if (task->tso == t) {
1636             task->tso = new_t;
1637         }
1638         pushOnRunQueue(cap,new_t);
1639     }
1640 }
1641
1642 /* -----------------------------------------------------------------------------
1643  * Handle a thread that returned to the scheduler with ThreadYielding
1644  * -------------------------------------------------------------------------- */
1645
1646 static rtsBool
1647 scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
1648 {
1649     // Reset the context switch flag.  We don't do this just before
1650     // running the thread, because that would mean we would lose ticks
1651     // during GC, which can lead to unfair scheduling (a thread hogs
1652     // the CPU because the tick always arrives during GC).  This way
1653     // penalises threads that do a lot of allocation, but that seems
1654     // better than the alternative.
1655     context_switch = 0;
1656
1657     /* put the thread back on the run queue.  Then, if we're ready to
1658      * GC, check whether this is the last task to stop.  If so, wake
1659      * up the GC thread.  getThread will block during a GC until the
1660      * GC is finished.
1661      */
1662     IF_DEBUG(scheduler,
1663              if (t->what_next != prev_what_next) {
1664                  debugBelch("--<< thread %ld (%s) stopped to switch evaluators\n",
1665                             (long)t->id, whatNext_strs[t->what_next]);
1666              } else {
1667                  debugBelch("--<< thread %ld (%s) stopped, yielding\n",
1668                             (long)t->id, whatNext_strs[t->what_next]);
1669              }
1670         );
1671
1672     IF_DEBUG(sanity,
1673              //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
1674              checkTSO(t));
1675     ASSERT(t->link == END_TSO_QUEUE);
1676
1677     // Shortcut if we're just switching evaluators: don't bother
1678     // doing stack squeezing (which can be expensive), just run the
1679     // thread.
1680     if (t->what_next != prev_what_next) {
1681         return rtsTrue;
1682     }
1683
1684 #if defined(GRAN)
1685     ASSERT(!is_on_queue(t,CurrentProc));
1686
1687     IF_DEBUG(sanity,
1688              //debugBelch("&& Doing sanity check on all ThreadQueues (and their TSOs).");
1689              checkThreadQsSanity(rtsTrue));
1690
1691 #endif
1692
1693     addToRunQueue(cap,t);
1694
1695 #if defined(GRAN)
1696     /* add a ContinueThread event to actually process the thread */
1697     new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
1698               ContinueThread,
1699               t, (StgClosure*)NULL, (rtsSpark*)NULL);
1700     IF_GRAN_DEBUG(bq,
1701                   debugBelch("GRAN: eventq and runnableq after adding yielded thread to queue again:\n");
1702                   G_EVENTQ(0);
1703                   G_CURR_THREADQ(0));
1704 #endif
1705     return rtsFalse;
1706 }
1707
1708 /* -----------------------------------------------------------------------------
1709  * Handle a thread that returned to the scheduler with ThreadBlocked
1710  * -------------------------------------------------------------------------- */
1711
1712 static void
1713 scheduleHandleThreadBlocked( StgTSO *t
1714 #if !defined(GRAN) && !defined(DEBUG)
1715     STG_UNUSED
1716 #endif
1717     )
1718 {
1719 #if defined(GRAN)
1720     IF_DEBUG(scheduler,
1721              debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: \n",
1722                         t->id, t, whatNext_strs[t->what_next], t->block_info.closure, (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
1723              if (t->block_info.closure!=(StgClosure*)NULL) print_bq(t->block_info.closure));
1724
1725     // ??? needed; should emit block before
1726     IF_DEBUG(gran,
1727              DumpGranEvent(GR_DESCHEDULE, t));
1728     prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
1729     /*
1730       ngoq Dogh!
1731       ASSERT(procStatus[CurrentProc]==Busy ||
1732       ((procStatus[CurrentProc]==Fetching) &&
1733       (t->block_info.closure!=(StgClosure*)NULL)));
1734       if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
1735       !(!RtsFlags.GranFlags.DoAsyncFetch &&
1736       procStatus[CurrentProc]==Fetching))
1737       procStatus[CurrentProc] = Idle;
1738     */
1739 #elif defined(PAR)
1740     IF_DEBUG(scheduler,
1741              debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p with BQ: \n",
1742                         t->id, t, whatNext_strs[t->what_next], t->block_info.closure));
1743     IF_PAR_DEBUG(bq,
1744
1745                  if (t->block_info.closure!=(StgClosure*)NULL)
1746                  print_bq(t->block_info.closure));
1747
1748     /* Send a fetch (if BlockedOnGA) and dump event to log file */
1749     blockThread(t);
1750
1751     /* whatever we schedule next, we must log that schedule */
1752     emitSchedule = rtsTrue;
1753
1754 #else /* !GRAN */
1755
1756       // We don't need to do anything.  The thread is blocked, and it
1757       // has tidied up its stack and placed itself on whatever queue
1758       // it needs to be on.
1759
1760 #if !defined(THREADED_RTS)
1761     ASSERT(t->why_blocked != NotBlocked);
1762              // This might not be true under THREADED_RTS: we don't have
1763              // exclusive access to this TSO, so someone might have
1764              // woken it up by now.  This actually happens: try
1765              // conc023 +RTS -N2.
1766 #endif
1767
1768     IF_DEBUG(scheduler,
1769              debugBelch("--<< thread %d (%s) stopped: ",
1770                         t->id, whatNext_strs[t->what_next]);
1771              printThreadBlockage(t);
1772              debugBelch("\n"));
1773
1774     /* Only for dumping event to log file
1775        ToDo: do I need this in GranSim, too?
1776        blockThread(t);
1777     */
1778 #endif
1779 }
1780
1781 /* -----------------------------------------------------------------------------
1782  * Handle a thread that returned to the scheduler with ThreadFinished
1783  * -------------------------------------------------------------------------- */
1784
1785 static rtsBool
1786 scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
1787 {
1788     /* Need to check whether this was a main thread, and if so,
1789      * return with the return value.
1790      *
1791      * We also end up here if the thread kills itself with an
1792      * uncaught exception, see Exception.cmm.
1793      */
1794     IF_DEBUG(scheduler,debugBelch("--++ thread %d (%s) finished\n",
1795                                   t->id, whatNext_strs[t->what_next]));
1796
1797 #if defined(GRAN)
1798       endThread(t, CurrentProc); // clean-up the thread
1799 #elif defined(PARALLEL_HASKELL)
1800       /* For now all are advisory -- HWL */
1801       //if(t->priority==AdvisoryPriority) ??
1802       advisory_thread_count--; // JB: Caution with this counter, buggy!
1803
1804 # if defined(DIST)
1805       if(t->dist.priority==RevalPriority)
1806         FinishReval(t);
1807 # endif
1808
1809 # if defined(EDENOLD)
1810       // the thread could still have an outport... (BUG)
1811       if (t->eden.outport != -1) {
1812       // delete the outport for the tso which has finished...
1813         IF_PAR_DEBUG(eden_ports,
1814                    debugBelch("WARNING: Scheduler removes outport %d for TSO %d.\n",
1815                               t->eden.outport, t->id));
1816         deleteOPT(t);
1817       }
1818       // thread still in the process (HEAVY BUG! since outport has just been closed...)
1819       if (t->eden.epid != -1) {
1820         IF_PAR_DEBUG(eden_ports,
1821                    debugBelch("WARNING: Scheduler removes TSO %d from process %d .\n",
1822                            t->id, t->eden.epid));
1823         removeTSOfromProcess(t);
1824       }
1825 # endif
1826
1827 # if defined(PAR)
1828       if (RtsFlags.ParFlags.ParStats.Full &&
1829           !RtsFlags.ParFlags.ParStats.Suppressed)
1830         DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
1831
1832       //  t->par only contains statistics: left out for now...
1833       IF_PAR_DEBUG(fish,
1834                    debugBelch("**** end thread: ended sparked thread %d (%lx); sparkname: %lx\n",
1835                               t->id,t,t->par.sparkname));
1836 # endif
1837 #endif // PARALLEL_HASKELL
1838
1839       //
1840       // Check whether the thread that just completed was a bound
1841       // thread, and if so return with the result.
1842       //
1843       // There is an assumption here that all thread completion goes
1844       // through this point; we need to make sure that if a thread
1845       // ends up in the ThreadKilled state, that it stays on the run
1846       // queue so it can be dealt with here.
1847       //
1848
1849       if (t->bound) {
1850
1851           if (t->bound != task) {
1852 #if !defined(THREADED_RTS)
1853               // Must be a bound thread that is not the topmost one.  Leave
1854               // it on the run queue until the stack has unwound to the
1855               // point where we can deal with this.  Leaving it on the run
1856               // queue also ensures that the garbage collector knows about
1857               // this thread and its return value (it gets dropped from the
1858               // all_threads list so there's no other way to find it).
1859               appendToRunQueue(cap,t);
1860               return rtsFalse;
1861 #else
1862               // this cannot happen in the threaded RTS, because a
1863               // bound thread can only be run by the appropriate Task.
1864               barf("finished bound thread that isn't mine");
1865 #endif
1866           }
1867
1868           ASSERT(task->tso == t);
1869
1870           if (t->what_next == ThreadComplete) {
1871               if (task->ret) {
1872                   // NOTE: return val is tso->sp[1] (see StgStartup.hc)
1873                   *(task->ret) = (StgClosure *)task->tso->sp[1];
1874               }
1875               task->stat = Success;
1876           } else {
1877               if (task->ret) {
1878                   *(task->ret) = NULL;
1879               }
1880               if (sched_state >= SCHED_INTERRUPTING) {
1881                   task->stat = Interrupted;
1882               } else {
1883                   task->stat = Killed;
1884               }
1885           }
1886 #ifdef DEBUG
1887           removeThreadLabel((StgWord)task->tso->id);
1888 #endif
1889           return rtsTrue; // tells schedule() to return
1890       }
1891
1892       return rtsFalse;
1893 }
1894
1895 /* -----------------------------------------------------------------------------
1896  * Perform a heap census, if PROFILING
1897  * -------------------------------------------------------------------------- */
1898
1899 static rtsBool
1900 scheduleDoHeapProfile( rtsBool ready_to_gc STG_UNUSED )
1901 {
1902 #if defined(PROFILING)
1903     // When we have +RTS -i0 and we're heap profiling, do a census at
1904     // every GC.  This lets us get repeatable runs for debugging.
1905     if (performHeapProfile ||
1906         (RtsFlags.ProfFlags.profileInterval==0 &&
1907          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
1908
1909         // checking black holes is necessary before GC, otherwise
1910         // there may be threads that are unreachable except by the
1911         // blackhole queue, which the GC will consider to be
1912         // deadlocked.
1913         scheduleCheckBlackHoles(&MainCapability);
1914
1915         IF_DEBUG(scheduler, sched_belch("garbage collecting before heap census"));
1916         GarbageCollect(GetRoots, rtsTrue);
1917
1918         IF_DEBUG(scheduler, sched_belch("performing heap census"));
1919         heapCensus();
1920
1921         performHeapProfile = rtsFalse;
1922         return rtsTrue;  // true <=> we already GC'd
1923     }
1924 #endif
1925     return rtsFalse;
1926 }
1927
1928 /* -----------------------------------------------------------------------------
1929  * Perform a garbage collection if necessary
1930  * -------------------------------------------------------------------------- */
1931
1932 static Capability *
1933 scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS,
1934               rtsBool force_major, void (*get_roots)(evac_fn))
1935 {
1936     StgTSO *t;
1937 #ifdef THREADED_RTS
1938     static volatile StgWord waiting_for_gc;
1939     rtsBool was_waiting;
1940     nat i;
1941 #endif
1942
1943 #ifdef THREADED_RTS
1944     // In order to GC, there must be no threads running Haskell code.
1945     // Therefore, the GC thread needs to hold *all* the capabilities,
1946     // and release them after the GC has completed.
1947     //
1948     // This seems to be the simplest way: previous attempts involved
1949     // making all the threads with capabilities give up their
1950     // capabilities and sleep except for the *last* one, which
1951     // actually did the GC.  But it's quite hard to arrange for all
1952     // the other tasks to sleep and stay asleep.
1953     //
1954
1955     was_waiting = cas(&waiting_for_gc, 0, 1);
1956     if (was_waiting) {
1957         do {
1958             IF_DEBUG(scheduler, sched_belch("someone else is trying to GC..."));
1959             if (cap) yieldCapability(&cap,task);
1960         } while (waiting_for_gc);
1961         return cap;  // NOTE: task->cap might have changed here
1962     }
1963
1964     for (i=0; i < n_capabilities; i++) {
1965         IF_DEBUG(scheduler, sched_belch("ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities));
1966         if (cap != &capabilities[i]) {
1967             Capability *pcap = &capabilities[i];
1968             // we better hope this task doesn't get migrated to
1969             // another Capability while we're waiting for this one.
1970             // It won't, because load balancing happens while we have
1971             // all the Capabilities, but even so it's a slightly
1972             // unsavoury invariant.
1973             task->cap = pcap;
1974             context_switch = 1;
1975             waitForReturnCapability(&pcap, task);
1976             if (pcap != &capabilities[i]) {
1977                 barf("scheduleDoGC: got the wrong capability");
1978             }
1979         }
1980     }
1981
1982     waiting_for_gc = rtsFalse;
1983 #endif
1984
1985     /* Kick any transactions which are invalid back to their
1986      * atomically frames.  When next scheduled they will try to
1987      * commit, this commit will fail and they will retry.
1988      */
1989     {
1990         StgTSO *next;
1991
1992         for (t = all_threads; t != END_TSO_QUEUE; t = next) {
1993             if (t->what_next == ThreadRelocated) {
1994                 next = t->link;
1995             } else {
1996                 next = t->global_link;
1997                 if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
1998                     if (!stmValidateNestOfTransactions (t -> trec)) {
1999                         IF_DEBUG(stm, sched_belch("trec %p found wasting its time", t));
2000
2001                         // strip the stack back to the
2002                         // ATOMICALLY_FRAME, aborting the (nested)
2003                         // transaction, and saving the stack of any
2004                         // partially-evaluated thunks on the heap.
2005                         raiseAsync_(&capabilities[0], t, NULL, rtsTrue, NULL);
2006
2007 #ifdef REG_R1
2008                         ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
2009 #endif
2010                     }
2011                 }
2012             }
2013         }
2014     }
2015
2016     // so this happens periodically:
2017     if (cap) scheduleCheckBlackHoles(cap);
2018
2019     IF_DEBUG(scheduler, printAllThreads());
2020
2021     /*
2022      * We now have all the capabilities; if we're in an interrupting
2023      * state, then we should take the opportunity to delete all the
2024      * threads in the system.
2025      */
2026     if (sched_state >= SCHED_INTERRUPTING) {
2027         deleteAllThreads(&capabilities[0]);
2028         sched_state = SCHED_INTERRUPTED;
2029     }
2030
2031     /* everybody back, start the GC.
2032      * Could do it in this thread, or signal a condition var
2033      * to do it in another thread.  Either way, we need to
2034      * broadcast on gc_pending_cond afterward.
2035      */
2036 #if defined(THREADED_RTS)
2037     IF_DEBUG(scheduler,sched_belch("doing GC"));
2038 #endif
2039     GarbageCollect(get_roots, force_major);
2040
2041 #if defined(THREADED_RTS)
2042     // release our stash of capabilities.
2043     for (i = 0; i < n_capabilities; i++) {
2044         if (cap != &capabilities[i]) {
2045             task->cap = &capabilities[i];
2046             releaseCapability(&capabilities[i]);
2047         }
2048     }
2049     if (cap) {
2050         task->cap = cap;
2051     } else {
2052         task->cap = NULL;
2053     }
2054 #endif
2055
2056 #if defined(GRAN)
2057     /* add a ContinueThread event to continue execution of current thread */
2058     new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
2059               ContinueThread,
2060               t, (StgClosure*)NULL, (rtsSpark*)NULL);
2061     IF_GRAN_DEBUG(bq,
2062                   debugBelch("GRAN: eventq and runnableq after Garbage collection:\n\n");
2063                   G_EVENTQ(0);
2064                   G_CURR_THREADQ(0));
2065 #endif /* GRAN */
2066
2067     return cap;
2068 }
2069
2070 /* ---------------------------------------------------------------------------
2071  * rtsSupportsBoundThreads(): is the RTS built to support bound threads?
2072  * used by Control.Concurrent for error checking.
2073  * ------------------------------------------------------------------------- */
2074
2075 StgBool
2076 rtsSupportsBoundThreads(void)
2077 {
2078 #if defined(THREADED_RTS)
2079   return rtsTrue;
2080 #else
2081   return rtsFalse;
2082 #endif
2083 }
2084
2085 /* ---------------------------------------------------------------------------
2086  * isThreadBound(tso): check whether tso is bound to an OS thread.
2087  * ------------------------------------------------------------------------- */
2088
2089 StgBool
2090 isThreadBound(StgTSO* tso USED_IF_THREADS)
2091 {
2092 #if defined(THREADED_RTS)
2093   return (tso->bound != NULL);
2094 #endif
2095   return rtsFalse;
2096 }
2097
2098 /* ---------------------------------------------------------------------------
2099  * Singleton fork(). Do not copy any running threads.
2100  * ------------------------------------------------------------------------- */
2101
2102 #if !defined(mingw32_HOST_OS)
2103 #define FORKPROCESS_PRIMOP_SUPPORTED
2104 #endif
2105
2106 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2107 static void
2108 deleteThreadImmediately(Capability *cap, StgTSO *tso);
2109 #endif
2110 StgInt
2111 forkProcess(HsStablePtr *entry
2112 #ifndef FORKPROCESS_PRIMOP_SUPPORTED
2113             STG_UNUSED
2114 #endif
2115            )
2116 {
2117 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
2118     Task *task;
2119     pid_t pid;
2120     StgTSO* t,*next;
2121     Capability *cap;
2122
2123 #if defined(THREADED_RTS)
2124     if (RtsFlags.ParFlags.nNodes > 1) {
2125         errorBelch("forking not supported with +RTS -N<n> greater than 1");
2126         stg_exit(EXIT_FAILURE);
2127     }
2128 #endif
2129
2130     IF_DEBUG(scheduler,sched_belch("forking!"));
2131
2132     // ToDo: for SMP, we should probably acquire *all* the capabilities
2133     cap = rts_lock();
2134
2135     pid = fork();
2136
2137     if (pid) { // parent
2138
2139         // just return the pid
2140         rts_unlock(cap);
2141         return pid;
2142
2143     } else { // child
2144
2145         // delete all threads
2146         cap->run_queue_hd = END_TSO_QUEUE;
2147         cap->run_queue_tl = END_TSO_QUEUE;
2148
2149         for (t = all_threads; t != END_TSO_QUEUE; t = next) {
2150             next = t->link;
2151
2152             // don't allow threads to catch the ThreadKilled exception
2153             deleteThreadImmediately(cap,t);
2154         }
2155
2156         // wipe the task list
2157         ACQUIRE_LOCK(&sched_mutex);
2158         for (task = all_tasks; task != NULL; task=task->all_link) {
2159             if (task != cap->running_task) discardTask(task);
2160         }
2161         RELEASE_LOCK(&sched_mutex);
2162
2163         cap->suspended_ccalling_tasks = NULL;
2164
2165 #if defined(THREADED_RTS)
2166         // wipe our spare workers list.
2167         cap->spare_workers = NULL;
2168         cap->returning_tasks_hd = NULL;
2169         cap->returning_tasks_tl = NULL;
2170 #endif
2171
2172         cap = rts_evalStableIO(cap, entry, NULL);  // run the action
2173         rts_checkSchedStatus("forkProcess",cap);
2174
2175         rts_unlock(cap);
2176         hs_exit();                      // clean up and exit
2177         stg_exit(EXIT_SUCCESS);
2178     }
2179 #else /* !FORKPROCESS_PRIMOP_SUPPORTED */
2180     barf("forkProcess#: primop not supported on this platform, sorry!\n");
2181     return -1;
2182 #endif
2183 }
2184
2185 /* ---------------------------------------------------------------------------
2186  * Delete all the threads in the system
2187  * ------------------------------------------------------------------------- */
2188
2189 static void
2190 deleteAllThreads ( Capability *cap )
2191 {
2192   StgTSO* t, *next;
2193   IF_DEBUG(scheduler,sched_belch("deleting all threads"));
2194   for (t = all_threads; t != END_TSO_QUEUE; t = next) {
2195       if (t->what_next == ThreadRelocated) {
2196           next = t->link;
2197       } else {
2198           next = t->global_link;
2199           deleteThread(cap,t);
2200       }
2201   }
2202
2203   // The run queue now contains a bunch of ThreadKilled threads.  We
2204   // must not throw these away: the main thread(s) will be in there
2205   // somewhere, and the main scheduler loop has to deal with it.
2206   // Also, the run queue is the only thing keeping these threads from
2207   // being GC'd, and we don't want the "main thread has been GC'd" panic.
2208
2209 #if !defined(THREADED_RTS)
2210   ASSERT(blocked_queue_hd == END_TSO_QUEUE);
2211   ASSERT(sleeping_queue == END_TSO_QUEUE);
2212 #endif
2213 }
2214
2215 /* -----------------------------------------------------------------------------
2216    Managing the suspended_ccalling_tasks list.
2217    Locks required: sched_mutex
2218    -------------------------------------------------------------------------- */
2219
2220 STATIC_INLINE void
2221 suspendTask (Capability *cap, Task *task)
2222 {
2223     ASSERT(task->next == NULL && task->prev == NULL);
2224     task->next = cap->suspended_ccalling_tasks;
2225     task->prev = NULL;
2226     if (cap->suspended_ccalling_tasks) {
2227         cap->suspended_ccalling_tasks->prev = task;
2228     }
2229     cap->suspended_ccalling_tasks = task;
2230 }
2231
2232 STATIC_INLINE void
2233 recoverSuspendedTask (Capability *cap, Task *task)
2234 {
2235     if (task->prev) {
2236         task->prev->next = task->next;
2237     } else {
2238         ASSERT(cap->suspended_ccalling_tasks == task);
2239         cap->suspended_ccalling_tasks = task->next;
2240     }
2241     if (task->next) {
2242         task->next->prev = task->prev;
2243     }
2244     task->next = task->prev = NULL;
2245 }
2246
2247 /* ---------------------------------------------------------------------------
2248  * Suspending & resuming Haskell threads.
2249  *
2250  * When making a "safe" call to C (aka _ccall_GC), the task gives back
2251  * its capability before calling the C function.  This allows another
2252  * task to pick up the capability and carry on running Haskell
2253  * threads.  It also means that if the C call blocks, it won't lock
2254  * the whole system.
2255  *
2256  * The Haskell thread making the C call is put to sleep for the
2257  * duration of the call, on the susepended_ccalling_threads queue.  We
2258  * give out a token to the task, which it can use to resume the thread
2259  * on return from the C function.
2260  * ------------------------------------------------------------------------- */
2261
2262 void *
2263 suspendThread (StgRegTable *reg)
2264 {
2265   Capability *cap;
2266   int saved_errno = errno;
2267   StgTSO *tso;
2268   Task *task;
2269
2270   /* assume that *reg is a pointer to the StgRegTable part of a Capability.
2271    */
2272   cap = regTableToCapability(reg);
2273
2274   task = cap->running_task;
2275   tso = cap->r.rCurrentTSO;
2276
2277   IF_DEBUG(scheduler,
2278            sched_belch("thread %d did a safe foreign call", cap->r.rCurrentTSO->id));
2279
2280   // XXX this might not be necessary --SDM
2281   tso->what_next = ThreadRunGHC;
2282
2283   threadPaused(cap,tso);
2284
2285   if(tso->blocked_exceptions == NULL)  {
2286       tso->why_blocked = BlockedOnCCall;
2287       tso->blocked_exceptions = END_TSO_QUEUE;
2288   } else {
2289       tso->why_blocked = BlockedOnCCall_NoUnblockExc;
2290   }
2291
2292   // Hand back capability
2293   task->suspended_tso = tso;
2294
2295   ACQUIRE_LOCK(&cap->lock);
2296
2297   suspendTask(cap,task);
2298   cap->in_haskell = rtsFalse;
2299   releaseCapability_(cap);
2300
2301   RELEASE_LOCK(&cap->lock);
2302
2303 #if defined(THREADED_RTS)
2304   /* Preparing to leave the RTS, so ensure there's a native thread/task
2305      waiting to take over.
2306   */
2307   IF_DEBUG(scheduler, sched_belch("thread %d: leaving RTS", tso->id));
2308 #endif
2309
2310   errno = saved_errno;
2311   return task;
2312 }
2313
2314 StgRegTable *
2315 resumeThread (void *task_)
2316 {
2317     StgTSO *tso;
2318     Capability *cap;
2319     int saved_errno = errno;
2320     Task *task = task_;
2321
2322     cap = task->cap;
2323     // Wait for permission to re-enter the RTS with the result.
2324     waitForReturnCapability(&cap,task);
2325     // we might be on a different capability now... but if so, our
2326     // entry on the suspended_ccalling_tasks list will also have been
2327     // migrated.
2328
2329     // Remove the thread from the suspended list
2330     recoverSuspendedTask(cap,task);
2331
2332     tso = task->suspended_tso;
2333     task->suspended_tso = NULL;
2334     tso->link = END_TSO_QUEUE;
2335     IF_DEBUG(scheduler, sched_belch("thread %d: re-entering RTS", tso->id));
2336
2337     if (tso->why_blocked == BlockedOnCCall) {
2338         awakenBlockedQueue(cap,tso->blocked_exceptions);
2339         tso->blocked_exceptions = NULL;
2340     }
2341
2342     /* Reset blocking status */
2343     tso->why_blocked  = NotBlocked;
2344
2345     cap->r.rCurrentTSO = tso;
2346     cap->in_haskell = rtsTrue;
2347     errno = saved_errno;
2348
2349     /* We might have GC'd, mark the TSO dirty again */
2350     dirtyTSO(tso);
2351
2352     IF_DEBUG(sanity, checkTSO(tso));
2353
2354     return &cap->r;
2355 }
2356
2357 /* ---------------------------------------------------------------------------
2358  * Comparing Thread ids.
2359  *
2360  * This is used from STG land in the implementation of the
2361  * instances of Eq/Ord for ThreadIds.
2362  * ------------------------------------------------------------------------ */
2363
2364 int
2365 cmp_thread(StgPtr tso1, StgPtr tso2)
2366 {
2367   StgThreadID id1 = ((StgTSO *)tso1)->id;
2368   StgThreadID id2 = ((StgTSO *)tso2)->id;
2369
2370   if (id1 < id2) return (-1);
2371   if (id1 > id2) return 1;
2372   return 0;
2373 }
2374
2375 /* ---------------------------------------------------------------------------
2376  * Fetching the ThreadID from an StgTSO.
2377  *
2378  * This is used in the implementation of Show for ThreadIds.
2379  * ------------------------------------------------------------------------ */
2380 int
2381 rts_getThreadId(StgPtr tso)
2382 {
2383   return ((StgTSO *)tso)->id;
2384 }
2385
2386 #ifdef DEBUG
2387 void
2388 labelThread(StgPtr tso, char *label)
2389 {
2390   int len;
2391   void *buf;
2392
2393   /* Caveat: Once set, you can only set the thread name to "" */
2394   len = strlen(label)+1;
2395   buf = stgMallocBytes(len * sizeof(char), "Schedule.c:labelThread()");
2396   strncpy(buf,label,len);
2397   /* Update will free the old memory for us */
2398   updateThreadLabel(((StgTSO *)tso)->id,buf);
2399 }
2400 #endif /* DEBUG */
2401
2402 /* ---------------------------------------------------------------------------
2403    Create a new thread.
2404
2405    The new thread starts with the given stack size.  Before the
2406    scheduler can run, however, this thread needs to have a closure
2407    (and possibly some arguments) pushed on its stack.  See
2408    pushClosure() in Schedule.h.
2409
2410    createGenThread() and createIOThread() (in SchedAPI.h) are
2411    convenient packaged versions of this function.
2412
2413    currently pri (priority) is only used in a GRAN setup -- HWL
2414    ------------------------------------------------------------------------ */
2415 #if defined(GRAN)
2416 /*   currently pri (priority) is only used in a GRAN setup -- HWL */
2417 StgTSO *
2418 createThread(nat size, StgInt pri)
2419 #else
2420 StgTSO *
2421 createThread(Capability *cap, nat size)
2422 #endif
2423 {
2424     StgTSO *tso;
2425     nat stack_size;
2426
2427     /* sched_mutex is *not* required */
2428
2429     /* First check whether we should create a thread at all */
2430 #if defined(PARALLEL_HASKELL)
2431     /* check that no more than RtsFlags.ParFlags.maxThreads threads are created */
2432     if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) {
2433         threadsIgnored++;
2434         debugBelch("{createThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)\n",
2435                    RtsFlags.ParFlags.maxThreads, advisory_thread_count);
2436         return END_TSO_QUEUE;
2437     }
2438     threadsCreated++;
2439 #endif
2440
2441 #if defined(GRAN)
2442     ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
2443 #endif
2444
2445     // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW
2446
2447     /* catch ridiculously small stack sizes */
2448     if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) {
2449         size = MIN_STACK_WORDS + TSO_STRUCT_SIZEW;
2450     }
2451
2452     stack_size = size - TSO_STRUCT_SIZEW;
2453
2454     tso = (StgTSO *)allocateLocal(cap, size);
2455     TICK_ALLOC_TSO(stack_size, 0);
2456
2457     SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM);
2458 #if defined(GRAN)
2459     SET_GRAN_HDR(tso, ThisPE);
2460 #endif
2461
2462     // Always start with the compiled code evaluator
2463     tso->what_next = ThreadRunGHC;
2464
2465     tso->why_blocked  = NotBlocked;
2466     tso->blocked_exceptions = NULL;
2467     tso->flags = TSO_DIRTY;
2468
2469     tso->saved_errno = 0;
2470     tso->bound = NULL;
2471
2472     tso->stack_size     = stack_size;
2473     tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize)
2474                           - TSO_STRUCT_SIZEW;
2475     tso->sp             = (P_)&(tso->stack) + stack_size;
2476
2477     tso->trec = NO_TREC;
2478
2479 #ifdef PROFILING
2480     tso->prof.CCCS = CCS_MAIN;
2481 #endif
2482
2483   /* put a stop frame on the stack */
2484     tso->sp -= sizeofW(StgStopFrame);
2485     SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
2486     tso->link = END_TSO_QUEUE;
2487
2488   // ToDo: check this
2489 #if defined(GRAN)
2490     /* uses more flexible routine in GranSim */
2491     insertThread(tso, CurrentProc);
2492 #else
2493     /* In a non-GranSim setup the pushing of a TSO onto the runq is separated
2494      * from its creation
2495      */
2496 #endif
2497
2498 #if defined(GRAN)
2499     if (RtsFlags.GranFlags.GranSimStats.Full)
2500         DumpGranEvent(GR_START,tso);
2501 #elif defined(PARALLEL_HASKELL)
2502     if (RtsFlags.ParFlags.ParStats.Full)
2503         DumpGranEvent(GR_STARTQ,tso);
2504     /* HACk to avoid SCHEDULE
2505        LastTSO = tso; */
2506 #endif
2507
2508     /* Link the new thread on the global thread list.
2509      */
2510     ACQUIRE_LOCK(&sched_mutex);
2511     tso->id = next_thread_id++;  // while we have the mutex
2512     tso->global_link = all_threads;
2513     all_threads = tso;
2514     RELEASE_LOCK(&sched_mutex);
2515
2516 #if defined(DIST)
2517     tso->dist.priority = MandatoryPriority; //by default that is...
2518 #endif
2519
2520 #if defined(GRAN)
2521     tso->gran.pri = pri;
2522 # if defined(DEBUG)
2523     tso->gran.magic = TSO_MAGIC; // debugging only
2524 # endif
2525     tso->gran.sparkname   = 0;
2526     tso->gran.startedat   = CURRENT_TIME;
2527     tso->gran.exported    = 0;
2528     tso->gran.basicblocks = 0;
2529     tso->gran.allocs      = 0;
2530     tso->gran.exectime    = 0;
2531     tso->gran.fetchtime   = 0;
2532     tso->gran.fetchcount  = 0;
2533     tso->gran.blocktime   = 0;
2534     tso->gran.blockcount  = 0;
2535     tso->gran.blockedat   = 0;
2536     tso->gran.globalsparks = 0;
2537     tso->gran.localsparks  = 0;
2538     if (RtsFlags.GranFlags.Light)
2539         tso->gran.clock  = Now; /* local clock */
2540     else
2541         tso->gran.clock  = 0;
2542
2543     IF_DEBUG(gran,printTSO(tso));
2544 #elif defined(PARALLEL_HASKELL)
2545 # if defined(DEBUG)
2546     tso->par.magic = TSO_MAGIC; // debugging only
2547 # endif
2548     tso->par.sparkname   = 0;
2549     tso->par.startedat   = CURRENT_TIME;
2550     tso->par.exported    = 0;
2551     tso->par.basicblocks = 0;
2552     tso->par.allocs      = 0;
2553     tso->par.exectime    = 0;
2554     tso->par.fetchtime   = 0;
2555     tso->par.fetchcount  = 0;
2556     tso->par.blocktime   = 0;
2557     tso->par.blockcount  = 0;
2558     tso->par.blockedat   = 0;
2559     tso->par.globalsparks = 0;
2560     tso->par.localsparks  = 0;
2561 #endif
2562
2563 #if defined(GRAN)
2564     globalGranStats.tot_threads_created++;
2565     globalGranStats.threads_created_on_PE[CurrentProc]++;
2566     globalGranStats.tot_sq_len += spark_queue_len(CurrentProc);
2567     globalGranStats.tot_sq_probes++;
2568 #elif defined(PARALLEL_HASKELL)
2569     // collect parallel global statistics (currently done together with GC stats)
2570     if (RtsFlags.ParFlags.ParStats.Global &&
2571         RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
2572         //debugBelch("Creating thread %d @ %11.2f\n", tso->id, usertime());
2573         globalParStats.tot_threads_created++;
2574     }
2575 #endif
2576
2577 #if defined(GRAN)
2578     IF_GRAN_DEBUG(pri,
2579                   sched_belch("==__ schedule: Created TSO %d (%p);",
2580                               CurrentProc, tso, tso->id));
2581 #elif defined(PARALLEL_HASKELL)
2582     IF_PAR_DEBUG(verbose,
2583                  sched_belch("==__ schedule: Created TSO %d (%p); %d threads active",
2584                              (long)tso->id, tso, advisory_thread_count));
2585 #else
2586     IF_DEBUG(scheduler,sched_belch("created thread %ld, stack size = %lx words",
2587                                    (long)tso->id, (long)tso->stack_size));
2588 #endif
2589     return tso;
2590 }
2591
2592 #if defined(PAR)
2593 /* RFP:
2594    all parallel thread creation calls should fall through the following routine.
2595 */
2596 StgTSO *
2597 createThreadFromSpark(rtsSpark spark)
2598 { StgTSO *tso;
2599   ASSERT(spark != (rtsSpark)NULL);
2600 // JB: TAKE CARE OF THIS COUNTER! BUGGY
2601   if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads)
2602   { threadsIgnored++;
2603     barf("{createSparkThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)",
2604           RtsFlags.ParFlags.maxThreads, advisory_thread_count);
2605     return END_TSO_QUEUE;
2606   }
2607   else
2608   { threadsCreated++;
2609     tso = createThread(RtsFlags.GcFlags.initialStkSize);
2610     if (tso==END_TSO_QUEUE)
2611       barf("createSparkThread: Cannot create TSO");
2612 #if defined(DIST)
2613     tso->priority = AdvisoryPriority;
2614 #endif
2615     pushClosure(tso,spark);
2616     addToRunQueue(tso);
2617     advisory_thread_count++;  // JB: TAKE CARE OF THIS COUNTER! BUGGY
2618   }
2619   return tso;
2620 }
2621 #endif
2622
2623 /*
2624   Turn a spark into a thread.
2625   ToDo: fix for SMP (needs to acquire SCHED_MUTEX!)
2626 */
2627 #if 0
2628 StgTSO *
2629 activateSpark (rtsSpark spark)
2630 {
2631   StgTSO *tso;
2632
2633   tso = createSparkThread(spark);
2634   if (RtsFlags.ParFlags.ParStats.Full) {
2635     //ASSERT(run_queue_hd == END_TSO_QUEUE); // I think ...
2636       IF_PAR_DEBUG(verbose,
2637                    debugBelch("==^^ activateSpark: turning spark of closure %p (%s) into a thread\n",
2638                               (StgClosure *)spark, info_type((StgClosure *)spark)));
2639   }
2640   // ToDo: fwd info on local/global spark to thread -- HWL
2641   // tso->gran.exported =  spark->exported;
2642   // tso->gran.locked =   !spark->global;
2643   // tso->gran.sparkname = spark->name;
2644
2645   return tso;
2646 }
2647 #endif
2648
2649 /* ---------------------------------------------------------------------------
2650  * scheduleThread()
2651  *
2652  * scheduleThread puts a thread on the end  of the runnable queue.
2653  * This will usually be done immediately after a thread is created.
2654  * The caller of scheduleThread must create the thread using e.g.
2655  * createThread and push an appropriate closure
2656  * on this thread's stack before the scheduler is invoked.
2657  * ------------------------------------------------------------------------ */
2658
2659 void
2660 scheduleThread(Capability *cap, StgTSO *tso)
2661 {
2662     // The thread goes at the *end* of the run-queue, to avoid possible
2663     // starvation of any threads already on the queue.
2664     appendToRunQueue(cap,tso);
2665 }
2666
2667 Capability *
2668 scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
2669 {
2670     Task *task;
2671
2672     // We already created/initialised the Task
2673     task = cap->running_task;
2674
2675     // This TSO is now a bound thread; make the Task and TSO
2676     // point to each other.
2677     tso->bound = task;
2678
2679     task->tso = tso;
2680     task->ret = ret;
2681     task->stat = NoStatus;
2682
2683     appendToRunQueue(cap,tso);
2684
2685     IF_DEBUG(scheduler, sched_belch("new bound thread (%d)", tso->id));
2686
2687 #if defined(GRAN)
2688     /* GranSim specific init */
2689     CurrentTSO = m->tso;                // the TSO to run
2690     procStatus[MainProc] = Busy;        // status of main PE
2691     CurrentProc = MainProc;             // PE to run it on
2692 #endif
2693
2694     cap = schedule(cap,task);
2695
2696     ASSERT(task->stat != NoStatus);
2697     ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
2698
2699     IF_DEBUG(scheduler, sched_belch("bound thread (%d) finished", task->tso->id));
2700     return cap;
2701 }
2702
2703 /* ----------------------------------------------------------------------------
2704  * Starting Tasks
2705  * ------------------------------------------------------------------------- */
2706
2707 #if defined(THREADED_RTS)
2708 void
2709 workerStart(Task *task)
2710 {
2711     Capability *cap;
2712
2713     // See startWorkerTask().
2714     ACQUIRE_LOCK(&task->lock);
2715     cap = task->cap;
2716     RELEASE_LOCK(&task->lock);
2717
2718     // set the thread-local pointer to the Task:
2719     taskEnter(task);
2720
2721     // schedule() runs without a lock.
2722     cap = schedule(cap,task);
2723
2724     // On exit from schedule(), we have a Capability.
2725     releaseCapability(cap);
2726     taskStop(task);
2727 }
2728 #endif
2729
2730 /* ---------------------------------------------------------------------------
2731  * initScheduler()
2732  *
2733  * Initialise the scheduler.  This resets all the queues - if the
2734  * queues contained any threads, they'll be garbage collected at the
2735  * next pass.
2736  *
2737  * ------------------------------------------------------------------------ */
2738
2739 void
2740 initScheduler(void)
2741 {
2742 #if defined(GRAN)
2743   nat i;
2744   for (i=0; i<=MAX_PROC; i++) {
2745     run_queue_hds[i]      = END_TSO_QUEUE;
2746     run_queue_tls[i]      = END_TSO_QUEUE;
2747     blocked_queue_hds[i]  = END_TSO_QUEUE;
2748     blocked_queue_tls[i]  = END_TSO_QUEUE;
2749     ccalling_threadss[i]  = END_TSO_QUEUE;
2750     blackhole_queue[i]    = END_TSO_QUEUE;
2751     sleeping_queue        = END_TSO_QUEUE;
2752   }
2753 #elif !defined(THREADED_RTS)
2754   blocked_queue_hd  = END_TSO_QUEUE;
2755   blocked_queue_tl  = END_TSO_QUEUE;
2756   sleeping_queue    = END_TSO_QUEUE;
2757 #endif
2758
2759   blackhole_queue   = END_TSO_QUEUE;
2760   all_threads       = END_TSO_QUEUE;
2761
2762   context_switch = 0;
2763   sched_state    = SCHED_RUNNING;
2764
2765   RtsFlags.ConcFlags.ctxtSwitchTicks =
2766       RtsFlags.ConcFlags.ctxtSwitchTime / TICK_MILLISECS;
2767
2768 #if defined(THREADED_RTS)
2769   /* Initialise the mutex and condition variables used by
2770    * the scheduler. */
2771   initMutex(&sched_mutex);
2772 #endif
2773
2774   ACQUIRE_LOCK(&sched_mutex);
2775
2776   /* A capability holds the state a native thread needs in
2777    * order to execute STG code. At least one capability is
2778    * floating around (only THREADED_RTS builds have more than one).
2779    */
2780   initCapabilities();
2781
2782   initTaskManager();
2783
2784 #if defined(THREADED_RTS) || defined(PARALLEL_HASKELL)
2785   initSparkPools();
2786 #endif
2787
2788 #if defined(THREADED_RTS)
2789   /*
2790    * Eagerly start one worker to run each Capability, except for
2791    * Capability 0.  The idea is that we're probably going to start a
2792    * bound thread on Capability 0 pretty soon, so we don't want a
2793    * worker task hogging it.
2794    */
2795   {
2796       nat i;
2797       Capability *cap;
2798       for (i = 1; i < n_capabilities; i++) {
2799           cap = &capabilities[i];
2800           ACQUIRE_LOCK(&cap->lock);
2801           startWorkerTask(cap, workerStart);
2802           RELEASE_LOCK(&cap->lock);
2803       }
2804   }
2805 #endif
2806
2807   RELEASE_LOCK(&sched_mutex);
2808 }
2809
2810 void
2811 exitScheduler( void )
2812 {
2813     Task *task = NULL;
2814
2815 #if defined(THREADED_RTS)
2816     ACQUIRE_LOCK(&sched_mutex);
2817     task = newBoundTask();
2818     RELEASE_LOCK(&sched_mutex);
2819 #endif
2820
2821     // If we haven't killed all the threads yet, do it now.
2822     if (sched_state < SCHED_INTERRUPTED) {
2823         sched_state = SCHED_INTERRUPTING;
2824         scheduleDoGC(NULL,task,rtsFalse,GetRoots);
2825     }
2826     sched_state = SCHED_SHUTTING_DOWN;
2827
2828 #if defined(THREADED_RTS)
2829     {
2830         nat i;
2831
2832         for (i = 0; i < n_capabilities; i++) {
2833             shutdownCapability(&capabilities[i], task);
2834         }
2835         boundTaskExiting(task);
2836         stopTaskManager();
2837     }
2838 #endif
2839 }
2840
2841 /* ---------------------------------------------------------------------------
2842    Where are the roots that we know about?
2843
2844         - all the threads on the runnable queue
2845         - all the threads on the blocked queue
2846         - all the threads on the sleeping queue
2847         - all the thread currently executing a _ccall_GC
2848         - all the "main threads"
2849
2850    ------------------------------------------------------------------------ */
2851
2852 /* This has to be protected either by the scheduler monitor, or by the
2853         garbage collection monitor (probably the latter).
2854         KH @ 25/10/99
2855 */
2856
2857 void
2858 GetRoots( evac_fn evac )
2859 {
2860     nat i;
2861     Capability *cap;
2862     Task *task;
2863
2864 #if defined(GRAN)
2865     for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
2866         if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
2867             evac((StgClosure **)&run_queue_hds[i]);
2868         if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
2869             evac((StgClosure **)&run_queue_tls[i]);
2870
2871         if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
2872             evac((StgClosure **)&blocked_queue_hds[i]);
2873         if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
2874             evac((StgClosure **)&blocked_queue_tls[i]);
2875         if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
2876             evac((StgClosure **)&ccalling_threads[i]);
2877     }
2878
2879     markEventQueue();
2880
2881 #else /* !GRAN */
2882
2883     for (i = 0; i < n_capabilities; i++) {
2884         cap = &capabilities[i];
2885         evac((StgClosure **)&cap->run_queue_hd);
2886         evac((StgClosure **)&cap->run_queue_tl);
2887
2888         for (task = cap->suspended_ccalling_tasks; task != NULL;
2889              task=task->next) {
2890             evac((StgClosure **)&task->suspended_tso);
2891         }
2892     }
2893
2894 #if !defined(THREADED_RTS)
2895     evac((StgClosure **)(void *)&blocked_queue_hd);
2896     evac((StgClosure **)(void *)&blocked_queue_tl);
2897     evac((StgClosure **)(void *)&sleeping_queue);
2898 #endif
2899 #endif
2900
2901     // evac((StgClosure **)&blackhole_queue);
2902
2903 #if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
2904     markSparkQueue(evac);
2905 #endif
2906
2907 #if defined(RTS_USER_SIGNALS)
2908     // mark the signal handlers (signals should be already blocked)
2909     markSignalHandlers(evac);
2910 #endif
2911 }
2912
2913 /* -----------------------------------------------------------------------------
2914    performGC
2915
2916    This is the interface to the garbage collector from Haskell land.
2917    We provide this so that external C code can allocate and garbage
2918    collect when called from Haskell via _ccall_GC.
2919
2920    It might be useful to provide an interface whereby the programmer
2921    can specify more roots (ToDo).
2922
2923    This needs to be protected by the GC condition variable above.  KH.
2924    -------------------------------------------------------------------------- */
2925
2926 static void (*extra_roots)(evac_fn);
2927
2928 static void
2929 performGC_(rtsBool force_major, void (*get_roots)(evac_fn))
2930 {
2931     Task *task = myTask();
2932
2933     if (task == NULL) {
2934         ACQUIRE_LOCK(&sched_mutex);
2935         task = newBoundTask();
2936         RELEASE_LOCK(&sched_mutex);
2937         scheduleDoGC(NULL,task,force_major, get_roots);
2938         boundTaskExiting(task);
2939     } else {
2940         scheduleDoGC(NULL,task,force_major, get_roots);
2941     }
2942 }
2943
2944 void
2945 performGC(void)
2946 {
2947     performGC_(rtsFalse, GetRoots);
2948 }
2949
2950 void
2951 performMajorGC(void)
2952 {
2953     performGC_(rtsTrue, GetRoots);
2954 }
2955
2956 static void
2957 AllRoots(evac_fn evac)
2958 {
2959     GetRoots(evac);             // the scheduler's roots
2960     extra_roots(evac);          // the user's roots
2961 }
2962
2963 void
2964 performGCWithRoots(void (*get_roots)(evac_fn))
2965 {
2966     extra_roots = get_roots;
2967     performGC_(rtsFalse, AllRoots);
2968 }
2969
2970 /* -----------------------------------------------------------------------------
2971    Stack overflow
2972
2973    If the thread has reached its maximum stack size, then raise the
2974    StackOverflow exception in the offending thread.  Otherwise
2975    relocate the TSO into a larger chunk of memory and adjust its stack
2976    size appropriately.
2977    -------------------------------------------------------------------------- */
2978
2979 static StgTSO *
2980 threadStackOverflow(Capability *cap, StgTSO *tso)
2981 {
2982   nat new_stack_size, stack_words;
2983   lnat new_tso_size;
2984   StgPtr new_sp;
2985   StgTSO *dest;
2986
2987   IF_DEBUG(sanity,checkTSO(tso));
2988   if (tso->stack_size >= tso->max_stack_size) {
2989
2990     IF_DEBUG(gc,
2991              debugBelch("@@ threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)\n",
2992                    (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size);
2993              /* If we're debugging, just print out the top of the stack */
2994              printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
2995                                               tso->sp+64)));
2996
2997     /* Send this thread the StackOverflow exception */
2998     raiseAsync(cap, tso, (StgClosure *)stackOverflow_closure);
2999     return tso;
3000   }
3001
3002   /* Try to double the current stack size.  If that takes us over the
3003    * maximum stack size for this thread, then use the maximum instead.
3004    * Finally round up so the TSO ends up as a whole number of blocks.
3005    */
3006   new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
3007   new_tso_size   = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) +
3008                                        TSO_STRUCT_SIZE)/sizeof(W_);
3009   new_tso_size = round_to_mblocks(new_tso_size);  /* Be MBLOCK-friendly */
3010   new_stack_size = new_tso_size - TSO_STRUCT_SIZEW;
3011
3012   IF_DEBUG(scheduler, sched_belch("increasing stack size from %ld words to %d.\n", (long)tso->stack_size, new_stack_size));
3013
3014   dest = (StgTSO *)allocate(new_tso_size);
3015   TICK_ALLOC_TSO(new_stack_size,0);
3016
3017   /* copy the TSO block and the old stack into the new area */
3018   memcpy(dest,tso,TSO_STRUCT_SIZE);
3019   stack_words = tso->stack + tso->stack_size - tso->sp;
3020   new_sp = (P_)dest + new_tso_size - stack_words;
3021   memcpy(new_sp, tso->sp, stack_words * sizeof(W_));
3022
3023   /* relocate the stack pointers... */
3024   dest->sp         = new_sp;
3025   dest->stack_size = new_stack_size;
3026
3027   /* Mark the old TSO as relocated.  We have to check for relocated
3028    * TSOs in the garbage collector and any primops that deal with TSOs.
3029    *
3030    * It's important to set the sp value to just beyond the end
3031    * of the stack, so we don't attempt to scavenge any part of the
3032    * dead TSO's stack.
3033    */
3034   tso->what_next = ThreadRelocated;
3035   tso->link = dest;
3036   tso->sp = (P_)&(tso->stack[tso->stack_size]);
3037   tso->why_blocked = NotBlocked;
3038
3039   IF_PAR_DEBUG(verbose,
3040                debugBelch("@@ threadStackOverflow of TSO %d (now at %p): stack size increased to %ld\n",
3041                      tso->id, tso, tso->stack_size);
3042                /* If we're debugging, just print out the top of the stack */
3043                printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
3044                                                 tso->sp+64)));
3045
3046   IF_DEBUG(sanity,checkTSO(tso));
3047 #if 0
3048   IF_DEBUG(scheduler,printTSO(dest));
3049 #endif
3050
3051   return dest;
3052 }
3053
3054 /* ---------------------------------------------------------------------------
3055    Wake up a queue that was blocked on some resource.
3056    ------------------------------------------------------------------------ */
3057
3058 #if defined(GRAN)
3059 STATIC_INLINE void
3060 unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node )
3061 {
3062 }
3063 #elif defined(PARALLEL_HASKELL)
3064 STATIC_INLINE void
3065 unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node )
3066 {
3067   /* write RESUME events to log file and
3068      update blocked and fetch time (depending on type of the orig closure) */
3069   if (RtsFlags.ParFlags.ParStats.Full) {
3070     DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
3071                      GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure,
3072                      0, 0 /* spark_queue_len(ADVISORY_POOL) */);
3073     if (emptyRunQueue())
3074       emitSchedule = rtsTrue;
3075
3076     switch (get_itbl(node)->type) {
3077         case FETCH_ME_BQ:
3078           ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
3079           break;
3080         case RBH:
3081         case FETCH_ME:
3082         case BLACKHOLE_BQ:
3083           ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
3084           break;
3085 #ifdef DIST
3086         case MVAR:
3087           break;
3088 #endif
3089         default:
3090           barf("{unblockOne}Daq Qagh: unexpected closure in blocking queue");
3091         }
3092       }
3093 }
3094 #endif
3095
3096 #if defined(GRAN)
3097 StgBlockingQueueElement *
3098 unblockOne(StgBlockingQueueElement *bqe, StgClosure *node)
3099 {
3100     StgTSO *tso;
3101     PEs node_loc, tso_loc;
3102
3103     node_loc = where_is(node); // should be lifted out of loop
3104     tso = (StgTSO *)bqe;  // wastes an assignment to get the type right
3105     tso_loc = where_is((StgClosure *)tso);
3106     if (IS_LOCAL_TO(PROCS(node),tso_loc)) { // TSO is local
3107       /* !fake_fetch => TSO is on CurrentProc is same as IS_LOCAL_TO */
3108       ASSERT(CurrentProc!=node_loc || tso_loc==CurrentProc);
3109       CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.lunblocktime;
3110       // insertThread(tso, node_loc);
3111       new_event(tso_loc, tso_loc, CurrentTime[CurrentProc],
3112                 ResumeThread,
3113                 tso, node, (rtsSpark*)NULL);
3114       tso->link = END_TSO_QUEUE; // overwrite link just to be sure
3115       // len_local++;
3116       // len++;
3117     } else { // TSO is remote (actually should be FMBQ)
3118       CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime +
3119                                   RtsFlags.GranFlags.Costs.gunblocktime +
3120                                   RtsFlags.GranFlags.Costs.latency;
3121       new_event(tso_loc, CurrentProc, CurrentTime[CurrentProc],
3122                 UnblockThread,
3123                 tso, node, (rtsSpark*)NULL);
3124       tso->link = END_TSO_QUEUE; // overwrite link just to be sure
3125       // len++;
3126     }
3127     /* the thread-queue-overhead is accounted for in either Resume or UnblockThread */
3128     IF_GRAN_DEBUG(bq,
3129                   debugBelch(" %s TSO %d (%p) [PE %d] (block_info.closure=%p) (next=%p) ,",
3130                           (node_loc==tso_loc ? "Local" : "Global"),
3131                           tso->id, tso, CurrentProc, tso->block_info.closure, tso->link));
3132     tso->block_info.closure = NULL;
3133     IF_DEBUG(scheduler,debugBelch("-- Waking up thread %ld (%p)\n",
3134                              tso->id, tso));
3135 }
3136 #elif defined(PARALLEL_HASKELL)
3137 StgBlockingQueueElement *
3138 unblockOne(StgBlockingQueueElement *bqe, StgClosure *node)
3139 {
3140     StgBlockingQueueElement *next;
3141
3142     switch (get_itbl(bqe)->type) {
3143     case TSO:
3144       ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
3145       /* if it's a TSO just push it onto the run_queue */
3146       next = bqe->link;
3147       ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
3148       APPEND_TO_RUN_QUEUE((StgTSO *)bqe);
3149       threadRunnable();
3150       unblockCount(bqe, node);
3151       /* reset blocking status after dumping event */
3152       ((StgTSO *)bqe)->why_blocked = NotBlocked;
3153       break;
3154
3155     case BLOCKED_FETCH:
3156       /* if it's a BLOCKED_FETCH put it on the PendingFetches list */
3157       next = bqe->link;
3158       bqe->link = (StgBlockingQueueElement *)PendingFetches;
3159       PendingFetches = (StgBlockedFetch *)bqe;
3160       break;
3161
3162 # if defined(DEBUG)
3163       /* can ignore this case in a non-debugging setup;
3164          see comments on RBHSave closures above */
3165     case CONSTR:
3166       /* check that the closure is an RBHSave closure */
3167       ASSERT(get_itbl((StgClosure *)bqe) == &stg_RBH_Save_0_info ||
3168              get_itbl((StgClosure *)bqe) == &stg_RBH_Save_1_info ||
3169              get_itbl((StgClosure *)bqe) == &stg_RBH_Save_2_info);
3170       break;
3171
3172     default:
3173       barf("{unblockOne}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n",
3174            get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),
3175            (StgClosure *)bqe);
3176 # endif
3177     }
3178   IF_PAR_DEBUG(bq, debugBelch(", %p (%s)\n", bqe, info_type((StgClosure*)bqe)));
3179   return next;
3180 }
3181 #endif
3182
3183 StgTSO *
3184 unblockOne(Capability *cap, StgTSO *tso)
3185 {
3186   StgTSO *next;
3187
3188   ASSERT(get_itbl(tso)->type == TSO);
3189   ASSERT(tso->why_blocked != NotBlocked);
3190   tso->why_blocked = NotBlocked;
3191   next = tso->link;
3192   tso->link = END_TSO_QUEUE;
3193
3194   // We might have just migrated this TSO to our Capability:
3195   if (tso->bound) {
3196       tso->bound->cap = cap;
3197   }
3198
3199   appendToRunQueue(cap,tso);
3200
3201   // we're holding a newly woken thread, make sure we context switch
3202   // quickly so we can migrate it if necessary.
3203   context_switch = 1;
3204   IF_DEBUG(scheduler,sched_belch("waking up thread %ld", (long)tso->id));
3205   return next;
3206 }
3207
3208
3209 #if defined(GRAN)
3210 void
3211 awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
3212 {
3213   StgBlockingQueueElement *bqe;
3214   PEs node_loc;
3215   nat len = 0;
3216
3217   IF_GRAN_DEBUG(bq,
3218                 debugBelch("##-_ AwBQ for node %p on PE %d @ %ld by TSO %d (%p): \n", \
3219                       node, CurrentProc, CurrentTime[CurrentProc],
3220                       CurrentTSO->id, CurrentTSO));
3221
3222   node_loc = where_is(node);
3223
3224   ASSERT(q == END_BQ_QUEUE ||
3225          get_itbl(q)->type == TSO ||   // q is either a TSO or an RBHSave
3226          get_itbl(q)->type == CONSTR); // closure (type constructor)
3227   ASSERT(is_unique(node));
3228
3229   /* FAKE FETCH: magically copy the node to the tso's proc;
3230      no Fetch necessary because in reality the node should not have been
3231      moved to the other PE in the first place
3232   */
3233   if (CurrentProc!=node_loc) {
3234     IF_GRAN_DEBUG(bq,
3235                   debugBelch("## node %p is on PE %d but CurrentProc is %d (TSO %d); assuming fake fetch and adjusting bitmask (old: %#x)\n",
3236                         node, node_loc, CurrentProc, CurrentTSO->id,
3237                         // CurrentTSO, where_is(CurrentTSO),
3238                         node->header.gran.procs));
3239     node->header.gran.procs = (node->header.gran.procs) | PE_NUMBER(CurrentProc);
3240     IF_GRAN_DEBUG(bq,
3241                   debugBelch("## new bitmask of node %p is %#x\n",
3242                         node, node->header.gran.procs));
3243     if (RtsFlags.GranFlags.GranSimStats.Global) {
3244       globalGranStats.tot_fake_fetches++;
3245     }
3246   }
3247
3248   bqe = q;
3249   // ToDo: check: ASSERT(CurrentProc==node_loc);
3250   while (get_itbl(bqe)->type==TSO) { // q != END_TSO_QUEUE) {
3251     //next = bqe->link;
3252     /*
3253        bqe points to the current element in the queue
3254        next points to the next element in the queue
3255     */
3256     //tso = (StgTSO *)bqe;  // wastes an assignment to get the type right
3257     //tso_loc = where_is(tso);
3258     len++;
3259     bqe = unblockOne(bqe, node);
3260   }
3261
3262   /* if this is the BQ of an RBH, we have to put back the info ripped out of
3263      the closure to make room for the anchor of the BQ */
3264   if (bqe!=END_BQ_QUEUE) {
3265     ASSERT(get_itbl(node)->type == RBH && get_itbl(bqe)->type == CONSTR);
3266     /*
3267     ASSERT((info_ptr==&RBH_Save_0_info) ||
3268            (info_ptr==&RBH_Save_1_info) ||
3269            (info_ptr==&RBH_Save_2_info));
3270     */
3271     /* cf. convertToRBH in RBH.c for writing the RBHSave closure */
3272     ((StgRBH *)node)->blocking_queue = (StgBlockingQueueElement *)((StgRBHSave *)bqe)->payload[0];
3273     ((StgRBH *)node)->mut_link       = (StgMutClosure *)((StgRBHSave *)bqe)->payload[1];
3274
3275     IF_GRAN_DEBUG(bq,
3276                   debugBelch("## Filled in RBH_Save for %p (%s) at end of AwBQ\n",
3277                         node, info_type(node)));
3278   }
3279
3280   /* statistics gathering */
3281   if (RtsFlags.GranFlags.GranSimStats.Global) {
3282     // globalGranStats.tot_bq_processing_time += bq_processing_time;
3283     globalGranStats.tot_bq_len += len;      // total length of all bqs awakened
3284     // globalGranStats.tot_bq_len_local += len_local;  // same for local TSOs only
3285     globalGranStats.tot_awbq++;             // total no. of bqs awakened
3286   }
3287   IF_GRAN_DEBUG(bq,
3288                 debugBelch("## BQ Stats of %p: [%d entries] %s\n",
3289                         node, len, (bqe!=END_BQ_QUEUE) ? "RBH" : ""));
3290 }
3291 #elif defined(PARALLEL_HASKELL)
3292 void
3293 awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
3294 {
3295   StgBlockingQueueElement *bqe;
3296
3297   IF_PAR_DEBUG(verbose,
3298                debugBelch("##-_ AwBQ for node %p on [%x]: \n",
3299                      node, mytid));
3300 #ifdef DIST
3301   //RFP
3302   if(get_itbl(q)->type == CONSTR || q==END_BQ_QUEUE) {
3303     IF_PAR_DEBUG(verbose, debugBelch("## ... nothing to unblock so lets just return. RFP (BUG?)\n"));
3304     return;
3305   }
3306 #endif
3307
3308   ASSERT(q == END_BQ_QUEUE ||
3309          get_itbl(q)->type == TSO ||
3310          get_itbl(q)->type == BLOCKED_FETCH ||
3311          get_itbl(q)->type == CONSTR);
3312
3313   bqe = q;
3314   while (get_itbl(bqe)->type==TSO ||
3315          get_itbl(bqe)->type==BLOCKED_FETCH) {
3316     bqe = unblockOne(bqe, node);
3317   }
3318 }
3319
3320 #else   /* !GRAN && !PARALLEL_HASKELL */
3321
3322 void
3323 awakenBlockedQueue(Capability *cap, StgTSO *tso)
3324 {
3325     if (tso == NULL) return; // hack; see bug #1235728, and comments in
3326                              // Exception.cmm
3327     while (tso != END_TSO_QUEUE) {
3328         tso = unblockOne(cap,tso);
3329     }
3330 }
3331 #endif
3332
3333 /* ---------------------------------------------------------------------------
3334    Interrupt execution
3335    - usually called inside a signal handler so it mustn't do anything fancy.
3336    ------------------------------------------------------------------------ */
3337
3338 void
3339 interruptStgRts(void)
3340 {
3341     sched_state = SCHED_INTERRUPTING;
3342     context_switch = 1;
3343 #if defined(THREADED_RTS)
3344     prodAllCapabilities();
3345 #endif
3346 }
3347
3348 /* -----------------------------------------------------------------------------
3349    Unblock a thread
3350
3351    This is for use when we raise an exception in another thread, which
3352    may be blocked.
3353    This has nothing to do with the UnblockThread event in GranSim. -- HWL
3354    -------------------------------------------------------------------------- */
3355
3356 #if defined(GRAN) || defined(PARALLEL_HASKELL)
3357 /*
3358   NB: only the type of the blocking queue is different in GranSim and GUM
3359       the operations on the queue-elements are the same
3360       long live polymorphism!
3361
3362   Locks: sched_mutex is held upon entry and exit.
3363
3364 */
3365 static void
3366 unblockThread(Capability *cap, StgTSO *tso)
3367 {
3368   StgBlockingQueueElement *t, **last;
3369
3370   switch (tso->why_blocked) {
3371
3372   case NotBlocked:
3373     return;  /* not blocked */
3374
3375   case BlockedOnSTM:
3376     // Be careful: nothing to do here!  We tell the scheduler that the thread
3377     // is runnable and we leave it to the stack-walking code to abort the
3378     // transaction while unwinding the stack.  We should perhaps have a debugging
3379     // test to make sure that this really happens and that the 'zombie' transaction
3380     // does not get committed.
3381     goto done;
3382
3383   case BlockedOnMVar:
3384     ASSERT(get_itbl(tso->block_info.closure)->type == MVAR);
3385     {
3386       StgBlockingQueueElement *last_tso = END_BQ_QUEUE;
3387       StgMVar *mvar = (StgMVar *)(tso->block_info.closure);
3388
3389       last = (StgBlockingQueueElement **)&mvar->head;
3390       for (t = (StgBlockingQueueElement *)mvar->head;
3391            t != END_BQ_QUEUE;
3392            last = &t->link, last_tso = t, t = t->link) {
3393         if (t == (StgBlockingQueueElement *)tso) {
3394           *last = (StgBlockingQueueElement *)tso->link;
3395           if (mvar->tail == tso) {
3396             mvar->tail = (StgTSO *)last_tso;
3397           }
3398           goto done;
3399         }
3400       }
3401       barf("unblockThread (MVAR): TSO not found");
3402     }
3403
3404   case BlockedOnBlackHole:
3405     ASSERT(get_itbl(tso->block_info.closure)->type == BLACKHOLE_BQ);
3406     {
3407       StgBlockingQueue *bq = (StgBlockingQueue *)(tso->block_info.closure);
3408
3409       last = &bq->blocking_queue;
3410       for (t = bq->blocking_queue;
3411            t != END_BQ_QUEUE;
3412            last = &t->link, t = t->link) {
3413         if (t == (StgBlockingQueueElement *)tso) {
3414           *last = (StgBlockingQueueElement *)tso->link;
3415           goto done;
3416         }
3417       }
3418       barf("unblockThread (BLACKHOLE): TSO not found");
3419     }
3420
3421   case BlockedOnException:
3422     {
3423       StgTSO *target  = tso->block_info.tso;
3424
3425       ASSERT(get_itbl(target)->type == TSO);
3426
3427       if (target->what_next == ThreadRelocated) {
3428           target = target->link;
3429           ASSERT(get_itbl(target)->type == TSO);
3430       }
3431
3432       ASSERT(target->blocked_exceptions != NULL);
3433
3434       last = (StgBlockingQueueElement **)&target->blocked_exceptions;
3435       for (t = (StgBlockingQueueElement *)target->blocked_exceptions;
3436            t != END_BQ_QUEUE;
3437            last = &t->link, t = t->link) {
3438         ASSERT(get_itbl(t)->type == TSO);
3439         if (t == (StgBlockingQueueElement *)tso) {
3440           *last = (StgBlockingQueueElement *)tso->link;
3441           goto done;
3442         }
3443       }
3444       barf("unblockThread (Exception): TSO not found");
3445     }
3446
3447   case BlockedOnRead:
3448   case BlockedOnWrite:
3449 #if defined(mingw32_HOST_OS)
3450   case BlockedOnDoProc:
3451 #endif
3452     {
3453       /* take TSO off blocked_queue */
3454       StgBlockingQueueElement *prev = NULL;
3455       for (t = (StgBlockingQueueElement *)blocked_queue_hd; t != END_BQ_QUEUE;
3456            prev = t, t = t->link) {
3457         if (t == (StgBlockingQueueElement *)tso) {
3458           if (prev == NULL) {
3459             blocked_queue_hd = (StgTSO *)t->link;
3460             if ((StgBlockingQueueElement *)blocked_queue_tl == t) {
3461               blocked_queue_tl = END_TSO_QUEUE;
3462             }
3463           } else {
3464             prev->link = t->link;
3465             if ((StgBlockingQueueElement *)blocked_queue_tl == t) {
3466               blocked_queue_tl = (StgTSO *)prev;
3467             }
3468           }
3469 #if defined(mingw32_HOST_OS)
3470           /* (Cooperatively) signal that the worker thread should abort
3471            * the request.
3472            */
3473           abandonWorkRequest(tso->block_info.async_result->reqID);
3474 #endif
3475           goto done;
3476         }
3477       }
3478       barf("unblockThread (I/O): TSO not found");
3479     }
3480
3481   case BlockedOnDelay:
3482     {
3483       /* take TSO off sleeping_queue */
3484       StgBlockingQueueElement *prev = NULL;
3485       for (t = (StgBlockingQueueElement *)sleeping_queue; t != END_BQ_QUEUE;
3486            prev = t, t = t->link) {
3487         if (t == (StgBlockingQueueElement *)tso) {
3488           if (prev == NULL) {
3489             sleeping_queue = (StgTSO *)t->link;
3490           } else {
3491             prev->link = t->link;
3492           }
3493           goto done;
3494         }
3495       }
3496       barf("unblockThread (delay): TSO not found");
3497     }
3498
3499   default:
3500     barf("unblockThread");
3501   }
3502
3503  done:
3504   tso->link = END_TSO_QUEUE;
3505   tso->why_blocked = NotBlocked;
3506   tso->block_info.closure = NULL;
3507   pushOnRunQueue(cap,tso);
3508 }
3509 #else
3510 static void
3511 unblockThread(Capability *cap, StgTSO *tso)
3512 {
3513   StgTSO *t, **last;
3514
3515   /* To avoid locking unnecessarily. */
3516   if (tso->why_blocked == NotBlocked) {
3517     return;
3518   }
3519
3520   switch (tso->why_blocked) {
3521
3522   case BlockedOnSTM:
3523     // Be careful: nothing to do here!  We tell the scheduler that the thread
3524     // is runnable and we leave it to the stack-walking code to abort the
3525     // transaction while unwinding the stack.  We should perhaps have a debugging
3526     // test to make sure that this really happens and that the 'zombie' transaction
3527     // does not get committed.
3528     goto done;
3529
3530   case BlockedOnMVar:
3531     ASSERT(get_itbl(tso->block_info.closure)->type == MVAR);
3532     {
3533       StgTSO *last_tso = END_TSO_QUEUE;
3534       StgMVar *mvar = (StgMVar *)(tso->block_info.closure);
3535
3536       last = &mvar->head;
3537       for (t = mvar->head; t != END_TSO_QUEUE;
3538            last = &t->link, last_tso = t, t = t->link) {
3539         if (t == tso) {
3540           *last = tso->link;
3541           if (mvar->tail == tso) {
3542             mvar->tail = last_tso;
3543           }
3544           goto done;
3545         }
3546       }
3547       barf("unblockThread (MVAR): TSO not found");
3548     }
3549
3550   case BlockedOnBlackHole:
3551     {
3552       last = &blackhole_queue;
3553       for (t = blackhole_queue; t != END_TSO_QUEUE;
3554            last = &t->link, t = t->link) {
3555         if (t == tso) {
3556           *last = tso->link;
3557           goto done;
3558         }
3559       }
3560       barf("unblockThread (BLACKHOLE): TSO not found");
3561     }
3562
3563   case BlockedOnException:
3564     {
3565       StgTSO *target  = tso->block_info.tso;
3566
3567       ASSERT(get_itbl(target)->type == TSO);
3568
3569       while (target->what_next == ThreadRelocated) {
3570           target = target->link;
3571           ASSERT(get_itbl(target)->type == TSO);
3572       }
3573
3574       ASSERT(target->blocked_exceptions != NULL);
3575
3576       last = &target->blocked_exceptions;
3577       for (t = target->blocked_exceptions; t != END_TSO_QUEUE;
3578            last = &t->link, t = t->link) {
3579         ASSERT(get_itbl(t)->type == TSO);
3580         if (t == tso) {
3581           *last = tso->link;
3582           goto done;
3583         }
3584       }
3585       barf("unblockThread (Exception): TSO not found");
3586     }
3587
3588 #if !defined(THREADED_RTS)
3589   case BlockedOnRead:
3590   case BlockedOnWrite:
3591 #if defined(mingw32_HOST_OS)
3592   case BlockedOnDoProc:
3593 #endif
3594     {
3595       StgTSO *prev = NULL;
3596       for (t = blocked_queue_hd; t != END_TSO_QUEUE;
3597            prev = t, t = t->link) {
3598         if (t == tso) {
3599           if (prev == NULL) {
3600             blocked_queue_hd = t->link;
3601             if (blocked_queue_tl == t) {
3602               blocked_queue_tl = END_TSO_QUEUE;
3603             }
3604           } else {
3605             prev->link = t->link;
3606             if (blocked_queue_tl == t) {
3607               blocked_queue_tl = prev;
3608             }
3609           }
3610 #if defined(mingw32_HOST_OS)
3611           /* (Cooperatively) signal that the worker thread should abort
3612            * the request.
3613            */
3614           abandonWorkRequest(tso->block_info.async_result->reqID);
3615 #endif
3616           goto done;
3617         }
3618       }
3619       barf("unblockThread (I/O): TSO not found");
3620     }
3621
3622   case BlockedOnDelay:
3623     {
3624       StgTSO *prev = NULL;
3625       for (t = sleeping_queue; t != END_TSO_QUEUE;
3626            prev = t, t = t->link) {
3627         if (t == tso) {
3628           if (prev == NULL) {
3629             sleeping_queue = t->link;
3630           } else {
3631             prev->link = t->link;
3632           }
3633           goto done;
3634         }
3635       }
3636       barf("unblockThread (delay): TSO not found");
3637     }
3638 #endif
3639
3640   default:
3641     barf("unblockThread");
3642   }
3643
3644  done:
3645   tso->link = END_TSO_QUEUE;
3646   tso->why_blocked = NotBlocked;
3647   tso->block_info.closure = NULL;
3648   appendToRunQueue(cap,tso);
3649
3650   // We might have just migrated this TSO to our Capability:
3651   if (tso->bound) {
3652       tso->bound->cap = cap;
3653   }
3654 }
3655 #endif
3656
3657 /* -----------------------------------------------------------------------------
3658  * checkBlackHoles()
3659  *
3660  * Check the blackhole_queue for threads that can be woken up.  We do
3661  * this periodically: before every GC, and whenever the run queue is
3662  * empty.
3663  *
3664  * An elegant solution might be to just wake up all the blocked
3665  * threads with awakenBlockedQueue occasionally: they'll go back to
3666  * sleep again if the object is still a BLACKHOLE.  Unfortunately this
3667  * doesn't give us a way to tell whether we've actually managed to
3668  * wake up any threads, so we would be busy-waiting.
3669  *
3670  * -------------------------------------------------------------------------- */
3671
3672 static rtsBool
3673 checkBlackHoles (Capability *cap)
3674 {
3675     StgTSO **prev, *t;
3676     rtsBool any_woke_up = rtsFalse;
3677     StgHalfWord type;
3678
3679     // blackhole_queue is global:
3680     ASSERT_LOCK_HELD(&sched_mutex);
3681
3682     IF_DEBUG(scheduler, sched_belch("checking threads blocked on black holes"));
3683
3684     // ASSUMES: sched_mutex
3685     prev = &blackhole_queue;
3686     t = blackhole_queue;
3687     while (t != END_TSO_QUEUE) {
3688         ASSERT(t->why_blocked == BlockedOnBlackHole);
3689         type = get_itbl(t->block_info.closure)->type;
3690         if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
3691             IF_DEBUG(sanity,checkTSO(t));
3692             t = unblockOne(cap, t);
3693             // urk, the threads migrate to the current capability
3694             // here, but we'd like to keep them on the original one.
3695             *prev = t;
3696             any_woke_up = rtsTrue;
3697         } else {
3698             prev = &t->link;
3699             t = t->link;
3700         }
3701     }
3702
3703     return any_woke_up;
3704 }
3705
3706 /* -----------------------------------------------------------------------------
3707  * raiseAsync()
3708  *
3709  * The following function implements the magic for raising an
3710  * asynchronous exception in an existing thread.
3711  *
3712  * We first remove the thread from any queue on which it might be
3713  * blocked.  The possible blockages are MVARs and BLACKHOLE_BQs.
3714  *
3715  * We strip the stack down to the innermost CATCH_FRAME, building
3716  * thunks in the heap for all the active computations, so they can
3717  * be restarted if necessary.  When we reach a CATCH_FRAME, we build
3718  * an application of the handler to the exception, and push it on
3719  * the top of the stack.
3720  *
3721  * How exactly do we save all the active computations?  We create an
3722  * AP_STACK for every UpdateFrame on the stack.  Entering one of these
3723  * AP_STACKs pushes everything from the corresponding update frame
3724  * upwards onto the stack.  (Actually, it pushes everything up to the
3725  * next update frame plus a pointer to the next AP_STACK object.
3726  * Entering the next AP_STACK object pushes more onto the stack until we
3727  * reach the last AP_STACK object - at which point the stack should look
3728  * exactly as it did when we killed the TSO and we can continue
3729  * execution by entering the closure on top of the stack.
3730  *
3731  * We can also kill a thread entirely - this happens if either (a) the
3732  * exception passed to raiseAsync is NULL, or (b) there's no
3733  * CATCH_FRAME on the stack.  In either case, we strip the entire
3734  * stack and replace the thread with a zombie.
3735  *
3736  * ToDo: in THREADED_RTS mode, this function is only safe if either
3737  * (a) we hold all the Capabilities (eg. in GC, or if there is only
3738  * one Capability), or (b) we own the Capability that the TSO is
3739  * currently blocked on or on the run queue of.
3740  *
3741  * -------------------------------------------------------------------------- */
3742
3743 void
3744 raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception)
3745 {
3746     raiseAsync_(cap, tso, exception, rtsFalse, NULL);
3747 }
3748
3749 void
3750 suspendComputation(Capability *cap, StgTSO *tso, StgPtr stop_here)
3751 {
3752     raiseAsync_(cap, tso, NULL, rtsFalse, stop_here);
3753 }
3754
3755 static void
3756 raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
3757             rtsBool stop_at_atomically, StgPtr stop_here)
3758 {
3759     StgRetInfoTable *info;
3760     StgPtr sp, frame;
3761     nat i;
3762
3763     // Thread already dead?
3764     if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
3765         return;
3766     }
3767
3768     IF_DEBUG(scheduler,
3769              sched_belch("raising exception in thread %ld.", (long)tso->id));
3770
3771     // Remove it from any blocking queues
3772     unblockThread(cap,tso);
3773
3774     // mark it dirty; we're about to change its stack.
3775     dirtyTSO(tso);
3776
3777     sp = tso->sp;
3778
3779     // The stack freezing code assumes there's a closure pointer on
3780     // the top of the stack, so we have to arrange that this is the case...
3781     //
3782     if (sp[0] == (W_)&stg_enter_info) {
3783         sp++;
3784     } else {
3785         sp--;
3786         sp[0] = (W_)&stg_dummy_ret_closure;
3787     }
3788
3789     frame = sp + 1;
3790     while (stop_here == NULL || frame < stop_here) {
3791
3792         // 1. Let the top of the stack be the "current closure"
3793         //
3794         // 2. Walk up the stack until we find either an UPDATE_FRAME or a
3795         // CATCH_FRAME.
3796         //
3797         // 3. If it's an UPDATE_FRAME, then make an AP_STACK containing the
3798         // current closure applied to the chunk of stack up to (but not
3799         // including) the update frame.  This closure becomes the "current
3800         // closure".  Go back to step 2.
3801         //
3802         // 4. If it's a CATCH_FRAME, then leave the exception handler on
3803         // top of the stack applied to the exception.
3804         //
3805         // 5. If it's a STOP_FRAME, then kill the thread.
3806         //
3807         // NB: if we pass an ATOMICALLY_FRAME then abort the associated
3808         // transaction
3809
3810         info = get_ret_itbl((StgClosure *)frame);
3811
3812         switch (info->i.type) {
3813
3814         case UPDATE_FRAME:
3815         {
3816             StgAP_STACK * ap;
3817             nat words;
3818
3819             // First build an AP_STACK consisting of the stack chunk above the
3820             // current update frame, with the top word on the stack as the
3821             // fun field.
3822             //
3823             words = frame - sp - 1;
3824             ap = (StgAP_STACK *)allocateLocal(cap,AP_STACK_sizeW(words));
3825
3826             ap->size = words;
3827             ap->fun  = (StgClosure *)sp[0];
3828             sp++;
3829             for(i=0; i < (nat)words; ++i) {
3830                 ap->payload[i] = (StgClosure *)*sp++;
3831             }
3832
3833             SET_HDR(ap,&stg_AP_STACK_info,
3834                     ((StgClosure *)frame)->header.prof.ccs /* ToDo */);
3835             TICK_ALLOC_UP_THK(words+1,0);
3836
3837             IF_DEBUG(scheduler,
3838                      debugBelch("sched: Updating ");
3839                      printPtr((P_)((StgUpdateFrame *)frame)->updatee);
3840                      debugBelch(" with ");
3841                      printObj((StgClosure *)ap);
3842                 );
3843
3844             // Replace the updatee with an indirection
3845             //
3846             // Warning: if we're in a loop, more than one update frame on
3847             // the stack may point to the same object.  Be careful not to
3848             // overwrite an IND_OLDGEN in this case, because we'll screw
3849             // up the mutable lists.  To be on the safe side, don't
3850             // overwrite any kind of indirection at all.  See also
3851             // threadSqueezeStack in GC.c, where we have to make a similar
3852             // check.
3853             //
3854             if (!closure_IND(((StgUpdateFrame *)frame)->updatee)) {
3855                 // revert the black hole
3856                 UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,
3857                                (StgClosure *)ap);
3858             }
3859             sp += sizeofW(StgUpdateFrame) - 1;
3860             sp[0] = (W_)ap; // push onto stack
3861             frame = sp + 1;
3862             continue; //no need to bump frame
3863         }
3864
3865         case STOP_FRAME:
3866             // We've stripped the entire stack, the thread is now dead.
3867             tso->what_next = ThreadKilled;
3868             tso->sp = frame + sizeofW(StgStopFrame);
3869             return;
3870
3871         case CATCH_FRAME:
3872             // If we find a CATCH_FRAME, and we've got an exception to raise,
3873             // then build the THUNK raise(exception), and leave it on
3874             // top of the CATCH_FRAME ready to enter.
3875             //
3876         {
3877 #ifdef PROFILING
3878             StgCatchFrame *cf = (StgCatchFrame *)frame;
3879 #endif
3880             StgThunk *raise;
3881
3882             if (exception == NULL) break;
3883
3884             // we've got an exception to raise, so let's pass it to the
3885             // handler in this frame.
3886             //
3887             raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
3888             TICK_ALLOC_SE_THK(1,0);
3889             SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
3890             raise->payload[0] = exception;
3891
3892             // throw away the stack from Sp up to the CATCH_FRAME.
3893             //
3894             sp = frame - 1;
3895
3896             /* Ensure that async excpetions are blocked now, so we don't get
3897              * a surprise exception before we get around to executing the
3898              * handler.
3899              */
3900             if (tso->blocked_exceptions == NULL) {
3901                 tso->blocked_exceptions = END_TSO_QUEUE;
3902             }
3903
3904             /* Put the newly-built THUNK on top of the stack, ready to execute
3905              * when the thread restarts.
3906              */
3907             sp[0] = (W_)raise;
3908             sp[-1] = (W_)&stg_enter_info;
3909             tso->sp = sp-1;
3910             tso->what_next = ThreadRunGHC;
3911             IF_DEBUG(sanity, checkTSO(tso));
3912             return;
3913         }
3914
3915         case ATOMICALLY_FRAME:
3916             if (stop_at_atomically) {
3917                 ASSERT(stmGetEnclosingTRec(tso->trec) == NO_TREC);
3918                 stmCondemnTransaction(cap, tso -> trec);
3919 #ifdef REG_R1
3920                 tso->sp = frame;
3921 #else
3922                 // R1 is not a register: the return convention for IO in
3923                 // this case puts the return value on the stack, so we
3924                 // need to set up the stack to return to the atomically
3925                 // frame properly...
3926                 tso->sp = frame - 2;
3927                 tso->sp[1] = (StgWord) &stg_NO_FINALIZER_closure; // why not?
3928                 tso->sp[0] = (StgWord) &stg_ut_1_0_unreg_info;
3929 #endif
3930                 tso->what_next = ThreadRunGHC;
3931                 return;
3932             }
3933             // Not stop_at_atomically... fall through and abort the
3934             // transaction.
3935
3936         case CATCH_RETRY_FRAME:
3937             // IF we find an ATOMICALLY_FRAME then we abort the
3938             // current transaction and propagate the exception.  In
3939             // this case (unlike ordinary exceptions) we do not care
3940             // whether the transaction is valid or not because its
3941             // possible validity cannot have caused the exception
3942             // and will not be visible after the abort.
3943             IF_DEBUG(stm,
3944                      debugBelch("Found atomically block delivering async exception\n"));
3945             StgTRecHeader *trec = tso -> trec;
3946             StgTRecHeader *outer = stmGetEnclosingTRec(trec);
3947             stmAbortTransaction(cap, trec);
3948             tso -> trec = outer;
3949             break;
3950
3951         default:
3952             break;
3953         }
3954
3955         // move on to the next stack frame
3956         frame += stack_frame_sizeW((StgClosure *)frame);
3957     }
3958
3959     // if we got here, then we stopped at stop_here
3960     ASSERT(stop_here != NULL);
3961 }
3962
3963 /* -----------------------------------------------------------------------------
3964    Deleting threads
3965
3966    This is used for interruption (^C) and forking, and corresponds to
3967    raising an exception but without letting the thread catch the
3968    exception.
3969    -------------------------------------------------------------------------- */
3970
3971 static void
3972 deleteThread (Capability *cap, StgTSO *tso)
3973 {
3974   if (tso->why_blocked != BlockedOnCCall &&
3975       tso->why_blocked != BlockedOnCCall_NoUnblockExc) {
3976       raiseAsync(cap,tso,NULL);
3977   }
3978 }
3979
3980 #ifdef FORKPROCESS_PRIMOP_SUPPORTED
3981 static void
3982 deleteThreadImmediately(Capability *cap, StgTSO *tso)
3983 { // for forkProcess only:
3984   // delete thread without giving it a chance to catch the KillThread exception
3985
3986   if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
3987       return;
3988   }
3989
3990   if (tso->why_blocked != BlockedOnCCall &&
3991       tso->why_blocked != BlockedOnCCall_NoUnblockExc) {
3992       unblockThread(cap,tso);
3993   }
3994
3995   tso->what_next = ThreadKilled;
3996 }
3997 #endif
3998
3999 /* -----------------------------------------------------------------------------
4000    raiseExceptionHelper
4001
4002    This function is called by the raise# primitve, just so that we can
4003    move some of the tricky bits of raising an exception from C-- into
4004    C.  Who knows, it might be a useful re-useable thing here too.
4005    -------------------------------------------------------------------------- */
4006
4007 StgWord
4008 raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
4009 {
4010     Capability *cap = regTableToCapability(reg);
4011     StgThunk *raise_closure = NULL;
4012     StgPtr p, next;
4013     StgRetInfoTable *info;
4014     //
4015     // This closure represents the expression 'raise# E' where E
4016     // is the exception raise.  It is used to overwrite all the
4017     // thunks which are currently under evaluataion.
4018     //
4019
4020     // OLD COMMENT (we don't have MIN_UPD_SIZE now):
4021     // LDV profiling: stg_raise_info has THUNK as its closure
4022     // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
4023     // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
4024     // 1 does not cause any problem unless profiling is performed.
4025     // However, when LDV profiling goes on, we need to linearly scan
4026     // small object pool, where raise_closure is stored, so we should
4027     // use MIN_UPD_SIZE.
4028     //
4029     // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
4030     //                                 sizeofW(StgClosure)+1);
4031     //
4032
4033     //
4034     // Walk up the stack, looking for the catch frame.  On the way,
4035     // we update any closures pointed to from update frames with the
4036     // raise closure that we just built.
4037     //
4038     p = tso->sp;
4039     while(1) {
4040         info = get_ret_itbl((StgClosure *)p);
4041         next = p + stack_frame_sizeW((StgClosure *)p);
4042         switch (info->i.type) {
4043
4044         case UPDATE_FRAME:
4045             // Only create raise_closure if we need to.
4046             if (raise_closure == NULL) {
4047                 raise_closure =
4048                     (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
4049                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
4050                 raise_closure->payload[0] = exception;
4051             }
4052             UPD_IND(((StgUpdateFrame *)p)->updatee,(StgClosure *)raise_closure);
4053             p = next;
4054             continue;
4055
4056         case ATOMICALLY_FRAME:
4057             IF_DEBUG(stm, debugBelch("Found ATOMICALLY_FRAME at %p\n", p));
4058             tso->sp = p;
4059             return ATOMICALLY_FRAME;
4060
4061         case CATCH_FRAME:
4062             tso->sp = p;
4063             return CATCH_FRAME;
4064
4065         case CATCH_STM_FRAME:
4066             IF_DEBUG(stm, debugBelch("Found CATCH_STM_FRAME at %p\n", p));
4067             tso->sp = p;
4068             return CATCH_STM_FRAME;
4069
4070         case STOP_FRAME:
4071             tso->sp = p;
4072             return STOP_FRAME;
4073
4074         case CATCH_RETRY_FRAME:
4075         default:
4076             p = next;
4077             continue;
4078         }
4079     }
4080 }
4081
4082
4083 /* -----------------------------------------------------------------------------
4084    findRetryFrameHelper
4085
4086    This function is called by the retry# primitive.  It traverses the stack
4087    leaving tso->sp referring to the frame which should handle the retry.
4088
4089    This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
4090    or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
4091
4092    We skip CATCH_STM_FRAMEs because retries are not considered to be exceptions,
4093    despite the similar implementation.
4094
4095    We should not expect to see CATCH_FRAME or STOP_FRAME because those should
4096    not be created within memory transactions.
4097    -------------------------------------------------------------------------- */
4098
4099 StgWord
4100 findRetryFrameHelper (StgTSO *tso)
4101 {
4102   StgPtr           p, next;
4103   StgRetInfoTable *info;
4104
4105   p = tso -> sp;
4106   while (1) {
4107     info = get_ret_itbl((StgClosure *)p);
4108     next = p + stack_frame_sizeW((StgClosure *)p);
4109     switch (info->i.type) {
4110
4111     case ATOMICALLY_FRAME:
4112       IF_DEBUG(stm, debugBelch("Found ATOMICALLY_FRAME at %p during retrry\n", p));
4113       tso->sp = p;
4114       return ATOMICALLY_FRAME;
4115
4116     case CATCH_RETRY_FRAME:
4117       IF_DEBUG(stm, debugBelch("Found CATCH_RETRY_FRAME at %p during retrry\n", p));
4118       tso->sp = p;
4119       return CATCH_RETRY_FRAME;
4120
4121     case CATCH_STM_FRAME:
4122     default:
4123       ASSERT(info->i.type != CATCH_FRAME);
4124       ASSERT(info->i.type != STOP_FRAME);
4125       p = next;
4126       continue;
4127     }
4128   }
4129 }
4130
4131 /* -----------------------------------------------------------------------------
4132    resurrectThreads is called after garbage collection on the list of
4133    threads found to be garbage.  Each of these threads will be woken
4134    up and sent a signal: BlockedOnDeadMVar if the thread was blocked
4135    on an MVar, or NonTermination if the thread was blocked on a Black
4136    Hole.
4137
4138    Locks: assumes we hold *all* the capabilities.
4139    -------------------------------------------------------------------------- */
4140
4141 void
4142 resurrectThreads (StgTSO *threads)
4143 {
4144     StgTSO *tso, *next;
4145     Capability *cap;
4146
4147     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
4148         next = tso->global_link;
4149         tso->global_link = all_threads;
4150         all_threads = tso;
4151         IF_DEBUG(scheduler, sched_belch("resurrecting thread %d", tso->id));
4152
4153         // Wake up the thread on the Capability it was last on for a
4154         // bound thread, or last_free_capability otherwise.
4155         if (tso->bound) {
4156             cap = tso->bound->cap;
4157         } else {
4158             cap = last_free_capability;
4159         }
4160
4161         switch (tso->why_blocked) {
4162         case BlockedOnMVar:
4163         case BlockedOnException:
4164             /* Called by GC - sched_mutex lock is currently held. */
4165             raiseAsync(cap, tso,(StgClosure *)BlockedOnDeadMVar_closure);
4166             break;
4167         case BlockedOnBlackHole:
4168             raiseAsync(cap, tso,(StgClosure *)NonTermination_closure);
4169             break;
4170         case BlockedOnSTM:
4171             raiseAsync(cap, tso,(StgClosure *)BlockedIndefinitely_closure);
4172             break;
4173         case NotBlocked:
4174             /* This might happen if the thread was blocked on a black hole
4175              * belonging to a thread that we've just woken up (raiseAsync
4176              * can wake up threads, remember...).
4177              */
4178             continue;
4179         default:
4180             barf("resurrectThreads: thread blocked in a strange way");
4181         }
4182     }
4183 }
4184
4185 /* ----------------------------------------------------------------------------
4186  * Debugging: why is a thread blocked
4187  * [Also provides useful information when debugging threaded programs
4188  *  at the Haskell source code level, so enable outside of DEBUG. --sof 7/02]
4189    ------------------------------------------------------------------------- */
4190
4191 #if DEBUG
4192 static void
4193 printThreadBlockage(StgTSO *tso)
4194 {
4195   switch (tso->why_blocked) {
4196   case BlockedOnRead:
4197     debugBelch("is blocked on read from fd %d", (int)(tso->block_info.fd));
4198     break;
4199   case BlockedOnWrite:
4200     debugBelch("is blocked on write to fd %d", (int)(tso->block_info.fd));
4201     break;
4202 #if defined(mingw32_HOST_OS)
4203     case BlockedOnDoProc:
4204     debugBelch("is blocked on proc (request: %ld)", tso->block_info.async_result->reqID);
4205     break;
4206 #endif
4207   case BlockedOnDelay:
4208     debugBelch("is blocked until %ld", (long)(tso->block_info.target));
4209     break;
4210   case BlockedOnMVar:
4211     debugBelch("is blocked on an MVar @ %p", tso->block_info.closure);
4212     break;
4213   case BlockedOnException:
4214     debugBelch("is blocked on delivering an exception to thread %d",
4215             tso->block_info.tso->id);
4216     break;
4217   case BlockedOnBlackHole:
4218     debugBelch("is blocked on a black hole");
4219     break;
4220   case NotBlocked:
4221     debugBelch("is not blocked");
4222     break;
4223 #if defined(PARALLEL_HASKELL)
4224   case BlockedOnGA:
4225     debugBelch("is blocked on global address; local FM_BQ is %p (%s)",
4226             tso->block_info.closure, info_type(tso->block_info.closure));
4227     break;
4228   case BlockedOnGA_NoSend:
4229     debugBelch("is blocked on global address (no send); local FM_BQ is %p (%s)",
4230             tso->block_info.closure, info_type(tso->block_info.closure));
4231     break;
4232 #endif
4233   case BlockedOnCCall:
4234     debugBelch("is blocked on an external call");
4235     break;
4236   case BlockedOnCCall_NoUnblockExc:
4237     debugBelch("is blocked on an external call (exceptions were already blocked)");
4238     break;
4239   case BlockedOnSTM:
4240     debugBelch("is blocked on an STM operation");
4241     break;
4242   default:
4243     barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
4244          tso->why_blocked, tso->id, tso);
4245   }
4246 }
4247
4248 void
4249 printThreadStatus(StgTSO *t)
4250 {
4251     debugBelch("\tthread %4d @ %p ", t->id, (void *)t);
4252     {
4253       void *label = lookupThreadLabel(t->id);
4254       if (label) debugBelch("[\"%s\"] ",(char *)label);
4255     }
4256     if (t->what_next == ThreadRelocated) {
4257         debugBelch("has been relocated...\n");
4258     } else {
4259         switch (t->what_next) {
4260         case ThreadKilled:
4261             debugBelch("has been killed");
4262             break;
4263         case ThreadComplete:
4264             debugBelch("has completed");
4265             break;
4266         default:
4267             printThreadBlockage(t);
4268         }
4269         debugBelch("\n");
4270     }
4271 }
4272
4273 void
4274 printAllThreads(void)
4275 {
4276   StgTSO *t, *next;
4277   nat i;
4278   Capability *cap;
4279
4280 # if defined(GRAN)
4281   char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
4282   ullong_format_string(TIME_ON_PROC(CurrentProc),
4283                        time_string, rtsFalse/*no commas!*/);
4284
4285   debugBelch("all threads at [%s]:\n", time_string);
4286 # elif defined(PARALLEL_HASKELL)
4287   char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
4288   ullong_format_string(CURRENT_TIME,
4289                        time_string, rtsFalse/*no commas!*/);
4290
4291   debugBelch("all threads at [%s]:\n", time_string);
4292 # else
4293   debugBelch("all threads:\n");
4294 # endif
4295
4296   for (i = 0; i < n_capabilities; i++) {
4297       cap = &capabilities[i];
4298       debugBelch("threads on capability %d:\n", cap->no);
4299       for (t = cap->run_queue_hd; t != END_TSO_QUEUE; t = t->link) {
4300           printThreadStatus(t);
4301       }
4302   }
4303
4304   debugBelch("other threads:\n");
4305   for (t = all_threads; t != END_TSO_QUEUE; t = next) {
4306       if (t->why_blocked != NotBlocked) {
4307           printThreadStatus(t);
4308       }
4309       if (t->what_next == ThreadRelocated) {
4310           next = t->link;
4311       } else {
4312           next = t->global_link;
4313       }
4314   }
4315 }
4316
4317 // useful from gdb
4318 void
4319 printThreadQueue(StgTSO *t)
4320 {
4321     nat i = 0;
4322     for (; t != END_TSO_QUEUE; t = t->link) {
4323         printThreadStatus(t);
4324         i++;
4325     }
4326     debugBelch("%d threads on queue\n", i);
4327 }
4328
4329 /*
4330    Print a whole blocking queue attached to node (debugging only).
4331 */
4332 # if defined(PARALLEL_HASKELL)
4333 void
4334 print_bq (StgClosure *node)
4335 {
4336   StgBlockingQueueElement *bqe;
4337   StgTSO *tso;
4338   rtsBool end;
4339
4340   debugBelch("## BQ of closure %p (%s): ",
4341           node, info_type(node));
4342
4343   /* should cover all closures that may have a blocking queue */
4344   ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
4345          get_itbl(node)->type == FETCH_ME_BQ ||
4346          get_itbl(node)->type == RBH ||
4347          get_itbl(node)->type == MVAR);
4348
4349   ASSERT(node!=(StgClosure*)NULL);         // sanity check
4350
4351   print_bqe(((StgBlockingQueue*)node)->blocking_queue);
4352 }
4353
4354 /*
4355    Print a whole blocking queue starting with the element bqe.
4356 */
4357 void
4358 print_bqe (StgBlockingQueueElement *bqe)
4359 {
4360   rtsBool end;
4361
4362   /*
4363      NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
4364   */
4365   for (end = (bqe==END_BQ_QUEUE);
4366        !end; // iterate until bqe points to a CONSTR
4367        end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE),
4368        bqe = end ? END_BQ_QUEUE : bqe->link) {
4369     ASSERT(bqe != END_BQ_QUEUE);                               // sanity check
4370     ASSERT(bqe != (StgBlockingQueueElement *)NULL);            // sanity check
4371     /* types of closures that may appear in a blocking queue */
4372     ASSERT(get_itbl(bqe)->type == TSO ||
4373            get_itbl(bqe)->type == BLOCKED_FETCH ||
4374            get_itbl(bqe)->type == CONSTR);
4375     /* only BQs of an RBH end with an RBH_Save closure */
4376     //ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
4377
4378     switch (get_itbl(bqe)->type) {
4379     case TSO:
4380       debugBelch(" TSO %u (%x),",
4381               ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
4382       break;
4383     case BLOCKED_FETCH:
4384       debugBelch(" BF (node=%p, ga=((%x, %d, %x)),",
4385               ((StgBlockedFetch *)bqe)->node,
4386               ((StgBlockedFetch *)bqe)->ga.payload.gc.gtid,
4387               ((StgBlockedFetch *)bqe)->ga.payload.gc.slot,
4388               ((StgBlockedFetch *)bqe)->ga.weight);
4389       break;
4390     case CONSTR:
4391       debugBelch(" %s (IP %p),",
4392               (get_itbl(bqe) == &stg_RBH_Save_0_info ? "RBH_Save_0" :
4393                get_itbl(bqe) == &stg_RBH_Save_1_info ? "RBH_Save_1" :
4394                get_itbl(bqe) == &stg_RBH_Save_2_info ? "RBH_Save_2" :
4395                "RBH_Save_?"), get_itbl(bqe));
4396       break;
4397     default:
4398       barf("Unexpected closure type %s in blocking queue", // of %p (%s)",
4399            info_type((StgClosure *)bqe)); // , node, info_type(node));
4400       break;
4401     }
4402   } /* for */
4403   debugBelch("\n");
4404 }
4405 # elif defined(GRAN)
4406 void
4407 print_bq (StgClosure *node)
4408 {
4409   StgBlockingQueueElement *bqe;
4410   PEs node_loc, tso_loc;
4411   rtsBool end;
4412
4413   /* should cover all closures that may have a blocking queue */
4414   ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
4415          get_itbl(node)->type == FETCH_ME_BQ ||
4416          get_itbl(node)->type == RBH);
4417
4418   ASSERT(node!=(StgClosure*)NULL);         // sanity check
4419   node_loc = where_is(node);
4420
4421   debugBelch("## BQ of closure %p (%s) on [PE %d]: ",
4422           node, info_type(node), node_loc);
4423
4424   /*
4425      NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
4426   */
4427   for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE);
4428        !end; // iterate until bqe points to a CONSTR
4429        end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) {
4430     ASSERT(bqe != END_BQ_QUEUE);             // sanity check
4431     ASSERT(bqe != (StgBlockingQueueElement *)NULL);  // sanity check
4432     /* types of closures that may appear in a blocking queue */
4433     ASSERT(get_itbl(bqe)->type == TSO ||
4434            get_itbl(bqe)->type == CONSTR);
4435     /* only BQs of an RBH end with an RBH_Save closure */
4436     ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
4437
4438     tso_loc = where_is((StgClosure *)bqe);
4439     switch (get_itbl(bqe)->type) {
4440     case TSO:
4441       debugBelch(" TSO %d (%p) on [PE %d],",
4442               ((StgTSO *)bqe)->id, (StgTSO *)bqe, tso_loc);
4443       break;
4444     case CONSTR:
4445       debugBelch(" %s (IP %p),",
4446               (get_itbl(bqe) == &stg_RBH_Save_0_info ? "RBH_Save_0" :
4447                get_itbl(bqe) == &stg_RBH_Save_1_info ? "RBH_Save_1" :
4448                get_itbl(bqe) == &stg_RBH_Save_2_info ? "RBH_Save_2" :
4449                "RBH_Save_?"), get_itbl(bqe));
4450       break;
4451     default:
4452       barf("Unexpected closure type %s in blocking queue of %p (%s)",
4453            info_type((StgClosure *)bqe), node, info_type(node));
4454       break;
4455     }
4456   } /* for */
4457   debugBelch("\n");
4458 }
4459 # endif
4460
4461 #if defined(PARALLEL_HASKELL)
4462 static nat
4463 run_queue_len(void)
4464 {
4465     nat i;
4466     StgTSO *tso;
4467
4468     for (i=0, tso=run_queue_hd;
4469          tso != END_TSO_QUEUE;
4470          i++, tso=tso->link) {
4471         /* nothing */
4472     }
4473
4474     return i;
4475 }
4476 #endif
4477
4478 void
4479 sched_belch(char *s, ...)
4480 {
4481     va_list ap;
4482     va_start(ap,s);
4483 #ifdef THREADED_RTS
4484     debugBelch("sched (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId());
4485 #elif defined(PARALLEL_HASKELL)
4486     debugBelch("== ");
4487 #else
4488     debugBelch("sched: ");
4489 #endif
4490     vdebugBelch(s, ap);
4491     debugBelch("\n");
4492     va_end(ap);
4493 }
4494
4495 #endif /* DEBUG */