further fix for floating point primitives

[ghc-hetmet.git] / ghc / rts / Schedule.c
diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c

index c4b253f..3f10e70 100644 (file)
--- a/ghc/rts/Schedule.c
+++ b/ghc/rts/Schedule.c
@@ -72,20 +72,6 @@
  # define STATIC_INLINE static
  #endif
  
-#ifdef THREADED_RTS
-#define USED_WHEN_THREADED_RTS
-#define USED_WHEN_NON_THREADED_RTS STG_UNUSED
-#else
-#define USED_WHEN_THREADED_RTS     STG_UNUSED
-#define USED_WHEN_NON_THREADED_RTS
-#endif
-
-#ifdef SMP
-#define USED_WHEN_SMP
-#else
-#define USED_WHEN_SMP STG_UNUSED
-#endif
-
  /* -----------------------------------------------------------------------------
   * Global variables
   * -------------------------------------------------------------------------- */
@@ -189,7 +175,7 @@ rtsBool shutting_down_scheduler = rtsFalse;
  
  /*
   * This mutex protects most of the global scheduler data in
- * the THREADED_RTS and (inc. SMP) runtime.
+ * the THREADED_RTS runtime.
   */
  #if defined(THREADED_RTS)
  Mutex sched_mutex;
@@ -213,7 +199,7 @@ static Capability *schedule (Capability *initialCapability, Task *task);
  // scheduler clearer.
  //
  static void schedulePreLoop (void);
-#if defined(SMP)
+#if defined(THREADED_RTS)
  static void schedulePushWork(Capability *cap, Task *task);
  #endif
  static void scheduleStartSignalHandlers (Capability *cap);
@@ -241,7 +227,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t );
  static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
                                              StgTSO *t );
  static rtsBool scheduleDoHeapProfile(rtsBool ready_to_gc);
-static void scheduleDoGC(Capability *cap, Task *task, rtsBool force_major);
+static void scheduleDoGC(Capability *cap, Task *task, rtsBool force_major,
+                        void (*get_roots)(evac_fn));
  
  static void unblockThread(Capability *cap, StgTSO *tso);
  static rtsBool checkBlackHoles(Capability *cap);
@@ -394,7 +381,7 @@ schedule (Capability *initialCapability, Task *task)
        }
  #endif
        
-#ifdef SMP
+#if defined(THREADED_RTS)
        schedulePushWork(cap,task);
  #endif
  
@@ -415,7 +402,7 @@ schedule (Capability *initialCapability, Task *task)
      //
      if (interrupted) {
         deleteRunQueue(cap);
-#if defined(SMP)
+#if defined(THREADED_RTS)
         discardSparksCap(cap);
  #endif
         if (shutting_down_scheduler) {
@@ -431,7 +418,7 @@ schedule (Capability *initialCapability, Task *task)
         }
      }
  
-#if defined(SMP)
+#if defined(THREADED_RTS)
      // If the run queue is empty, take a spark and turn it into a thread.
      {
         if (emptyRunQueue(cap)) {
@@ -445,7 +432,7 @@ schedule (Capability *initialCapability, Task *task)
             }
         }
      }
-#endif // SMP
+#endif // THREADED_RTS
  
      scheduleStartSignalHandlers(cap);
  
@@ -458,6 +445,9 @@ schedule (Capability *initialCapability, Task *task)
      scheduleCheckBlockedThreads(cap);
  
      scheduleDetectDeadlock(cap,task);
+#if defined(THREADED_RTS)
+    cap = task->cap;    // reload cap, it might have changed
+#endif
  
      // Normally, the only way we can get here with no threads to
      // run is if a keyboard interrupt received during 
@@ -578,6 +568,8 @@ run_thread:
      errno = t->saved_errno;
      cap->in_haskell = rtsTrue;
  
+    dirtyTSO(t);
+
      recent_activity = ACTIVITY_YES;
  
      switch (prev_what_next) {
@@ -689,7 +681,12 @@ run_thread:
      }
  
      if (scheduleDoHeapProfile(ready_to_gc)) { ready_to_gc = rtsFalse; }
-    if (ready_to_gc) { scheduleDoGC(cap,task,rtsFalse); }
+    if (ready_to_gc) {
+      scheduleDoGC(cap,task,rtsFalse,GetRoots);
+#if defined(THREADED_RTS)
+      cap = task->cap;    // reload cap, it might have changed  
+#endif
+    }
    } /* end of while() */
  
    IF_PAR_DEBUG(verbose,
@@ -728,10 +725,10 @@ schedulePreLoop(void)
   * Push work to other Capabilities if we have some.
   * -------------------------------------------------------------------------- */
  
-#ifdef SMP
+#if defined(THREADED_RTS)
  static void
-schedulePushWork(Capability *cap USED_WHEN_SMP, 
-                Task *task      USED_WHEN_SMP)
+schedulePushWork(Capability *cap USED_IF_THREADS, 
+                Task *task      USED_IF_THREADS)
  {
      Capability *free_caps[n_capabilities], *cap0;
      nat i, n_free_caps;
@@ -854,7 +851,7 @@ scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
   * ------------------------------------------------------------------------- */
  
  static void
-scheduleCheckBlockedThreads(Capability *cap USED_WHEN_NON_THREADED_RTS)
+scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
  {
  #if !defined(THREADED_RTS)
      //
@@ -896,7 +893,7 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
  {
  
  #if defined(PARALLEL_HASKELL)
-    // ToDo: add deadlock detection in GUM (similar to SMP) -- HWL
+    // ToDo: add deadlock detection in GUM (similar to THREADED_RTS) -- HWL
      return;
  #endif
  
@@ -925,7 +922,11 @@ scheduleDetectDeadlock (Capability *cap, Task *task)
         // they are unreachable and will therefore be sent an
         // exception.  Any threads thus released will be immediately
         // runnable.
-       scheduleDoGC( cap, task, rtsTrue/*force  major GC*/ );
+       scheduleDoGC( cap, task, rtsTrue/*force  major GC*/, GetRoots );
+#if defined(THREADED_RTS)
+       cap = task->cap;    // reload cap, it might have changed
+#endif
+
         recent_activity = ACTIVITY_DONE_GC;
         
         if ( !emptyRunQueue(cap) ) return;
@@ -1515,7 +1516,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
             if (cap->r.rCurrentNursery->u.back != NULL) {
                 cap->r.rCurrentNursery->u.back->link = bd;
             } else {
-#if !defined(SMP)
+#if !defined(THREADED_RTS)
                 ASSERT(g0s0->blocks == cap->r.rCurrentNursery &&
                        g0s0 == cap->r.rNursery);
  #endif
@@ -1720,9 +1721,9 @@ scheduleHandleThreadBlocked( StgTSO *t
        // has tidied up its stack and placed itself on whatever queue
        // it needs to be on.
  
-#if !defined(SMP)
+#if !defined(THREADED_RTS)
      ASSERT(t->why_blocked != NotBlocked);
-            // This might not be true under SMP: we don't have
+            // This might not be true under THREADED_RTS: we don't have
              // exclusive access to this TSO, so someone might have
              // woken it up by now.  This actually happens: try
              // conc023 +RTS -N2.
@@ -1868,8 +1869,19 @@ scheduleDoHeapProfile( rtsBool ready_to_gc STG_UNUSED )
      if (performHeapProfile ||
         (RtsFlags.ProfFlags.profileInterval==0 &&
          RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
+
+       // checking black holes is necessary before GC, otherwise
+       // there may be threads that are unreachable except by the
+       // blackhole queue, which the GC will consider to be
+       // deadlocked.
+       scheduleCheckBlackHoles(&MainCapability);
+
+       IF_DEBUG(scheduler, sched_belch("garbage collecting before heap census"));
         GarbageCollect(GetRoots, rtsTrue);
+
+       IF_DEBUG(scheduler, sched_belch("performing heap census"));
         heapCensus();
+
         performHeapProfile = rtsFalse;
         return rtsTrue;  // true <=> we already GC'd
      }
@@ -1882,16 +1894,17 @@ scheduleDoHeapProfile( rtsBool ready_to_gc STG_UNUSED )
   * -------------------------------------------------------------------------- */
  
  static void
-scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
+scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS,
+             rtsBool force_major, void (*get_roots)(evac_fn))
  {
      StgTSO *t;
-#ifdef SMP
+#ifdef THREADED_RTS
      static volatile StgWord waiting_for_gc;
      rtsBool was_waiting;
      nat i;
  #endif
  
-#ifdef SMP
+#ifdef THREADED_RTS
      // In order to GC, there must be no threads running Haskell code.
      // Therefore, the GC thread needs to hold *all* the capabilities,
      // and release them after the GC has completed.  
@@ -1907,9 +1920,9 @@ scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
      if (was_waiting) {
         do {
             IF_DEBUG(scheduler, sched_belch("someone else is trying to GC..."));
-           yieldCapability(&cap,task);
+           if (cap) yieldCapability(&cap,task);
         } while (waiting_for_gc);
-       return;
+       return;  // NOTE: task->cap might have changed here
      }
  
      for (i=0; i < n_capabilities; i++) {
@@ -1953,7 +1966,7 @@ scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
                         // ATOMICALLY_FRAME, aborting the (nested)
                         // transaction, and saving the stack of any
                         // partially-evaluated thunks on the heap.
-                       raiseAsync_(cap, t, NULL, rtsTrue, NULL);
+                       raiseAsync_(&capabilities[0], t, NULL, rtsTrue, NULL);
                         
  #ifdef REG_R1
                         ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
@@ -1965,7 +1978,7 @@ scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
      }
      
      // so this happens periodically:
-    scheduleCheckBlackHoles(cap);
+    if (cap) scheduleCheckBlackHoles(cap);
      
      IF_DEBUG(scheduler, printAllThreads());
  
@@ -1977,9 +1990,9 @@ scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
  #if defined(THREADED_RTS)
      IF_DEBUG(scheduler,sched_belch("doing GC"));
  #endif
-    GarbageCollect(GetRoots, force_major);
+    GarbageCollect(get_roots, force_major);
      
-#if defined(SMP)
+#if defined(THREADED_RTS)
      // release our stash of capabilities.
      for (i = 0; i < n_capabilities; i++) {
         if (cap != &capabilities[i]) {
@@ -1987,7 +2000,11 @@ scheduleDoGC( Capability *cap, Task *task USED_WHEN_SMP, rtsBool force_major )
             releaseCapability(&capabilities[i]);
         }
      }
-    task->cap = cap;
+    if (cap) {
+       task->cap = cap;
+    } else {
+       task->cap = NULL;
+    }
  #endif
  
  #if defined(GRAN)
@@ -2022,7 +2039,7 @@ rtsSupportsBoundThreads(void)
   * ------------------------------------------------------------------------- */
   
  StgBool
-isThreadBound(StgTSO* tso USED_WHEN_THREADED_RTS)
+isThreadBound(StgTSO* tso USED_IF_THREADS)
  {
  #if defined(THREADED_RTS)
    return (tso->bound != NULL);
@@ -2034,7 +2051,7 @@ isThreadBound(StgTSO* tso USED_WHEN_THREADED_RTS)
   * Singleton fork(). Do not copy any running threads.
   * ------------------------------------------------------------------------- */
  
-#if !defined(mingw32_HOST_OS) && !defined(SMP)
+#if !defined(mingw32_HOST_OS)
  #define FORKPROCESS_PRIMOP_SUPPORTED
  #endif
  
@@ -2055,6 +2072,13 @@ forkProcess(HsStablePtr *entry
      StgTSO* t,*next;
      Capability *cap;
      
+#if defined(THREADED_RTS)
+    if (RtsFlags.ParFlags.nNodes > 1) {
+       errorBelch("forking not supported with +RTS -N<n> greater than 1");
+       stg_exit(EXIT_FAILURE);
+    }
+#endif
+
      IF_DEBUG(scheduler,sched_belch("forking!"));
      
      // ToDo: for SMP, we should probably acquire *all* the capabilities
@@ -2262,6 +2286,9 @@ resumeThread (void *task_)
      cap->in_haskell = rtsTrue;
      errno = saved_errno;
  
+    /* We might have GC'd, mark the TSO dirty again */
+    dirtyTSO(tso);
+
      return &cap->r;
  }
  
@@ -2375,6 +2402,7 @@ createThread(Capability *cap, nat size)
  
      tso->why_blocked  = NotBlocked;
      tso->blocked_exceptions = NULL;
+    tso->flags = TSO_DIRTY;
      
      tso->saved_errno = 0;
      tso->bound = NULL;
@@ -2685,17 +2713,17 @@ initScheduler(void)
  
    /* A capability holds the state a native thread needs in
     * order to execute STG code. At least one capability is
-   * floating around (only SMP builds have more than one).
+   * floating around (only THREADED_RTS builds have more than one).
     */
    initCapabilities();
  
    initTaskManager();
  
-#if defined(SMP) || defined(PARALLEL_HASKELL)
+#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL)
    initSparkPools();
  #endif
  
-#if defined(SMP)
+#if defined(THREADED_RTS)
    /*
     * Eagerly start one worker to run each Capability, except for
     * Capability 0.  The idea is that we're probably going to start a
@@ -2795,15 +2823,15 @@ GetRoots( evac_fn evac )
      }
      
  #if !defined(THREADED_RTS)
-    evac((StgClosure **)&blocked_queue_hd);
-    evac((StgClosure **)&blocked_queue_tl);
-    evac((StgClosure **)&sleeping_queue);
+    evac((StgClosure **)(void *)&blocked_queue_hd);
+    evac((StgClosure **)(void *)&blocked_queue_tl);
+    evac((StgClosure **)(void *)&sleeping_queue);
  #endif 
  #endif
  
-    evac((StgClosure **)&blackhole_queue);
+    // evac((StgClosure **)&blackhole_queue);
  
-#if defined(SMP) || defined(PARALLEL_HASKELL) || defined(GRAN)
+#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
      markSparkQueue(evac);
  #endif
      
@@ -2828,26 +2856,32 @@ GetRoots( evac_fn evac )
  
  static void (*extra_roots)(evac_fn);
  
+static void
+performGC_(rtsBool force_major, void (*get_roots)(evac_fn))
+{
+    Task *task = myTask();
+
+    if (task == NULL) {
+       ACQUIRE_LOCK(&sched_mutex);
+       task = newBoundTask();
+       RELEASE_LOCK(&sched_mutex);
+       scheduleDoGC(NULL,task,force_major, get_roots);
+       boundTaskExiting(task);
+    } else {
+       scheduleDoGC(NULL,task,force_major, get_roots);
+    }
+}
+
  void
  performGC(void)
  {
-#ifdef THREADED_RTS
-    // ToDo: we have to grab all the capabilities here.
-    errorBelch("performGC not supported in threaded RTS (yet)");
-    stg_exit(EXIT_FAILURE);
-#endif
-    /* Obligated to hold this lock upon entry */
-    GarbageCollect(GetRoots,rtsFalse);
+    performGC_(rtsFalse, GetRoots);
  }
  
  void
  performMajorGC(void)
  {
-#ifdef THREADED_RTS
-    errorBelch("performMayjorGC not supported in threaded RTS (yet)");
-    stg_exit(EXIT_FAILURE);
-#endif
-    GarbageCollect(GetRoots,rtsTrue);
+    performGC_(rtsTrue, GetRoots);
  }
  
  static void
@@ -2860,12 +2894,8 @@ AllRoots(evac_fn evac)
  void
  performGCWithRoots(void (*get_roots)(evac_fn))
  {
-#ifdef THREADED_RTS
-    errorBelch("performGCWithRoots not supported in threaded RTS (yet)");
-    stg_exit(EXIT_FAILURE);
-#endif
      extra_roots = get_roots;
-    GarbageCollect(AllRoots,rtsFalse);
+    performGC_(rtsFalse, AllRoots);
  }
  
  /* -----------------------------------------------------------------------------
@@ -3629,9 +3659,10 @@ checkBlackHoles (Capability *cap)
   * CATCH_FRAME on the stack.  In either case, we strip the entire
   * stack and replace the thread with a zombie.
   *
- * ToDo: in SMP mode, this function is only safe if either (a) we hold
- * all the Capabilities (eg. in GC), or (b) we own the Capability that
- * the TSO is currently blocked on or on the run queue of.
+ * ToDo: in THREADED_RTS mode, this function is only safe if either
+ * (a) we hold all the Capabilities (eg. in GC, or if there is only
+ * one Capability), or (b) we own the Capability that the TSO is
+ * currently blocked on or on the run queue of.
   *
   * -------------------------------------------------------------------------- */
   
@@ -3666,6 +3697,9 @@ raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
      // Remove it from any blocking queues
      unblockThread(cap,tso);
  
+    // mark it dirty; we're about to change its stack.
+    dirtyTSO(tso);
+
      sp = tso->sp;
      
      // The stack freezing code assumes there's a closure pointer on
@@ -3776,7 +3810,7 @@ raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
             // we've got an exception to raise, so let's pass it to the
             // handler in this frame.
             //
-           raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+MIN_UPD_SIZE);
+           raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
             TICK_ALLOC_SE_THK(1,0);
             SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
             raise->payload[0] = exception;
@@ -3909,7 +3943,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
      // thunks which are currently under evaluataion.
      //
  
-    //    
+    // OLD COMMENT (we don't have MIN_UPD_SIZE now):
      // LDV profiling: stg_raise_info has THUNK as its closure
      // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
      // payload, MIN_UPD_SIZE is more approprate than 1.  It seems that
@@ -3937,7 +3971,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
             // Only create raise_closure if we need to.
             if (raise_closure == NULL) {
                 raise_closure = 
-                   (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+MIN_UPD_SIZE);
+                   (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
                 SET_HDR(raise_closure, &stg_raise_info, CCCS);
                 raise_closure->payload[0] = exception;
             }