Implement stack chunks and separate TSO/STACK objects

author Simon Marlow <marlowsd@gmail.com>

Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)

committer Simon Marlow <marlowsd@gmail.com>

Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)
author Simon Marlow <marlowsd@gmail.com>
Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)
committer Simon Marlow <marlowsd@gmail.com>
Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)
diff --git a/compiler/cmm/CmmCPSGen.hs b/compiler/cmm/CmmCPSGen.hs

index 924ce9d..45d0aeb 100644 (file)
--- a/compiler/cmm/CmmCPSGen.hs
+++ b/compiler/cmm/CmmCPSGen.hs
@@ -331,8 +331,8 @@ nursery_bdescr_start  = cmmOffset stgCurrentNursery oFFSET_bdescr_start
  nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks
  
  tso_SP, tso_STACK, tso_CCCS :: ByteOff
-tso_SP    = tsoFieldB     oFFSET_StgTSO_sp
-tso_STACK = tsoFieldB     oFFSET_StgTSO_stack
+tso_SP    = tsoFieldB     undefined --oFFSET_StgTSO_sp
+tso_STACK = tsoFieldB     undefined --oFFSET_StgTSO_stack
  tso_CCCS  = tsoProfFieldB oFFSET_StgTSO_CCCS
  
  -- The TSO struct has a variable header, and an optional StgTSOProfInfo in
diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs

index 8e8e34d..cdaccc9 100644 (file)
--- a/compiler/codeGen/CgForeignCall.hs
+++ b/compiler/codeGen/CgForeignCall.hs
@@ -202,8 +202,9 @@ maybe_assign_temp e
  
  emitSaveThreadState :: Code
  emitSaveThreadState = do
-  -- CurrentTSO->sp = Sp;
-  stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp
+  -- CurrentTSO->stackobj->sp = Sp;
+  stmtC $ CmmStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord)
+                              stack_SP) stgSp
    emitCloseNursery
    -- and save the current cost centre stack in the TSO when profiling:
    when opt_SccProfilingOn $
@@ -216,14 +217,17 @@ emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1)
  emitLoadThreadState :: Code
  emitLoadThreadState = do
    tso <- newTemp bWord -- TODO FIXME NOW
+  stack <- newTemp bWord -- TODO FIXME NOW
    stmtsC [
-       -- tso = CurrentTSO;
-       CmmAssign (CmmLocal tso) stgCurrentTSO,
-       -- Sp = tso->sp;
-       CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP)
-                             bWord),
-       -- SpLim = tso->stack + RESERVED_STACK_WORDS;
-       CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK)
+        -- tso = CurrentTSO
+        CmmAssign (CmmLocal tso) stgCurrentTSO,
+        -- stack = tso->stackobj
+        CmmAssign (CmmLocal stack) (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_stackobj) bWord),
+        -- Sp = stack->sp;
+        CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal stack)) stack_SP)
+                              bWord),
+        -- SpLim = stack->stack + RESERVED_STACK_WORDS;
+        CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal stack)) stack_STACK)
                                     rESERVED_STACK_WORDS),
          -- HpAlloc = 0;
          --   HpAlloc is assumed to be set to non-zero only by a failed
@@ -234,7 +238,7 @@ emitLoadThreadState = do
    -- and load the current cost centre stack from the TSO when profiling:
    when opt_SccProfilingOn $
         stmtC (CmmStore curCCSAddr 
-               (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord))
+                (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord))
  
  emitOpenNursery :: Code
  emitOpenNursery = stmtsC [
@@ -262,20 +266,14 @@ nursery_bdescr_free   = cmmOffset stgCurrentNursery oFFSET_bdescr_free
  nursery_bdescr_start  = cmmOffset stgCurrentNursery oFFSET_bdescr_start
  nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks
  
-tso_SP, tso_STACK, tso_CCCS :: ByteOff
-tso_SP    = tsoFieldB     oFFSET_StgTSO_sp
-tso_STACK = tsoFieldB     oFFSET_StgTSO_stack
-tso_CCCS  = tsoProfFieldB oFFSET_StgTSO_CCCS
+tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: ByteOff
+tso_stackobj = closureField oFFSET_StgTSO_stackobj
+tso_CCCS     = closureField oFFSET_StgTSO_CCCS
+stack_STACK  = closureField oFFSET_StgStack_stack
+stack_SP     = closureField oFFSET_StgStack_sp
  
--- The TSO struct has a variable header, and an optional StgTSOProfInfo in
--- the middle.  The fields we're interested in are after the StgTSOProfInfo.
-tsoFieldB :: ByteOff -> ByteOff
-tsoFieldB off
-  | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE
-  | otherwise          = off + fixedHdrSize * wORD_SIZE
-
-tsoProfFieldB :: ByteOff -> ByteOff
-tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE
+closureField :: ByteOff -> ByteOff
+closureField off = off + fixedHdrSize * wORD_SIZE
  
  stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
  stgSp            = CmmReg sp
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs

index 83c4301..7ddf597 100644 (file)
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -243,10 +243,12 @@ nursery_bdescr_start  = cmmOffset stgCurrentNursery oFFSET_bdescr_start
  nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks
  
  tso_SP, tso_STACK, tso_CCCS :: ByteOff
-tso_SP    = tsoFieldB     oFFSET_StgTSO_sp
-tso_STACK = tsoFieldB     oFFSET_StgTSO_stack
  tso_CCCS  = tsoProfFieldB oFFSET_StgTSO_CCCS
  
+ --ToDo: needs merging with changes to CgForeign
+tso_STACK = tsoFieldB     undefined
+tso_SP    = tsoFieldB     undefined
+
  -- The TSO struct has a variable header, and an optional StgTSOProfInfo in
  -- the middle.  The fields we're interested in are after the StgTSOProfInfo.
  tsoFieldB :: ByteOff -> ByteOff
diff --git a/docs/users_guide/runtime_control.xml b/docs/users_guide/runtime_control.xml

index 4040913..0e13f0e 100644 (file)
--- a/docs/users_guide/runtime_control.xml
+++ b/docs/users_guide/runtime_control.xml
@@ -424,22 +424,88 @@
  
        <varlistentry>
         <term>
-         <option>-k</option><replaceable>size</replaceable>
+         <option>-ki</option><replaceable>size</replaceable>
           <indexterm><primary><option>-k</option></primary><secondary>RTS option</secondary></indexterm>
-         <indexterm><primary>stack, minimum size</primary></indexterm>
+         <indexterm><primary>stack, initial size</primary></indexterm>
          </term>
         <listitem>
-         <para>&lsqb;Default: 1k&rsqb; Set the initial stack size for
-          new threads.  Thread stacks (including the main thread's
-          stack) live on the heap, and grow as required.  The default
-          value is good for concurrent applications with lots of small
-          threads; if your program doesn't fit this model then
-          increasing this option may help performance.</para>
-
-         <para>The main thread is normally started with a slightly
-          larger heap to cut down on unnecessary stack growth while
-          the program is starting up.</para>
-       </listitem>
+          <para>
+            &lsqb;Default: 1k&rsqb; Set the initial stack size for new
+            threads.  (Note: this flag used to be
+            simply <option>-k</option>, but was renamed
+            to <option>-ki</option> in GHC 7.2.1.  The old name is
+            still accepted for backwards compatibility, but that may
+            be removed in a future version).
+          </para>
+
+          <para>
+            Thread stacks (including the main thread's stack) live on
+            the heap.  As the stack grows, new stack chunks are added
+            as required; if the stack shrinks again, these extra stack
+            chunks are reclaimed by the garbage collector.  The
+            default initial stack size is deliberately small, in order
+            to keep the time and space overhead for thread creation to
+            a minimum, and to make it practical to spawn threads for
+            even tiny pieces of work.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-kc</option><replaceable>size</replaceable>
+          <indexterm><primary><option>-kc</option></primary><secondary>RTS
+          option</secondary></indexterm>
+          <indexterm><primary>stack</primary><secondary>chunk size</secondary></indexterm>
+        </term>
+        <listitem>
+          <para>
+            &lsqb;Default: 32k&rsqb; Set the size of &ldquo;stack
+            chunks&rdquo;.  When a thread's current stack overflows, a
+            new stack chunk is created and added to the thread's
+            stack, until the limit set by <option>-K</option> is
+            reached.
+          </para>
+
+          <para>
+            The advantage of smaller stack chunks is that the garbage
+            collector can avoid traversing stack chunks if they are
+            known to be unmodified since the last collection, so
+            reducing the chunk size means that the garbage collector
+            can identify more stack as unmodified, and the GC overhead
+            might be reduced.  On the other hand, making stack chunks
+            too small adds some overhead as there will be more
+            overflow/underflow between chunks.  The default setting of
+            32k appears to be a reasonable compromise in most cases.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <option>-kb</option><replaceable>size</replaceable>
+          <indexterm><primary><option>-kc</option></primary><secondary>RTS
+          option</secondary></indexterm>
+          <indexterm><primary>stack</primary><secondary>chunk buffer size</secondary></indexterm>
+        </term>
+        <listitem>
+          <para>
+            &lsqb;Default: 1k&rsqb; Sets the stack chunk buffer size.
+            When a stack chunk overflows and a new stack chunk is
+            created, some of the data from the previous stack chunk is
+            moved into the new chunk, to avoid an immediate underflow
+            and repeated overflow/underflow at the boundary.  The
+            amount of stack moved is set by the <option>-kb</option>
+            option.
+          </para>
+          <para>
+            Note that to avoid wasting space, this value should
+            typically be less than 10&percnt; of the size of a stack
+            chunk (<option>-kc</option>), because in a chain of stack
+            chunks, each chunk will have a gap of unused space of this
+            size.
+          </para>
+        </listitem>
        </varlistentry>
  
        <varlistentry>
@@ -451,9 +517,14 @@
         <listitem>
           <para>&lsqb;Default: 8M&rsqb; Set the maximum stack size for
            an individual thread to <replaceable>size</replaceable>
-          bytes.  This option is there purely to stop the program
-          eating up all the available memory in the machine if it gets
-          into an infinite loop.</para>
+          bytes.  If the thread attempts to exceed this limit, it will
+            be send the <literal>StackOverflow</literal> exception.
+          </para>
+          <para>
+            This option is there mainly to stop the program eating up
+            all the available memory in the machine if it gets into an
+            infinite loop.
+          </para>
         </listitem>
        </varlistentry>
  
diff --git a/includes/Cmm.h b/includes/Cmm.h

index 0088c1a..6abe760 100644 (file)
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -467,6 +467,12 @@
  #define mutArrPtrsCardWords(n) \
      ROUNDUP_BYTES_TO_WDS(((n) + (1 << MUT_ARR_PTRS_CARD_BITS) - 1) >> MUT_ARR_PTRS_CARD_BITS)
  
+#if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
+#define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
+#else
+#define OVERWRITING_CLOSURE(c) /* nothing */
+#endif
+
  /* -----------------------------------------------------------------------------
     Voluntary Yields/Blocks
  
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c

index ade104a..0ed7ec6 100644 (file)
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -296,9 +296,12 @@ main(int argc, char *argv[])
      closure_field(StgTSO, dirty);
      closure_field(StgTSO, bq);
      closure_field_("StgTSO_CCCS", StgTSO, prof.CCCS);
-    tso_field(StgTSO, sp);
-    tso_field_offset(StgTSO, stack);
-    tso_field(StgTSO, stack_size);
+    closure_field(StgTSO, stackobj);
+
+    closure_field(StgStack, sp);
+    closure_field_offset(StgStack, stack);
+    closure_field(StgStack, stack_size);
+    closure_field(StgStack, dirty);
  
      struct_size(StgTSOProfInfo);
  
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h

index e21a893..a4114ab 100644 (file)
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -198,8 +198,7 @@
  #define ThreadRunGHC    1      /* return to address on top of stack */
  #define ThreadInterpret 2      /* interpret this thread */
  #define ThreadKilled   3       /* thread has died, don't run it */
-#define ThreadRelocated        4       /* thread has moved, link points to new locn */
-#define ThreadComplete 5       /* thread has finished */
+#define ThreadComplete  4       /* thread has finished */
  
  /*
   * Constants for the why_blocked field of a TSO
@@ -266,11 +265,6 @@
  #define TSO_STOPPED_ON_BREAKPOINT 16 
  
  /*
- * TSO_LINK_DIRTY is set when a TSO's link field is modified
- */
-#define TSO_LINK_DIRTY 32
-
-/*
   * Used by the sanity checker to check whether TSOs are on the correct
   * mutable list.
   */
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h

index 8bfadaa..75525d8 100644 (file)
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -29,6 +29,8 @@ struct GC_FLAGS {
  
      nat     maxStkSize;         /* in *words* */
      nat     initialStkSize;     /* in *words* */
+    nat     stkChunkSize;       /* in *words* */
+    nat     stkChunkBufferSize; /* in *words* */
  
      nat            maxHeapSize;        /* in *blocks* */
      nat     minAllocAreaSize;   /* in *blocks* */
diff --git a/includes/rts/prof/LDV.h b/includes/rts/prof/LDV.h

index 77d873c..6426691 100644 (file)
--- a/includes/rts/prof/LDV.h
+++ b/includes/rts/prof/LDV.h
@@ -31,25 +31,16 @@
  
  #ifdef CMINUSMINUS
  
-#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \
-  foreign "C" LDV_recordDead_FILL_SLOP_DYNAMIC(c "ptr")
-
  #else
  
  #define LDV_RECORD_CREATE(c)   \
    LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE
  
-void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p );
-
-#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \
-  LDV_recordDead_FILL_SLOP_DYNAMIC(c)
-
  #endif
  
  #else  /* !PROFILING */
  
  #define LDV_RECORD_CREATE(c)   /* nothing */
-#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c)  /* nothing */
  
  #endif /* PROFILING */
  
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h

index aead2ed..7123c20 100644 (file)
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -131,9 +131,9 @@
  
  // Use when changing a closure from one kind to another
  #define OVERWRITE_INFO(c, new_info)                             \
-   LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)(c));       \
-   SET_INFO((c), (new_info));                                  \
-   LDV_RECORD_CREATE(c);
+    OVERWRITING_CLOSURE((StgClosure *)(c));                     \
+    SET_INFO((c), (new_info));                                  \
+    LDV_RECORD_CREATE(c);
  
  /* -----------------------------------------------------------------------------
     How to get hold of the static link field for a static closure.
@@ -289,8 +289,8 @@ INLINE_HEADER StgOffset arr_words_sizeW( StgArrWords* x )
  INLINE_HEADER StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x )
  { return sizeofW(StgMutArrPtrs) + x->size; }
  
-INLINE_HEADER StgWord tso_sizeW ( StgTSO *tso )
-{ return TSO_STRUCT_SIZEW + tso->stack_size; }
+INLINE_HEADER StgWord stack_sizeW ( StgStack *stack )
+{ return sizeofW(StgStack) + stack->stack_size; }
  
  INLINE_HEADER StgWord bco_sizeW ( StgBCO *bco )
  { return bco->size; }
@@ -339,7 +339,9 @@ closure_sizeW_ (StgClosure *p, StgInfoTable *info)
      case MUT_ARR_PTRS_FROZEN0:
         return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
      case TSO:
-       return tso_sizeW((StgTSO *)p);
+        return sizeofW(StgTSO);
+    case STACK:
+        return stack_sizeW((StgStack*)p);
      case BCO:
         return bco_sizeW((StgBCO *)p);
      case TREC_CHUNK:
@@ -417,4 +419,62 @@ INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, lnat n)
      return ((StgWord8 *)&(a->payload[a->ptrs]) + n);
  }
  
+/* -----------------------------------------------------------------------------
+   Replacing a closure with a different one.  We must call
+   OVERWRITING_CLOSURE(p) on the old closure that is about to be
+   overwritten.
+
+   In PROFILING mode, LDV profiling requires that we fill the slop
+   with zeroes, and record the old closure as dead (LDV_recordDead()).
+
+   In DEBUG mode, we must overwrite the slop with zeroes, because the
+   sanity checker wants to walk through the heap checking all the
+   pointers.
+
+   In multicore mode, we *cannot* overwrite slop with zeroes, because
+   another thread might be reading it.  So,
+
+      PROFILING is not compatible with  +RTS -N<n> (for n > 1)
+
+      THREADED_RTS can be used with DEBUG, but full heap sanity
+      checking is disabled.
+
+   -------------------------------------------------------------------------- */
+
+#if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
+#define OVERWRITING_CLOSURE(c) overwritingClosure(c)
+#else
+#define OVERWRITING_CLOSURE(c) /* nothing */
+#endif
+
+#ifdef PROFILING
+void LDV_recordDead (StgClosure *c, nat size);
+#endif
+
+#ifdef KEEP_INLINES
+void overwritingClosure (StgClosure *p);
+#else
+INLINE_HEADER
+#endif
+void
+overwritingClosure (StgClosure *p)
+{
+    nat size, i;
+
+#if defined(PROFILING)
+    if (era <= 0) return;
+#endif
+
+    size = closure_sizeW(p);
+
+    // For LDV profiling, we need to record the closure as dead
+#if defined(PROFILING)
+    LDV_recordDead((StgClosure *)(p), size);
+#endif
+
+    for (i = 0; i < size - sizeofW(StgThunkHeader); i++) {
+        ((StgThunk *)(p))->payload[i] = 0;
+    }
+}
+
  #endif /* RTS_STORAGE_CLOSUREMACROS_H */
diff --git a/includes/rts/storage/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h

index b7489c9..75ec08b 100644 (file)
--- a/includes/rts/storage/ClosureTypes.h
+++ b/includes/rts/storage/ClosureTypes.h
@@ -56,27 +56,29 @@
  #define RET_FUN                 35
  #define UPDATE_FRAME           36
  #define CATCH_FRAME            37
-#define STOP_FRAME             38
-#define BLOCKING_QUEUE         39
-#define BLACKHOLE              40
-#define MVAR_CLEAN             41
-#define MVAR_DIRTY             42
-#define ARR_WORDS              43
-#define MUT_ARR_PTRS_CLEAN      44
-#define MUT_ARR_PTRS_DIRTY      45
-#define MUT_ARR_PTRS_FROZEN0    46
-#define MUT_ARR_PTRS_FROZEN     47
-#define MUT_VAR_CLEAN          48
-#define MUT_VAR_DIRTY          49
-#define WEAK                   50
-#define PRIM                   51
-#define MUT_PRIM                52
-#define TSO                    53
-#define TREC_CHUNK              54
-#define ATOMICALLY_FRAME        55
-#define CATCH_RETRY_FRAME       56
-#define CATCH_STM_FRAME         57
-#define WHITEHOLE               58
-#define N_CLOSURE_TYPES         59
+#define UNDERFLOW_FRAME         38
+#define STOP_FRAME              39
+#define BLOCKING_QUEUE         40
+#define BLACKHOLE              41
+#define MVAR_CLEAN             42
+#define MVAR_DIRTY             43
+#define ARR_WORDS              44
+#define MUT_ARR_PTRS_CLEAN      45
+#define MUT_ARR_PTRS_DIRTY      46
+#define MUT_ARR_PTRS_FROZEN0    47
+#define MUT_ARR_PTRS_FROZEN     48
+#define MUT_VAR_CLEAN          49
+#define MUT_VAR_DIRTY          50
+#define WEAK                   51
+#define PRIM                   52
+#define MUT_PRIM                53
+#define TSO                    54
+#define STACK                   55
+#define TREC_CHUNK              56
+#define ATOMICALLY_FRAME        57
+#define CATCH_RETRY_FRAME       58
+#define CATCH_STM_FRAME         59
+#define WHITEHOLE               60
+#define N_CLOSURE_TYPES         61
  
  #endif /* RTS_STORAGE_CLOSURETYPES_H */
diff --git a/includes/rts/storage/Closures.h b/includes/rts/storage/Closures.h

index 2683ce7..f3929ee 100644 (file)
--- a/includes/rts/storage/Closures.h
+++ b/includes/rts/storage/Closures.h
@@ -166,6 +166,11 @@ typedef struct {
  } StgCatchFrame;
  
  typedef struct {
+    const StgInfoTable* info;
+    struct StgStack_ *next_chunk;
+} StgUnderflowFrame;
+
+typedef struct {
      StgHeader  header;
  } StgStopFrame;  
  
diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h

index 0e9883f..04e673f 100644 (file)
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -83,7 +83,7 @@ typedef struct StgTSO_ {
        Currently used for linking TSOs on:
        * cap->run_queue_{hd,tl}
        * (non-THREADED_RTS); the blocked_queue
-      * and pointing to the relocated version of a ThreadRelocated
+      * and pointing to the next chunk for a ThreadOldStack
  
         NOTE!!!  do not modify _link directly, it is subject to
         a write barrier for generational GC.  Instead use the
@@ -97,7 +97,11 @@ typedef struct StgTSO_ {
      struct StgTSO_*         global_link;    // Links threads on the
                                              // generation->threads lists
      
-    StgWord                 dirty;          /* non-zero => dirty */
+    /*
+     * The thread's stack
+     */
+    struct StgStack_       *stackobj;
+
      /*
       * The tso->dirty flag indicates that this TSO's stack should be
       * scanned during garbage collection.  It also indicates that this
@@ -110,10 +114,6 @@ typedef struct StgTSO_ {
       *
       * tso->dirty is set by dirty_TSO(), and unset by the garbage
       * collector (only).
-     *
-     * The link field has a separate dirty bit of its own, namely the
-     * bit TSO_LINK_DIRTY in the tso->flags field, set by
-     * setTSOLink().
       */
  
      StgWord16               what_next;      // Values defined in Constants.h
@@ -121,21 +121,21 @@ typedef struct StgTSO_ {
      StgWord32               flags;          // Values defined in Constants.h
      StgTSOBlockInfo         block_info;
      StgThreadID             id;
-    int                     saved_errno;
+    StgWord32               saved_errno;
+    StgWord32               dirty;          /* non-zero => dirty */
      struct InCall_*         bound;
      struct Capability_*     cap;
+
      struct StgTRecHeader_ * trec;       /* STM transaction record */
  
      /*
-       A list of threads blocked on this TSO waiting to throw
-       exceptions.  In order to access this field, the TSO must be
-       locked using lockClosure/unlockClosure (see SMP.h).
+     * A list of threads blocked on this TSO waiting to throw exceptions.
      */
      struct MessageThrowTo_ * blocked_exceptions;
  
      /*
-      A list of StgBlockingQueue objects, representing threads blocked
-      on thunks that are under evaluation by this thread.
+     * A list of StgBlockingQueue objects, representing threads
+     * blocked on thunks that are under evaluation by this thread.
      */
      struct StgBlockingQueue_ *bq;
  
@@ -149,14 +149,36 @@ typedef struct StgTSO_ {
      StgWord32 saved_winerror;
  #endif
  
-    /* The thread stack... */
-    StgWord32         stack_size;     /* stack size in *words* */
-    StgWord32          max_stack_size; /* maximum stack size in *words* */
-    StgPtr             sp;
-    
-    StgWord            stack[FLEXIBLE_ARRAY];
+    /*
+     * sum of the sizes of all stack chunks (in words), used to decide
+     * whether to throw the StackOverflow exception when the stack
+     * overflows, or whether to just chain on another stack chunk.
+     *
+     * Note that this overestimates the real stack size, because each
+     * chunk will have a gap at the end, of +RTS -kb<size> words.
+     * This means stack overflows are not entirely accurate, because
+     * the more gaps there are, the sooner the stack will run into the
+     * hard +RTS -K<size> limit.
+     */
+    StgWord32  tot_stack_size;
+
  } *StgTSOPtr;
  
+typedef struct StgStack_ {
+    StgHeader  header;
+    StgWord32  stack_size;     // stack size in *words*
+    StgWord32  dirty;          // non-zero => dirty
+    StgPtr     sp;             // current stack pointer
+    StgWord    stack[FLEXIBLE_ARRAY];
+} StgStack;
+
+// Calculate SpLim from a TSO (reads tso->stackobj, but no fields from
+// the stackobj itself).
+INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso)
+{
+    return tso->stackobj->stack + RESERVED_STACK_WORDS;
+}
+
  /* -----------------------------------------------------------------------------
     functions
     -------------------------------------------------------------------------- */
@@ -165,17 +187,7 @@ void dirty_TSO  (Capability *cap, StgTSO *tso);
  void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
  void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target);
  
-// Apply to a TSO before looking at it if you are not sure whether it
-// might be ThreadRelocated or not (basically, that's most of the time
-// unless the TSO is the current TSO).
-//
-INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso)
-{
-    while (tso->what_next == ThreadRelocated) {
-       tso = tso->_link;
-    }
-    return tso;
-}
+void dirty_STACK (Capability *cap, StgStack *stack);
  
  /* -----------------------------------------------------------------------------
     Invariants:
@@ -232,18 +244,6 @@ INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso)
  
   ---------------------------------------------------------------------------- */
  
-/* Workaround for a bug/quirk in gcc on certain architectures.
- * symptom is that (&tso->stack - &tso->header) /=  sizeof(StgTSO)
- * in other words, gcc pads the structure at the end.
- */
-
-extern StgTSO dummy_tso;
-
-#define TSO_STRUCT_SIZE \
-   ((char *)&dummy_tso.stack - (char *)&dummy_tso.header)
-
-#define TSO_STRUCT_SIZEW (TSO_STRUCT_SIZE / sizeof(W_))
-
  /* this is the NIL ptr for a TSO queue (e.g. runnable queue) */
  #define END_TSO_QUEUE  ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure)
  
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h

index 8a1b84a..c52a3c9 100644 (file)
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -61,6 +61,7 @@ RTS_RET(stg_catch_stm_frame);
  RTS_RET(stg_unmaskAsyncExceptionszh_ret);
  RTS_RET(stg_maskUninterruptiblezh_ret);
  RTS_RET(stg_maskAsyncExceptionszh_ret);
+RTS_RET(stg_stack_underflow_frame);
  
  // RTS_FUN(stg_interp_constr_entry);
  //
@@ -100,6 +101,7 @@ RTS_ENTRY(stg_STABLE_NAME);
  RTS_ENTRY(stg_MVAR_CLEAN);
  RTS_ENTRY(stg_MVAR_DIRTY);
  RTS_ENTRY(stg_TSO);
+RTS_ENTRY(stg_STACK);
  RTS_ENTRY(stg_ARR_WORDS);
  RTS_ENTRY(stg_MUT_ARR_WORDS);
  RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN);
@@ -119,6 +121,7 @@ RTS_ENTRY(stg_PAP);
  RTS_ENTRY(stg_AP);
  RTS_ENTRY(stg_AP_NOUPD);
  RTS_ENTRY(stg_AP_STACK);
+RTS_ENTRY(stg_AP_STACK_NOUPD);
  RTS_ENTRY(stg_dummy_ret);
  RTS_ENTRY(stg_raise);
  RTS_ENTRY(stg_raise_ret);
diff --git a/includes/stg/Ticky.h b/includes/stg/Ticky.h

index 2ede8eb..a811aec 100644 (file)
--- a/includes/stg/Ticky.h
+++ b/includes/stg/Ticky.h
@@ -190,7 +190,8 @@ EXTERN StgInt RET_SEMI_loads_avoided INIT(0);
  #define TICK_UPD_SQUEEZED()
  #define TICK_ALLOC_HEAP_NOCTR(x)
  #define TICK_GC_FAILED_PROMOTION()
-#define TICK_ALLOC_TSO(g,s)
+#define TICK_ALLOC_TSO()
+#define TICK_ALLOC_STACK(g)
  #define TICK_ALLOC_UP_THK(g,s)
  #define TICK_ALLOC_SE_THK(g,s)
  
diff --git a/rts/Apply.cmm b/rts/Apply.cmm

index 9af9b11..f9ac3b3 100644 (file)
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -350,3 +350,56 @@ for:
  
    ENTER();
  }
+
+/* -----------------------------------------------------------------------------
+   AP_STACK_NOUPD - exactly like AP_STACK, but doesn't push an update frame.
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK,
+                                        "AP_STACK_NOUPD","AP_STACK_NOUPD")
+{
+  W_ Words;
+  W_ ap;
+
+  ap = R1;
+  
+  Words = StgAP_STACK_size(ap);
+
+  /* 
+   * Check for stack overflow.  IMPORTANT: use a _NP check here,
+   * because if the check fails, we might end up blackholing this very
+   * closure, in which case we must enter the blackhole on return rather
+   * than continuing to evaluate the now-defunct closure.
+   */
+  STK_CHK_NP(WDS(Words) + WDS(AP_STACK_SPLIM));
+  /* ensure there is at least AP_STACK_SPLIM words of headroom available
+   * after unpacking the AP_STACK. See bug #1466 */
+
+  Sp = Sp - WDS(Words);
+
+  TICK_ENT_AP();
+  LDV_ENTER(ap);
+
+  // Enter PAP cost centre
+  ENTER_CCS_PAP_CL(ap);   // ToDo: ENTER_CC_AP_CL 
+
+  // Reload the stack
+  W_ i;
+  W_ p;
+  p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
+  i = 0;
+for:
+  if (i < Words) {
+    Sp(i) = W_[p];
+    p = p + WDS(1);
+    i = i + 1;
+    goto for;
+  }
+
+  // Off we go!
+  TICK_ENT_VIA_NODE();
+
+  R1 = StgAP_STACK_fun(ap);
+
+  ENTER();
+}
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c

index d5181ca..41810f4 100644 (file)
--- a/rts/ClosureFlags.c
+++ b/rts/ClosureFlags.c
@@ -59,8 +59,9 @@ StgWord16 closure_flags[] = {
   [RET_FUN]             =  ( 0                                         ),
   [UPDATE_FRAME]                =  (     _BTM                                  ),
   [CATCH_FRAME]         =  (     _BTM                                  ),
- [STOP_FRAME]          =  (     _BTM                                  ),
- [BLACKHOLE]           =  (          _NS|              _UPT           ),
+ [UNDERFLOW_FRAME]      =  (     _BTM                                  ),
+ [STOP_FRAME]           =  (     _BTM                                  ),
+ [BLACKHOLE]            =  (          _NS|              _UPT           ),
   [BLOCKING_QUEUE]      =  (          _NS|         _MUT|_UPT           ),
   [MVAR_CLEAN]          =  (_HNF|     _NS|         _MUT|_UPT           ),
   [MVAR_DIRTY]          =  (_HNF|     _NS|         _MUT|_UPT           ),
@@ -74,7 +75,8 @@ StgWord16 closure_flags[] = {
   [WEAK]                        =  (_HNF|     _NS|              _UPT           ),
   [PRIM]                =  (_HNF|     _NS|              _UPT           ),
   [MUT_PRIM]            =  (_HNF|     _NS|         _MUT|_UPT           ),
- [TSO]                         =  (_HNF|     _NS|         _MUT|_UPT           ),
+ [TSO]                  =  (_HNF|     _NS|         _MUT|_UPT           ),
+ [STACK]                =  (_HNF|     _NS|         _MUT|_UPT           ),
   [TREC_CHUNK]           =  (          _NS|         _MUT|_UPT           ),
   [ATOMICALLY_FRAME]     =  (     _BTM                                  ),
   [CATCH_RETRY_FRAME]    =  (     _BTM                                  ),
@@ -82,6 +84,6 @@ StgWord16 closure_flags[] = {
   [WHITEHOLE]           =  ( 0                                         )
  };
  
-#if N_CLOSURE_TYPES != 59
+#if N_CLOSURE_TYPES != 61
  #error Closure types changed: update ClosureFlags.c!
  #endif
diff --git a/rts/Exception.cmm b/rts/Exception.cmm

index 581dafd..24da1c6 100644 (file)
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -283,11 +283,6 @@ stg_killThreadzh
       * If the exception went to a catch frame, we'll just continue from
       * the handler.
       */
-  loop:
-    if (StgTSO_what_next(target) == ThreadRelocated::I16) {
-        target = StgTSO__link(target);
-        goto loop;
-    }
      if (target == CurrentTSO) {
          /*
           * So what should happen if a thread calls "throwTo self" inside
@@ -436,9 +431,9 @@ stg_raisezh
  #endif
      
  retry_pop_stack:
-    StgTSO_sp(CurrentTSO) = Sp;
+    SAVE_THREAD_STATE();
      (frame_type) = foreign "C" raiseExceptionHelper(BaseReg "ptr", CurrentTSO "ptr", exception "ptr") [];
-    Sp = StgTSO_sp(CurrentTSO);
+    LOAD_THREAD_STATE();
      if (frame_type == ATOMICALLY_FRAME) {
        /* The exception has reached the edge of a memory transaction.  Check that 
         * the transaction is valid.  If not then perhaps the exception should
@@ -511,8 +506,10 @@ retry_pop_stack:
          * We will leave the stack in a GC'able state, see the stg_stop_thread
          * entry code in StgStartup.cmm.
          */
-       Sp = CurrentTSO + TSO_OFFSET_StgTSO_stack 
-               + WDS(TO_W_(StgTSO_stack_size(CurrentTSO))) - WDS(2);
+        W_ stack;
+        stack = StgTSO_stackobj(CurrentTSO);
+        Sp = stack + OFFSET_StgStack_stack
+                + WDS(TO_W_(StgStack_stack_size(stack))) - WDS(2);
         Sp(1) = exception;      /* save the exception */
         Sp(0) = stg_enter_info; /* so that GC can traverse this stack */
         StgTSO_what_next(CurrentTSO) = ThreadKilled::I16;
diff --git a/rts/Interpreter.c b/rts/Interpreter.c

index fa4a46f..ade4ad1 100644 (file)
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -65,13 +65,13 @@
  #define BCO_LIT(n)    literals[n]
  
  #define LOAD_STACK_POINTERS                                    \
-    Sp = cap->r.rCurrentTSO->sp;                               \
+    Sp = cap->r.rCurrentTSO->stackobj->sp;                      \
      /* We don't change this ... */                             \
-    SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS;
+    SpLim = tso_SpLim(cap->r.rCurrentTSO);
  
  #define SAVE_STACK_POINTERS                    \
      ASSERT(Sp > SpLim); \
-    cap->r.rCurrentTSO->sp = Sp
+    cap->r.rCurrentTSO->stackobj->sp = Sp
  
  #define RETURN_TO_SCHEDULER(todo,retcode)      \
     SAVE_STACK_POINTERS;                                \
@@ -266,7 +266,7 @@ eval_obj:
               debugBelch("Sp = %p\n", Sp);
               debugBelch("\n" );
  
-             printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+             printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size);
               debugBelch("\n\n");
              );
  
@@ -381,11 +381,11 @@ do_return:
               debugBelch("Returning: "); printObj(obj);
               debugBelch("Sp = %p\n", Sp);
               debugBelch("\n" );
-             printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+             printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size);
               debugBelch("\n\n");
              );
  
-    IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+    IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size));
  
      switch (get_itbl((StgClosure *)Sp)->type) {
  
@@ -466,7 +466,7 @@ do_return:
         INTERP_TICK(it_retto_other);
         IF_DEBUG(interpreter,
                  debugBelch("returning to unknown frame -- yielding to sched\n"); 
-                printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+                 printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size);
             );
         Sp -= 2;
         Sp[1] = (W_)tagged_obj;
@@ -529,8 +529,8 @@ do_return_unboxed:
             INTERP_TICK(it_retto_other);
             IF_DEBUG(interpreter,
                      debugBelch("returning to unknown frame -- yielding to sched\n"); 
-                    printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
-               );
+                     printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size);
+                );
             RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
         }
         }
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c

index 021ecf0..acec057 100644 (file)
--- a/rts/LdvProfile.c
+++ b/rts/LdvProfile.c
@@ -168,6 +168,7 @@ processHeapClosureForDead( StgClosure *c )
         // stack objects
      case UPDATE_FRAME:
      case CATCH_FRAME:
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case RET_DYN:
      case RET_BCO:
diff --git a/rts/Messages.c b/rts/Messages.c

index 5e0fa25..1730278 100644 (file)
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -98,11 +98,13 @@ loop:
          r = throwToMsg(cap, t);
  
          switch (r) {
-        case THROWTO_SUCCESS:
+        case THROWTO_SUCCESS: {
              // this message is done
-            unlockClosure((StgClosure*)m, &stg_MSG_NULL_info);
-            tryWakeupThread(cap, t->source);
+            StgTSO *source = t->source;
+            doneWithMsgThrowTo(t);
+            tryWakeupThread(cap, source);
              break;
+        }
          case THROWTO_BLOCKED:
              // unlock the message
              unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info);
@@ -203,7 +205,7 @@ loop:
  
      else if (info == &stg_TSO_info)
      {
-        owner = deRefTSO((StgTSO *)p);
+        owner = (StgTSO*)p;
  
  #ifdef THREADED_RTS
          if (owner->cap != cap) {
@@ -265,7 +267,7 @@ loop:
  
          ASSERT(bq->bh == bh);
  
-        owner = deRefTSO(bq->owner);
+        owner = bq->owner;
  
          ASSERT(owner != END_TSO_QUEUE);
  
diff --git a/rts/Messages.h b/rts/Messages.h

index 54650fd..febb839 100644 (file)
--- a/rts/Messages.h
+++ b/rts/Messages.h
@@ -15,4 +15,15 @@ void executeMessage (Capability *cap, Message *m);
  void sendMessage    (Capability *from_cap, Capability *to_cap, Message *msg);
  #endif
  
+#include "Capability.h"
+#include "Updates.h" // for DEBUG_FILL_SLOP
+
+INLINE_HEADER void
+doneWithMsgThrowTo (MessageThrowTo *m)
+{
+    OVERWRITING_CLOSURE((StgClosure*)m);
+    unlockClosure((StgClosure*)m, &stg_MSG_NULL_info);
+    LDV_RECORD_CREATE(m);
+}
+
  #include "EndPrivate.h"
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm

index 0cf26b2..4f6c252 100644 (file)
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -634,11 +634,6 @@ stg_threadStatuszh
      W_ ret;
  
      tso = R1;
-    loop:
-      if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
-          tso = StgTSO__link(tso);
-          goto loop;
-      }
  
      what_next   = TO_W_(StgTSO_what_next(tso));
      why_blocked = TO_W_(StgTSO_why_blocked(tso));
@@ -939,9 +934,9 @@ stg_retryzh
  
    // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
  retry_pop_stack:
-  StgTSO_sp(CurrentTSO) = Sp;
-  (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
-  Sp = StgTSO_sp(CurrentTSO);
+  SAVE_THREAD_STATE();
+  (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") [];
+  LOAD_THREAD_STATE();
    frame = Sp;
    trec = StgTSO_trec(CurrentTSO);
    outer  = StgTRecHeader_enclosing_trec(trec);
@@ -1138,13 +1133,13 @@ stg_newMVarzh
  }
  
  
-#define PerformTake(tso, value)                                \
-    W_[StgTSO_sp(tso) + WDS(1)] = value;               \
-    W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
+#define PerformTake(stack, value)                       \
+    W_[StgStack_sp(stack) + WDS(1)] = value;            \
+    W_[StgStack_sp(stack) + WDS(0)] = stg_gc_unpt_r1_info;
  
-#define PerformPut(tso,lval)                   \
-    StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);  \
-    lval = W_[StgTSO_sp(tso) - WDS(1)];
+#define PerformPut(stack,lval)                      \
+    StgStack_sp(stack) = StgStack_sp(stack) + WDS(3);   \
+    lval = W_[StgStack_sp(stack) - WDS(1)];
  
  stg_takeMVarzh
  {
@@ -1224,24 +1219,20 @@ loop:
          StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
      }
  
-loop2:
-    if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
-        tso = StgTSO__link(tso);
-        goto loop2;
-    }
-
      ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
      ASSERT(StgTSO_block_info(tso) == mvar);
  
      // actually perform the putMVar for the thread that we just woke up
-    PerformPut(tso,StgMVar_value(mvar));
+    W_ stack;
+    stack = StgTSO_stackobj(tso);
+    PerformPut(stack, StgMVar_value(mvar));
  
      // indicate that the MVar operation has now completed.
      StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
      
      // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
  
-    foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
+    foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
      
      unlockClosure(mvar, stg_MVAR_DIRTY_info);
      RET_P(val);
@@ -1303,24 +1294,20 @@ loop:
          StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
      }
  
-loop2:
-    if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
-        tso = StgTSO__link(tso);
-        goto loop2;
-    }
-
      ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
      ASSERT(StgTSO_block_info(tso) == mvar);
  
      // actually perform the putMVar for the thread that we just woke up
-    PerformPut(tso,StgMVar_value(mvar));
+    W_ stack;
+    stack = StgTSO_stackobj(tso);
+    PerformPut(stack, StgMVar_value(mvar));
  
      // indicate that the MVar operation has now completed.
      StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
      
      // no need to mark the TSO dirty, we have only written END_TSO_QUEUE.
  
-    foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
+    foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
      
      unlockClosure(mvar, stg_MVAR_DIRTY_info);
      RET_NP(1,val);
@@ -1395,26 +1382,22 @@ loop:
          StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
      }
  
-loop2:
-    if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
-        tso = StgTSO__link(tso);
-        goto loop2;
-    }
-
      ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
      ASSERT(StgTSO_block_info(tso) == mvar);
  
      // actually perform the takeMVar
-    PerformTake(tso, val);
+    W_ stack;
+    stack = StgTSO_stackobj(tso);
+    PerformTake(stack, val);
  
      // indicate that the MVar operation has now completed.
      StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
-    
-    if (TO_W_(StgTSO_dirty(tso)) == 0) {
-        foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
+
+    if (TO_W_(StgStack_dirty(stack)) == 0) {
+        foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
      }
      
-    foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
+    foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
  
      unlockClosure(mvar, stg_MVAR_DIRTY_info);
      jump %ENTRY_CODE(Sp(0));
@@ -1468,26 +1451,22 @@ loop:
          StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
      }
  
-loop2:
-    if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
-        tso = StgTSO__link(tso);
-        goto loop2;
-    }
-
      ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16);
      ASSERT(StgTSO_block_info(tso) == mvar);
  
      // actually perform the takeMVar
-    PerformTake(tso, val);
+    W_ stack;
+    stack = StgTSO_stackobj(tso);
+    PerformTake(stack, val);
  
      // indicate that the MVar operation has now completed.
      StgTSO__link(tso) = stg_END_TSO_QUEUE_closure;
      
-    if (TO_W_(StgTSO_dirty(tso)) == 0) {
-        foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
+    if (TO_W_(StgStack_dirty(stack)) == 0) {
+        foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") [];
      }
      
-    foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) [];
+    foreign "C" tryWakeupThread(MyCapability() "ptr", tso) [];
  
      unlockClosure(mvar, stg_MVAR_DIRTY_info);
      RET_N(1);
diff --git a/rts/Printer.c b/rts/Printer.c

index 565a11e..fcc483d 100644 (file)
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -276,6 +276,15 @@ printClosure( StgClosure *obj )
              break;
          }
  
+    case UNDERFLOW_FRAME:
+        {
+            StgUnderflowFrame* u = (StgUnderflowFrame*)obj;
+            debugBelch("UNDERFLOW_FRAME(");
+            printPtr((StgPtr)u->next_chunk);
+            debugBelch(")\n"); 
+            break;
+        }
+
      case STOP_FRAME:
          {
              StgStopFrame* u = (StgStopFrame*)obj;
@@ -461,13 +470,11 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
             
         case UPDATE_FRAME:
         case CATCH_FRAME:
-           printObj((StgClosure*)sp);
+        case UNDERFLOW_FRAME:
+        case STOP_FRAME:
+            printObj((StgClosure*)sp);
             continue;
  
-       case STOP_FRAME:
-           printObj((StgClosure*)sp);
-           return;
-
         case RET_DYN:
         { 
             StgRetDyn* r;
@@ -559,7 +566,8 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
  
  void printTSO( StgTSO *tso )
  {
-    printStackChunk( tso->sp, tso->stack+tso->stack_size);
+    printStackChunk( tso->stackobj->sp,
+                     tso->stackobj->stack+tso->stackobj->stack_size);
  }
  
  /* --------------------------------------------------------------------------
@@ -1039,7 +1047,6 @@ char *what_next_strs[] = {
    [ThreadRunGHC]    = "ThreadRunGHC",
    [ThreadInterpret] = "ThreadInterpret",
    [ThreadKilled]    = "ThreadKilled",
-  [ThreadRelocated] = "ThreadRelocated",
    [ThreadComplete]  = "ThreadComplete"
  };
  
@@ -1102,6 +1109,7 @@ char *closure_type_names[] = {
   [RET_FUN]               = "RET_FUN",
   [UPDATE_FRAME]          = "UPDATE_FRAME",
   [CATCH_FRAME]           = "CATCH_FRAME",
+ [UNDERFLOW_FRAME]       = "UNDERFLOW_FRAME",
   [STOP_FRAME]            = "STOP_FRAME",
   [BLACKHOLE]             = "BLACKHOLE",
   [BLOCKING_QUEUE]        = "BLOCKING_QUEUE",
@@ -1118,6 +1126,7 @@ char *closure_type_names[] = {
   [PRIM]                         = "PRIM",
   [MUT_PRIM]              = "MUT_PRIM",
   [TSO]                   = "TSO",
+ [STACK]                 = "STACK",
   [TREC_CHUNK]            = "TREC_CHUNK",
   [ATOMICALLY_FRAME]      = "ATOMICALLY_FRAME",
   [CATCH_RETRY_FRAME]     = "CATCH_RETRY_FRAME",
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c

index d398afd..39b64d4 100644 (file)
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -947,19 +947,35 @@ heapCensusChain( Census *census, bdescr *bd )
                 prim = rtsTrue;
  #ifdef PROFILING
                 if (RtsFlags.ProfFlags.includeTSOs) {
-                   size = tso_sizeW((StgTSO *)p);
+                    size = sizeofW(StgTSO);
                     break;
                 } else {
                     // Skip this TSO and move on to the next object
-                   p += tso_sizeW((StgTSO *)p);
+                    p += sizeofW(StgTSO);
                     continue;
                 }
  #else
-               size = tso_sizeW((StgTSO *)p);
+                size = sizeofW(StgTSO);
                 break;
  #endif
  
-           case TREC_CHUNK:
+            case STACK:
+               prim = rtsTrue;
+#ifdef PROFILING
+               if (RtsFlags.ProfFlags.includeTSOs) {
+                    size = stack_sizeW((StgStack*)p);
+                    break;
+               } else {
+                   // Skip this TSO and move on to the next object
+                    p += stack_sizeW((StgStack*)p);
+                   continue;
+               }
+#else
+                size = stack_sizeW((StgStack*)p);
+               break;
+#endif
+
+            case TREC_CHUNK:
                 prim = rtsTrue;
                 size = sizeofW(StgTRecChunk);
                 break;
diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h

index 48b5baf..c4a92e2 100644 (file)
--- a/rts/ProfHeap.h
+++ b/rts/ProfHeap.h
@@ -14,7 +14,6 @@
  void    heapCensus         (void);
  nat     initHeapProfiling  (void);
  void    endHeapProfiling   (void);
-void    LDV_recordDead     (StgClosure *c, nat size);
  rtsBool strMatchesSelector (char* str, char* sel);
  
  #include "EndPrivate.h"
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c

index 7abccde..550f703 100644 (file)
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -23,11 +23,11 @@
  #include "win32/IOManager.h"
  #endif
  
-static void raiseAsync (Capability *cap,
-                       StgTSO *tso,
-                       StgClosure *exception, 
-                       rtsBool stop_at_atomically,
-                       StgUpdateFrame *stop_here);
+static StgTSO* raiseAsync (Capability *cap,
+                           StgTSO *tso,
+                           StgClosure *exception,
+                           rtsBool stop_at_atomically,
+                           StgUpdateFrame *stop_here);
  
  static void removeFromQueues(Capability *cap, StgTSO *tso);
  
@@ -61,11 +61,9 @@ static void
  throwToSingleThreaded__ (Capability *cap, StgTSO *tso, StgClosure *exception, 
                           rtsBool stop_at_atomically, StgUpdateFrame *stop_here)
  {
-    tso = deRefTSO(tso);
-
      // Thread already dead?
      if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
-       return;
+        return;
      }
  
      // Remove it from any blocking queues
@@ -81,13 +79,13 @@ throwToSingleThreaded (Capability *cap, StgTSO *tso, StgClosure *exception)
  }
  
  void
-throwToSingleThreaded_ (Capability *cap, StgTSO *tso, StgClosure *exception, 
+throwToSingleThreaded_ (Capability *cap, StgTSO *tso, StgClosure *exception,
                          rtsBool stop_at_atomically)
  {
      throwToSingleThreaded__ (cap, tso, exception, stop_at_atomically, NULL);
  }
  
-void
+void // cannot return a different TSO
  suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
  {
      throwToSingleThreaded__ (cap, tso, NULL, rtsFalse, stop_here);
@@ -192,9 +190,6 @@ retry:
  check_target:
      ASSERT(target != END_TSO_QUEUE);
  
-    // follow ThreadRelocated links in the target first
-    target = deRefTSO(target);
-
      // Thread already dead?
      if (target->what_next == ThreadComplete 
         || target->what_next == ThreadKilled) {
@@ -268,7 +263,7 @@ check_target:
              // might as well just do it now.  The message will
              // be a no-op when it arrives.
              unlockClosure((StgClosure*)m, i);
-            tryWakeupThread_(cap, target);
+            tryWakeupThread(cap, target);
              goto retry;
          }
  
@@ -286,7 +281,7 @@ check_target:
          }
  
          // nobody else can wake up this TSO after we claim the message
-        unlockClosure((StgClosure*)m, &stg_MSG_NULL_info);
+        doneWithMsgThrowTo(m);
  
          raiseAsync(cap, target, msg->exception, rtsFalse, NULL);
          return THROWTO_SUCCESS;
@@ -315,12 +310,7 @@ check_target:
  
         info = lockClosure((StgClosure *)mvar);
  
-       if (target->what_next == ThreadRelocated) {
-           target = target->_link;
-           unlockClosure((StgClosure *)mvar,info);
-           goto retry;
-       }
-       // we have the MVar, let's check whether the thread
+        // we have the MVar, let's check whether the thread
         // is still blocked on the same MVar.
         if (target->why_blocked != BlockedOnMVar
             || (StgMVar *)target->block_info.closure != mvar) {
@@ -334,7 +324,7 @@ check_target:
              // thread now anyway and ignore the message when it
              // arrives.
             unlockClosure((StgClosure *)mvar, info);
-            tryWakeupThread_(cap, target);
+            tryWakeupThread(cap, target);
              goto retry;
          }
  
@@ -505,7 +495,8 @@ maybePerformBlockedException (Capability *cap, StgTSO *tso)
  {
      MessageThrowTo *msg;
      const StgInfoTable *i;
-    
+    StgTSO *source;
+
      if (tso->what_next == ThreadComplete || tso->what_next == ThreadFinished) {
          if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
              awakenBlockedExceptionQueue(cap,tso);
@@ -537,8 +528,9 @@ maybePerformBlockedException (Capability *cap, StgTSO *tso)
          }
  
          throwToSingleThreaded(cap, msg->target, msg->exception);
-        unlockClosure((StgClosure*)msg,&stg_MSG_NULL_info);
-        tryWakeupThread(cap, msg->source);
+        source = msg->source;
+        doneWithMsgThrowTo(msg);
+        tryWakeupThread(cap, source);
          return 1;
      }
      return 0;
@@ -552,13 +544,15 @@ awakenBlockedExceptionQueue (Capability *cap, StgTSO *tso)
  {
      MessageThrowTo *msg;
      const StgInfoTable *i;
+    StgTSO *source;
  
      for (msg = tso->blocked_exceptions; msg != END_BLOCKED_EXCEPTIONS_QUEUE;
           msg = (MessageThrowTo*)msg->link) {
          i = lockClosure((StgClosure *)msg);
          if (i != &stg_MSG_NULL_info) {
-            unlockClosure((StgClosure *)msg,&stg_MSG_NULL_info);
-            tryWakeupThread(cap, msg->source);
+            source = msg->source;
+            doneWithMsgThrowTo(msg);
+            tryWakeupThread(cap, source);
          } else {
              unlockClosure((StgClosure *)msg,i);
          }
@@ -653,7 +647,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
        // ASSERT(m->header.info == &stg_WHITEHOLE_info);
  
        // unlock and revoke it at the same time
-      unlockClosure((StgClosure*)m,&stg_MSG_NULL_info);
+      doneWithMsgThrowTo(m);
        break;
    }
  
@@ -724,7 +718,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
   *
   * -------------------------------------------------------------------------- */
  
-static void
+static StgTSO *
  raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, 
            rtsBool stop_at_atomically, StgUpdateFrame *stop_here)
  {
@@ -732,6 +726,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
      StgPtr sp, frame;
      StgClosure *updatee;
      nat i;
+    StgStack *stack;
  
      debugTraceCap(DEBUG_sched, cap,
                    "raising exception in thread %ld.", (long)tso->id);
@@ -747,25 +742,21 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
          fprintCCS_stderr(tso->prof.CCCS);
      }
  #endif
-    // ASSUMES: the thread is not already complete or dead, or
-    // ThreadRelocated.  Upper layers should deal with that.
+    // ASSUMES: the thread is not already complete or dead
+    // Upper layers should deal with that.
      ASSERT(tso->what_next != ThreadComplete && 
-           tso->what_next != ThreadKilled && 
-           tso->what_next != ThreadRelocated);
+           tso->what_next != ThreadKilled);
  
      // only if we own this TSO (except that deleteThread() calls this 
      ASSERT(tso->cap == cap);
  
-    // wake it up
-    if (tso->why_blocked != NotBlocked) {
-        tso->why_blocked = NotBlocked;
-        appendToRunQueue(cap,tso);
-    }        
+    stack = tso->stackobj;
  
      // mark it dirty; we're about to change its stack.
      dirty_TSO(cap, tso);
+    dirty_STACK(cap, stack);
  
-    sp = tso->sp;
+    sp = stack->sp;
      
      if (stop_here != NULL) {
          updatee = stop_here->updatee;
@@ -801,10 +792,13 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
         // 
         // 5. If it's a STOP_FRAME, then kill the thread.
          // 
-        // NB: if we pass an ATOMICALLY_FRAME then abort the associated 
+        // 6. If it's an UNDERFLOW_FRAME, then continue with the next
+        //    stack chunk.
+        //
+        // NB: if we pass an ATOMICALLY_FRAME then abort the associated
          // transaction
         
-       info = get_ret_itbl((StgClosure *)frame);
+        info = get_ret_itbl((StgClosure *)frame);
  
         switch (info->i.type) {
  
@@ -859,12 +853,46 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
             continue; //no need to bump frame
         }
  
-       case STOP_FRAME:
+        case UNDERFLOW_FRAME:
+        {
+           StgAP_STACK * ap;
+           nat words;
+           
+           // First build an AP_STACK consisting of the stack chunk above the
+           // current update frame, with the top word on the stack as the
+           // fun field.
+           //
+           words = frame - sp - 1;
+           ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words));
+           
+           ap->size = words;
+           ap->fun  = (StgClosure *)sp[0];
+           sp++;
+           for(i=0; i < (nat)words; ++i) {
+               ap->payload[i] = (StgClosure *)*sp++;
+           }
+           
+            SET_HDR(ap,&stg_AP_STACK_NOUPD_info,
+                   ((StgClosure *)frame)->header.prof.ccs /* ToDo */); 
+            TICK_ALLOC_SE_THK(words+1,0);
+
+            stack->sp = sp;
+            threadStackUnderflow(cap,tso);
+            stack = tso->stackobj;
+            sp = stack->sp;
+
+            sp--;
+            sp[0] = (W_)ap;
+            frame = sp + 1;
+            continue;
+        }
+
+        case STOP_FRAME:
         {
             // We've stripped the entire stack, the thread is now dead.
             tso->what_next = ThreadKilled;
-           tso->sp = frame + sizeofW(StgStopFrame);
-           return;
+            stack->sp = frame + sizeofW(StgStopFrame);
+            goto done;
         }
  
         case CATCH_FRAME:
@@ -906,17 +934,16 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
              */
             sp[0] = (W_)raise;
             sp[-1] = (W_)&stg_enter_info;
-           tso->sp = sp-1;
+            stack->sp = sp-1;
             tso->what_next = ThreadRunGHC;
-           IF_DEBUG(sanity, checkTSO(tso));
-           return;
+            goto done;
         }
             
         case ATOMICALLY_FRAME:
             if (stop_at_atomically) {
                 ASSERT(tso->trec->enclosing_trec == NO_TREC);
                 stmCondemnTransaction(cap, tso -> trec);
-               tso->sp = frame - 2;
+                stack->sp = frame - 2;
                  // The ATOMICALLY_FRAME expects to be returned a
                  // result from the transaction, which it stores in the
                  // stack frame.  Hence we arrange to return a dummy
@@ -925,10 +952,10 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
                  // ATOMICALLY_FRAME instance for condemned
                  // transactions, but I don't fully understand the
                  // interaction with STM invariants.
-                tso->sp[1] = (W_)&stg_NO_TREC_closure;
-                tso->sp[0] = (W_)&stg_gc_unpt_r1_info;
-               tso->what_next = ThreadRunGHC;
-               return;
+                stack->sp[1] = (W_)&stg_NO_TREC_closure;
+                stack->sp[0] = (W_)&stg_gc_unpt_r1_info;
+                tso->what_next = ThreadRunGHC;
+                goto done;
             }
             // Not stop_at_atomically... fall through and abort the
             // transaction.
@@ -950,7 +977,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
              stmAbortTransaction(cap, trec);
             stmFreeAbortedTRec(cap, trec);
              tso -> trec = outer;
-           break;
+            break;
             };
             
         default:
@@ -961,8 +988,16 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
         frame += stack_frame_sizeW((StgClosure *)frame);
      }
  
-    // if we got here, then we stopped at stop_here
-    ASSERT(stop_here != NULL);
+done:
+    IF_DEBUG(sanity, checkTSO(tso));
+
+    // wake it up
+    if (tso->why_blocked != NotBlocked) {
+        tso->why_blocked = NotBlocked;
+        appendToRunQueue(cap,tso);
+    }        
+
+    return tso;
  }
  
  
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c

index c5a7bf7..48473d2 100644 (file)
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -597,11 +597,13 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
      case AP:
      case AP_STACK:
      case TSO:
+    case STACK:
      case IND_STATIC:
      case CONSTR_NOCAF_STATIC:
         // stack objects
      case UPDATE_FRAME:
      case CATCH_FRAME:
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case RET_DYN:
      case RET_BCO:
@@ -925,13 +927,15 @@ pop( StgClosure **c, StgClosure **cp, retainer *r )
         case AP:
         case AP_STACK:
         case TSO:
-       case IND_STATIC:
+        case STACK:
+        case IND_STATIC:
         case CONSTR_NOCAF_STATIC:
             // stack objects
         case RET_DYN:
         case UPDATE_FRAME:
         case CATCH_FRAME:
-       case STOP_FRAME:
+        case UNDERFLOW_FRAME:
+        case STOP_FRAME:
         case RET_BCO:
         case RET_SMALL:
         case RET_BIG:
@@ -1001,6 +1005,7 @@ isRetainer( StgClosure *c )
         //
         // TSOs MUST be retainers: they constitute the set of roots.
      case TSO:
+    case STACK:
  
         // mutable objects
      case MUT_PRIM:
@@ -1080,6 +1085,7 @@ isRetainer( StgClosure *c )
         // legal objects during retainer profiling.
      case UPDATE_FRAME:
      case CATCH_FRAME:
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case RET_DYN:
      case RET_BCO:
@@ -1257,8 +1263,8 @@ retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
   *    RSET(c) and RSET(c_child_r) are valid, i.e., their
   *    interpretation conforms to the current value of flip (even when they
   *    are interpreted to be NULL).
- *    If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
- *    or ThreadKilled, which means that its stack is ready to process.
+ *    If *c is TSO, its state is not ThreadComplete,or ThreadKilled, 
+ *    which means that its stack is ready to process.
   *  Note:
   *    This code was almost plagiarzied from GC.c! For each pointer,
   *    retainClosure() is invoked instead of evacuate().
@@ -1291,11 +1297,8 @@ retainStack( StgClosure *c, retainer c_child_r,
      // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
  #endif
  
-    ASSERT(get_itbl(c)->type != TSO || 
-          (((StgTSO *)c)->what_next != ThreadRelocated &&
-           ((StgTSO *)c)->what_next != ThreadComplete &&
-           ((StgTSO *)c)->what_next != ThreadKilled));
-    
+    ASSERT(get_itbl(c)->type == STACK);
+
      p = stackStart;
      while (p < stackEnd) {
         info = get_ret_itbl((StgClosure *)p);
@@ -1307,7 +1310,8 @@ retainStack( StgClosure *c, retainer c_child_r,
             p += sizeofW(StgUpdateFrame);
             continue;
  
-       case STOP_FRAME:
+        case UNDERFLOW_FRAME:
+        case STOP_FRAME:
         case CATCH_FRAME:
         case CATCH_STM_FRAME:
         case CATCH_RETRY_FRAME:
@@ -1560,14 +1564,7 @@ inner_loop:
  #endif
             goto loop;
         }
-       if (((StgTSO *)c)->what_next == ThreadRelocated) {
-#ifdef DEBUG_RETAINER
-           debugBelch("ThreadRelocated encountered in retainClosure()\n");
-#endif
-           c = (StgClosure *)((StgTSO *)c)->_link;
-           goto inner_loop;
-       }
-       break;
+        break;
  
      case IND_STATIC:
         // We just skip IND_STATIC, so its retainer set is never computed.
@@ -1681,10 +1678,10 @@ inner_loop:
      // than attempting to save the current position, because doing so
      // would be hard.
      switch (typeOfc) {
-    case TSO:
+    case STACK:
         retainStack(c, c_child_r,
-                   ((StgTSO *)c)->sp,
-                   ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
+                    ((StgStack *)c)->sp,
+                    ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
         goto loop;
  
      case PAP:
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c

index 53628dc..8fcf8ce 100644 (file)
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -375,8 +375,8 @@ rts_getBool (HaskellObj p)
     -------------------------------------------------------------------------- */
  
  INLINE_HEADER void pushClosure   (StgTSO *tso, StgWord c) {
-  tso->sp--;
-  tso->sp[0] = (W_) c;
+  tso->stackobj->sp--;
+  tso->stackobj->sp[0] = (W_) c;
  }
  
  StgTSO *
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c

index c11cc3e..b0dd42b 100644 (file)
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -69,6 +69,8 @@ void initRtsFlagsDefaults(void)
  
      RtsFlags.GcFlags.maxStkSize                = (8 * 1024 * 1024) / sizeof(W_);
      RtsFlags.GcFlags.initialStkSize    = 1024 / sizeof(W_);
+    RtsFlags.GcFlags.stkChunkSize       = (32 * 1024) / sizeof(W_);
+    RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_);
  
      RtsFlags.GcFlags.minAllocAreaSize   = (512 * 1024)        / BLOCK_SIZE;
      RtsFlags.GcFlags.minOldGenSize      = (1024 * 1024)       / BLOCK_SIZE;
@@ -194,7 +196,9 @@ usage_text[] = {
  "  --info   Print information about the RTS used by this program",
  "",
  "  -K<size> Sets the maximum stack size (default 8M)  Egs: -K32k   -K512k",
-"  -k<size> Sets the initial thread stack size (default 1k)  Egs: -k4k   -k2m",
+"  -ki<size> Sets the initial thread stack size (default 1k)  Egs: -ki4k -ki2m",
+"  -kc<size> Sets the stack chunk size (default 32k)",
+"  -kb<size> Sets the stack chunk buffer size (default 1k)",
  "",
  "  -A<size> Sets the minimum allocation area size (default 512k) Egs: -A1m -A10k",
  "  -M<size> Sets the maximum heap size (default unlimited)  Egs: -M256k -M1G",
@@ -693,15 +697,31 @@ error = rtsTrue;
  
               case 'K':
                    RtsFlags.GcFlags.maxStkSize =
-                      decodeSize(rts_argv[arg], 2, 1, HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
                    break;
  
               case 'k':
+               switch(rts_argv[arg][2]) {
+                case 'c':
+                  RtsFlags.GcFlags.stkChunkSize =
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                  break;
+                case 'b':
+                  RtsFlags.GcFlags.stkChunkBufferSize =
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                  break;
+                case 'i':
+                  RtsFlags.GcFlags.initialStkSize =
+                      decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
+                  break;
+                default:
                    RtsFlags.GcFlags.initialStkSize =
-                      decodeSize(rts_argv[arg], 2, 1, HS_WORD_MAX) / sizeof(W_);
+                      decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_);
                    break;
+                }
+                break;
  
-             case 'M':
+              case 'M':
                    RtsFlags.GcFlags.maxHeapSize =
                        decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX) / BLOCK_SIZE;
                    /* user give size in *bytes* but "maxHeapSize" is in *blocks* */
@@ -1203,6 +1223,12 @@ error = rtsTrue;
          RtsFlags.ProfFlags.profileIntervalTicks = 0;
      }
  
+    if (RtsFlags.GcFlags.stkChunkBufferSize >
+        RtsFlags.GcFlags.stkChunkSize / 2) {
+        errorBelch("stack chunk buffer size (-kb) must be less than 50%% of the stack chunk size (-kc)");
+        error = rtsTrue;
+    }
+
      if (error) {
         const char **p;
  
diff --git a/rts/Schedule.c b/rts/Schedule.c

index 0b1dec4..c115d2b 100644 (file)
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -140,9 +140,7 @@ static void scheduleActivateSpark(Capability *cap);
  #endif
  static void schedulePostRunThread(Capability *cap, StgTSO *t);
  static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
-static void scheduleHandleStackOverflow( Capability *cap, Task *task, 
-                                        StgTSO *t);
-static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t, 
+static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
                                     nat prev_what_next );
  static void scheduleHandleThreadBlocked( StgTSO *t );
  static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
@@ -151,9 +149,6 @@ static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc);
  static Capability *scheduleDoGC(Capability *cap, Task *task,
                                 rtsBool force_major);
  
-static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
-static StgTSO *threadStackUnderflow(Capability *cap, Task *task, StgTSO *tso);
-
  static void deleteThread (Capability *cap, StgTSO *tso);
  static void deleteAllThreads (Capability *cap);
  
@@ -426,6 +421,7 @@ run_thread:
      cap->in_haskell = rtsTrue;
  
      dirty_TSO(cap,t);
+    dirty_STACK(cap,t->stackobj);
  
  #if defined(THREADED_RTS)
      if (recent_activity == ACTIVITY_DONE_GC) {
@@ -503,10 +499,6 @@ run_thread:
      
      schedulePostRunThread(cap,t);
  
-    if (ret != StackOverflow) {
-        t = threadStackUnderflow(cap,task,t);
-    }
-
      ready_to_gc = rtsFalse;
  
      switch (ret) {
@@ -515,8 +507,11 @@ run_thread:
         break;
  
      case StackOverflow:
-       scheduleHandleStackOverflow(cap,task,t);
-       break;
+        // just adjust the stack for this thread, then pop it back
+        // on the run queue.
+        threadStackOverflow(cap, t);
+        pushOnRunQueue(cap,t);
+        break;
  
      case ThreadYielding:
         if (scheduleHandleYield(cap, t, prev_what_next)) {
@@ -729,8 +724,7 @@ schedulePushWork(Capability *cap USED_IF_THREADS,
             for (; t != END_TSO_QUEUE; t = next) {
                 next = t->_link;
                 t->_link = END_TSO_QUEUE;
-               if (t->what_next == ThreadRelocated
-                   || t->bound == task->incall // don't move my bound thread
+                if (t->bound == task->incall // don't move my bound thread
                     || tsoLocked(t)) {  // don't move a locked thread
                     setTSOLink(cap, prev, t);
                      setTSOPrev(cap, t, prev);
@@ -1098,30 +1092,6 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
  }
  
  /* -----------------------------------------------------------------------------
- * Handle a thread that returned to the scheduler with ThreadStackOverflow
- * -------------------------------------------------------------------------- */
-
-static void
-scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t)
-{
-    /* just adjust the stack for this thread, then pop it back
-     * on the run queue.
-     */
-    { 
-       /* enlarge the stack */
-       StgTSO *new_t = threadStackOverflow(cap, t);
-       
-       /* The TSO attached to this Task may have moved, so update the
-        * pointer to it.
-        */
-       if (task->incall->tso == t) {
-           task->incall->tso = new_t;
-       }
-       pushOnRunQueue(cap,new_t);
-    }
-}
-
-/* -----------------------------------------------------------------------------
   * Handle a thread that returned to the scheduler with ThreadYielding
   * -------------------------------------------------------------------------- */
  
@@ -1241,8 +1211,8 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
  
           if (t->what_next == ThreadComplete) {
               if (task->incall->ret) {
-                 // NOTE: return val is tso->sp[1] (see StgStartup.hc)
-                 *(task->incall->ret) = (StgClosure *)task->incall->tso->sp[1]; 
+                  // NOTE: return val is stack->sp[1] (see StgStartup.hc)
+                  *(task->incall->ret) = (StgClosure *)task->incall->tso->stackobj->sp[1];
               }
               task->incall->stat = Success;
           } else {
@@ -1578,10 +1548,7 @@ forkProcess(HsStablePtr *entry
  
          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
            for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
-           if (t->what_next == ThreadRelocated) {
-               next = t->_link;
-           } else {
-               next = t->global_link;
+                next = t->global_link;
                 // don't allow threads to catch the ThreadKilled
                 // exception, but we do want to raiseAsync() because these
                 // threads may be evaluating thunks that we need later.
@@ -1593,7 +1560,6 @@ forkProcess(HsStablePtr *entry
                  // won't get a chance to exit in the usual way (see
                  // also scheduleHandleThreadFinished).
                  t->bound = NULL;
-           }
            }
         }
         
@@ -1661,12 +1627,8 @@ deleteAllThreads ( Capability *cap )
      debugTrace(DEBUG_sched,"deleting all threads");
      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
          for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
-            if (t->what_next == ThreadRelocated) {
-                next = t->_link;
-            } else {
                  next = t->global_link;
                  deleteThread(cap,t);
-            }
          }
      }
  
@@ -1850,6 +1812,7 @@ resumeThread (void *task_)
  
      /* We might have GC'd, mark the TSO dirty again */
      dirty_TSO(cap,tso);
+    dirty_STACK(cap,tso->stackobj);
  
      IF_DEBUG(sanity, checkTSO(tso));
  
@@ -2108,189 +2071,6 @@ performMajorGC(void)
      performGC_(rtsTrue);
  }
  
-/* -----------------------------------------------------------------------------
-   Stack overflow
-
-   If the thread has reached its maximum stack size, then raise the
-   StackOverflow exception in the offending thread.  Otherwise
-   relocate the TSO into a larger chunk of memory and adjust its stack
-   size appropriately.
-   -------------------------------------------------------------------------- */
-
-static StgTSO *
-threadStackOverflow(Capability *cap, StgTSO *tso)
-{
-  nat new_stack_size, stack_words;
-  lnat new_tso_size;
-  StgPtr new_sp;
-  StgTSO *dest;
-
-  IF_DEBUG(sanity,checkTSO(tso));
-
-  if (tso->stack_size >= tso->max_stack_size
-      && !(tso->flags & TSO_BLOCKEX)) {
-      // NB. never raise a StackOverflow exception if the thread is
-      // inside Control.Exceptino.block.  It is impractical to protect
-      // against stack overflow exceptions, since virtually anything
-      // can raise one (even 'catch'), so this is the only sensible
-      // thing to do here.  See bug #767.
-      //
-
-      if (tso->flags & TSO_SQUEEZED) {
-          return tso;
-      }
-      // #3677: In a stack overflow situation, stack squeezing may
-      // reduce the stack size, but we don't know whether it has been
-      // reduced enough for the stack check to succeed if we try
-      // again.  Fortunately stack squeezing is idempotent, so all we
-      // need to do is record whether *any* squeezing happened.  If we
-      // are at the stack's absolute -K limit, and stack squeezing
-      // happened, then we try running the thread again.  The
-      // TSO_SQUEEZED flag is set by threadPaused() to tell us whether
-      // squeezing happened or not.
-
-      debugTrace(DEBUG_gc,
-                "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
-                (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size);
-      IF_DEBUG(gc,
-              /* If we're debugging, just print out the top of the stack */
-              printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size, 
-                                               tso->sp+64)));
-
-      // Send this thread the StackOverflow exception
-      throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
-      return tso;
-  }
-
-
-  // We also want to avoid enlarging the stack if squeezing has
-  // already released some of it.  However, we don't want to get into
-  // a pathalogical situation where a thread has a nearly full stack
-  // (near its current limit, but not near the absolute -K limit),
-  // keeps allocating a little bit, squeezing removes a little bit,
-  // and then it runs again.  So to avoid this, if we squeezed *and*
-  // there is still less than BLOCK_SIZE_W words free, then we enlarge
-  // the stack anyway.
-  if ((tso->flags & TSO_SQUEEZED) && 
-      ((W_)(tso->sp - tso->stack) >= BLOCK_SIZE_W)) {
-      return tso;
-  }
-
-  /* Try to double the current stack size.  If that takes us over the
-   * maximum stack size for this thread, then use the maximum instead
-   * (that is, unless we're already at or over the max size and we
-   * can't raise the StackOverflow exception (see above), in which
-   * case just double the size). Finally round up so the TSO ends up as
-   * a whole number of blocks.
-   */
-  if (tso->stack_size >= tso->max_stack_size) {
-      new_stack_size = tso->stack_size * 2;
-  } else { 
-      new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
-  }
-  new_tso_size   = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) + 
-                                      TSO_STRUCT_SIZE)/sizeof(W_);
-  new_tso_size = round_to_mblocks(new_tso_size);  /* Be MBLOCK-friendly */
-  new_stack_size = new_tso_size - TSO_STRUCT_SIZEW;
-
-  debugTrace(DEBUG_sched, 
-            "increasing stack size from %ld words to %d.",
-            (long)tso->stack_size, new_stack_size);
-
-  dest = (StgTSO *)allocate(cap,new_tso_size);
-  TICK_ALLOC_TSO(new_stack_size,0);
-
-  /* copy the TSO block and the old stack into the new area */
-  memcpy(dest,tso,TSO_STRUCT_SIZE);
-  stack_words = tso->stack + tso->stack_size - tso->sp;
-  new_sp = (P_)dest + new_tso_size - stack_words;
-  memcpy(new_sp, tso->sp, stack_words * sizeof(W_));
-
-  /* relocate the stack pointers... */
-  dest->sp         = new_sp;
-  dest->stack_size = new_stack_size;
-       
-  /* Mark the old TSO as relocated.  We have to check for relocated
-   * TSOs in the garbage collector and any primops that deal with TSOs.
-   *
-   * It's important to set the sp value to just beyond the end
-   * of the stack, so we don't attempt to scavenge any part of the
-   * dead TSO's stack.
-   */
-  setTSOLink(cap,tso,dest);
-  write_barrier(); // other threads seeing ThreadRelocated will look at _link
-  tso->what_next = ThreadRelocated;
-  tso->sp = (P_)&(tso->stack[tso->stack_size]);
-  tso->why_blocked = NotBlocked;
-
-  IF_DEBUG(sanity,checkTSO(dest));
-#if 0
-  IF_DEBUG(scheduler,printTSO(dest));
-#endif
-
-  return dest;
-}
-
-static StgTSO *
-threadStackUnderflow (Capability *cap, Task *task, StgTSO *tso)
-{
-    bdescr *bd, *new_bd;
-    lnat free_w, tso_size_w;
-    StgTSO *new_tso;
-
-    tso_size_w = tso_sizeW(tso);
-
-    if (tso_size_w < MBLOCK_SIZE_W ||
-          // TSO is less than 2 mblocks (since the first mblock is
-          // shorter than MBLOCK_SIZE_W)
-        (tso_size_w - BLOCKS_PER_MBLOCK*BLOCK_SIZE_W) % MBLOCK_SIZE_W != 0 ||
-          // or TSO is not a whole number of megablocks (ensuring
-          // precondition of splitLargeBlock() below)
-        (tso_size_w <= round_up_to_mblocks(RtsFlags.GcFlags.initialStkSize)) ||
-          // or TSO is smaller than the minimum stack size (rounded up)
-        (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4) 
-          // or stack is using more than 1/4 of the available space
-    {
-        // then do nothing
-        return tso;
-    }
-
-    // this is the number of words we'll free
-    free_w = round_to_mblocks(tso_size_w/2);
-
-    bd = Bdescr((StgPtr)tso);
-    new_bd = splitLargeBlock(bd, free_w / BLOCK_SIZE_W);
-    bd->free = bd->start + TSO_STRUCT_SIZEW;
-
-    new_tso = (StgTSO *)new_bd->start;
-    memcpy(new_tso,tso,TSO_STRUCT_SIZE);
-    new_tso->stack_size = new_bd->free - new_tso->stack;
-
-    // The original TSO was dirty and probably on the mutable
-    // list. The new TSO is not yet on the mutable list, so we better
-    // put it there.
-    new_tso->dirty = 0;
-    new_tso->flags &= ~TSO_LINK_DIRTY;
-    dirty_TSO(cap, new_tso);
-
-    debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu",
-               (long)tso->id, tso_size_w, tso_sizeW(new_tso));
-
-    tso->_link = new_tso; // no write barrier reqd: same generation
-    write_barrier(); // other threads seeing ThreadRelocated will look at _link
-    tso->what_next = ThreadRelocated;
-
-    // The TSO attached to this Task may have moved, so update the
-    // pointer to it.
-    if (task->incall->tso == tso) {
-        task->incall->tso = new_tso;
-    }
-
-    IF_DEBUG(sanity,checkTSO(new_tso));
-
-    return new_tso;
-}
-
  /* ---------------------------------------------------------------------------
     Interrupt execution
     - usually called inside a signal handler so it mustn't do anything fancy.   
@@ -2337,7 +2117,7 @@ void wakeUpRts(void)
     exception.
     -------------------------------------------------------------------------- */
  
-static void 
+static void
  deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
  {
      // NOTE: must only be called on a TSO that we have exclusive
@@ -2347,12 +2127,12 @@ deleteThread (Capability *cap STG_UNUSED, StgTSO *tso)
  
      if (tso->why_blocked != BlockedOnCCall &&
         tso->why_blocked != BlockedOnCCall_Interruptible) {
-       throwToSingleThreaded(tso->cap,tso,NULL);
+        throwToSingleThreaded(tso->cap,tso,NULL);
      }
  }
  
  #ifdef FORKPROCESS_PRIMOP_SUPPORTED
-static void 
+static void
  deleteThread_(Capability *cap, StgTSO *tso)
  { // for forkProcess only:
    // like deleteThread(), but we delete threads in foreign calls, too.
@@ -2406,7 +2186,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
      // we update any closures pointed to from update frames with the
      // raise closure that we just built.
      //
-    p = tso->sp;
+    p = tso->stackobj->sp;
      while(1) {
         info = get_ret_itbl((StgClosure *)p);
         next = p + stack_frame_sizeW((StgClosure *)p);
@@ -2427,20 +2207,25 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
  
          case ATOMICALLY_FRAME:
             debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p);
-            tso->sp = p;
+            tso->stackobj->sp = p;
              return ATOMICALLY_FRAME;
             
         case CATCH_FRAME:
-           tso->sp = p;
+            tso->stackobj->sp = p;
             return CATCH_FRAME;
  
          case CATCH_STM_FRAME:
             debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p);
-            tso->sp = p;
+            tso->stackobj->sp = p;
              return CATCH_STM_FRAME;
             
-       case STOP_FRAME:
-           tso->sp = p;
+        case UNDERFLOW_FRAME:
+            threadStackUnderflow(cap,tso);
+            p = tso->stackobj->sp;
+            continue;
+
+        case STOP_FRAME:
+            tso->stackobj->sp = p;
             return STOP_FRAME;
  
          case CATCH_RETRY_FRAME:
@@ -2470,12 +2255,12 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
     -------------------------------------------------------------------------- */
  
  StgWord
-findRetryFrameHelper (StgTSO *tso)
+findRetryFrameHelper (Capability *cap, StgTSO *tso)
  {
    StgPtr           p, next;
    StgRetInfoTable *info;
  
-  p = tso -> sp;
+  p = tso->stackobj->sp;
    while (1) {
      info = get_ret_itbl((StgClosure *)p);
      next = p + stack_frame_sizeW((StgClosure *)p);
@@ -2484,13 +2269,13 @@ findRetryFrameHelper (StgTSO *tso)
      case ATOMICALLY_FRAME:
         debugTrace(DEBUG_stm,
                    "found ATOMICALLY_FRAME at %p during retry", p);
-       tso->sp = p;
+        tso->stackobj->sp = p;
         return ATOMICALLY_FRAME;
        
      case CATCH_RETRY_FRAME:
         debugTrace(DEBUG_stm,
                    "found CATCH_RETRY_FRAME at %p during retrry", p);
-       tso->sp = p;
+        tso->stackobj->sp = p;
         return CATCH_RETRY_FRAME;
        
      case CATCH_STM_FRAME: {
@@ -2499,13 +2284,17 @@ findRetryFrameHelper (StgTSO *tso)
          debugTrace(DEBUG_stm,
                    "found CATCH_STM_FRAME at %p during retry", p);
          debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer);
-       stmAbortTransaction(tso -> cap, trec);
-       stmFreeAbortedTRec(tso -> cap, trec);
+        stmAbortTransaction(cap, trec);
+        stmFreeAbortedTRec(cap, trec);
         tso -> trec = outer;
          p = next; 
          continue;
      }
        
+    case UNDERFLOW_FRAME:
+        threadStackUnderflow(cap,tso);
+        p = tso->stackobj->sp;
+        continue;
  
      default:
        ASSERT(info->i.type != CATCH_FRAME);
diff --git a/rts/Schedule.h b/rts/Schedule.h

index a00d81a..edba8f5 100644 (file)
--- a/rts/Schedule.h
+++ b/rts/Schedule.h
@@ -44,7 +44,7 @@ void wakeUpRts(void);
  StgWord raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception);
  
  /* findRetryFrameHelper */
-StgWord findRetryFrameHelper (StgTSO *tso);
+StgWord findRetryFrameHelper (Capability *cap, StgTSO *tso);
  
  /* Entry point for a new worker */
  void scheduleWorker (Capability *cap, Task *task);
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm

index c981cbe..b4a037d 100644 (file)
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -19,6 +19,23 @@ import EnterCriticalSection;
  import LeaveCriticalSection;
  
  /* ----------------------------------------------------------------------------
+   Stack underflow
+   ------------------------------------------------------------------------- */
+
+INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused)
+{
+    W_ new_tso;
+    W_ ret_off;
+
+    SAVE_THREAD_STATE();
+    ("ptr" ret_off) = foreign "C" threadStackUnderflow(MyCapability(),
+                                                       CurrentTSO);
+    LOAD_THREAD_STATE();
+
+    jump %ENTRY_CODE(Sp(ret_off));
+}
+
+/* ----------------------------------------------------------------------------
     Support for the bytecode interpreter.
     ------------------------------------------------------------------------- */
  
@@ -353,6 +370,9 @@ loop:
  INFO_TABLE(stg_TSO, 0,0,TSO, "TSO", "TSO")
  { foreign "C" barf("TSO object entered!") never returns; }
  
+INFO_TABLE(stg_STACK, 0,0, STACK, "STACK", "STACK")
+{ foreign "C" barf("STACK object entered!") never returns; }
+
  /* ----------------------------------------------------------------------------
     Weak pointers
  
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c

index 94a5a15..aeae1d4 100644 (file)
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -44,13 +44,13 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
      //    contains two values: the size of the gap, and the distance
      //    to the next gap (or the stack top).
  
-    frame = tso->sp;
+    frame = tso->stackobj->sp;
  
      ASSERT(frame < bottom);
      
      prev_was_update_frame = rtsFalse;
      current_gap_size = 0;
-    gap = (struct stack_gap *) (tso->sp - sizeofW(StgUpdateFrame));
+    gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
  
      while (frame <= bottom) {
         
@@ -150,7 +150,7 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
         next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
         sp = next_gap_start;
  
-       while ((StgPtr)gap > tso->sp) {
+        while ((StgPtr)gap > tso->stackobj->sp) {
  
             // we're working in *bytes* now...
             gap_start = next_gap_start;
@@ -164,7 +164,7 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
             memmove(sp, next_gap_start, chunk_size);
         }
  
-       tso->sp = (StgPtr)sp;
+        tso->stackobj->sp = (StgPtr)sp;
      }
  }    
  
@@ -201,27 +201,27 @@ threadPaused(Capability *cap, StgTSO *tso)
      // blackholing, or eager blackholing consistently.  See Note
      // [upd-black-hole] in sm/Scav.c.
  
-    stack_end = &tso->stack[tso->stack_size];
+    stack_end = tso->stackobj->stack + tso->stackobj->stack_size;
      
-    frame = (StgClosure *)tso->sp;
+    frame = (StgClosure *)tso->stackobj->sp;
  
-    while (1) {
-       // If we've already marked this frame, then stop here.
-       if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
-           if (prev_was_update_frame) {
-               words_to_squeeze += sizeofW(StgUpdateFrame);
-               weight += weight_pending;
-               weight_pending = 0;
-           }
-           goto end;
-       }
-
-       info = get_ret_itbl(frame);
+    while ((P_)frame < stack_end) {
+        info = get_ret_itbl(frame);
         
         switch (info->i.type) {
-           
+
         case UPDATE_FRAME:
  
+            // If we've already marked this frame, then stop here.
+            if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+                if (prev_was_update_frame) {
+                    words_to_squeeze += sizeofW(StgUpdateFrame);
+                    weight += weight_pending;
+                    weight_pending = 0;
+                }
+                goto end;
+            }
+
             SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
  
             bh = ((StgUpdateFrame *)frame)->updatee;
@@ -235,7 +235,7 @@ threadPaused(Capability *cap, StgTSO *tso)
              {
                 debugTrace(DEBUG_squeeze,
                            "suspending duplicate work: %ld words of stack",
-                          (long)((StgPtr)frame - tso->sp));
+                           (long)((StgPtr)frame - tso->stackobj->sp));
  
                 // If this closure is already an indirection, then
                 // suspend the computation up to this point.
@@ -245,25 +245,22 @@ threadPaused(Capability *cap, StgTSO *tso)
  
                 // Now drop the update frame, and arrange to return
                 // the value to the frame underneath:
-               tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
-               tso->sp[1] = (StgWord)bh;
+                tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
+                tso->stackobj->sp[1] = (StgWord)bh;
                  ASSERT(bh->header.info != &stg_TSO_info);
-               tso->sp[0] = (W_)&stg_enter_info;
+                tso->stackobj->sp[0] = (W_)&stg_enter_info;
  
                 // And continue with threadPaused; there might be
                 // yet more computation to suspend.
-                frame = (StgClosure *)(tso->sp + 2);
+                frame = (StgClosure *)(tso->stackobj->sp + 2);
                  prev_was_update_frame = rtsFalse;
                  continue;
             }
  
+
              // zero out the slop so that the sanity checker can tell
              // where the next closure is.
-            DEBUG_FILL_SLOP(bh);
-
-            // @LDV profiling
-            // We pretend that bh is now dead.
-            LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+            OVERWRITING_CLOSURE(bh);
  
              // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a
              // BLACKHOLE here.
@@ -301,7 +298,8 @@ threadPaused(Capability *cap, StgTSO *tso)
             prev_was_update_frame = rtsTrue;
             break;
             
-       case STOP_FRAME:
+        case UNDERFLOW_FRAME:
+        case STOP_FRAME:
             goto end;
             
             // normal stack frames; do nothing except advance the pointer
diff --git a/rts/Threads.c b/rts/Threads.c

index f6b1bac..d6fe0e7 100644 (file)
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -18,8 +18,14 @@
  #include "ThreadLabels.h"
  #include "Updates.h"
  #include "Messages.h"
+#include "RaiseAsync.h"
+#include "Prelude.h"
+#include "Printer.h"
+#include "sm/Sanity.h"
  #include "sm/Storage.h"
  
+#include <string.h>
+
  /* Next thread ID to allocate.
   * LOCK: sched_mutex
   */
@@ -54,57 +60,67 @@ StgTSO *
  createThread(Capability *cap, nat size)
  {
      StgTSO *tso;
+    StgStack *stack;
      nat stack_size;
  
      /* sched_mutex is *not* required */
  
-    /* First check whether we should create a thread at all */
-
-    // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW
-
      /* catch ridiculously small stack sizes */
-    if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) {
-       size = MIN_STACK_WORDS + TSO_STRUCT_SIZEW;
+    if (size < MIN_STACK_WORDS + sizeofW(StgStack)) {
+        size = MIN_STACK_WORDS + sizeofW(StgStack);
      }
  
-    size = round_to_mblocks(size);
-    tso = (StgTSO *)allocate(cap, size);
-
-    stack_size = size - TSO_STRUCT_SIZEW;
-    TICK_ALLOC_TSO(stack_size, 0);
-
+    /* The size argument we are given includes all the per-thread
+     * overheads:
+     *
+     *    - The TSO structure
+     *    - The STACK header
+     *
+     * This is so that we can use a nice round power of 2 for the
+     * default stack size (e.g. 1k), and if we're allocating lots of
+     * threads back-to-back they'll fit nicely in a block.  It's a bit
+     * of a benchmark hack, but it doesn't do any harm.
+     */
+    stack_size = round_to_mblocks(size - sizeofW(StgTSO));
+    stack = (StgStack *)allocate(cap, stack_size);
+    TICK_ALLOC_STACK(stack_size);
+    SET_HDR(stack, &stg_STACK_info, CCS_SYSTEM);
+    stack->stack_size   = stack_size - sizeofW(StgStack);
+    stack->sp           = stack->stack + stack->stack_size;
+    stack->dirty        = 1;
+
+    tso = (StgTSO *)allocate(cap, sizeofW(StgTSO));
+    TICK_ALLOC_TSO();
      SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM);
  
      // Always start with the compiled code evaluator
      tso->what_next = ThreadRunGHC;
-
      tso->why_blocked  = NotBlocked;
      tso->block_info.closure = (StgClosure *)END_TSO_QUEUE;
      tso->blocked_exceptions = END_BLOCKED_EXCEPTIONS_QUEUE;
      tso->bq = (StgBlockingQueue *)END_TSO_QUEUE;
      tso->flags = 0;
      tso->dirty = 1;
-    
+    tso->_link = END_TSO_QUEUE;
+
      tso->saved_errno = 0;
      tso->bound = NULL;
      tso->cap = cap;
      
-    tso->stack_size     = stack_size;
-    tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize) 
-                         - TSO_STRUCT_SIZEW;
-    tso->sp             = (P_)&(tso->stack) + stack_size;
+    tso->stackobj       = stack;
+    tso->tot_stack_size = stack->stack_size;
  
      tso->trec = NO_TREC;
-    
+
  #ifdef PROFILING
      tso->prof.CCCS = CCS_MAIN;
  #endif
      
-  /* put a stop frame on the stack */
-    tso->sp -= sizeofW(StgStopFrame);
-    SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
-    tso->_link = END_TSO_QUEUE;
-    
+    // put a stop frame on the stack
+    stack->sp -= sizeofW(StgStopFrame);
+    SET_HDR((StgClosure*)stack->sp,
+            (StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
+
      /* Link the new thread on the global thread list.
       */
      ACQUIRE_LOCK(&sched_mutex);
@@ -220,12 +236,6 @@ removeThreadFromDeQueue (Capability *cap,
  void
  tryWakeupThread (Capability *cap, StgTSO *tso)
  {
-    tryWakeupThread_(cap, deRefTSO(tso));
-}
-
-void
-tryWakeupThread_ (Capability *cap, StgTSO *tso)
-{
      traceEventThreadWakeup (cap, tso, tso->cap->no);
  
  #ifdef THREADED_RTS
@@ -267,8 +277,8 @@ tryWakeupThread_ (Capability *cap, StgTSO *tso)
          }
  
          // remove the block frame from the stack
-        ASSERT(tso->sp[0] == (StgWord)&stg_block_throwto_info);
-        tso->sp += 3;
+        ASSERT(tso->stackobj->sp[0] == (StgWord)&stg_block_throwto_info);
+        tso->stackobj->sp += 3;
          goto unblock;
      }
  
@@ -416,7 +426,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
  
      i = v->header.info;
      if (i == &stg_TSO_info) {
-        owner = deRefTSO((StgTSO*)v);
+        owner = (StgTSO*)v;
          if (owner != tso) {
              checkBlockingQueues(cap, tso);
          }
@@ -429,7 +439,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
          return;
      }
  
-    owner = deRefTSO(((StgBlockingQueue*)v)->owner);
+    owner = ((StgBlockingQueue*)v)->owner;
  
      if (owner != tso) {
          checkBlockingQueues(cap, tso);
@@ -466,6 +476,202 @@ isThreadBound(StgTSO* tso USED_IF_THREADS)
    return rtsFalse;
  }
  
+/* -----------------------------------------------------------------------------
+   Stack overflow
+
+   If the thread has reached its maximum stack size, then raise the
+   StackOverflow exception in the offending thread.  Otherwise
+   relocate the TSO into a larger chunk of memory and adjust its stack
+   size appropriately.
+   -------------------------------------------------------------------------- */
+
+void
+threadStackOverflow (Capability *cap, StgTSO *tso)
+{
+    StgStack *new_stack, *old_stack;
+    StgUnderflowFrame *frame;
+
+    IF_DEBUG(sanity,checkTSO(tso));
+
+    if (tso->tot_stack_size >= RtsFlags.GcFlags.maxStkSize
+        && !(tso->flags & TSO_BLOCKEX)) {
+        // NB. never raise a StackOverflow exception if the thread is
+        // inside Control.Exceptino.block.  It is impractical to protect
+        // against stack overflow exceptions, since virtually anything
+        // can raise one (even 'catch'), so this is the only sensible
+        // thing to do here.  See bug #767.
+        //
+
+        if (tso->flags & TSO_SQUEEZED) {
+            return;
+        }
+        // #3677: In a stack overflow situation, stack squeezing may
+        // reduce the stack size, but we don't know whether it has been
+        // reduced enough for the stack check to succeed if we try
+        // again.  Fortunately stack squeezing is idempotent, so all we
+        // need to do is record whether *any* squeezing happened.  If we
+        // are at the stack's absolute -K limit, and stack squeezing
+        // happened, then we try running the thread again.  The
+        // TSO_SQUEEZED flag is set by threadPaused() to tell us whether
+        // squeezing happened or not.
+
+        debugTrace(DEBUG_gc,
+                   "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)",
+                   (long)tso->id, tso, (long)tso->stackobj->stack_size,
+                   RtsFlags.GcFlags.maxStkSize);
+        IF_DEBUG(gc,
+                 /* If we're debugging, just print out the top of the stack */
+                 printStackChunk(tso->stackobj->sp,
+                                 stg_min(tso->stackobj->stack + tso->stackobj->stack_size,
+                                         tso->stackobj->sp+64)));
+
+        // Send this thread the StackOverflow exception
+        throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
+    }
+
+
+    // We also want to avoid enlarging the stack if squeezing has
+    // already released some of it.  However, we don't want to get into
+    // a pathalogical situation where a thread has a nearly full stack
+    // (near its current limit, but not near the absolute -K limit),
+    // keeps allocating a little bit, squeezing removes a little bit,
+    // and then it runs again.  So to avoid this, if we squeezed *and*
+    // there is still less than BLOCK_SIZE_W words free, then we enlarge
+    // the stack anyway.
+    if ((tso->flags & TSO_SQUEEZED) && 
+        ((W_)(tso->stackobj->sp - tso->stackobj->stack) >= BLOCK_SIZE_W)) {
+        return;
+    }
+
+    debugTraceCap(DEBUG_sched, cap,
+                  "allocating new stack chunk of size %d bytes",
+                  RtsFlags.GcFlags.stkChunkSize * sizeof(W_));
+
+    old_stack = tso->stackobj;
+
+    new_stack = (StgStack*) allocate(cap, RtsFlags.GcFlags.stkChunkSize);
+    SET_HDR(new_stack, &stg_STACK_info, CCS_SYSTEM);
+    TICK_ALLOC_STACK(RtsFlags.GcFlags.stkChunkSize);
+
+    new_stack->dirty = 0; // begin clean, we'll mark it dirty below
+    new_stack->stack_size = RtsFlags.GcFlags.stkChunkSize - sizeofW(StgStack);
+    new_stack->sp = new_stack->stack + new_stack->stack_size;
+
+    tso->tot_stack_size += new_stack->stack_size;
+
+    new_stack->sp -= sizeofW(StgUnderflowFrame);
+    frame = (StgUnderflowFrame*)new_stack->sp;
+    frame->info = &stg_stack_underflow_frame_info;
+    frame->next_chunk  = old_stack;
+
+    {
+        StgWord *sp;
+        nat chunk_words, size;
+
+        // find the boundary of the chunk of old stack we're going to
+        // copy to the new stack.  We skip over stack frames until we
+        // reach the smaller of
+        //
+        //   * the chunk buffer size (+RTS -kb)
+        //   * the end of the old stack
+        //
+        for (sp = old_stack->sp;
+             sp < stg_min(old_stack->sp + RtsFlags.GcFlags.stkChunkBufferSize,
+                          old_stack->stack + old_stack->stack_size); )
+        {
+            size = stack_frame_sizeW((StgClosure*)sp);
+
+            // if including this frame would exceed the size of the
+            // new stack (taking into account the underflow frame),
+            // then stop at the previous frame.
+            if (sp + size > old_stack->stack + (new_stack->stack_size -
+                                                sizeofW(StgUnderflowFrame))) {
+                break;
+            }
+            sp += size;
+        }
+
+        // copy the stack chunk between tso->sp and sp to
+        //   new_tso->sp + (tso->sp - sp)
+        chunk_words = sp - old_stack->sp;
+
+        memcpy(/* dest   */ new_stack->sp - chunk_words,
+               /* source */ old_stack->sp,
+               /* size   */ chunk_words * sizeof(W_));
+
+        old_stack->sp += chunk_words;
+        new_stack->sp -= chunk_words;
+    }
+
+    // if the old stack chunk is now empty, discard it.  With the
+    // default settings, -ki1k -kb1k, this means the first stack chunk
+    // will be discarded after the first overflow, being replaced by a
+    // non-moving 32k chunk.
+    if (old_stack->sp == old_stack->stack + old_stack->stack_size) {
+        frame->next_chunk = new_stack;
+    }
+
+    tso->stackobj = new_stack;
+
+    // we're about to run it, better mark it dirty
+    dirty_STACK(cap, new_stack);
+
+    IF_DEBUG(sanity,checkTSO(tso));
+    // IF_DEBUG(scheduler,printTSO(new_tso));
+}
+
+
+/* ---------------------------------------------------------------------------
+   Stack underflow - called from the stg_stack_underflow_info frame
+   ------------------------------------------------------------------------ */
+
+nat // returns offset to the return address
+threadStackUnderflow (Capability *cap, StgTSO *tso)
+{
+    StgStack *new_stack, *old_stack;
+    StgUnderflowFrame *frame;
+    nat retvals;
+
+    debugTraceCap(DEBUG_sched, cap, "stack underflow");
+
+    old_stack = tso->stackobj;
+
+    frame = (StgUnderflowFrame*)(old_stack->stack + old_stack->stack_size
+                                 - sizeofW(StgUnderflowFrame));
+    ASSERT(frame->info == &stg_stack_underflow_frame_info);
+
+    new_stack = (StgStack*)frame->next_chunk;
+    tso->stackobj = new_stack;
+
+    retvals = (P_)frame - old_stack->sp;
+    if (retvals != 0)
+    {
+        // we have some return values to copy to the old stack
+        if ((new_stack->sp - new_stack->stack) < retvals)
+        {
+            barf("threadStackUnderflow: not enough space for return values");
+        }
+
+        new_stack->sp -= retvals;
+
+        memcpy(/* dest */ new_stack->sp,
+               /* src  */ old_stack->sp,
+               /* size */ retvals * sizeof(W_));
+    }
+
+    // empty the old stack.  The GC may still visit this object
+    // because it is on the mutable list.
+    old_stack->sp = old_stack->stack + old_stack->stack_size;
+
+    // restore the stack parameters, and update tot_stack_size
+    tso->tot_stack_size -= old_stack->stack_size;
+
+    // we're about to run it, better mark it dirty
+    dirty_STACK(cap, new_stack);
+
+    return retvals;
+}
+
  /* ----------------------------------------------------------------------------
   * Debugging: why is a thread blocked
   * ------------------------------------------------------------------------- */
@@ -529,10 +735,7 @@ printThreadStatus(StgTSO *t)
        void *label = lookupThreadLabel(t->id);
        if (label) debugBelch("[\"%s\"] ",(char *)label);
      }
-    if (t->what_next == ThreadRelocated) {
-       debugBelch("has been relocated...\n");
-    } else {
-       switch (t->what_next) {
+        switch (t->what_next) {
         case ThreadKilled:
             debugBelch("has been killed");
             break;
@@ -544,11 +747,8 @@ printThreadStatus(StgTSO *t)
         }
          if (t->dirty) {
              debugBelch(" (TSO_DIRTY)");
-        } else if (t->flags & TSO_LINK_DIRTY) {
-            debugBelch(" (TSO_LINK_DIRTY)");
          }
         debugBelch("\n");
-    }
  }
  
  void
@@ -574,11 +774,7 @@ printAllThreads(void)
        if (t->why_blocked != NotBlocked) {
           printThreadStatus(t);
        }
-      if (t->what_next == ThreadRelocated) {
-         next = t->_link;
-      } else {
-         next = t->global_link;
-      }
+      next = t->global_link;
      }
    }
  }
diff --git a/rts/Threads.h b/rts/Threads.h

index 776dd93..857658a 100644 (file)
--- a/rts/Threads.h
+++ b/rts/Threads.h
@@ -21,9 +21,6 @@ void wakeBlockingQueue   (Capability *cap, StgBlockingQueue *bq);
  void tryWakeupThread     (Capability *cap, StgTSO *tso);
  void migrateThread       (Capability *from, StgTSO *tso, Capability *to);
  
-// like tryWakeupThread(), but assumes the TSO is not ThreadRelocated
-void tryWakeupThread_    (Capability *cap, StgTSO *tso);
-
  // Wakes up a thread on a Capability (probably a different Capability
  // from the one held by the current Task).
  //
@@ -41,6 +38,10 @@ rtsBool removeThreadFromDeQueue   (Capability *cap, StgTSO **head, StgTSO **tail
  
  StgBool isThreadBound (StgTSO* tso);
  
+// Overfow/underflow
+void threadStackOverflow  (Capability *cap, StgTSO *tso);
+nat  threadStackUnderflow (Capability *cap, StgTSO *tso);
+
  #ifdef DEBUG
  void printThreadBlockage (StgTSO *tso);
  void printThreadStatus (StgTSO *t);
diff --git a/rts/Trace.h b/rts/Trace.h

index 97d9514..27de60e 100644 (file)
--- a/rts/Trace.h
+++ b/rts/Trace.h
@@ -265,7 +265,7 @@ void dtraceUserMsgWrapper(Capability *cap, char *msg);
  INLINE_HEADER void traceEventCreateThread(Capability *cap STG_UNUSED, 
                                            StgTSO     *tso STG_UNUSED)
  {
-    traceSchedEvent(cap, EVENT_CREATE_THREAD, tso, tso->stack_size);
+    traceSchedEvent(cap, EVENT_CREATE_THREAD, tso, tso->stackobj->stack_size);
      dtraceCreateThread((EventCapNo)cap->no, (EventThreadID)tso->id);
  }
  
diff --git a/rts/Updates.h b/rts/Updates.h

index 2258c98..954f02a 100644 (file)
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -18,101 +18,12 @@
     -------------------------------------------------------------------------- */
  
  /* LDV profiling:
- * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in 
- * which p1 resides.
- *
- * Note: 
   *   After all, we do *NOT* need to call LDV_RECORD_CREATE() for IND
   *   closures because they are inherently used. But, it corrupts
   *   the invariants that every closure keeps its creation time in the profiling
   *  field. So, we call LDV_RECORD_CREATE().
   */
  
-/* In the DEBUG case, we also zero out the slop of the old closure,
- * so that the sanity checker can tell where the next closure is.
- *
- * Two important invariants: we should never try to update a closure
- * to point to itself, and the closure being updated should not
- * already have been updated (the mutable list will get messed up
- * otherwise).
- *
- * NB. We do *not* do this in THREADED_RTS mode, because when we have the
- * possibility of multiple threads entering the same closure, zeroing
- * the slop in one of the threads would have a disastrous effect on
- * the other (seen in the wild!).
- */
-#ifdef CMINUSMINUS
-
-#define FILL_SLOP(p)                                                   \
-  W_ inf;                                                              \
-  W_ sz;                                                               \
-  W_ i;                                                                        \
-  inf = %GET_STD_INFO(p);                                              \
-  if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)) {                         \
-      if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) {                        \
-         sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr);             \
-     } else {                                                          \
-          if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) {                  \
-              sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
-          } else {                                                     \
-              if (%INFO_TYPE(inf) == HALF_W_(AP)) {                    \
-                 sz = TO_W_(StgAP_n_args(p)) +  BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
-              } else {                                                 \
-                  sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
-             }                                                         \
-          }                                                            \
-      }                                                                        \
-      i = 0;                                                           \
-      for:                                                             \
-        if (i < sz) {                                                  \
-          StgThunk_payload(p,i) = 0;                                   \
-          i = i + 1;                                                   \
-          goto for;                                                    \
-        }                                                              \
-  }
-
-#else /* !CMINUSMINUS */
-
-INLINE_HEADER void
-FILL_SLOP(StgClosure *p)
-{                                              
-    StgInfoTable *inf = get_itbl(p);           
-    nat i, sz;
-
-    switch (inf->type) {
-    case BLACKHOLE:
-       goto no_slop;
-       // we already filled in the slop when we overwrote the thunk
-       // with BLACKHOLE, and also an evacuated BLACKHOLE is only the
-       // size of an IND.
-    case THUNK_SELECTOR:
-       sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader);
-       break;
-    case AP:
-       sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
-       break;
-    case AP_STACK:
-       sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
-       break;
-    default:
-       sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
-        break;
-    }
-    for (i = 0; i < sz; i++) {
-       ((StgThunk *)p)->payload[i] = 0;
-    }
-no_slop:
-    ;
-}
-
-#endif /* CMINUSMINUS */
-
-#if !defined(DEBUG) || defined(THREADED_RTS)
-#define DEBUG_FILL_SLOP(p) /* do nothing */
-#else
-#define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
-#endif
-
  /* We have two versions of this macro (sadly), one for use in C-- code,
   * and the other for C.
   *
@@ -128,9 +39,8 @@ no_slop:
  #define updateWithIndirection(p1, p2, and_then)        \
      W_ bd;                                                     \
                                                                 \
-    DEBUG_FILL_SLOP(p1);                                       \
-    LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);                     \
-    StgInd_indirectee(p1) = p2;                                        \
+    OVERWRITING_CLOSURE(p1);                                    \
+    StgInd_indirectee(p1) = p2;                                 \
      prim %write_barrier() [];                                  \
      SET_INFO(p1, stg_BLACKHOLE_info);                           \
      LDV_RECORD_CREATE(p1);                                      \
@@ -155,8 +65,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
      ASSERT( (P_)p1 != (P_)p2 );
      /* not necessarily true: ASSERT( !closure_IND(p1) ); */
      /* occurs in RaiseAsync.c:raiseAsync() */
-    DEBUG_FILL_SLOP(p1);
-    LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);
+    OVERWRITING_CLOSURE(p1);
      ((StgInd *)p1)->indirectee = p2;
      write_barrier();
      SET_INFO(p1, &stg_BLACKHOLE_info);
diff --git a/rts/posix/Select.c b/rts/posix/Select.c

index 0127b3c..3c87fbd 100644 (file)
--- a/rts/posix/Select.c
+++ b/rts/posix/Select.c
@@ -63,10 +63,6 @@ wakeUpSleepingThreads(lnat ticks)
  
      while (sleeping_queue != END_TSO_QUEUE) {
         tso = sleeping_queue;
-        if (tso->what_next == ThreadRelocated) {
-            sleeping_queue = tso->_link;
-            continue;
-        }
          if (((long)ticks - (long)tso->block_info.target) < 0) {
              break;
          }
@@ -259,11 +255,7 @@ awaitEvent(rtsBool wait)
           for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
               next = tso->_link;
  
-              if (tso->what_next == ThreadRelocated) {
-                  continue;
-              }
-
-             switch (tso->why_blocked) {
+              switch (tso->why_blocked) {
               case BlockedOnRead:
                   ready = unblock_all || FD_ISSET(tso->block_info.fd, &rfd);
                   break;
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c

index 75c8832..bf0c5e6 100644 (file)
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -577,48 +577,6 @@ freeChain_lock(bdescr *bd)
      RELEASE_SM_LOCK;
  }
  
-// splitBlockGroup(bd,B) splits bd in two.  Afterward, bd will have B
-// blocks, and a new block descriptor pointing to the remainder is
-// returned.
-bdescr *
-splitBlockGroup (bdescr *bd, nat blocks)
-{
-    bdescr *new_bd;
-
-    if (bd->blocks <= blocks) {
-        barf("splitLargeBlock: too small");
-    }
-
-    if (bd->blocks > BLOCKS_PER_MBLOCK) {
-        nat low_mblocks, high_mblocks;
-        void *new_mblock;
-        if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
-            barf("splitLargeBlock: not a multiple of a megablock");
-        }
-        low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
-        high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
-
-        new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + (W_)low_mblocks * MBLOCK_SIZE_W);
-        initMBlock(new_mblock);
-        new_bd = FIRST_BDESCR(new_mblock);
-        new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
-
-        ASSERT(blocks + new_bd->blocks == 
-               bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
-    }
-    else
-    {
-        // NB. we're not updating all the bdescrs in the split groups to
-        // point to the new heads, so this can only be used for large
-        // objects which do not start in the non-head block.
-        new_bd = bd + blocks;
-        new_bd->blocks = bd->blocks - blocks;
-    }
-    bd->blocks = blocks;
-
-    return new_bd;
-}
-
  static void
  initMBlock(void *mblock)
  {
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c

index 977e31d..4f3dcf2 100644 (file)
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -335,8 +335,9 @@ thread_stack(StgPtr p, StgPtr stack_end)
          case CATCH_STM_FRAME:
          case ATOMICALLY_FRAME:
         case UPDATE_FRAME:
-       case STOP_FRAME:
-       case CATCH_FRAME:
+        case UNDERFLOW_FRAME:
+        case STOP_FRAME:
+        case CATCH_FRAME:
         case RET_SMALL:
             bitmap = BITMAP_BITS(info->i.layout.bitmap);
             size   = BITMAP_SIZE(info->i.layout.bitmap);
@@ -480,8 +481,8 @@ thread_TSO (StgTSO *tso)
      
      thread_(&tso->trec);
  
-    thread_stack(tso->sp, &(tso->stack[tso->stack_size]));
-    return (StgPtr)tso + tso_sizeW(tso);
+    thread_(&tso->stackobj);
+    return (StgPtr)tso + sizeofW(StgTSO);
  }
  
  
@@ -521,9 +522,12 @@ update_fwd_large( bdescr *bd )
            continue;
        }
  
-    case TSO:
-       thread_TSO((StgTSO *)p);
-       continue;
+    case STACK:
+    {
+        StgStack *stack = (StgStack*)p;
+        thread_stack(stack->sp, stack->stack + stack->stack_size);
+        continue;
+    }
  
      case AP_STACK:
         thread_AP_STACK((StgAP_STACK *)p);
@@ -706,6 +710,13 @@ thread_obj (StgInfoTable *info, StgPtr p)
      case TSO:
         return thread_TSO((StgTSO *)p);
      
+    case STACK:
+    {
+        StgStack *stack = (StgStack*)p;
+        thread_stack(stack->sp, stack->stack + stack->stack_size);
+        return p + stack_sizeW(stack);
+    }
+
      case TREC_CHUNK:
      {
          StgWord i;
@@ -899,8 +910,8 @@ update_bkwd_compact( generation *gen )
             }
  
             // relocate TSOs
-           if (info->type == TSO) {
-               move_TSO((StgTSO *)p, (StgTSO *)free);
+            if (info->type == STACK) {
+                move_STACK((StgStack *)p, (StgStack *)free);
             }
  
             free += size;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c

index 61cf10b..65da076 100644 (file)
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -485,14 +485,7 @@ loop:
        /* evacuate large objects by re-linking them onto a different list.
         */
        if (bd->flags & BF_LARGE) {
-         info = get_itbl(q);
-         if (info->type == TSO && 
-             ((StgTSO *)q)->what_next == ThreadRelocated) {
-             q = (StgClosure *)((StgTSO *)q)->_link;
-              *p = q;
-             goto loop;
-         }
-         evacuate_large((P_)q);
+          evacuate_large((P_)q);
           return;
        }
        
@@ -675,6 +668,7 @@ loop:
    case RET_BIG:
    case RET_DYN:
    case UPDATE_FRAME:
+  case UNDERFLOW_FRAME:
    case STOP_FRAME:
    case CATCH_FRAME:
    case CATCH_STM_FRAME:
@@ -709,31 +703,28 @@ loop:
        return;
  
    case TSO:
-    {
-      StgTSO *tso = (StgTSO *)q;
+      copy(p,info,q,sizeofW(StgTSO),gen);
+      evacuate((StgClosure**)&(((StgTSO*)(*p))->stackobj));
+      return;
  
-      /* Deal with redirected TSOs (a TSO that's had its stack enlarged).
-       */
-      if (tso->what_next == ThreadRelocated) {
-       q = (StgClosure *)tso->_link;
-       *p = q;
-       goto loop;
-      }
+  case STACK:
+    {
+      StgStack *stack = (StgStack *)q;
  
-      /* To evacuate a small TSO, we need to adjust the stack pointer
+      /* To evacuate a small STACK, we need to adjust the stack pointer
         */
        {
-         StgTSO *new_tso;
+          StgStack *new_stack;
           StgPtr r, s;
            rtsBool mine;
  
-         mine = copyPart(p,(StgClosure *)tso, tso_sizeW(tso), 
-                          sizeofW(StgTSO), gen);
+          mine = copyPart(p,(StgClosure *)stack, stack_sizeW(stack),
+                          sizeofW(StgStack), gen);
            if (mine) {
-              new_tso = (StgTSO *)*p;
-              move_TSO(tso, new_tso);
-              for (r = tso->sp, s = new_tso->sp;
-                   r < tso->stack+tso->stack_size;) {
+              new_stack = (StgStack *)*p;
+              move_STACK(stack, new_stack);
+              for (r = stack->sp, s = new_stack->sp;
+                   r < stack->stack + stack->stack_size;) {
                    *s++ = *r++;
                }
            }
@@ -952,7 +943,7 @@ selector_loop:
                // For the purposes of LDV profiling, we have destroyed
                // the original selector thunk, p.
                SET_INFO(p, (StgInfoTable *)info_ptr);
-              LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)p);
+              OVERWRITING_CLOSURE(p);
                SET_INFO(p, &stg_WHITEHOLE_info);
  #endif
  
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c

index f69c81d..97af17a 100644 (file)
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -67,12 +67,7 @@ isAlive(StgClosure *p)
  
      // large objects use the evacuated flag
      if (bd->flags & BF_LARGE) {
-        if (get_itbl(q)->type == TSO &&
-            ((StgTSO *)p)->what_next == ThreadRelocated) {
-            p = (StgClosure *)((StgTSO *)p)->_link;
-            continue;
-        }
-       return NULL;
+        return NULL;
      }
  
      // check the mark bit for compacted steps
@@ -98,13 +93,6 @@ isAlive(StgClosure *p)
        p = ((StgInd *)q)->indirectee;
        continue;
  
-    case TSO:
-      if (((StgTSO *)q)->what_next == ThreadRelocated) {
-       p = (StgClosure *)((StgTSO *)q)->_link;
-       continue;
-      } 
-      return NULL;
-
      default:
        // dead. 
        return NULL;
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c

index aadd575..72f0ade 100644 (file)
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -260,12 +260,6 @@ static rtsBool tidyThreadList (generation *gen)
          }
          
          ASSERT(get_itbl(t)->type == TSO);
-        if (t->what_next == ThreadRelocated) {
-            next = t->_link;
-            *prev = next;
-            continue;
-        }
-        
          next = t->global_link;
          
          // if the thread is not masking exceptions but there are
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c

index dfa9865..22b7f64 100644 (file)
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -35,6 +35,7 @@
  static void      checkSmallBitmap    ( StgPtr payload, StgWord bitmap, nat );
  static void      checkLargeBitmap    ( StgPtr payload, StgLargeBitmap*, nat );
  static void      checkClosureShallow ( StgClosure * );
+static void      checkSTACK          (StgStack *stack);
  
  /* -----------------------------------------------------------------------------
     Check stack sanity
@@ -139,6 +140,7 @@ checkStackFrame( StgPtr c )
      case CATCH_STM_FRAME:
      case CATCH_FRAME:
        // small bitmap cases (<= 32 entries)
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case RET_SMALL:
         size = BITMAP_SIZE(info->i.layout.bitmap);
@@ -331,7 +333,7 @@ checkClosure( StgClosure* p )
  
          ASSERT(get_itbl(bq->owner)->type == TSO);
          ASSERT(bq->queue == (MessageBlackHole*)END_TSO_QUEUE 
-               || get_itbl(bq->queue)->type == TSO);
+               || bq->queue->header.info == &stg_MSG_BLACKHOLE_info);
          ASSERT(bq->link == (StgBlockingQueue*)END_TSO_QUEUE || 
                 get_itbl(bq->link)->type == IND ||
                 get_itbl(bq->link)->type == BLOCKING_QUEUE);
@@ -384,6 +386,7 @@ checkClosure( StgClosure* p )
      case RET_BIG:
      case RET_DYN:
      case UPDATE_FRAME:
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case CATCH_FRAME:
      case ATOMICALLY_FRAME:
@@ -431,7 +434,11 @@ checkClosure( StgClosure* p )
  
      case TSO:
          checkTSO((StgTSO *)p);
-        return tso_sizeW((StgTSO *)p);
+        return sizeofW(StgTSO);
+
+    case STACK:
+        checkSTACK((StgStack*)p);
+        return stack_sizeW((StgStack*)p);
  
      case TREC_CHUNK:
        {
@@ -514,19 +521,21 @@ checkLargeObjects(bdescr *bd)
    }
  }
  
-void
-checkTSO(StgTSO *tso)
+static void
+checkSTACK (StgStack *stack)
  {
-    StgPtr sp = tso->sp;
-    StgPtr stack = tso->stack;
-    StgOffset stack_size = tso->stack_size;
-    StgPtr stack_end = stack + stack_size;
+    StgPtr sp = stack->sp;
+    StgOffset stack_size = stack->stack_size;
+    StgPtr stack_end = stack->stack + stack_size;
  
-    if (tso->what_next == ThreadRelocated) {
-      checkTSO(tso->_link);
-      return;
-    }
+    ASSERT(stack->stack <= sp && sp <= stack_end);
  
+    checkStackChunk(sp, stack_end);
+}
+
+void
+checkTSO(StgTSO *tso)
+{
      if (tso->what_next == ThreadKilled) {
        /* The garbage collector doesn't bother following any pointers
         * from dead threads, so don't check sanity here.  
@@ -537,16 +546,24 @@ checkTSO(StgTSO *tso)
      ASSERT(tso->_link == END_TSO_QUEUE || 
             tso->_link->header.info == &stg_MVAR_TSO_QUEUE_info ||
             tso->_link->header.info == &stg_TSO_info);
-    ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure));
+
+    if (   tso->why_blocked == BlockedOnMVar
+       || tso->why_blocked == BlockedOnBlackHole
+       || tso->why_blocked == BlockedOnMsgThrowTo
+        || tso->why_blocked == NotBlocked
+       ) {
+        ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure));
+    }
+
      ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq));
      ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions));
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj));
  
-    ASSERT(stack <= sp && sp < stack_end);
-
-    checkStackChunk(sp, stack_end);
+    // XXX are we checking the stack twice?
+    checkSTACK(tso->stackobj);
  }
  
-/* 
+/*
     Check that all TSOs have been evacuated.
     Optionally also check the sanity of the TSOs.
  */
@@ -564,11 +581,9 @@ checkGlobalTSOList (rtsBool checkTSOs)
            if (checkTSOs)
                checkTSO(tso);
  
-          tso = deRefTSO(tso);
-
            // If this TSO is dirty and in an old generation, it better
            // be on the mutable list.
-          if (tso->dirty || (tso->flags & TSO_LINK_DIRTY)) {
+          if (tso->dirty) {
                ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED));
                tso->flags &= ~TSO_MARKED;
            }
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c

index d01442b..d7e16ea 100644 (file)
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -51,14 +51,6 @@ scavengeTSO (StgTSO *tso)
  {
      rtsBool saved_eager;
  
-    if (tso->what_next == ThreadRelocated) {
-        // the only way this can happen is if the old TSO was on the
-        // mutable list.  We might have other links to this defunct
-        // TSO, so we must update its link field.
-        evacuate((StgClosure**)&tso->_link);
-        return;
-    }
-
      debugTrace(DEBUG_gc,"scavenging thread %d",(int)tso->id);
  
      // update the pointer from the Task.
@@ -69,17 +61,13 @@ scavengeTSO (StgTSO *tso)
      saved_eager = gct->eager_promotion;
      gct->eager_promotion = rtsFalse;
  
-
      evacuate((StgClosure **)&tso->blocked_exceptions);
      evacuate((StgClosure **)&tso->bq);
      
      // scavange current transaction record
      evacuate((StgClosure **)&tso->trec);
-    
-    // scavenge this thread's stack 
-    scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
  
-    tso->dirty = gct->failed_to_evac;
+    evacuate((StgClosure **)&tso->stackobj);
  
      evacuate((StgClosure **)&tso->_link);
      if (   tso->why_blocked == BlockedOnMVar
@@ -99,11 +87,7 @@ scavengeTSO (StgTSO *tso)
      }
  #endif
  
-    if (tso->dirty == 0 && gct->failed_to_evac) {
-        tso->flags |= TSO_LINK_DIRTY;
-    } else {
-        tso->flags &= ~TSO_LINK_DIRTY;
-    }
+    tso->dirty = gct->failed_to_evac;
  
      gct->eager_promotion = saved_eager;
  }
@@ -661,12 +645,25 @@ scavenge_block (bdescr *bd)
  
      case TSO:
      { 
-       StgTSO *tso = (StgTSO *)p;
-        scavengeTSO(tso);
-       p += tso_sizeW(tso);
+        scavengeTSO((StgTSO *)p);
+        p += sizeofW(StgTSO);
         break;
      }
  
+    case STACK:
+    {
+        StgStack *stack = (StgStack*)p;
+
+        gct->eager_promotion = rtsFalse;
+
+        scavenge_stack(stack->sp, stack->stack + stack->stack_size);
+        stack->dirty = gct->failed_to_evac;
+        p += stack_sizeW(stack);
+
+        gct->eager_promotion = saved_eager_promotion;
+        break;
+    }
+
      case MUT_PRIM:
        {
         StgPtr end;
@@ -991,6 +988,19 @@ scavenge_mark_stack(void)
             break;
         }
  
+        case STACK:
+        {
+            StgStack *stack = (StgStack*)p;
+
+            gct->eager_promotion = rtsFalse;
+
+            scavenge_stack(stack->sp, stack->stack + stack->stack_size);
+            stack->dirty = gct->failed_to_evac;
+
+            gct->eager_promotion = saved_eager_promotion;
+            break;
+        }
+
          case MUT_PRIM:
          {
              StgPtr end;
@@ -1227,6 +1237,19 @@ scavenge_one(StgPtr p)
         break;
      }
    
+    case STACK:
+    {
+        StgStack *stack = (StgStack*)p;
+
+        gct->eager_promotion = rtsFalse;
+
+        scavenge_stack(stack->sp, stack->stack + stack->stack_size);
+        stack->dirty = gct->failed_to_evac;
+
+        gct->eager_promotion = saved_eager_promotion;
+        break;
+    }
+
      case MUT_PRIM:
      {
         StgPtr end;
@@ -1374,33 +1397,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
                 recordMutableGen_GC((StgClosure *)p,gen->no);
                 continue;
              }
-           case TSO: {
-               StgTSO *tso = (StgTSO *)p;
-               if (tso->dirty == 0) {
-                    // Should be on the mutable list because its link
-                    // field is dirty.  However, in parallel GC we may
-                    // have a thread on multiple mutable lists, so
-                    // this assertion would be invalid:
-                    // ASSERT(tso->flags & TSO_LINK_DIRTY);
-
-                    evacuate((StgClosure **)&tso->_link);
-                    if (   tso->why_blocked == BlockedOnMVar
-                        || tso->why_blocked == BlockedOnBlackHole
-                        || tso->why_blocked == BlockedOnMsgThrowTo
-                        || tso->why_blocked == NotBlocked
-                        ) {
-                        evacuate((StgClosure **)&tso->block_info.prev);
-                    }
-                    if (gct->failed_to_evac) {
-                        recordMutableGen_GC((StgClosure *)p,gen->no);
-                        gct->failed_to_evac = rtsFalse;
-                    } else {
-                        tso->flags &= ~TSO_LINK_DIRTY;
-                    }
-                   continue;
-               }
-           }
-           default:
+            default:
                 ;
             }
  
@@ -1643,6 +1640,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
      case CATCH_STM_FRAME:
      case CATCH_RETRY_FRAME:
      case ATOMICALLY_FRAME:
+    case UNDERFLOW_FRAME:
      case STOP_FRAME:
      case CATCH_FRAME:
      case RET_SMALL:
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c

index 2172f9b..4247d28 100644 (file)
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -495,12 +495,12 @@ resizeNurseries (nat blocks)
  
  
  /* -----------------------------------------------------------------------------
-   move_TSO is called to update the TSO structure after it has been
+   move_STACK is called to update the TSO structure after it has been
     moved from one place to another.
     -------------------------------------------------------------------------- */
  
  void
-move_TSO (StgTSO *src, StgTSO *dest)
+move_STACK (StgStack *src, StgStack *dest)
  {
      ptrdiff_t diff;
  
@@ -510,45 +510,6 @@ move_TSO (StgTSO *src, StgTSO *dest)
  }
  
  /* -----------------------------------------------------------------------------
-   split N blocks off the front of the given bdescr, returning the
-   new block group.  We add the remainder to the large_blocks list
-   in the same step as the original block.
-   -------------------------------------------------------------------------- */
-
-bdescr *
-splitLargeBlock (bdescr *bd, nat blocks)
-{
-    bdescr *new_bd;
-
-    ACQUIRE_SM_LOCK;
-
-    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
-
-    // subtract the original number of blocks from the counter first
-    bd->gen->n_large_blocks -= bd->blocks;
-
-    new_bd = splitBlockGroup (bd, blocks);
-    initBdescr(new_bd, bd->gen, bd->gen->to);
-    new_bd->flags   = BF_LARGE | (bd->flags & BF_EVACUATED); 
-    // if new_bd is in an old generation, we have to set BF_EVACUATED
-    new_bd->free    = bd->free;
-    dbl_link_onto(new_bd, &bd->gen->large_objects);
-
-    ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
-
-    // add the new number of blocks to the counter.  Due to the gaps
-    // for block descriptors, new_bd->blocks + bd->blocks might not be
-    // equal to the original bd->blocks, which is why we do it this way.
-    bd->gen->n_large_blocks += bd->blocks + new_bd->blocks;
-
-    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
-
-    RELEASE_SM_LOCK;
-
-    return new_bd;
-}
-
-/* -----------------------------------------------------------------------------
     allocate()
  
     This allocates memory in the current thread - it is intended for
@@ -731,8 +692,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
  void
  setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
  {
-    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
-        tso->flags |= TSO_LINK_DIRTY;
+    if (tso->dirty == 0) {
+        tso->dirty = 1;
          recordClosureMutated(cap,(StgClosure*)tso);
      }
      tso->_link = target;
@@ -741,8 +702,8 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
  void
  setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
  {
-    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
-        tso->flags |= TSO_LINK_DIRTY;
+    if (tso->dirty == 0) {
+        tso->dirty = 1;
          recordClosureMutated(cap,(StgClosure*)tso);
      }
      tso->block_info.prev = target;
@@ -751,10 +712,19 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
  void
  dirty_TSO (Capability *cap, StgTSO *tso)
  {
-    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
+    if (tso->dirty == 0) {
+        tso->dirty = 1;
          recordClosureMutated(cap,(StgClosure*)tso);
      }
-    tso->dirty = 1;
+}
+
+void
+dirty_STACK (Capability *cap, StgStack *stack)
+{
+    if (stack->dirty == 0) {
+        stack->dirty = 1;
+        recordClosureMutated(cap,(StgClosure*)stack);
+    }
  }
  
  /*
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h

index e541193..3ff3380 100644 (file)
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -146,7 +146,7 @@ extern bdescr *exec_block;
  
  #define END_OF_STATIC_LIST ((StgClosure*)1)
  
-void move_TSO  (StgTSO *src, StgTSO *dest);
+void move_STACK  (StgStack *src, StgStack *dest);
  
  extern StgClosure * caf_list;
  extern StgClosure * revertible_caf_list;
diff --git a/rts/win32/AsyncIO.c b/rts/win32/AsyncIO.c

index 5dedee0..ff2e1a2 100644 (file)
--- a/rts/win32/AsyncIO.c
+++ b/rts/win32/AsyncIO.c
@@ -276,20 +276,7 @@ start:
             prev = NULL;
             for(tso = blocked_queue_hd ; tso != END_TSO_QUEUE; tso = tso->_link) {
         
-                if (tso->what_next == ThreadRelocated) {
-                    /* Drop the TSO from blocked_queue */
-                    if (prev) {
-                        setTSOLink(&MainCapability, prev, tso->_link);
-                    } else {
-                        blocked_queue_hd = tso->_link;
-                    }
-                    if (blocked_queue_tl == tso) {
-                        blocked_queue_tl = prev ? prev : END_TSO_QUEUE;
-                    }
-                    continue;
-                }
-
-               switch(tso->why_blocked) {
+                switch(tso->why_blocked) {
                 case BlockedOnRead:
                 case BlockedOnWrite:
                 case BlockedOnDoProc:
author	Simon Marlow <marlowsd@gmail.com>
	Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)
committer	Simon Marlow <marlowsd@gmail.com>
	Wed, 15 Dec 2010 12:08:43 +0000 (12:08 +0000)
compiler/cmm/CmmCPSGen.hs		patch \| blob \| history
compiler/codeGen/CgForeignCall.hs		patch \| blob \| history
compiler/codeGen/StgCmmForeign.hs		patch \| blob \| history
docs/users_guide/runtime_control.xml		patch \| blob \| history
includes/Cmm.h		patch \| blob \| history
includes/mkDerivedConstants.c		patch \| blob \| history
includes/rts/Constants.h		patch \| blob \| history
includes/rts/Flags.h		patch \| blob \| history
includes/rts/prof/LDV.h		patch \| blob \| history
includes/rts/storage/ClosureMacros.h		patch \| blob \| history
includes/rts/storage/ClosureTypes.h		patch \| blob \| history
includes/rts/storage/Closures.h		patch \| blob \| history
includes/rts/storage/TSO.h		patch \| blob \| history
includes/stg/MiscClosures.h		patch \| blob \| history
includes/stg/Ticky.h		patch \| blob \| history
rts/Apply.cmm		patch \| blob \| history
rts/ClosureFlags.c		patch \| blob \| history
rts/Exception.cmm		patch \| blob \| history
rts/Interpreter.c		patch \| blob \| history
rts/LdvProfile.c		patch \| blob \| history
rts/Messages.c		patch \| blob \| history
rts/Messages.h		patch \| blob \| history
rts/PrimOps.cmm		patch \| blob \| history
rts/Printer.c		patch \| blob \| history
rts/ProfHeap.c		patch \| blob \| history
rts/ProfHeap.h		patch \| blob \| history
rts/RaiseAsync.c		patch \| blob \| history
rts/RetainerProfile.c		patch \| blob \| history
rts/RtsAPI.c		patch \| blob \| history
rts/RtsFlags.c		patch \| blob \| history
rts/Schedule.c		patch \| blob \| history
rts/Schedule.h		patch \| blob \| history
rts/StgMiscClosures.cmm		patch \| blob \| history
rts/ThreadPaused.c		patch \| blob \| history
rts/Threads.c		patch \| blob \| history
rts/Threads.h		patch \| blob \| history
rts/Trace.h		patch \| blob \| history
rts/Updates.h		patch \| blob \| history
rts/posix/Select.c		patch \| blob \| history
rts/sm/BlockAlloc.c		patch \| blob \| history
rts/sm/Compact.c		patch \| blob \| history
rts/sm/Evac.c		patch \| blob \| history
rts/sm/GCAux.c		patch \| blob \| history
rts/sm/MarkWeak.c		patch \| blob \| history
rts/sm/Sanity.c		patch \| blob \| history
rts/sm/Scav.c		patch \| blob \| history
rts/sm/Storage.c		patch \| blob \| history
rts/sm/Storage.h		patch \| blob \| history
rts/win32/AsyncIO.c		patch \| blob \| history