[project @ 2004-02-12 02:04:59 by mthomas]
[ghc-hetmet.git] / ghc / includes / StgMacros.h
index 4c7be2b..bb1fcf6 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
 /* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.10 1999/05/11 16:47:41 keithw Exp $
+ * $Id: StgMacros.h,v 1.57 2003/11/12 17:27:04 sof Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
  *
  * (c) The GHC Team, 1998-1999
  *
 #define EXTFUN(f)      extern StgFunPtr f(void)
 #define EXTFUN_RTS(f)  extern DLL_IMPORT_RTS StgFunPtr f(void)
 #define FN_(f)         F_ f(void)
 #define EXTFUN(f)      extern StgFunPtr f(void)
 #define EXTFUN_RTS(f)  extern DLL_IMPORT_RTS StgFunPtr f(void)
 #define FN_(f)         F_ f(void)
-#define IFN_(f)                static F_ f(void)
 #define IF_(f)         static F_ f(void)
 #define EF_(f)         extern F_ f(void)
 #define EDF_(f)                extern DLLIMPORT F_ f(void)
 
 #define IF_(f)         static F_ f(void)
 #define EF_(f)         extern F_ f(void)
 #define EDF_(f)                extern DLLIMPORT F_ f(void)
 
+#define EXTINFO_RTS    extern DLL_IMPORT_RTS const StgInfoTable
+#define ETI_RTS                extern DLL_IMPORT_RTS const StgThunkInfoTable
+
+// Info tables as generated by the compiler are simply arrays of words.
+typedef StgWord StgWordArray[];
+
 #define ED_            extern
 #define ED_            extern
-#define EDD_           extern DLLIMPORT 
+#define EDD_           extern DLLIMPORT
 #define ED_RO_         extern const
 #define ED_RO_         extern const
-#define ID_            extern
-#define ID_RO_         extern const
-#define EI_             extern const StgInfoTable
-#define EDI_            extern DLLIMPORT const StgInfoTable
-#define II_             extern const StgInfoTable
+#define ID_            static
+#define ID_RO_         static const
+#define EI_             extern StgWordArray
+#define ERI_            extern const StgRetInfoTable
+#define II_             static StgWordArray
+#define IRI_            static const StgRetInfoTable
 #define EC_            extern StgClosure
 #define EDC_           extern DLLIMPORT StgClosure
 #define EC_            extern StgClosure
 #define EDC_           extern DLLIMPORT StgClosure
-#define IC_            extern StgClosure
+#define IC_            static StgClosure
 #define ECP_(x)                extern const StgClosure *(x)[]
 #define EDCP_(x)       extern DLLIMPORT StgClosure *(x)[]
 #define ECP_(x)                extern const StgClosure *(x)[]
 #define EDCP_(x)       extern DLLIMPORT StgClosure *(x)[]
-#define ICP_(x)                extern const StgClosure *(x)[]
+#define ICP_(x)                static const StgClosure *(x)[]
 
 /* -----------------------------------------------------------------------------
 
 /* -----------------------------------------------------------------------------
-   Stack Tagging.
-
-   For a  block of non-pointer words on the stack, we precede the
-   block with a small-integer tag giving the number of non-pointer
-   words in the block.
-   -------------------------------------------------------------------------- */
-
-#ifndef DEBUG_EXTRA
-#define ARGTAG_MAX 16          /* probably arbitrary */
-#define ARG_TAG(n)  (n)
-#define ARG_SIZE(n) stgCast(StgWord,n)
-
-typedef enum {
-    REALWORLD_TAG = 0,
-    INT_TAG    = sizeofW(StgInt), 
-    INT64_TAG  = sizeofW(StgInt64), 
-    WORD_TAG   = sizeofW(StgWord), 
-    ADDR_TAG   = sizeofW(StgAddr), 
-    CHAR_TAG   = sizeofW(StgChar),
-    FLOAT_TAG  = sizeofW(StgFloat), 
-    DOUBLE_TAG = sizeofW(StgDouble), 
-    STABLE_TAG = sizeofW(StgWord), 
-} StackTag;
-
-#else /* DEBUG_EXTRA */
-
-typedef enum {
-    ILLEGAL_TAG,
-    REALWORLD_TAG,
-    INT_TAG    ,
-    INT64_TAG  ,
-    WORD_TAG   ,
-    ADDR_TAG   ,
-    CHAR_TAG   ,
-    FLOAT_TAG  ,
-    DOUBLE_TAG ,
-    STABLE_TAG ,
-    ARGTAG_MAX = DOUBLE_TAG
-} StackTag;
-
-/* putting this in a .h file generates many copies - but its only a 
- * debugging build.
- */
-static StgWord stg_arg_size[] = {
-    [REALWORLD_TAG] = 0,
-    [INT_TAG   ] = sizeofW(StgInt), 
-    [INT64_TAG ] = sizeofW(StgInt64), 
-    [WORD_TAG  ] = sizeofW(StgWord), 
-    [ADDR_TAG  ] = sizeofW(StgAddr), 
-    [CHAR_TAG  ] = sizeofW(StgChar),
-    [FLOAT_TAG ] = sizeofW(StgFloat), 
-    [DOUBLE_TAG] = sizeofW(StgDouble),
-    [STABLE_TAG] = sizeofW(StgWord)
-};
-
-#define ARG_SIZE(tag) stg_arg_size[stgCast(StgWord,tag)]
+   Entering 
 
 
-#endif /* DEBUG_EXTRA */
-
-static inline int IS_ARG_TAG( StgWord p );
-static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
-
-/* -----------------------------------------------------------------------------
-   Argument checks.
-   
-   If (Sp + <n_args>) > Su { JMP_(stg_update_PAP); }
-   
-   Sp points to the topmost used word on the stack, and Su points to
-   the most recently pushed update frame.
-
-   Remember that <n_args> must include any tagging of unboxed values.
-
-   ARGS_CHK_LOAD_NODE is for top-level functions, whose entry
-   convention doesn't require that Node is loaded with a pointer to
-   the closure.  Thus we must load node before calling stg_updatePAP if
-   the argument check fails. 
+   It isn't safe to "enter" every closure.  Functions in particular
+   have no entry code as such; their entry point contains the code to
+   apply the function.
    -------------------------------------------------------------------------- */
 
    -------------------------------------------------------------------------- */
 
-#define ARGS_CHK(n)                            \
-        if ((P_)(Sp + (n)) > (P_)Su) {         \
-               JMP_(stg_update_PAP);           \
-       }
-
-#define ARGS_CHK_LOAD_NODE(n,closure)          \
-        if ((P_)(Sp + (n)) > (P_)Su) {         \
-               R1.p = (P_)closure;             \
-               JMP_(stg_update_PAP);           \
-       }
+#define ENTER()                                        \
+ {                                             \
+ again:                                                \
+  switch (get_itbl(R1.cl)->type) {             \
+  case IND:                                    \
+  case IND_OLDGEN:                             \
+  case IND_PERM:                               \
+  case IND_OLDGEN_PERM:                                \
+  case IND_STATIC:                             \
+      R1.cl = ((StgInd *)R1.cl)->indirectee;    \
+      goto again;                              \
+  case BCO:                                    \
+  case FUN:                                    \
+  case FUN_1_0:                                        \
+  case FUN_0_1:                                        \
+  case FUN_2_0:                                        \
+  case FUN_1_1:                                        \
+  case FUN_0_2:                                        \
+  case FUN_STATIC:                             \
+  case PAP:                                    \
+      JMP_(ENTRY_CODE(Sp[0]));                 \
+  default:                                     \
+      JMP_(GET_ENTRY(R1.cl));                  \
+  }                                            \
+ }
 
 /* -----------------------------------------------------------------------------
    Heap/Stack Checks.
 
 /* -----------------------------------------------------------------------------
    Heap/Stack Checks.
@@ -165,31 +115,31 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    in the meantime.
    ------------------------------------------------------------------------- */
 
    in the meantime.
    ------------------------------------------------------------------------- */
 
-#define STK_CHK(headroom,ret,r,layout,tag_assts)               \
-       if (Sp - headroom < SpLim) {                            \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
+#define STK_CHK_FUN(headroom,assts)            \
+       if (Sp - headroom < SpLim) {            \
+           assts                               \
+           JMP_(stg_gc_fun);                   \
        }
        }
-       
-#define HP_CHK(headroom,ret,r,layout,tag_assts)                        \
-       if ((Hp += headroom) > HpLim) {                         \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
 
 
-#define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
+#define HP_CHK_FUN(headroom,assts)                                     \
+        DO_GRAN_ALLOCATE(headroom)                                     \
+       if ((Hp += headroom) > HpLim) {                                 \
+            HpAlloc = (headroom);                                      \
+           assts                                                       \
+           JMP_(stg_gc_fun);                                           \
+       }
+
+// When doing both a heap and a stack check, don't move the heap
+// pointer unless the stack check succeeds.  Otherwise we might end up
+// with slop at the end of the current block, which can confuse the
+// LDV profiler.
+#define HP_STK_CHK_FUN(stk_headroom,hp_headroom,assts)                 \
+        DO_GRAN_ALLOCATE(hp_headroom)                                  \
        if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) { \
        if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) { \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
-       }                                                       \
-        TICK_ALLOC_HEAP(hp_headroom);
+            HpAlloc = (hp_headroom);                                   \
+           assts                                                       \
+           JMP_(stg_gc_fun);                                           \
+       }
 
 /* -----------------------------------------------------------------------------
    A Heap Check in a case alternative are much simpler: everything is
 
 /* -----------------------------------------------------------------------------
    A Heap Check in a case alternative are much simpler: everything is
@@ -203,49 +153,46 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    functions.  In all these cases, node points to a closure that we
    can just enter to restart the heap check (the NP stands for 'node points').
 
    functions.  In all these cases, node points to a closure that we
    can just enter to restart the heap check (the NP stands for 'node points').
 
+   In the NP case GranSim absolutely has to check whether the current node 
+   resides on the current processor. Otherwise a FETCH event has to be
+   scheduled. All that is done in GranSimFetch. -- HWL
+
    HpLim points to the LAST WORD of valid allocation space.
    -------------------------------------------------------------------------- */
 
    HpLim points to the LAST WORD of valid allocation space.
    -------------------------------------------------------------------------- */
 
-#define STK_CHK_NP(headroom,ptrs,tag_assts)                    \
-       if ((Sp - (headroom)) < SpLim) {                        \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
-            tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
+#define STK_CHK_NP(headroom,tag_assts)         \
+       if ((Sp - (headroom)) < SpLim) {        \
+            tag_assts                          \
+           JMP_(stg_gc_enter_1);               \
        }
 
        }
 
-#define HP_CHK_NP(headroom,ptrs,tag_assts)                     \
-       if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
-            tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
-
-#define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts)                 \
-       if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(stg_gc_seq_##ptrs);                      \
-            tag_assts                                          \
-           JMP_(stg_gc_seq_##ptrs);                            \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
-
-#define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
+#define HP_CHK_NP(headroom,tag_assts)                                  \
+        DO_GRAN_ALLOCATE(headroom)                                     \
+       if ((Hp += (headroom)) > HpLim) {                               \
+            HpAlloc = (headroom);                                      \
+            tag_assts                                                  \
+           JMP_(stg_gc_enter_1);                                       \
+       }                                                       
+
+// See comment on HP_STK_CHK_FUN above.
+#define HP_STK_CHK_NP(stk_headroom, hp_headroom, tag_assts) \
+        DO_GRAN_ALLOCATE(hp_headroom)                              \
        if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \
        if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
+            HpAlloc = (hp_headroom);                           \
             tag_assts                                          \
             tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(hp_headroom);
+           JMP_(stg_gc_enter_1);                               \
+       }                                                       
+
 
 /* Heap checks for branches of a primitive case / unboxed tuple return */
 
 #define GEN_HP_CHK_ALT(headroom,lbl,tag_assts)                 \
 
 /* Heap checks for branches of a primitive case / unboxed tuple return */
 
 #define GEN_HP_CHK_ALT(headroom,lbl,tag_assts)                 \
+        DO_GRAN_ALLOCATE(headroom)                              \
        if ((Hp += (headroom)) > HpLim) {                       \
        if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(lbl);                                    \
+            HpAlloc = (headroom);                              \
             tag_assts                                          \
            JMP_(lbl);                                          \
             tag_assts                                          \
            JMP_(lbl);                                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
+       }                                                       
 
 #define HP_CHK_NOREGS(headroom,tag_assts) \
     GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
 
 #define HP_CHK_NOREGS(headroom,tag_assts) \
     GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
@@ -257,13 +204,8 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
     GEN_HP_CHK_ALT(headroom,stg_gc_f1,tag_assts);
 #define HP_CHK_D1(headroom,tag_assts)       \
     GEN_HP_CHK_ALT(headroom,stg_gc_d1,tag_assts);
     GEN_HP_CHK_ALT(headroom,stg_gc_f1,tag_assts);
 #define HP_CHK_D1(headroom,tag_assts)       \
     GEN_HP_CHK_ALT(headroom,stg_gc_d1,tag_assts);
-
 #define HP_CHK_L1(headroom,tag_assts)       \
 #define HP_CHK_L1(headroom,tag_assts)       \
-    GEN_HP_CHK_ALT(headroom,stg_gc_d1,tag_assts);
-
-#define HP_CHK_UT_ALT(headroom, ptrs, nptrs, r, ret, tag_assts) \
-    GEN_HP_CHK_ALT(headroom, stg_gc_ut_##ptrs##_##nptrs, \
-                    tag_assts r = (P_)ret;)
+    GEN_HP_CHK_ALT(headroom,stg_gc_l1,tag_assts);
 
 /* -----------------------------------------------------------------------------
    Generic Heap checks.
 
 /* -----------------------------------------------------------------------------
    Generic Heap checks.
@@ -286,32 +228,55 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
 
        - primitives (no SRT required).
 
 
        - primitives (no SRT required).
 
-   The stack layout is like this:
-
-          DblReg1-2
-         FltReg1-4
-         R1-8
-         return address
-         liveness mask
-         stg_gen_chk_info
-
-   so the liveness mask depends on the size of an StgDouble (FltRegs
-   and R<n> are guaranteed to be 1 word in size).
-
+   The stack frame layout for a RET_DYN is like this:
+
+          some pointers         |-- GET_PTRS(liveness) words
+          some nonpointers      |-- GET_NONPTRS(liveness) words
+                              
+         L1                    \
+          D1-2                  |-- RET_DYN_NONPTR_REGS_SIZE words
+         F1-4                  /
+                              
+         R1-8                  |-- RET_DYN_BITMAP_SIZE words
+                              
+         return address        \
+         liveness mask         |-- StgRetDyn structure
+         stg_gen_chk_info      /
+
+   we assume that the size of a double is always 2 pointers (wasting a
+   word when it is only one pointer, but avoiding lots of #ifdefs).
+
+   NOTE: if you change the layout of RET_DYN stack frames, then you
+   might also need to adjust the value of RESERVED_STACK_WORDS in
+   Constants.h.
    -------------------------------------------------------------------------- */
 
    -------------------------------------------------------------------------- */
 
-/* VERY MAGIC CONSTANTS! 
- * must agree with code in HeapStackCheck.c, stg_gen_chk
- */
-
-#if SIZEOF_DOUBLE > SIZEOF_VOID_P
-#define ALL_NON_PTRS   0xffff
-#else /* SIZEOF_DOUBLE == SIZEOF_VOID_P */
-#define ALL_NON_PTRS   0x3fff
+// VERY MAGIC CONSTANTS! 
+// must agree with code in HeapStackCheck.c, stg_gen_chk, and
+// RESERVED_STACK_WORDS in Constants.h.
+//
+#define RET_DYN_BITMAP_SIZE 8
+#define RET_DYN_NONPTR_REGS_SIZE 10
+#define ALL_NON_PTRS 0xff
+
+// Sanity check that RESERVED_STACK_WORDS is reasonable.  We can't
+// just derive RESERVED_STACK_WORDS because it's used in Haskell code
+// too.
+#if RESERVED_STACK_WORDS != (3 + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE)
+#error RESERVED_STACK_WORDS may be wrong!
 #endif
 
 #define LIVENESS_MASK(ptr_regs)  (ALL_NON_PTRS ^ (ptr_regs))
 
 #endif
 
 #define LIVENESS_MASK(ptr_regs)  (ALL_NON_PTRS ^ (ptr_regs))
 
+// We can have up to 255 pointers and 255 nonpointers in the stack
+// frame.
+#define N_NONPTRS(n)  ((n)<<16)
+#define N_PTRS(n)     ((n)<<24)
+
+#define GET_NONPTRS(l) ((l)>>16 & 0xff)
+#define GET_PTRS(l)    ((l)>>24 & 0xff)
+#define GET_LIVENESS(l) ((l) & 0xffff)
+
 #define NO_PTRS   0
 #define R1_PTR   1<<0
 #define R2_PTR   1<<1
 #define NO_PTRS   0
 #define R1_PTR   1<<0
 #define R2_PTR   1<<1
@@ -321,31 +286,39 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
 #define R6_PTR   1<<5
 #define R7_PTR   1<<6
 #define R8_PTR   1<<7
 #define R6_PTR   1<<5
 #define R7_PTR   1<<6
 #define R8_PTR   1<<7
-#define HP_CHK_GEN(headroom,liveness,reentry,tag_assts)        \
+
+#define HP_CHK_UNBX_TUPLE(headroom,liveness,code)      \
    if ((Hp += (headroom)) > HpLim ) {                  \
    if ((Hp += (headroom)) > HpLim ) {                  \
-       EF_(stg_gen_chk);                               \
-        tag_assts                                      \
+        HpAlloc = (headroom);                          \
+        code                                           \
        R9.w = (W_)LIVENESS_MASK(liveness);             \
        R9.w = (W_)LIVENESS_MASK(liveness);             \
-        R10.w = (W_)reentry;                           \
-        JMP_(stg_gen_chk);                             \
-   }                                                   \
-   TICK_ALLOC_HEAP(headroom);
+        JMP_(stg_gc_ut);                               \
+    }                                                       
+
+#define HP_CHK_GEN(headroom,liveness,reentry)                  \
+   if ((Hp += (headroom)) > HpLim ) {                          \
+        HpAlloc = (headroom);                                  \
+       R9.w = (W_)LIVENESS_MASK(liveness);                     \
+        R10.w = (W_)reentry;                                   \
+        JMP_(stg_gc_gen);                                      \
+    }                                                       
 
 
-#define STK_CHK_GEN(headroom,liveness,reentry,tag_assts)       \
+#define HP_CHK_GEN_TICKY(headroom,liveness,reentry)    \
+   HP_CHK_GEN(headroom,liveness,reentry);              \
+   TICK_ALLOC_HEAP_NOCTR(headroom)
+
+#define STK_CHK_GEN(headroom,liveness,reentry) \
    if ((Sp - (headroom)) < SpLim) {                            \
    if ((Sp - (headroom)) < SpLim) {                            \
-       EF_(stg_gen_chk);                                       \
-        tag_assts                                              \
        R9.w = (W_)LIVENESS_MASK(liveness);                     \
         R10.w = (W_)reentry;                                   \
        R9.w = (W_)LIVENESS_MASK(liveness);                     \
         R10.w = (W_)reentry;                                   \
-        JMP_(stg_gen_chk);                                     \
+        JMP_(stg_gc_gen);                                      \
    }
 
 #define MAYBE_GC(liveness,reentry)             \
    if (doYouWantToGC()) {                      \
    }
 
 #define MAYBE_GC(liveness,reentry)             \
    if (doYouWantToGC()) {                      \
-       EF_(stg_gen_hp);                        \
        R9.w = (W_)LIVENESS_MASK(liveness);     \
         R10.w = (W_)reentry;                   \
        R9.w = (W_)LIVENESS_MASK(liveness);     \
         R10.w = (W_)reentry;                   \
-        JMP_(stg_gen_hp);                      \
+        JMP_(stg_gc_gen_hp);                   \
    }
 
 /* -----------------------------------------------------------------------------
    }
 
 /* -----------------------------------------------------------------------------
@@ -355,8 +328,8 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    out to be slowing us down we can make specialised ones.
    -------------------------------------------------------------------------- */
 
    out to be slowing us down we can make specialised ones.
    -------------------------------------------------------------------------- */
 
-EF_(stg_gen_yield);
-EF_(stg_gen_block);
+EXTFUN_RTS(stg_gen_yield);
+EXTFUN_RTS(stg_gen_block);
 
 #define YIELD(liveness,reentry)                        \
   {                                            \
 
 #define YIELD(liveness,reentry)                        \
   {                                            \
@@ -374,10 +347,29 @@ EF_(stg_gen_block);
 
 #define BLOCK_NP(ptrs)                         \
   {                                            \
 
 #define BLOCK_NP(ptrs)                         \
   {                                            \
-    EF_(stg_block_##ptrs);                     \
+    EXTFUN_RTS(stg_block_##ptrs);                      \
     JMP_(stg_block_##ptrs);                    \
   }
 
     JMP_(stg_block_##ptrs);                    \
   }
 
+#if defined(PAR)
+/*
+  Similar to BLOCK_NP but separates the saving of the thread state from the
+  actual jump via an StgReturn
+*/
+
+#define SAVE_THREAD_STATE(ptrs)                  \
+  ASSERT(ptrs==1);                               \
+  Sp -= 1;                                       \
+  Sp[0] = R1.w;                                  \
+  SaveThreadState();                             
+
+#define THREAD_RETURN(ptrs)                      \
+  ASSERT(ptrs==1);                               \
+  CurrentTSO->what_next = ThreadEnterGHC;        \
+  R1.i = ThreadBlocked;                          \
+  JMP_(StgReturn);                               
+#endif
+
 /* -----------------------------------------------------------------------------
    CCall_GC needs to push a dummy stack frame containing the contents
    of volatile registers and variables.  
 /* -----------------------------------------------------------------------------
    CCall_GC needs to push a dummy stack frame containing the contents
    of volatile registers and variables.  
@@ -385,11 +377,6 @@ EF_(stg_gen_block);
    We use a RET_DYN frame the same as for a dynamic heap check.
    ------------------------------------------------------------------------- */
 
    We use a RET_DYN frame the same as for a dynamic heap check.
    ------------------------------------------------------------------------- */
 
-#if COMPILING_RTS
-EI_(stg_gen_chk_info);
-#else
-EDI_(stg_gen_chk_info);
-#endif
 /* -----------------------------------------------------------------------------
    Vectored Returns
 
 /* -----------------------------------------------------------------------------
    Vectored Returns
 
@@ -399,30 +386,87 @@ EDI_(stg_gen_chk_info);
 
    Return vectors are placed in *reverse order* immediately before the info
    table for the return address.  Hence the formula for computing the
 
    Return vectors are placed in *reverse order* immediately before the info
    table for the return address.  Hence the formula for computing the
-   actual return address is (addr - sizeof(InfoTable) - tag - 1).
+   actual return address is (addr - sizeof(RetInfoTable) - tag - 1).
    The extra subtraction of one word is because tags start at zero.
    -------------------------------------------------------------------------- */
 
    The extra subtraction of one word is because tags start at zero.
    -------------------------------------------------------------------------- */
 
-#ifdef USE_MINIINTERPRETER
-#define RET_VEC(p,t) ((*(stgCast(StgInfoTable*,p)->vector))[t])
+#ifdef TABLES_NEXT_TO_CODE
+#define RET_VEC(p,t) (*((P_)(p) - sizeofW(StgRetInfoTable) - t - 1))
 #else
 #else
-#define RET_VEC(p,t) (*((P_)(p) - sizeofW(StgInfoTable) - t - 1))
+#define RET_VEC(p,t) (((StgRetInfoTable *)p)->vector[t])
 #endif
 
 /* -----------------------------------------------------------------------------
    Misc
    -------------------------------------------------------------------------- */
 
 #endif
 
 /* -----------------------------------------------------------------------------
    Misc
    -------------------------------------------------------------------------- */
 
+
 /* set the tag register (if we have one) */
 #define SET_TAG(t)  /* nothing */
 
 #ifdef EAGER_BLACKHOLING
 /* set the tag register (if we have one) */
 #define SET_TAG(t)  /* nothing */
 
 #ifdef EAGER_BLACKHOLING
-#  define UPD_BH_UPDATABLE(thunk)                        \
-        TICK_UPD_BH_UPDATABLE();                         \
-        SET_INFO((StgClosure *)thunk,&BLACKHOLE_info)
-#  define UPD_BH_SINGLE_ENTRY(thunk)                     \
-        TICK_UPD_BH_SINGLE_ENTRY();                      \
-        SET_INFO((StgClosure *)thunk,&SE_BLACKHOLE_info)
+#  ifdef SMP
+#    define UPD_BH_UPDATABLE(info)                             \
+        TICK_UPD_BH_UPDATABLE();                               \
+        {                                                      \
+         bdescr *bd = Bdescr(R1.p);                            \
+          if (bd->u.back != (bdescr *)BaseReg) {               \
+             if (bd->gen_no >= 1 || bd->step->no >= 1) {       \
+                LOCK_THUNK(info);                              \
+             } else {                                          \
+                EXTFUN_RTS(stg_gc_enter_1_hponly);             \
+                JMP_(stg_gc_enter_1_hponly);                   \
+             }                                                 \
+          }                                                    \
+       }                                                       \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#    define UPD_BH_SINGLE_ENTRY(info)                          \
+        TICK_UPD_BH_SINGLE_ENTRY();                            \
+        {                                                      \
+         bdescr *bd = Bdescr(R1.p);                            \
+          if (bd->u.back != (bdescr *)BaseReg) {               \
+             if (bd->gen_no >= 1 || bd->step->no >= 1) {       \
+                LOCK_THUNK(info);                              \
+             } else {                                          \
+                EXTFUN_RTS(stg_gc_enter_1_hponly);             \
+                JMP_(stg_gc_enter_1_hponly);                   \
+             }                                                 \
+          }                                                    \
+       }                                                       \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#  else
+#   ifndef PROFILING
+#    define UPD_BH_UPDATABLE(info)             \
+        TICK_UPD_BH_UPDATABLE();               \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#    define UPD_BH_SINGLE_ENTRY(info)          \
+        TICK_UPD_BH_SINGLE_ENTRY();            \
+        SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)
+#   else
+// An object is replaced by a blackhole, so we fill the slop with zeros.
+// 
+// This looks like it can't work - we're overwriting the contents of
+// the THUNK with slop!  Perhaps this never worked??? --SDM
+// The problem is that with eager-black-holing we currently perform
+// the black-holing operation at the *beginning* of the basic block,
+// when we still need the contents of the thunk.
+// Perhaps the thing to do is to overwrite it at the *end* of the
+// basic block, when we've already sucked out the thunk's contents? -- SLPJ
+//
+// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+// 
+#    define UPD_BH_UPDATABLE(info)             \
+        TICK_UPD_BH_UPDATABLE();               \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info);    \
+        LDV_recordCreate(R1.cl)
+#    define UPD_BH_SINGLE_ENTRY(info)          \
+        TICK_UPD_BH_SINGLE_ENTRY();            \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)  \
+        LDV_recordCreate(R1.cl)
+#   endif /* PROFILING */
+#  endif
 #else /* !EAGER_BLACKHOLING */
 #  define UPD_BH_UPDATABLE(thunk)    /* nothing */
 #  define UPD_BH_SINGLE_ENTRY(thunk) /* nothing */
 #else /* !EAGER_BLACKHOLING */
 #  define UPD_BH_UPDATABLE(thunk)    /* nothing */
 #  define UPD_BH_SINGLE_ENTRY(thunk) /* nothing */
@@ -433,30 +477,30 @@ EDI_(stg_gen_chk_info);
 
    ASSIGN_FLT is for assigning a float to memory (usually the
               stack/heap).  The memory address is guaranteed to be
 
    ASSIGN_FLT is for assigning a float to memory (usually the
               stack/heap).  The memory address is guaranteed to be
-             StgWord aligned (currently == sizeof(long)).
+             StgWord aligned (currently == sizeof(void *)).
 
    PK_FLT     is for pulling a float out of memory.  The memory is
               guaranteed to be StgWord aligned.
    -------------------------------------------------------------------------- */
 
 
    PK_FLT     is for pulling a float out of memory.  The memory is
               guaranteed to be StgWord aligned.
    -------------------------------------------------------------------------- */
 
-static inline void       ASSIGN_FLT (W_ [], StgFloat);
-static inline StgFloat    PK_FLT     (W_ []);
+INLINE_HEADER void       ASSIGN_FLT (W_ [], StgFloat);
+INLINE_HEADER StgFloat    PK_FLT     (W_ []);
 
 #if ALIGNMENT_FLOAT <= ALIGNMENT_LONG
 
 
 #if ALIGNMENT_FLOAT <= ALIGNMENT_LONG
 
-static inline void     ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; }
-static inline StgFloat PK_FLT    (W_ p_src[])                { return *(StgFloat *)p_src; }
+INLINE_HEADER void     ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; }
+INLINE_HEADER StgFloat PK_FLT    (W_ p_src[])                { return *(StgFloat *)p_src; }
 
 #else  /* ALIGNMENT_FLOAT > ALIGNMENT_UNSIGNED_INT */
 
 
 #else  /* ALIGNMENT_FLOAT > ALIGNMENT_UNSIGNED_INT */
 
-static inline void ASSIGN_FLT(W_ p_dest[], StgFloat src)
+INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src)
 {
     float_thing y;
     y.f = src;
     *p_dest = y.fu;
 }
 
 {
     float_thing y;
     y.f = src;
     *p_dest = y.fu;
 }
 
-static inline StgFloat PK_FLT(W_ p_src[])
+INLINE_HEADER StgFloat PK_FLT(W_ p_src[])
 {
     float_thing y;
     y.fu = *p_src;
 {
     float_thing y;
     y.fu = *p_src;
@@ -467,11 +511,11 @@ static inline StgFloat PK_FLT(W_ p_src[])
 
 #if ALIGNMENT_DOUBLE <= ALIGNMENT_LONG
 
 
 #if ALIGNMENT_DOUBLE <= ALIGNMENT_LONG
 
-static inline void       ASSIGN_DBL (W_ [], StgDouble);
-static inline StgDouble   PK_DBL     (W_ []);
+INLINE_HEADER void       ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble   PK_DBL     (W_ []);
 
 
-static inline void      ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; }
-static inline StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDouble *)p_src; }
+INLINE_HEADER void      ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; }
+INLINE_HEADER StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDouble *)p_src; }
 
 #else  /* ALIGNMENT_DOUBLE > ALIGNMENT_LONG */
 
 
 #else  /* ALIGNMENT_DOUBLE > ALIGNMENT_LONG */
 
@@ -497,8 +541,8 @@ static inline StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDou
 
 #else /* ! sparc_TARGET_ARCH */
 
 
 #else /* ! sparc_TARGET_ARCH */
 
-static inline void       ASSIGN_DBL (W_ [], StgDouble);
-static inline StgDouble   PK_DBL     (W_ []);
+INLINE_HEADER void       ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble   PK_DBL     (W_ []);
 
 typedef struct
   { StgWord dhi;
 
 typedef struct
   { StgWord dhi;
@@ -510,7 +554,7 @@ typedef union
     unpacked_double du;
   } double_thing;
 
     unpacked_double du;
   } double_thing;
 
-static inline void ASSIGN_DBL(W_ p_dest[], StgDouble src)
+INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src)
 {
     double_thing y;
     y.d = src;
 {
     double_thing y;
     y.d = src;
@@ -526,7 +570,7 @@ static inline void ASSIGN_DBL(W_ p_dest[], StgDouble src)
        *(p_dest+1) = ((double_thing) src).du.dlo \
 */
 
        *(p_dest+1) = ((double_thing) src).du.dlo \
 */
 
-static inline StgDouble PK_DBL(W_ p_src[])
+INLINE_HEADER StgDouble PK_DBL(W_ p_src[])
 {
     double_thing y;
     y.du.dhi = p_src[0];
 {
     double_thing y;
     y.du.dhi = p_src[0];
@@ -555,7 +599,7 @@ typedef union
     unpacked_double_word wu;
   } word64_thing;
 
     unpacked_double_word wu;
   } word64_thing;
 
-static inline void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
 {
     word64_thing y;
     y.w = src;
 {
     word64_thing y;
     y.w = src;
@@ -563,7 +607,7 @@ static inline void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
     p_dest[1] = y.wu.dlo;
 }
 
     p_dest[1] = y.wu.dlo;
 }
 
-static inline StgWord64 PK_Word64(W_ p_src[])
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
 {
     word64_thing y;
     y.wu.dhi = p_src[0];
 {
     word64_thing y;
     y.wu.dhi = p_src[0];
@@ -571,7 +615,7 @@ static inline StgWord64 PK_Word64(W_ p_src[])
     return(y.w);
 }
 
     return(y.w);
 }
 
-static inline void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
 {
     int64_thing y;
     y.i = src;
 {
     int64_thing y;
     y.i = src;
@@ -579,59 +623,76 @@ static inline void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
     p_dest[1] = y.iu.dlo;
 }
 
     p_dest[1] = y.iu.dlo;
 }
 
-static inline StgInt64 PK_Int64(W_ p_src[])
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
 {
     int64_thing y;
     y.iu.dhi = p_src[0];
     y.iu.dlo = p_src[1];
     return(y.i);
 }
 {
     int64_thing y;
     y.iu.dhi = p_src[0];
     y.iu.dlo = p_src[1];
     return(y.i);
 }
-#endif
 
 
-/* -----------------------------------------------------------------------------
-   Catch frames
-   -------------------------------------------------------------------------- */
+#elif SIZEOF_VOID_P == 8
 
 
-extern DLL_IMPORT_DATA const StgPolyInfoTable catch_frame_info;
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+{
+       p_dest[0] = src;
+}
 
 
-/* -----------------------------------------------------------------------------
-   Seq frames
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
+{
+    return p_src[0];
+}
 
 
-   A seq frame is very like an update frame, except that it doesn't do
-   an update...
-   -------------------------------------------------------------------------- */
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+{
+    p_dest[0] = src;
+}
 
 
-extern DLL_IMPORT_DATA const StgPolyInfoTable seq_frame_info;
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
+{
+    return p_src[0];
+}
 
 
-#define PUSH_SEQ_FRAME(sp)                                     \
-       {                                                       \
-               StgSeqFrame *__frame;                           \
-               TICK_SEQF_PUSHED();                             \
-               __frame = (StgSeqFrame *)(sp);                  \
-               SET_HDR_(__frame,&seq_frame_info,CCCS);         \
-               __frame->link = Su;                             \
-               Su = (StgUpdateFrame *)__frame;                 \
-       }
+#endif
+
+/* -----------------------------------------------------------------------------
+   Catch frames
+   -------------------------------------------------------------------------- */
+
+extern DLL_IMPORT_RTS const StgPolyInfoTable stg_catch_frame_info;
 
 /* -----------------------------------------------------------------------------
    Split markers
    -------------------------------------------------------------------------- */
 
 #if defined(USE_SPLIT_MARKERS)
 
 /* -----------------------------------------------------------------------------
    Split markers
    -------------------------------------------------------------------------- */
 
 #if defined(USE_SPLIT_MARKERS)
-#define __STG_SPLIT_MARKER(n) FN_(__stg_split_marker##n) { }
+#if defined(LEADING_UNDERSCORE)
+#define __STG_SPLIT_MARKER __asm__("\n___stg_split_marker:");
+#else
+#define __STG_SPLIT_MARKER __asm__("\n__stg_split_marker:");
+#endif
 #else
 #else
-#define __STG_SPLIT_MARKER(n) /* nothing */
+#define __STG_SPLIT_MARKER /* nothing */
 #endif
 
 /* -----------------------------------------------------------------------------
    Closure and Info Macros with casting.
 
    We don't want to mess around with casts in the generated C code, so
 #endif
 
 /* -----------------------------------------------------------------------------
    Closure and Info Macros with casting.
 
    We don't want to mess around with casts in the generated C code, so
-   we use these casting versions of the closure/info tables macros.
+   we use this casting versions of the closure macro.
+
+   This version of SET_HDR also includes CCS_ALLOC for profiling - the
+   reason we don't use two separate macros is that the cost centre
+   field is sometimes a non-simple expression and we want to share its
+   value between SET_HDR and CCS_ALLOC.
    -------------------------------------------------------------------------- */
 
    -------------------------------------------------------------------------- */
 
-#define SET_HDR_(c,info,ccs) \
-   SET_HDR((StgClosure *)(c),(StgInfoTable *)(info),ccs)
+#define SET_HDR_(c,info,ccs,size)                              \
+  {                                                            \
+      CostCentreStack *tmp = (ccs);                            \
+      SET_HDR((StgClosure *)(c),(StgInfoTable *)(info),tmp);   \
+      CCS_ALLOC(tmp,size);                                     \
+  }
 
 /* -----------------------------------------------------------------------------
    Saving context for exit from the STG world, and loading up context
 
 /* -----------------------------------------------------------------------------
    Saving context for exit from the STG world, and loading up context
@@ -640,41 +701,151 @@ extern DLL_IMPORT_DATA const StgPolyInfoTable seq_frame_info;
    We save all the STG registers (that is, the ones that are mapped to
    machine registers) in their places in the TSO.  
 
    We save all the STG registers (that is, the ones that are mapped to
    machine registers) in their places in the TSO.  
 
-   The stack registers go into the current stack object, and the heap
-   registers are saved in global locations.
+   The stack registers go into the current stack object, and the
+   current nursery is updated from the heap pointer.
+
+   These functions assume that BaseReg is loaded appropriately (if
+   we have one).
    -------------------------------------------------------------------------- */
 
    -------------------------------------------------------------------------- */
 
-static __inline__ void
+#if IN_STG_CODE
+
+INLINE_HEADER void
 SaveThreadState(void)
 {
 SaveThreadState(void)
 {
+  StgTSO *tso;
+
   /* Don't need to save REG_Base, it won't have changed. */
 
   /* Don't need to save REG_Base, it won't have changed. */
 
-  CurrentTSO->sp       = Sp;
-  CurrentTSO->su       = Su;
-  CurrentTSO->splim    = SpLim;
+  tso = CurrentTSO;
+  tso->sp       = Sp;
   CloseNursery(Hp);
 
   CloseNursery(Hp);
 
+#ifdef REG_CurrentTSO
+  SAVE_CurrentTSO = tso;
+#endif
+#ifdef REG_CurrentNursery
+  SAVE_CurrentNursery = CurrentNursery;
+#endif
 #if defined(PROFILING)
   CurrentTSO->prof.CCCS = CCCS;
 #endif
 }
 
 #if defined(PROFILING)
   CurrentTSO->prof.CCCS = CCCS;
 #endif
 }
 
-static __inline__ void 
+INLINE_HEADER void 
 LoadThreadState (void)
 {
 LoadThreadState (void)
 {
-#ifdef REG_Base
-  BaseReg = (StgRegTable*)&MainRegTable;
+  StgTSO *tso;
+
+#ifdef REG_CurrentTSO
+  CurrentTSO = SAVE_CurrentTSO;
 #endif
 
 #endif
 
-  Sp    = CurrentTSO->sp;
-  Su    = CurrentTSO->su;
-  SpLim = CurrentTSO->splim;
+  tso = CurrentTSO;
+  Sp    = tso->sp;
+  SpLim = (P_)&(tso->stack) + RESERVED_STACK_WORDS;
   OpenNursery(Hp,HpLim);
 
   OpenNursery(Hp,HpLim);
 
+#ifdef REG_CurrentNursery
+  CurrentNursery = SAVE_CurrentNursery;
+#endif
 # if defined(PROFILING)
   CCCS = CurrentTSO->prof.CCCS;
 # endif
 }
 
 # if defined(PROFILING)
   CCCS = CurrentTSO->prof.CCCS;
 # endif
 }
 
+#endif
+
+/* -----------------------------------------------------------------------------
+   Module initialisation
+
+   The module initialisation code looks like this, roughly:
+
+       FN(__stginit_Foo) {
+         JMP_(__stginit_Foo_1_p)
+       }
+
+       FN(__stginit_Foo_1_p) {
+       ...
+       }
+
+   We have one version of the init code with a module version and the
+   'way' attached to it.  The version number helps to catch cases
+   where modules are not compiled in dependency order before being
+   linked: if a module has been compiled since any modules which depend on
+   it, then the latter modules will refer to a different version in their
+   init blocks and a link error will ensue.
+
+   The 'way' suffix helps to catch cases where modules compiled in different
+   ways are linked together (eg. profiled and non-profiled).
+
+   We provide a plain, unadorned, version of the module init code
+   which just jumps to the version with the label and way attached.  The
+   reason for this is that when using foreign exports, the caller of
+   startupHaskell() must supply the name of the init function for the "top"
+   module in the program, and we don't want to require that this name
+   has the version and way info appended to it.
+   -------------------------------------------------------------------------- */
+
+#define PUSH_INIT_STACK(reg_function)          \
+       *(Sp++) = (W_)reg_function
+
+#define POP_INIT_STACK()                       \
+       *(--Sp)
+
+#define MOD_INIT_WRAPPER(label,real_init)      \
+
+
+#define START_MOD_INIT(plain_lbl, real_lbl)    \
+       static int _module_registered = 0;      \
+       EF_(real_lbl);                          \
+       FN_(plain_lbl) {                        \
+            FB_                                        \
+            JMP_(real_lbl);                    \
+           FE_                                 \
+        }                                      \
+       FN_(real_lbl) {                 \
+           FB_;                                \
+           if (! _module_registered) {         \
+               _module_registered = 1;         \
+               { 
+           /* extern decls go here, followed by init code */
+
+#define REGISTER_FOREIGN_EXPORT(reg_fe_binder) \
+        STGCALL1(getStablePtr,reg_fe_binder)
+       
+#define REGISTER_IMPORT(reg_mod_name)          \
+        PUSH_INIT_STACK(reg_mod_name)
+
+#define END_MOD_INIT()                         \
+        }};                                    \
+       JMP_(POP_INIT_STACK());                 \
+       FE_ }
+
+/* -----------------------------------------------------------------------------
+   Support for _ccall_GC_ and _casm_GC.
+   -------------------------------------------------------------------------- */
+
+/* 
+ * Suspending/resuming threads for doing external C-calls (_ccall_GC).
+ * These functions are defined in rts/Schedule.c.
+ */
+StgInt        suspendThread ( StgRegTable *, rtsBool);
+StgRegTable * resumeThread  ( StgInt, rtsBool );
+
+#define SUSPEND_THREAD(token,threaded)         \
+   SaveThreadState();                          \
+   token = suspendThread(BaseReg,threaded);
+
+#ifdef SMP
+#define RESUME_THREAD(token,threaded)          \
+    BaseReg = resumeThread(token,threaded);    \
+    LoadThreadState();
+#else
+#define RESUME_THREAD(token,threaded)          \
+   (void)resumeThread(token,threaded);         \
+   LoadThreadState();
+#endif
+
 #endif /* STGMACROS_H */
 
 #endif /* STGMACROS_H */