[project @ 2004-02-12 02:04:59 by mthomas]
[ghc-hetmet.git] / ghc / includes / StgMacros.h
index 4c7be2b..bb1fcf6 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.10 1999/05/11 16:47:41 keithw Exp $
+ * $Id: StgMacros.h,v 1.57 2003/11/12 17:27:04 sof Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
 #define EXTFUN(f)      extern StgFunPtr f(void)
 #define EXTFUN_RTS(f)  extern DLL_IMPORT_RTS StgFunPtr f(void)
 #define FN_(f)         F_ f(void)
-#define IFN_(f)                static F_ f(void)
 #define IF_(f)         static F_ f(void)
 #define EF_(f)         extern F_ f(void)
 #define EDF_(f)                extern DLLIMPORT F_ f(void)
 
+#define EXTINFO_RTS    extern DLL_IMPORT_RTS const StgInfoTable
+#define ETI_RTS                extern DLL_IMPORT_RTS const StgThunkInfoTable
+
+// Info tables as generated by the compiler are simply arrays of words.
+typedef StgWord StgWordArray[];
+
 #define ED_            extern
-#define EDD_           extern DLLIMPORT 
+#define EDD_           extern DLLIMPORT
 #define ED_RO_         extern const
-#define ID_            extern
-#define ID_RO_         extern const
-#define EI_             extern const StgInfoTable
-#define EDI_            extern DLLIMPORT const StgInfoTable
-#define II_             extern const StgInfoTable
+#define ID_            static
+#define ID_RO_         static const
+#define EI_             extern StgWordArray
+#define ERI_            extern const StgRetInfoTable
+#define II_             static StgWordArray
+#define IRI_            static const StgRetInfoTable
 #define EC_            extern StgClosure
 #define EDC_           extern DLLIMPORT StgClosure
-#define IC_            extern StgClosure
+#define IC_            static StgClosure
 #define ECP_(x)                extern const StgClosure *(x)[]
 #define EDCP_(x)       extern DLLIMPORT StgClosure *(x)[]
-#define ICP_(x)                extern const StgClosure *(x)[]
+#define ICP_(x)                static const StgClosure *(x)[]
 
 /* -----------------------------------------------------------------------------
-   Stack Tagging.
-
-   For a  block of non-pointer words on the stack, we precede the
-   block with a small-integer tag giving the number of non-pointer
-   words in the block.
-   -------------------------------------------------------------------------- */
-
-#ifndef DEBUG_EXTRA
-#define ARGTAG_MAX 16          /* probably arbitrary */
-#define ARG_TAG(n)  (n)
-#define ARG_SIZE(n) stgCast(StgWord,n)
-
-typedef enum {
-    REALWORLD_TAG = 0,
-    INT_TAG    = sizeofW(StgInt), 
-    INT64_TAG  = sizeofW(StgInt64), 
-    WORD_TAG   = sizeofW(StgWord), 
-    ADDR_TAG   = sizeofW(StgAddr), 
-    CHAR_TAG   = sizeofW(StgChar),
-    FLOAT_TAG  = sizeofW(StgFloat), 
-    DOUBLE_TAG = sizeofW(StgDouble), 
-    STABLE_TAG = sizeofW(StgWord), 
-} StackTag;
-
-#else /* DEBUG_EXTRA */
-
-typedef enum {
-    ILLEGAL_TAG,
-    REALWORLD_TAG,
-    INT_TAG    ,
-    INT64_TAG  ,
-    WORD_TAG   ,
-    ADDR_TAG   ,
-    CHAR_TAG   ,
-    FLOAT_TAG  ,
-    DOUBLE_TAG ,
-    STABLE_TAG ,
-    ARGTAG_MAX = DOUBLE_TAG
-} StackTag;
-
-/* putting this in a .h file generates many copies - but its only a 
- * debugging build.
- */
-static StgWord stg_arg_size[] = {
-    [REALWORLD_TAG] = 0,
-    [INT_TAG   ] = sizeofW(StgInt), 
-    [INT64_TAG ] = sizeofW(StgInt64), 
-    [WORD_TAG  ] = sizeofW(StgWord), 
-    [ADDR_TAG  ] = sizeofW(StgAddr), 
-    [CHAR_TAG  ] = sizeofW(StgChar),
-    [FLOAT_TAG ] = sizeofW(StgFloat), 
-    [DOUBLE_TAG] = sizeofW(StgDouble),
-    [STABLE_TAG] = sizeofW(StgWord)
-};
-
-#define ARG_SIZE(tag) stg_arg_size[stgCast(StgWord,tag)]
+   Entering 
 
-#endif /* DEBUG_EXTRA */
-
-static inline int IS_ARG_TAG( StgWord p );
-static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
-
-/* -----------------------------------------------------------------------------
-   Argument checks.
-   
-   If (Sp + <n_args>) > Su { JMP_(stg_update_PAP); }
-   
-   Sp points to the topmost used word on the stack, and Su points to
-   the most recently pushed update frame.
-
-   Remember that <n_args> must include any tagging of unboxed values.
-
-   ARGS_CHK_LOAD_NODE is for top-level functions, whose entry
-   convention doesn't require that Node is loaded with a pointer to
-   the closure.  Thus we must load node before calling stg_updatePAP if
-   the argument check fails. 
+   It isn't safe to "enter" every closure.  Functions in particular
+   have no entry code as such; their entry point contains the code to
+   apply the function.
    -------------------------------------------------------------------------- */
 
-#define ARGS_CHK(n)                            \
-        if ((P_)(Sp + (n)) > (P_)Su) {         \
-               JMP_(stg_update_PAP);           \
-       }
-
-#define ARGS_CHK_LOAD_NODE(n,closure)          \
-        if ((P_)(Sp + (n)) > (P_)Su) {         \
-               R1.p = (P_)closure;             \
-               JMP_(stg_update_PAP);           \
-       }
+#define ENTER()                                        \
+ {                                             \
+ again:                                                \
+  switch (get_itbl(R1.cl)->type) {             \
+  case IND:                                    \
+  case IND_OLDGEN:                             \
+  case IND_PERM:                               \
+  case IND_OLDGEN_PERM:                                \
+  case IND_STATIC:                             \
+      R1.cl = ((StgInd *)R1.cl)->indirectee;    \
+      goto again;                              \
+  case BCO:                                    \
+  case FUN:                                    \
+  case FUN_1_0:                                        \
+  case FUN_0_1:                                        \
+  case FUN_2_0:                                        \
+  case FUN_1_1:                                        \
+  case FUN_0_2:                                        \
+  case FUN_STATIC:                             \
+  case PAP:                                    \
+      JMP_(ENTRY_CODE(Sp[0]));                 \
+  default:                                     \
+      JMP_(GET_ENTRY(R1.cl));                  \
+  }                                            \
+ }
 
 /* -----------------------------------------------------------------------------
    Heap/Stack Checks.
@@ -165,31 +115,31 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    in the meantime.
    ------------------------------------------------------------------------- */
 
-#define STK_CHK(headroom,ret,r,layout,tag_assts)               \
-       if (Sp - headroom < SpLim) {                            \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
+#define STK_CHK_FUN(headroom,assts)            \
+       if (Sp - headroom < SpLim) {            \
+           assts                               \
+           JMP_(stg_gc_fun);                   \
        }
-       
-#define HP_CHK(headroom,ret,r,layout,tag_assts)                        \
-       if ((Hp += headroom) > HpLim) {                         \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
 
-#define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
+#define HP_CHK_FUN(headroom,assts)                                     \
+        DO_GRAN_ALLOCATE(headroom)                                     \
+       if ((Hp += headroom) > HpLim) {                                 \
+            HpAlloc = (headroom);                                      \
+           assts                                                       \
+           JMP_(stg_gc_fun);                                           \
+       }
+
+// When doing both a heap and a stack check, don't move the heap
+// pointer unless the stack check succeeds.  Otherwise we might end up
+// with slop at the end of the current block, which can confuse the
+// LDV profiler.
+#define HP_STK_CHK_FUN(stk_headroom,hp_headroom,assts)                 \
+        DO_GRAN_ALLOCATE(hp_headroom)                                  \
        if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) { \
-           EXTFUN_RTS(stg_chk_##layout);                       \
-           tag_assts                                           \
-           (r) = (P_)ret;                                      \
-           JMP_(stg_chk_##layout);                             \
-       }                                                       \
-        TICK_ALLOC_HEAP(hp_headroom);
+            HpAlloc = (hp_headroom);                                   \
+           assts                                                       \
+           JMP_(stg_gc_fun);                                           \
+       }
 
 /* -----------------------------------------------------------------------------
    A Heap Check in a case alternative are much simpler: everything is
@@ -203,49 +153,46 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    functions.  In all these cases, node points to a closure that we
    can just enter to restart the heap check (the NP stands for 'node points').
 
+   In the NP case GranSim absolutely has to check whether the current node 
+   resides on the current processor. Otherwise a FETCH event has to be
+   scheduled. All that is done in GranSimFetch. -- HWL
+
    HpLim points to the LAST WORD of valid allocation space.
    -------------------------------------------------------------------------- */
 
-#define STK_CHK_NP(headroom,ptrs,tag_assts)                    \
-       if ((Sp - (headroom)) < SpLim) {                        \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
-            tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
+#define STK_CHK_NP(headroom,tag_assts)         \
+       if ((Sp - (headroom)) < SpLim) {        \
+            tag_assts                          \
+           JMP_(stg_gc_enter_1);               \
        }
 
-#define HP_CHK_NP(headroom,ptrs,tag_assts)                     \
-       if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
-            tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
-
-#define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts)                 \
-       if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(stg_gc_seq_##ptrs);                      \
-            tag_assts                                          \
-           JMP_(stg_gc_seq_##ptrs);                            \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
-
-#define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
+#define HP_CHK_NP(headroom,tag_assts)                                  \
+        DO_GRAN_ALLOCATE(headroom)                                     \
+       if ((Hp += (headroom)) > HpLim) {                               \
+            HpAlloc = (headroom);                                      \
+            tag_assts                                                  \
+           JMP_(stg_gc_enter_1);                                       \
+       }                                                       
+
+// See comment on HP_STK_CHK_FUN above.
+#define HP_STK_CHK_NP(stk_headroom, hp_headroom, tag_assts) \
+        DO_GRAN_ALLOCATE(hp_headroom)                              \
        if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \
-           EXTFUN_RTS(stg_gc_enter_##ptrs);                    \
+            HpAlloc = (hp_headroom);                           \
             tag_assts                                          \
-           JMP_(stg_gc_enter_##ptrs);                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(hp_headroom);
+           JMP_(stg_gc_enter_1);                               \
+       }                                                       
+
 
 /* Heap checks for branches of a primitive case / unboxed tuple return */
 
 #define GEN_HP_CHK_ALT(headroom,lbl,tag_assts)                 \
+        DO_GRAN_ALLOCATE(headroom)                              \
        if ((Hp += (headroom)) > HpLim) {                       \
-           EXTFUN_RTS(lbl);                                    \
+            HpAlloc = (headroom);                              \
             tag_assts                                          \
            JMP_(lbl);                                          \
-       }                                                       \
-        TICK_ALLOC_HEAP(headroom);
+       }                                                       
 
 #define HP_CHK_NOREGS(headroom,tag_assts) \
     GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
@@ -257,13 +204,8 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
     GEN_HP_CHK_ALT(headroom,stg_gc_f1,tag_assts);
 #define HP_CHK_D1(headroom,tag_assts)       \
     GEN_HP_CHK_ALT(headroom,stg_gc_d1,tag_assts);
-
 #define HP_CHK_L1(headroom,tag_assts)       \
-    GEN_HP_CHK_ALT(headroom,stg_gc_d1,tag_assts);
-
-#define HP_CHK_UT_ALT(headroom, ptrs, nptrs, r, ret, tag_assts) \
-    GEN_HP_CHK_ALT(headroom, stg_gc_ut_##ptrs##_##nptrs, \
-                    tag_assts r = (P_)ret;)
+    GEN_HP_CHK_ALT(headroom,stg_gc_l1,tag_assts);
 
 /* -----------------------------------------------------------------------------
    Generic Heap checks.
@@ -286,32 +228,55 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
 
        - primitives (no SRT required).
 
-   The stack layout is like this:
-
-          DblReg1-2
-         FltReg1-4
-         R1-8
-         return address
-         liveness mask
-         stg_gen_chk_info
-
-   so the liveness mask depends on the size of an StgDouble (FltRegs
-   and R<n> are guaranteed to be 1 word in size).
-
+   The stack frame layout for a RET_DYN is like this:
+
+          some pointers         |-- GET_PTRS(liveness) words
+          some nonpointers      |-- GET_NONPTRS(liveness) words
+                              
+         L1                    \
+          D1-2                  |-- RET_DYN_NONPTR_REGS_SIZE words
+         F1-4                  /
+                              
+         R1-8                  |-- RET_DYN_BITMAP_SIZE words
+                              
+         return address        \
+         liveness mask         |-- StgRetDyn structure
+         stg_gen_chk_info      /
+
+   we assume that the size of a double is always 2 pointers (wasting a
+   word when it is only one pointer, but avoiding lots of #ifdefs).
+
+   NOTE: if you change the layout of RET_DYN stack frames, then you
+   might also need to adjust the value of RESERVED_STACK_WORDS in
+   Constants.h.
    -------------------------------------------------------------------------- */
 
-/* VERY MAGIC CONSTANTS! 
- * must agree with code in HeapStackCheck.c, stg_gen_chk
- */
-
-#if SIZEOF_DOUBLE > SIZEOF_VOID_P
-#define ALL_NON_PTRS   0xffff
-#else /* SIZEOF_DOUBLE == SIZEOF_VOID_P */
-#define ALL_NON_PTRS   0x3fff
+// VERY MAGIC CONSTANTS! 
+// must agree with code in HeapStackCheck.c, stg_gen_chk, and
+// RESERVED_STACK_WORDS in Constants.h.
+//
+#define RET_DYN_BITMAP_SIZE 8
+#define RET_DYN_NONPTR_REGS_SIZE 10
+#define ALL_NON_PTRS 0xff
+
+// Sanity check that RESERVED_STACK_WORDS is reasonable.  We can't
+// just derive RESERVED_STACK_WORDS because it's used in Haskell code
+// too.
+#if RESERVED_STACK_WORDS != (3 + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE)
+#error RESERVED_STACK_WORDS may be wrong!
 #endif
 
 #define LIVENESS_MASK(ptr_regs)  (ALL_NON_PTRS ^ (ptr_regs))
 
+// We can have up to 255 pointers and 255 nonpointers in the stack
+// frame.
+#define N_NONPTRS(n)  ((n)<<16)
+#define N_PTRS(n)     ((n)<<24)
+
+#define GET_NONPTRS(l) ((l)>>16 & 0xff)
+#define GET_PTRS(l)    ((l)>>24 & 0xff)
+#define GET_LIVENESS(l) ((l) & 0xffff)
+
 #define NO_PTRS   0
 #define R1_PTR   1<<0
 #define R2_PTR   1<<1
@@ -321,31 +286,39 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
 #define R6_PTR   1<<5
 #define R7_PTR   1<<6
 #define R8_PTR   1<<7
-#define HP_CHK_GEN(headroom,liveness,reentry,tag_assts)        \
+
+#define HP_CHK_UNBX_TUPLE(headroom,liveness,code)      \
    if ((Hp += (headroom)) > HpLim ) {                  \
-       EF_(stg_gen_chk);                               \
-        tag_assts                                      \
+        HpAlloc = (headroom);                          \
+        code                                           \
        R9.w = (W_)LIVENESS_MASK(liveness);             \
-        R10.w = (W_)reentry;                           \
-        JMP_(stg_gen_chk);                             \
-   }                                                   \
-   TICK_ALLOC_HEAP(headroom);
+        JMP_(stg_gc_ut);                               \
+    }                                                       
+
+#define HP_CHK_GEN(headroom,liveness,reentry)                  \
+   if ((Hp += (headroom)) > HpLim ) {                          \
+        HpAlloc = (headroom);                                  \
+       R9.w = (W_)LIVENESS_MASK(liveness);                     \
+        R10.w = (W_)reentry;                                   \
+        JMP_(stg_gc_gen);                                      \
+    }                                                       
 
-#define STK_CHK_GEN(headroom,liveness,reentry,tag_assts)       \
+#define HP_CHK_GEN_TICKY(headroom,liveness,reentry)    \
+   HP_CHK_GEN(headroom,liveness,reentry);              \
+   TICK_ALLOC_HEAP_NOCTR(headroom)
+
+#define STK_CHK_GEN(headroom,liveness,reentry) \
    if ((Sp - (headroom)) < SpLim) {                            \
-       EF_(stg_gen_chk);                                       \
-        tag_assts                                              \
        R9.w = (W_)LIVENESS_MASK(liveness);                     \
         R10.w = (W_)reentry;                                   \
-        JMP_(stg_gen_chk);                                     \
+        JMP_(stg_gc_gen);                                      \
    }
 
 #define MAYBE_GC(liveness,reentry)             \
    if (doYouWantToGC()) {                      \
-       EF_(stg_gen_hp);                        \
        R9.w = (W_)LIVENESS_MASK(liveness);     \
         R10.w = (W_)reentry;                   \
-        JMP_(stg_gen_hp);                      \
+        JMP_(stg_gc_gen_hp);                   \
    }
 
 /* -----------------------------------------------------------------------------
@@ -355,8 +328,8 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
    out to be slowing us down we can make specialised ones.
    -------------------------------------------------------------------------- */
 
-EF_(stg_gen_yield);
-EF_(stg_gen_block);
+EXTFUN_RTS(stg_gen_yield);
+EXTFUN_RTS(stg_gen_block);
 
 #define YIELD(liveness,reentry)                        \
   {                                            \
@@ -374,10 +347,29 @@ EF_(stg_gen_block);
 
 #define BLOCK_NP(ptrs)                         \
   {                                            \
-    EF_(stg_block_##ptrs);                     \
+    EXTFUN_RTS(stg_block_##ptrs);                      \
     JMP_(stg_block_##ptrs);                    \
   }
 
+#if defined(PAR)
+/*
+  Similar to BLOCK_NP but separates the saving of the thread state from the
+  actual jump via an StgReturn
+*/
+
+#define SAVE_THREAD_STATE(ptrs)                  \
+  ASSERT(ptrs==1);                               \
+  Sp -= 1;                                       \
+  Sp[0] = R1.w;                                  \
+  SaveThreadState();                             
+
+#define THREAD_RETURN(ptrs)                      \
+  ASSERT(ptrs==1);                               \
+  CurrentTSO->what_next = ThreadEnterGHC;        \
+  R1.i = ThreadBlocked;                          \
+  JMP_(StgReturn);                               
+#endif
+
 /* -----------------------------------------------------------------------------
    CCall_GC needs to push a dummy stack frame containing the contents
    of volatile registers and variables.  
@@ -385,11 +377,6 @@ EF_(stg_gen_block);
    We use a RET_DYN frame the same as for a dynamic heap check.
    ------------------------------------------------------------------------- */
 
-#if COMPILING_RTS
-EI_(stg_gen_chk_info);
-#else
-EDI_(stg_gen_chk_info);
-#endif
 /* -----------------------------------------------------------------------------
    Vectored Returns
 
@@ -399,30 +386,87 @@ EDI_(stg_gen_chk_info);
 
    Return vectors are placed in *reverse order* immediately before the info
    table for the return address.  Hence the formula for computing the
-   actual return address is (addr - sizeof(InfoTable) - tag - 1).
+   actual return address is (addr - sizeof(RetInfoTable) - tag - 1).
    The extra subtraction of one word is because tags start at zero.
    -------------------------------------------------------------------------- */
 
-#ifdef USE_MINIINTERPRETER
-#define RET_VEC(p,t) ((*(stgCast(StgInfoTable*,p)->vector))[t])
+#ifdef TABLES_NEXT_TO_CODE
+#define RET_VEC(p,t) (*((P_)(p) - sizeofW(StgRetInfoTable) - t - 1))
 #else
-#define RET_VEC(p,t) (*((P_)(p) - sizeofW(StgInfoTable) - t - 1))
+#define RET_VEC(p,t) (((StgRetInfoTable *)p)->vector[t])
 #endif
 
 /* -----------------------------------------------------------------------------
    Misc
    -------------------------------------------------------------------------- */
 
+
 /* set the tag register (if we have one) */
 #define SET_TAG(t)  /* nothing */
 
 #ifdef EAGER_BLACKHOLING
-#  define UPD_BH_UPDATABLE(thunk)                        \
-        TICK_UPD_BH_UPDATABLE();                         \
-        SET_INFO((StgClosure *)thunk,&BLACKHOLE_info)
-#  define UPD_BH_SINGLE_ENTRY(thunk)                     \
-        TICK_UPD_BH_SINGLE_ENTRY();                      \
-        SET_INFO((StgClosure *)thunk,&SE_BLACKHOLE_info)
+#  ifdef SMP
+#    define UPD_BH_UPDATABLE(info)                             \
+        TICK_UPD_BH_UPDATABLE();                               \
+        {                                                      \
+         bdescr *bd = Bdescr(R1.p);                            \
+          if (bd->u.back != (bdescr *)BaseReg) {               \
+             if (bd->gen_no >= 1 || bd->step->no >= 1) {       \
+                LOCK_THUNK(info);                              \
+             } else {                                          \
+                EXTFUN_RTS(stg_gc_enter_1_hponly);             \
+                JMP_(stg_gc_enter_1_hponly);                   \
+             }                                                 \
+          }                                                    \
+       }                                                       \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#    define UPD_BH_SINGLE_ENTRY(info)                          \
+        TICK_UPD_BH_SINGLE_ENTRY();                            \
+        {                                                      \
+         bdescr *bd = Bdescr(R1.p);                            \
+          if (bd->u.back != (bdescr *)BaseReg) {               \
+             if (bd->gen_no >= 1 || bd->step->no >= 1) {       \
+                LOCK_THUNK(info);                              \
+             } else {                                          \
+                EXTFUN_RTS(stg_gc_enter_1_hponly);             \
+                JMP_(stg_gc_enter_1_hponly);                   \
+             }                                                 \
+          }                                                    \
+       }                                                       \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#  else
+#   ifndef PROFILING
+#    define UPD_BH_UPDATABLE(info)             \
+        TICK_UPD_BH_UPDATABLE();               \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info)
+#    define UPD_BH_SINGLE_ENTRY(info)          \
+        TICK_UPD_BH_SINGLE_ENTRY();            \
+        SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)
+#   else
+// An object is replaced by a blackhole, so we fill the slop with zeros.
+// 
+// This looks like it can't work - we're overwriting the contents of
+// the THUNK with slop!  Perhaps this never worked??? --SDM
+// The problem is that with eager-black-holing we currently perform
+// the black-holing operation at the *beginning* of the basic block,
+// when we still need the contents of the thunk.
+// Perhaps the thing to do is to overwrite it at the *end* of the
+// basic block, when we've already sucked out the thunk's contents? -- SLPJ
+//
+// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+// 
+#    define UPD_BH_UPDATABLE(info)             \
+        TICK_UPD_BH_UPDATABLE();               \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info);    \
+        LDV_recordCreate(R1.cl)
+#    define UPD_BH_SINGLE_ENTRY(info)          \
+        TICK_UPD_BH_SINGLE_ENTRY();            \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)  \
+        LDV_recordCreate(R1.cl)
+#   endif /* PROFILING */
+#  endif
 #else /* !EAGER_BLACKHOLING */
 #  define UPD_BH_UPDATABLE(thunk)    /* nothing */
 #  define UPD_BH_SINGLE_ENTRY(thunk) /* nothing */
@@ -433,30 +477,30 @@ EDI_(stg_gen_chk_info);
 
    ASSIGN_FLT is for assigning a float to memory (usually the
               stack/heap).  The memory address is guaranteed to be
-             StgWord aligned (currently == sizeof(long)).
+             StgWord aligned (currently == sizeof(void *)).
 
    PK_FLT     is for pulling a float out of memory.  The memory is
               guaranteed to be StgWord aligned.
    -------------------------------------------------------------------------- */
 
-static inline void       ASSIGN_FLT (W_ [], StgFloat);
-static inline StgFloat    PK_FLT     (W_ []);
+INLINE_HEADER void       ASSIGN_FLT (W_ [], StgFloat);
+INLINE_HEADER StgFloat    PK_FLT     (W_ []);
 
 #if ALIGNMENT_FLOAT <= ALIGNMENT_LONG
 
-static inline void     ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; }
-static inline StgFloat PK_FLT    (W_ p_src[])                { return *(StgFloat *)p_src; }
+INLINE_HEADER void     ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; }
+INLINE_HEADER StgFloat PK_FLT    (W_ p_src[])                { return *(StgFloat *)p_src; }
 
 #else  /* ALIGNMENT_FLOAT > ALIGNMENT_UNSIGNED_INT */
 
-static inline void ASSIGN_FLT(W_ p_dest[], StgFloat src)
+INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src)
 {
     float_thing y;
     y.f = src;
     *p_dest = y.fu;
 }
 
-static inline StgFloat PK_FLT(W_ p_src[])
+INLINE_HEADER StgFloat PK_FLT(W_ p_src[])
 {
     float_thing y;
     y.fu = *p_src;
@@ -467,11 +511,11 @@ static inline StgFloat PK_FLT(W_ p_src[])
 
 #if ALIGNMENT_DOUBLE <= ALIGNMENT_LONG
 
-static inline void       ASSIGN_DBL (W_ [], StgDouble);
-static inline StgDouble   PK_DBL     (W_ []);
+INLINE_HEADER void       ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble   PK_DBL     (W_ []);
 
-static inline void      ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; }
-static inline StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDouble *)p_src; }
+INLINE_HEADER void      ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; }
+INLINE_HEADER StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDouble *)p_src; }
 
 #else  /* ALIGNMENT_DOUBLE > ALIGNMENT_LONG */
 
@@ -497,8 +541,8 @@ static inline StgDouble PK_DBL    (W_ p_src[])                 { return *(StgDou
 
 #else /* ! sparc_TARGET_ARCH */
 
-static inline void       ASSIGN_DBL (W_ [], StgDouble);
-static inline StgDouble   PK_DBL     (W_ []);
+INLINE_HEADER void       ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble   PK_DBL     (W_ []);
 
 typedef struct
   { StgWord dhi;
@@ -510,7 +554,7 @@ typedef union
     unpacked_double du;
   } double_thing;
 
-static inline void ASSIGN_DBL(W_ p_dest[], StgDouble src)
+INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src)
 {
     double_thing y;
     y.d = src;
@@ -526,7 +570,7 @@ static inline void ASSIGN_DBL(W_ p_dest[], StgDouble src)
        *(p_dest+1) = ((double_thing) src).du.dlo \
 */
 
-static inline StgDouble PK_DBL(W_ p_src[])
+INLINE_HEADER StgDouble PK_DBL(W_ p_src[])
 {
     double_thing y;
     y.du.dhi = p_src[0];
@@ -555,7 +599,7 @@ typedef union
     unpacked_double_word wu;
   } word64_thing;
 
-static inline void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
 {
     word64_thing y;
     y.w = src;
@@ -563,7 +607,7 @@ static inline void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
     p_dest[1] = y.wu.dlo;
 }
 
-static inline StgWord64 PK_Word64(W_ p_src[])
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
 {
     word64_thing y;
     y.wu.dhi = p_src[0];
@@ -571,7 +615,7 @@ static inline StgWord64 PK_Word64(W_ p_src[])
     return(y.w);
 }
 
-static inline void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
 {
     int64_thing y;
     y.i = src;
@@ -579,59 +623,76 @@ static inline void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
     p_dest[1] = y.iu.dlo;
 }
 
-static inline StgInt64 PK_Int64(W_ p_src[])
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
 {
     int64_thing y;
     y.iu.dhi = p_src[0];
     y.iu.dlo = p_src[1];
     return(y.i);
 }
-#endif
 
-/* -----------------------------------------------------------------------------
-   Catch frames
-   -------------------------------------------------------------------------- */
+#elif SIZEOF_VOID_P == 8
 
-extern DLL_IMPORT_DATA const StgPolyInfoTable catch_frame_info;
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+{
+       p_dest[0] = src;
+}
 
-/* -----------------------------------------------------------------------------
-   Seq frames
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
+{
+    return p_src[0];
+}
 
-   A seq frame is very like an update frame, except that it doesn't do
-   an update...
-   -------------------------------------------------------------------------- */
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+{
+    p_dest[0] = src;
+}
 
-extern DLL_IMPORT_DATA const StgPolyInfoTable seq_frame_info;
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
+{
+    return p_src[0];
+}
 
-#define PUSH_SEQ_FRAME(sp)                                     \
-       {                                                       \
-               StgSeqFrame *__frame;                           \
-               TICK_SEQF_PUSHED();                             \
-               __frame = (StgSeqFrame *)(sp);                  \
-               SET_HDR_(__frame,&seq_frame_info,CCCS);         \
-               __frame->link = Su;                             \
-               Su = (StgUpdateFrame *)__frame;                 \
-       }
+#endif
+
+/* -----------------------------------------------------------------------------
+   Catch frames
+   -------------------------------------------------------------------------- */
+
+extern DLL_IMPORT_RTS const StgPolyInfoTable stg_catch_frame_info;
 
 /* -----------------------------------------------------------------------------
    Split markers
    -------------------------------------------------------------------------- */
 
 #if defined(USE_SPLIT_MARKERS)
-#define __STG_SPLIT_MARKER(n) FN_(__stg_split_marker##n) { }
+#if defined(LEADING_UNDERSCORE)
+#define __STG_SPLIT_MARKER __asm__("\n___stg_split_marker:");
+#else
+#define __STG_SPLIT_MARKER __asm__("\n__stg_split_marker:");
+#endif
 #else
-#define __STG_SPLIT_MARKER(n) /* nothing */
+#define __STG_SPLIT_MARKER /* nothing */
 #endif
 
 /* -----------------------------------------------------------------------------
    Closure and Info Macros with casting.
 
    We don't want to mess around with casts in the generated C code, so
-   we use these casting versions of the closure/info tables macros.
+   we use this casting versions of the closure macro.
+
+   This version of SET_HDR also includes CCS_ALLOC for profiling - the
+   reason we don't use two separate macros is that the cost centre
+   field is sometimes a non-simple expression and we want to share its
+   value between SET_HDR and CCS_ALLOC.
    -------------------------------------------------------------------------- */
 
-#define SET_HDR_(c,info,ccs) \
-   SET_HDR((StgClosure *)(c),(StgInfoTable *)(info),ccs)
+#define SET_HDR_(c,info,ccs,size)                              \
+  {                                                            \
+      CostCentreStack *tmp = (ccs);                            \
+      SET_HDR((StgClosure *)(c),(StgInfoTable *)(info),tmp);   \
+      CCS_ALLOC(tmp,size);                                     \
+  }
 
 /* -----------------------------------------------------------------------------
    Saving context for exit from the STG world, and loading up context
@@ -640,41 +701,151 @@ extern DLL_IMPORT_DATA const StgPolyInfoTable seq_frame_info;
    We save all the STG registers (that is, the ones that are mapped to
    machine registers) in their places in the TSO.  
 
-   The stack registers go into the current stack object, and the heap
-   registers are saved in global locations.
+   The stack registers go into the current stack object, and the
+   current nursery is updated from the heap pointer.
+
+   These functions assume that BaseReg is loaded appropriately (if
+   we have one).
    -------------------------------------------------------------------------- */
 
-static __inline__ void
+#if IN_STG_CODE
+
+INLINE_HEADER void
 SaveThreadState(void)
 {
+  StgTSO *tso;
+
   /* Don't need to save REG_Base, it won't have changed. */
 
-  CurrentTSO->sp       = Sp;
-  CurrentTSO->su       = Su;
-  CurrentTSO->splim    = SpLim;
+  tso = CurrentTSO;
+  tso->sp       = Sp;
   CloseNursery(Hp);
 
+#ifdef REG_CurrentTSO
+  SAVE_CurrentTSO = tso;
+#endif
+#ifdef REG_CurrentNursery
+  SAVE_CurrentNursery = CurrentNursery;
+#endif
 #if defined(PROFILING)
   CurrentTSO->prof.CCCS = CCCS;
 #endif
 }
 
-static __inline__ void 
+INLINE_HEADER void 
 LoadThreadState (void)
 {
-#ifdef REG_Base
-  BaseReg = (StgRegTable*)&MainRegTable;
+  StgTSO *tso;
+
+#ifdef REG_CurrentTSO
+  CurrentTSO = SAVE_CurrentTSO;
 #endif
 
-  Sp    = CurrentTSO->sp;
-  Su    = CurrentTSO->su;
-  SpLim = CurrentTSO->splim;
+  tso = CurrentTSO;
+  Sp    = tso->sp;
+  SpLim = (P_)&(tso->stack) + RESERVED_STACK_WORDS;
   OpenNursery(Hp,HpLim);
 
+#ifdef REG_CurrentNursery
+  CurrentNursery = SAVE_CurrentNursery;
+#endif
 # if defined(PROFILING)
   CCCS = CurrentTSO->prof.CCCS;
 # endif
 }
 
+#endif
+
+/* -----------------------------------------------------------------------------
+   Module initialisation
+
+   The module initialisation code looks like this, roughly:
+
+       FN(__stginit_Foo) {
+         JMP_(__stginit_Foo_1_p)
+       }
+
+       FN(__stginit_Foo_1_p) {
+       ...
+       }
+
+   We have one version of the init code with a module version and the
+   'way' attached to it.  The version number helps to catch cases
+   where modules are not compiled in dependency order before being
+   linked: if a module has been compiled since any modules which depend on
+   it, then the latter modules will refer to a different version in their
+   init blocks and a link error will ensue.
+
+   The 'way' suffix helps to catch cases where modules compiled in different
+   ways are linked together (eg. profiled and non-profiled).
+
+   We provide a plain, unadorned, version of the module init code
+   which just jumps to the version with the label and way attached.  The
+   reason for this is that when using foreign exports, the caller of
+   startupHaskell() must supply the name of the init function for the "top"
+   module in the program, and we don't want to require that this name
+   has the version and way info appended to it.
+   -------------------------------------------------------------------------- */
+
+#define PUSH_INIT_STACK(reg_function)          \
+       *(Sp++) = (W_)reg_function
+
+#define POP_INIT_STACK()                       \
+       *(--Sp)
+
+#define MOD_INIT_WRAPPER(label,real_init)      \
+
+
+#define START_MOD_INIT(plain_lbl, real_lbl)    \
+       static int _module_registered = 0;      \
+       EF_(real_lbl);                          \
+       FN_(plain_lbl) {                        \
+            FB_                                        \
+            JMP_(real_lbl);                    \
+           FE_                                 \
+        }                                      \
+       FN_(real_lbl) {                 \
+           FB_;                                \
+           if (! _module_registered) {         \
+               _module_registered = 1;         \
+               { 
+           /* extern decls go here, followed by init code */
+
+#define REGISTER_FOREIGN_EXPORT(reg_fe_binder) \
+        STGCALL1(getStablePtr,reg_fe_binder)
+       
+#define REGISTER_IMPORT(reg_mod_name)          \
+        PUSH_INIT_STACK(reg_mod_name)
+
+#define END_MOD_INIT()                         \
+        }};                                    \
+       JMP_(POP_INIT_STACK());                 \
+       FE_ }
+
+/* -----------------------------------------------------------------------------
+   Support for _ccall_GC_ and _casm_GC.
+   -------------------------------------------------------------------------- */
+
+/* 
+ * Suspending/resuming threads for doing external C-calls (_ccall_GC).
+ * These functions are defined in rts/Schedule.c.
+ */
+StgInt        suspendThread ( StgRegTable *, rtsBool);
+StgRegTable * resumeThread  ( StgInt, rtsBool );
+
+#define SUSPEND_THREAD(token,threaded)         \
+   SaveThreadState();                          \
+   token = suspendThread(BaseReg,threaded);
+
+#ifdef SMP
+#define RESUME_THREAD(token,threaded)          \
+    BaseReg = resumeThread(token,threaded);    \
+    LoadThreadState();
+#else
+#define RESUME_THREAD(token,threaded)          \
+   (void)resumeThread(token,threaded);         \
+   LoadThreadState();
+#endif
+
 #endif /* STGMACROS_H */