X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fincludes%2FPrimOps.h;h=76f392af70f5542683c8a52dee8077d0cfc8f4d2;hb=efdcccf6b431cbc54a1c4da33bb1ac80c579bea4;hp=4ccbd278b30bf66f2498004dcdc80df73215ec20;hpb=b41f38a42b197dfb166ebfe78476b24982379e19;p=ghc-hetmet.git

diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h
index 4ccbd27..76f392a 100644
--- a/ghc/includes/PrimOps.h
+++ b/ghc/includes/PrimOps.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: PrimOps.h,v 1.17 1999/02/11 14:22:57 simonm Exp $
+ * $Id: PrimOps.h,v 1.54 2000/05/12 20:01:28 panne Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -58,9 +58,6 @@
 #define zlzhzh(r,a,b)	r=(I_)((a) <(b))
 #define zlzezhzh(r,a,b)	r=(I_)((a)<=(b))
 
-/*  used by returning comparison primops, defined in Prims.hc. */
-extern const StgClosure *PrelBase_Bool_closure_tbl[];
-
 /* -----------------------------------------------------------------------------
    Char# PrimOps.
    -------------------------------------------------------------------------- */
@@ -82,15 +79,64 @@ I_ stg_div (I_ a, I_ b);
 #define remIntzh(r,a,b)		r=(a)%(b)
 #define negateIntzh(r,a)	r=-(a)
 
-/* The following operations are the standard add,subtract and multiply
- * except that they return a carry if the operation overflows.
+/* -----------------------------------------------------------------------------
+ * Int operations with carry.
+ * -------------------------------------------------------------------------- */
+
+/* With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+ * C, and without needing any comparisons.  This may not be the
+ * fastest way to do it - if you have better code, please send it! --SDM
+ *
+ * Return : r = a + b,  c = 0 if no overflow, 1 on overflow.
+ *
+ * We currently don't make use of the r value if c is != 0 (i.e. 
+ * overflow), we just convert to big integers and try again.  This
+ * could be improved by making r and c the correct values for
+ * plugging into a new J#.  
+ */
+#define addIntCzh(r,c,a,b)			\
+{ r = a + b;					\
+  c = ((StgWord)(~(a^b) & (a^r)))		\
+    >> (BITS_PER_BYTE * sizeof(I_) - 1);	\
+}
+
+
+#define subIntCzh(r,c,a,b)			\
+{ r = a - b;					\
+  c = ((StgWord)((a^b) & (a^r)))		\
+    >> (BITS_PER_BYTE * sizeof(I_) - 1);	\
+}
+
+/* Multiply with overflow checking.
  *
- * They are all defined in terms of 32-bit integers and use the GCC
- * 'long long' extension to get a 64-bit result.  We'd like to use
- * 64-bit integers on 64-bit architectures, but it seems that gcc's
- * 'long long' type is set at 64-bits even on a 64-bit machine.  
+ * This is slightly more tricky - the usual sign rules for add/subtract
+ * don't apply.  
+ *
+ * On x86 hardware we use a hand-crafted assembly fragment to do the job.
+ *
+ * On other 32-bit machines we use gcc's 'long long' types, finding
+ * overflow with some careful bit-twiddling.
+ *
+ * On 64-bit machines where gcc's 'long long' type is also 64-bits,
+ * we use a crude approximation, testing whether either operand is
+ * larger than 32-bits; if neither is, then we go ahead with the
+ * multiplication.
  */
 
+#if i386_TARGET_ARCH
+
+#define mulIntCzh(r,c,a,b)				\
+{							\
+  __asm__("xorl %1,%1\n\t				\
+	   imull %2,%3\n\t				\
+	   jno 1f\n\t					\
+	   movl $1,%1\n\t				\
+	   1:" 						\
+	: "=r" (r), "=&r" (c) : "r" (a), "0" (b));	\
+}
+
+#elif SIZEOF_VOID_P == 4
+
 #ifdef WORDS_BIGENDIAN
 #define C 0
 #define R 1
@@ -104,27 +150,38 @@ typedef union {
     StgInt32 i[2];
 } long_long_u ;
 
-#define addWithCarryzh(r,c,a,b)			\
-{ long_long_u z;				\
-  z.l = a + b;					\
+#define mulIntCzh(r,c,a,b)			\
+{						\
+  long_long_u z;				\
+  z.l = (StgInt64)a * (StgInt64)b;		\
   r = z.i[R];					\
   c = z.i[C];					\
+  if (c == 0 || c == -1) {			\
+    c = ((StgWord)((a^b) ^ r))			\
+      >> (BITS_PER_BYTE * sizeof(I_) - 1);	\
+  }						\
 }
+/* Careful: the carry calculation above is extremely delicate.  Make sure
+ * you test it thoroughly after changing it.
+ */
 
+#else
 
-#define subWithCarryzh(r,c,a,b)			\
-{ long_long_u z;				\
-  z.l = a + b;					\
-  r = z.i[R];					\
-  c = z.i[C];					\
-}
+#define HALF_INT  (1 << (BITS_PER_BYTE * sizeof(I_) / 2))
 
-#define mulWithCarryzh(r,c,a,b)			\
-{ long_long_u z;				\
-  z.l = a * b;					\
-  r = z.i[R];					\
-  c = z.i[C];					\
+#define stg_abs(a) ((a) < 0 ? -(a) : (a))
+
+#define mulIntCzh(r,c,a,b)			\
+{						\
+  if (stg_abs(a) >= HALF_INT			\
+      stg_abs(b) >= HALF_INT) {			\
+    c = 1;					\
+  } else {					\
+    r = a * b;					\
+    c = 0;					\
+  }						\
 }
+#endif
 
 /* -----------------------------------------------------------------------------
    Word PrimOps.
@@ -138,15 +195,21 @@ typedef union {
 #define xorzh(r,a,b)            r=(a)^(b)
 #define notzh(r,a)		r=~(a)
 
-#define shiftLzh(r,a,b)	  	r=(a)<<(b)
-#define shiftRLzh(r,a,b)  	r=(a)>>(b)
-#define iShiftLzh(r,a,b)  	r=(a)<<(b)
+/* The extra tests below properly define the behaviour when shifting
+ * by offsets larger than the width of the value being shifted.  Doing
+ * so is undefined in C (and in fact gives different answers depending
+ * on whether the operation is constant folded or not with gcc on x86!)
+ */
+
+#define shiftLzh(r,a,b)	  	r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b)
+#define shiftRLzh(r,a,b)  	r=((b) >= BITS_IN(W_)) ? 0 : (a)>>(b)
+#define iShiftLzh(r,a,b)  	r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b)
 /* Right shifting of signed quantities is not portable in C, so
    the behaviour you'll get from using these primops depends
    on the whatever your C compiler is doing. ToDo: fix/document. -- sof 8/98
 */
-#define iShiftRAzh(r,a,b) 	r=(a)>>(b)
-#define iShiftRLzh(r,a,b) 	r=(a)>>(b)
+#define iShiftRAzh(r,a,b) 	r=((b) >= BITS_IN(I_)) ? (((a) < 0) ? -1 : 0) : (a)>>(b)
+#define iShiftRLzh(r,a,b) 	r=((b) >= BITS_IN(I_)) ? 0 : ((W_)(a))>>(b)
 
 #define int2Wordzh(r,a) 	r=(W_)(a)
 #define word2Intzh(r,a) 	r=(I_)(a)
@@ -158,15 +221,16 @@ typedef union {
 #define int2Addrzh(r,a) 	r=(A_)(a)
 #define addr2Intzh(r,a) 	r=(I_)(a)
 
-#define indexCharOffAddrzh(r,a,i)   r= ((C_ *)(a))[i]
-#define indexIntOffAddrzh(r,a,i)    r= ((I_ *)(a))[i]
-#define indexAddrOffAddrzh(r,a,i)   r= ((PP_)(a))[i]
-#define indexFloatOffAddrzh(r,a,i)  r= PK_FLT((P_) (((StgFloat *)(a)) + i))
-#define indexDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i))
-#define indexStablePtrOffAddrzh(r,a,i)    r= ((StgStablePtr *)(a))[i]
+#define readCharOffAddrzh(r,a,i)   	r= ((C_ *)(a))[i]
+#define readIntOffAddrzh(r,a,i)    	r= ((I_ *)(a))[i]
+#define readWordOffAddrzh(r,a,i)    	r= ((W_ *)(a))[i]
+#define readAddrOffAddrzh(r,a,i)   	r= ((PP_)(a))[i]
+#define readFloatOffAddrzh(r,a,i)  	r= PK_FLT((P_) (((StgFloat *)(a)) + i))
+#define readDoubleOffAddrzh(r,a,i) 	r= PK_DBL((P_) (((StgDouble *)(a)) + i))
+#define readStablePtrOffAddrzh(r,a,i)   r= ((StgStablePtr *)(a))[i]
 #ifdef SUPPORT_LONG_LONGS
-#define indexInt64OffAddrzh(r,a,i)  r= ((LI_ *)(a))[i]
-#define indexWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i]
+#define readInt64OffAddrzh(r,a,i)  	r= ((LI_ *)(a))[i]
+#define readWord64OffAddrzh(r,a,i) 	r= ((LW_ *)(a))[i]
 #endif
 
 #define writeCharOffAddrzh(a,i,v)       ((C_ *)(a))[i] = (v)
@@ -182,6 +246,18 @@ typedef union {
 #define writeWord64OffAddrzh(a,i,v)  ((LW_ *)(a))[i] = (v)
 #endif
 
+#define indexCharOffAddrzh(r,a,i)   	r= ((C_ *)(a))[i]
+#define indexIntOffAddrzh(r,a,i)    	r= ((I_ *)(a))[i]
+#define indexWordOffAddrzh(r,a,i)   	r= ((W_ *)(a))[i]
+#define indexAddrOffAddrzh(r,a,i)   	r= ((PP_)(a))[i]
+#define indexFloatOffAddrzh(r,a,i)  	r= PK_FLT((P_) (((StgFloat *)(a)) + i))
+#define indexDoubleOffAddrzh(r,a,i) 	r= PK_DBL((P_) (((StgDouble *)(a)) + i))
+#define indexStablePtrOffAddrzh(r,a,i)  r= ((StgStablePtr *)(a))[i]
+#ifdef SUPPORT_LONG_LONGS
+#define indexInt64OffAddrzh(r,a,i)  	r= ((LI_ *)(a))[i]
+#define indexWord64OffAddrzh(r,a,i) 	r= ((LW_ *)(a))[i]
+#endif
+
 /* -----------------------------------------------------------------------------
    Float PrimOps.
    -------------------------------------------------------------------------- */
@@ -246,54 +322,66 @@ typedef union {
 
 /* We can do integer2Int and cmpInteger inline, since they don't need
  * to allocate any memory.
+ *
+ * integer2Int# is now modular.
  */
 
-#define integer2Intzh(r, aa,sa,da)					\
-{ MP_INT arg;								\
-									\
-  arg._mp_alloc	= (aa);							\
-  arg._mp_size	= (sa);							\
-  arg._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(da));		\
-									\
-  (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg);				\
+#define integer2Intzh(r, sa,da)				\
+{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0];	\
+  int size = sa;					\
+							\
+  (r) =							\
+    ( size == 0 ) ?					\
+       0 :						\
+       ( size < 0 && word0 != 0x8000000 ) ?		\
+         -(I_)word0 :					\
+	  (I_)word0;					\
 }
 
-#define integer2Wordzh(r, aa,sa,da)					\
-{ MP_INT arg;								\
-									\
-  arg._mp_alloc	= (aa);							\
-  arg._mp_size	= (sa);							\
-  arg._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(da));		\
-									\
-  (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg);				\
+#define integer2Wordzh(r, sa,da)			\
+{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0];	\
+  int size = sa;                                        \
+  (r) = ( size == 0 ) ? 0 : word0 ;                     \
 }
 
-#define cmpIntegerzh(r, a1,s1,d1, a2,s2,d2)				\
-{ MP_INT arg1;								\
-  MP_INT arg2;								\
-									\
-  arg1._mp_alloc= (a1);							\
-  arg1._mp_size	= (s1);							\
-  arg1._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(d1));		\
-  arg2._mp_alloc= (a2);							\
-  arg2._mp_size	= (s2);							\
-  arg2._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(d2));		\
-									\
-  (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2);			\
+#define cmpIntegerzh(r, s1,d1, s2,d2)				\
+{ MP_INT arg1;							\
+  MP_INT arg2;							\
+								\
+  arg1._mp_size	= (s1);						\
+  arg1._mp_alloc= ((StgArrWords *)d1)->words;			\
+  arg1._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(d1));	\
+  arg2._mp_size	= (s2);						\
+  arg2._mp_alloc= ((StgArrWords *)d2)->words;			\
+  arg2._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(d2));	\
+								\
+  (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2);		\
 }
 
-/* A glorious hack: calling mpz_neg would entail allocation and
- * copying, but by looking at what mpz_neg actually does, we can
- * derive a better version:
- */
+#define cmpIntegerIntzh(r, s,d, i)				\
+{ MP_INT arg;							\
+								\
+  arg._mp_size	= (s);						\
+  arg._mp_alloc = ((StgArrWords *)d)->words;			\
+  arg._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(d));	\
+								\
+  (r) = RET_PRIM_STGCALL2(I_,mpz_cmp_si,&arg,i);		\
+}
 
-#define negateIntegerzh(ra, rs, rd, a, s, d)				\
-{ 									\
-  (ra) = (a);								\
-  (rs) = -(s);								\
-  (rd) = d;								\
+/* I think mp_limb_t must be the same size as StgInt for this to work
+ * properly --SDM
+ */
+#define gcdIntzh(r,a,b) \
+{ StgInt aa = a; \
+  r = (aa) ? (b) ? \
+        RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(b)) \
+        : abs(aa) \
+      : abs(b); \
 }
 
+#define gcdIntegerIntzh(r,a,sb,b) \
+  RET_STGCALL3(StgInt, mpn_gcd_1, (unsigned long int *) b, sb, (mp_limb_t)(a))
+
 /* The rest are all out-of-line: -------- */
 
 /* Integer arithmetic */
@@ -302,6 +390,9 @@ EF_(minusIntegerzh_fast);
 EF_(timesIntegerzh_fast);
 EF_(gcdIntegerzh_fast);
 EF_(quotRemIntegerzh_fast);
+EF_(quotIntegerzh_fast);
+EF_(remIntegerzh_fast);
+EF_(divExactIntegerzh_fast);
 EF_(divModIntegerzh_fast);
 
 /* Conversions */
@@ -309,11 +400,8 @@ EF_(int2Integerzh_fast);
 EF_(word2Integerzh_fast);
 EF_(addr2Integerzh_fast);
 
-/* Floating-point encodings/decodings */
-EF_(encodeFloatzh_fast);
+/* Floating-point decodings */
 EF_(decodeFloatzh_fast);
-
-EF_(encodeDoublezh_fast);
 EF_(decodeDoublezh_fast);
 
 /* -----------------------------------------------------------------------------
@@ -322,37 +410,41 @@ EF_(decodeDoublezh_fast);
 
 #ifdef SUPPORT_LONG_LONGS
 
-#define integerToWord64zh(r, aa,sa,da)					\
-{ unsigned long int* d; 						\
-  StgNat64 res;								\
-									\
-  d		= (unsigned long int *) (BYTE_ARR_CTS(da));		\
-  if ( (aa) == 0 ) {							\
-     res = (LW_)0;							\
-  } else if ( (aa) == 1) {						\
-     res = (LW_)d[0];							\
-  } else {								\
-     res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL;			\
-  }									\
-  (r) = res;								\
+#define integerToWord64zh(r, sa,da)			\
+{ unsigned long int* d;					\
+  I_ s;							\
+  StgWord64 res;					\
+							\
+  d = (unsigned long int *) (BYTE_ARR_CTS(da));		\
+  s = (sa);						\
+  if ( s == 0 ) {					\
+     res = (LW_)0;					\
+  } else if ( s == 1) {					\
+     res = (LW_)d[0];					\
+  } else {						\
+     res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL;	\
+  }							\
+  (r) = res;						\
 }
 
-#define integerToInt64zh(r, aa,sa,da)					\
-{ unsigned long int* d; 						\
-  StgInt64 res;								\
-									\
-  d		= (unsigned long int *) (BYTE_ARR_CTS(da));		\
-  if ( (aa) == 0 ) {							\
-     res = (LI_)0;							\
-  } else if ( (aa) == 1) {						\
-     res = (LI_)d[0];							\
-  } else {								\
-     res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL;			\
-     if ( sa < 0 ) {                                                    \
-	   res = (LI_)-res;                                             \
-     }                                                                  \
-  }									\
-  (r) = res;						                \
+#define integerToInt64zh(r, sa,da)			\
+{ unsigned long int* d;					\
+  I_ s;							\
+  StgInt64 res;						\
+							\
+  d = (unsigned long int *) (BYTE_ARR_CTS(da));		\
+  s = (sa);						\
+  if ( s == 0 ) {					\
+     res = (LI_)0;					\
+  } else if ( s == 1) {					\
+     res = (LI_)d[0];					\
+  } else {						\
+     res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL;	\
+     if ( s < 0 ) {					\
+	   res = (LI_)-res;				\
+     }							\
+  }							\
+  (r) = res;						\
 }
 
 /* Conversions */
@@ -361,12 +453,12 @@ EF_(word64ToIntegerzh_fast);
 
 /* The rest are (way!) out of line, implemented via C entry points.
  */
-I_ stg_gtWord64 (StgNat64, StgNat64);
-I_ stg_geWord64 (StgNat64, StgNat64);
-I_ stg_eqWord64 (StgNat64, StgNat64);
-I_ stg_neWord64 (StgNat64, StgNat64);
-I_ stg_ltWord64 (StgNat64, StgNat64);
-I_ stg_leWord64 (StgNat64, StgNat64);
+I_ stg_gtWord64 (StgWord64, StgWord64);
+I_ stg_geWord64 (StgWord64, StgWord64);
+I_ stg_eqWord64 (StgWord64, StgWord64);
+I_ stg_neWord64 (StgWord64, StgWord64);
+I_ stg_ltWord64 (StgWord64, StgWord64);
+I_ stg_leWord64 (StgWord64, StgWord64);
 
 I_ stg_gtInt64 (StgInt64, StgInt64);
 I_ stg_geInt64 (StgInt64, StgInt64);
@@ -375,8 +467,8 @@ I_ stg_neInt64 (StgInt64, StgInt64);
 I_ stg_ltInt64 (StgInt64, StgInt64);
 I_ stg_leInt64 (StgInt64, StgInt64);
 
-LW_ stg_remWord64  (StgNat64, StgNat64);
-LW_ stg_quotWord64 (StgNat64, StgNat64);
+LW_ stg_remWord64  (StgWord64, StgWord64);
+LW_ stg_quotWord64 (StgWord64, StgWord64);
 
 LI_ stg_remInt64    (StgInt64, StgInt64);
 LI_ stg_quotInt64   (StgInt64, StgInt64);
@@ -385,13 +477,13 @@ LI_ stg_plusInt64   (StgInt64, StgInt64);
 LI_ stg_minusInt64  (StgInt64, StgInt64);
 LI_ stg_timesInt64  (StgInt64, StgInt64);
 
-LW_ stg_and64  (StgNat64, StgNat64);
-LW_ stg_or64   (StgNat64, StgNat64);
-LW_ stg_xor64  (StgNat64, StgNat64);
-LW_ stg_not64  (StgNat64);
+LW_ stg_and64  (StgWord64, StgWord64);
+LW_ stg_or64   (StgWord64, StgWord64);
+LW_ stg_xor64  (StgWord64, StgWord64);
+LW_ stg_not64  (StgWord64);
 
-LW_ stg_shiftL64   (StgNat64, StgInt);
-LW_ stg_shiftRL64  (StgNat64, StgInt);
+LW_ stg_shiftL64   (StgWord64, StgInt);
+LW_ stg_shiftRL64  (StgWord64, StgInt);
 LI_ stg_iShiftL64  (StgInt64, StgInt);
 LI_ stg_iShiftRL64 (StgInt64, StgInt);
 LI_ stg_iShiftRA64 (StgInt64, StgInt);
@@ -401,8 +493,8 @@ I_ stg_int64ToInt     (StgInt64);
 LW_ stg_int64ToWord64 (StgInt64);
 
 LW_ stg_wordToWord64  (StgWord);
-W_  stg_word64ToWord  (StgNat64);
-LI_ stg_word64ToInt64 (StgNat64);
+W_  stg_word64ToWord  (StgWord64);
+LI_ stg_word64ToInt64 (StgWord64);
 #endif
 
 /* -----------------------------------------------------------------------------
@@ -417,11 +509,11 @@ LI_ stg_word64ToInt64 (StgNat64);
 
 #ifdef DEBUG
 #define BYTE_ARR_CTS(a)				  \
- ({ ASSERT(GET_INFO(a) == &ARR_WORDS_info); 	  \
+ ({ ASSERT(GET_INFO((StgArrWords *)(a)) == &ARR_WORDS_info); 	  \
     REAL_BYTE_ARR_CTS(a); })
 #define PTRS_ARR_CTS(a)				  \
- ({ ASSERT((GET_INFO(a) == &ARR_PTRS_info)	  \
-	|| (GET_INFO(a) == &MUT_ARR_PTRS_info));  \
+ ({ ASSERT((GET_INFO((StgMutArrPtrs  *)(a)) == &MUT_ARR_PTRS_FROZEN_info)	  \
+	|| (GET_INFO((StgMutArrPtrs  *)(a)) == &MUT_ARR_PTRS_info));  \
     REAL_PTRS_ARR_CTS(a); })
 #else
 #define BYTE_ARR_CTS(a)		REAL_BYTE_ARR_CTS(a)
@@ -481,29 +573,6 @@ extern I_ resetGenSymZh(void);
 #define indexWord64Arrayzh(r,a,i) indexWord64OffAddrzh(r,BYTE_ARR_CTS(a),i)
 #endif
 
-#define indexCharOffForeignObjzh(r,fo,i)   indexCharOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexIntOffForeignObjzh(r,fo,i)    indexIntOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexWordOffForeignObjzh(r,fo,i)   indexWordOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexAddrOffForeignObjzh(r,fo,i)   indexAddrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexFloatOffForeignObjzh(r,fo,i)  indexFloatOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexDoubleOffForeignObjzh(r,fo,i) indexDoubleOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexStablePtrOffForeignObjzh(r,fo,i)  indexStablePtrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#ifdef SUPPORT_LONG_LONGS
-#define indexInt64OffForeignObjzh(r,fo,i)  indexInt64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#define indexWord64OffForeignObjzh(r,fo,i) indexWord64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
-#endif
-
-#define indexCharOffAddrzh(r,a,i)   r= ((C_ *)(a))[i]
-#define indexIntOffAddrzh(r,a,i)    r= ((I_ *)(a))[i]
-#define indexWordOffAddrzh(r,a,i)   r= ((W_ *)(a))[i]
-#define indexAddrOffAddrzh(r,a,i)   r= ((PP_)(a))[i]
-#define indexFloatOffAddrzh(r,a,i)  r= PK_FLT((P_) (((StgFloat *)(a)) + i))
-#define indexDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i))
-#ifdef SUPPORT_LONG_LONGS
-#define indexInt64OffAddrzh(r,a,i)  r= ((LI_ *)(a))[i]
-#define indexWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i]
-#endif
-
 /* Freezing arrays-of-ptrs requires changing an info table, for the
    benefit of the generational collector.  It needs to scavenge mutable
    objects, even if they are in old space.  When they become immutable,
@@ -517,6 +586,8 @@ extern I_ resetGenSymZh(void);
 
 #define unsafeFreezzeByteArrayzh(r,a)	r=(a)
 
+EF_(unsafeThawArrayzh_fast);
+
 #define sizzeofByteArrayzh(r,a) \
      r = (((StgArrWords *)(a))->words * sizeof(W_))
 #define sizzeofMutableByteArrayzh(r,a) \
@@ -538,36 +609,9 @@ EF_(newArrayzh_fast);
 /* We only support IEEE floating point format */
 #include "ieee-flpt.h"
 
-#if FLOATS_AS_DOUBLES  /* i.e. 64-bit machines */
-#define encodeFloatzh(r, aa,sa,da, expon)   encodeDoublezh(r, aa,sa,da, expon)
-#else
-#define encodeFloatzh(r, aa,sa,da, expon)	\
-{ MP_INT arg;					\
-  /* Does not allocate memory */		\
-						\
-  arg._mp_alloc	= aa;				\
-  arg._mp_size	= sa;				\
-  arg._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(da)); \
-						\
-  r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon));\
-}
-#endif /* FLOATS_AS_DOUBLES */
-
-#define encodeDoublezh(r, aa,sa,da, expon)	\
-{ MP_INT arg;					\
-  /* Does not allocate memory */		\
-						\
-  arg._mp_alloc	= aa;				\
-  arg._mp_size	= sa;				\
-  arg._mp_d	= (unsigned long int *) (BYTE_ARR_CTS(da)); \
-						\
-  r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon));\
-}
-
 /* The decode operations are out-of-line because they need to allocate
  * a byte array.
  */
- 
 #ifdef FLOATS_AS_DOUBLES
 #define decodeFloatzh_fast decodeDoublezh_fast
 #else
@@ -578,8 +622,12 @@ EF_(decodeDoublezh_fast);
 
 /* grimy low-level support functions defined in StgPrimFloat.c */
 
-extern StgDouble __encodeDouble (MP_INT *s, I_ e);
-extern StgFloat  __encodeFloat  (MP_INT *s, I_ e);
+extern StgDouble __encodeDouble (I_ size, StgByteArray arr, I_ e);
+extern StgDouble __int_encodeDouble (I_ j, I_ e);
+#ifndef FLOATS_AS_DOUBLES
+extern StgFloat  __encodeFloat (I_ size, StgByteArray arr, I_ e);
+extern StgFloat  __int_encodeFloat (I_ j, I_ e);
+#endif
 extern void      __decodeDouble (MP_INT *man, I_ *_exp, StgDouble dbl);
 extern void      __decodeFloat  (MP_INT *man, I_ *_exp, StgFloat flt);
 extern StgInt    isDoubleNaN(StgDouble d);
@@ -614,6 +662,7 @@ EF_(newMutVarzh_fast);
 #define isEmptyMVarzh(r,a)       r=(I_)((GET_INFO((StgMVar*)(a))) == &EMPTY_MVAR_info )
 EF_(newMVarzh_fast);
 EF_(takeMVarzh_fast);
+EF_(tryTakeMVarzh_fast);
 EF_(putMVarzh_fast);
 
 
@@ -621,7 +670,9 @@ EF_(putMVarzh_fast);
    Delay/Wait PrimOps
    -------------------------------------------------------------------------- */
 
-/* Hmm, I'll think about these later. */
+EF_(waitReadzh_fast);
+EF_(waitWritezh_fast);
+EF_(delayzh_fast);
 
 /* -----------------------------------------------------------------------------
    Primitive I/O, error-handling PrimOps
@@ -649,24 +700,122 @@ EF_(makeStableNamezh_fast);
    r = RET_STGCALL1(StgStablePtr,getStablePtr,a)
 
 #define deRefStablePtrzh(r,sp) do {		\
-  ASSERT(stable_ptr_table[sp & ~STABLEPTR_WEIGHT_MASK].weight > 0);	\
-  r = stable_ptr_table[sp & ~STABLEPTR_WEIGHT_MASK].addr; \
+  ASSERT(stable_ptr_table[stgCast(StgWord,sp) & ~STABLEPTR_WEIGHT_MASK].weight > 0);	\
+  r = stable_ptr_table[stgCast(StgWord,sp) & ~STABLEPTR_WEIGHT_MASK].addr; \
 } while (0);
 
 #define eqStablePtrzh(r,sp1,sp2) \
-    (r = ((sp1 & ~STABLEPTR_WEIGHT_MASK) == (sp2 & ~STABLEPTR_WEIGHT_MASK)))
+    (r = ((stgCast(StgWord,sp1) & ~STABLEPTR_WEIGHT_MASK) == (stgCast(StgWord,sp2) & ~STABLEPTR_WEIGHT_MASK)))
 
 #endif
 
 /* -----------------------------------------------------------------------------
-   Parallel PrimOps.
+   Concurrency/Exception PrimOps.
    -------------------------------------------------------------------------- */
 
 EF_(forkzh_fast);
+EF_(yieldzh_fast);
 EF_(killThreadzh_fast);
 EF_(seqzh_fast);
+EF_(blockAsyncExceptionszh_fast);
+EF_(unblockAsyncExceptionszh_fast);
+
+#define myThreadIdzh(t) (t = CurrentTSO)
+
+extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2);
+
+/* ------------------------------------------------------------------------
+   Parallel PrimOps
+
+   A par in the Haskell code is ultimately translated to a parzh macro
+   (with a case wrapped around it to guarantee that the macro is actually 
+    executed; see compiler/prelude/PrimOps.lhs)
+   In GUM and SMP we only add a pointer to the spark pool.
+   In GranSim we call an RTS fct, forwarding additional parameters which
+   supply info on granularity of the computation, size of the result value
+   and the degree of parallelism in the sparked expression.
+   ---------------------------------------------------------------------- */
+
+#if defined(GRAN)
+//@cindex _par_
+#define parzh(r,node)             PAR(r,node,1,0,0,0,0,0)
+
+//@cindex _parAt_
+#define parAtzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \
+	parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,1)
+
+//@cindex _parAtAbs_
+#define parAtAbszh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \
+	parAT(r,node,proc,identifier,gran_info,size_info,par_info,rest,2)
+
+//@cindex _parAtRel_
+#define parAtRelzh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \
+	parAT(r,node,proc,identifier,gran_info,size_info,par_info,rest,3)
+
+//@cindex _parAtForNow_
+#define parAtForNowzh(r,node,where,identifier,gran_info,size_info,par_info,rest)	\
+	parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,0)
+
+#define parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,local)	\
+{							        \
+  if (closure_SHOULD_SPARK((StgClosure*)node)) {		\
+    rtsSparkQ result;						\
+    PEs p;                                                      \
+                                                                \
+    STGCALL6(newSpark, node,identifier,gran_info,size_info,par_info,local); \
+    switch (local) {                                                        \
+      case 2: p = where;  /* parAtAbs means absolute PE no. expected */     \
+              break;                                                        \
+      case 3: p = CurrentProc+where; /* parAtRel means rel PE no. expected */\
+              break;                                                        \
+      default: p = where_is(where); /* parAt means closure expected */      \
+              break;                                                        \
+    }                                                                       \
+    /* update GranSim state according to this spark */                      \
+    STGCALL3(GranSimSparkAtAbs, result, (I_)p, identifier);                 \
+  }                                                                         \
+}
+
+//@cindex _parLocal_
+#define parLocalzh(r,node,identifier,gran_info,size_info,par_info,rest)	\
+	PAR(r,node,rest,identifier,gran_info,size_info,par_info,1)
+
+//@cindex _parGlobal_
+#define parGlobalzh(r,node,identifier,gran_info,size_info,par_info,rest) \
+	PAR(r,node,rest,identifier,gran_info,size_info,par_info,0)
+
+#define PAR(r,node,rest,identifier,gran_info,size_info,par_info,local) \
+{                                                                        \
+  if (closure_SHOULD_SPARK((StgClosure*)node)) {                         \
+    rtsSpark *result;						         \
+    result = RET_STGCALL6(rtsSpark*, newSpark,                           \
+                          node,identifier,gran_info,size_info,par_info,local);\
+    STGCALL1(add_to_spark_queue,result); 				\
+    STGCALL2(GranSimSpark, local,(P_)node);	                        \
+  }							                \
+}
+
+#define copyablezh(r,node)				\
+  /* copyable not yet implemented!! */
+
+#define noFollowzh(r,node)				\
+  /* noFollow not yet implemented!! */
+
+#elif defined(SMP) || defined(PAR)
+
+#define parzh(r,node)					\
+{							\
+  extern unsigned int context_switch; 			\
+  if (closure_SHOULD_SPARK((StgClosure *)node) &&	\
+      SparkTl < SparkLim) {				\
+    *SparkTl++ = (StgClosure *)(node);			\
+  }							\
+  r = context_switch = 1;				\
+}
+#else /* !GRAN && !SMP && !PAR */
+#define parzh(r,node) r = 1
+#endif
 
-/* Hmm, I'll think about these later. */
 /* -----------------------------------------------------------------------------
    Pointer equality
    -------------------------------------------------------------------------- */
@@ -686,7 +835,7 @@ EF_(seqzh_fast);
 #ifndef PAR
 
 EF_(mkWeakzh_fast);
-EF_(finalizeWeakzh_fast);
+EF_(finalizzeWeakzh_fast);
 
 #define deRefWeakzh(code,val,w)				\
   if (((StgWeak *)w)->header.info == &WEAK_info) {	\
@@ -709,16 +858,35 @@ EF_(finalizeWeakzh_fast);
 
 #define ForeignObj_CLOSURE_DATA(c)  (((StgForeignObj *)c)->data)
 
-EF_(makeForeignObjzh_fast);
+EF_(mkForeignObjzh_fast);
 
 #define writeForeignObjzh(res,datum) \
    (ForeignObj_CLOSURE_DATA(res) = (P_)(datum))
 
 #define eqForeignObj(f1,f2)  ((f1)==(f2))
 
+#define indexCharOffForeignObjzh(r,fo,i)   indexCharOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexIntOffForeignObjzh(r,fo,i)    indexIntOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexWordOffForeignObjzh(r,fo,i)   indexWordOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexAddrOffForeignObjzh(r,fo,i)   indexAddrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexFloatOffForeignObjzh(r,fo,i)  indexFloatOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexDoubleOffForeignObjzh(r,fo,i) indexDoubleOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexStablePtrOffForeignObjzh(r,fo,i)  indexStablePtrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#ifdef SUPPORT_LONG_LONGS
+#define indexInt64OffForeignObjzh(r,fo,i)  indexInt64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#define indexWord64OffForeignObjzh(r,fo,i) indexWord64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i)
+#endif
+
 #endif
 
 /* -----------------------------------------------------------------------------
+   Constructor tags
+   -------------------------------------------------------------------------- */
+
+#define dataToTagzh(r,a)  r=(GET_TAG(((StgClosure *)a)->header.info))
+/*  tagToEnum# is handled directly by the code generator. */
+
+/* -----------------------------------------------------------------------------
    Signal processing.  Not really primops, but called directly from
    Haskell. 
    -------------------------------------------------------------------------- */
@@ -733,4 +901,4 @@ extern StgInt sig_install (StgInt, StgInt, StgStablePtr, sigset_t *);
 #define stg_sig_ignore(sig,mask) sig_install(sig,STG_SIG_IGN,0,(sigset_t *)mask)
 #define stg_sig_catch(sig,ptr,mask) sig_install(sig,STG_SIG_HAN,ptr,(sigset_t *)mask)
 
-#endif PRIMOPS_H
+#endif /* PRIMOPS_H */