/* -----------------------------------------------------------------------------
- * $Id: PrimOps.h,v 1.17 1999/02/11 14:22:57 simonm Exp $
+ * $Id: PrimOps.h,v 1.41 1999/12/08 14:21:54 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
#define zlzhzh(r,a,b) r=(I_)((a) <(b))
#define zlzezhzh(r,a,b) r=(I_)((a)<=(b))
-/* used by returning comparison primops, defined in Prims.hc. */
-extern const StgClosure *PrelBase_Bool_closure_tbl[];
-
/* -----------------------------------------------------------------------------
Char# PrimOps.
-------------------------------------------------------------------------- */
#define remIntzh(r,a,b) r=(a)%(b)
#define negateIntzh(r,a) r=-(a)
-/* The following operations are the standard add,subtract and multiply
- * except that they return a carry if the operation overflows.
+/* -----------------------------------------------------------------------------
+ * Int operations with carry.
+ * -------------------------------------------------------------------------- */
+
+/* With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+ * C, and without needing any comparisons. This may not be the
+ * fastest way to do it - if you have better code, please send it! --SDM
+ *
+ * Return : r = a + b, c = 0 if no overflow, 1 on overflow.
+ *
+ * We currently don't make use of the r value if c is != 0 (i.e.
+ * overflow), we just convert to big integers and try again. This
+ * could be improved by making r and c the correct values for
+ * plugging into a new J#.
+ */
+#define addIntCzh(r,c,a,b) \
+{ r = a + b; \
+ c = ((StgWord)(~(a^b) & (a^r))) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+}
+
+
+#define subIntCzh(r,c,a,b) \
+{ r = a - b; \
+ c = ((StgWord)((a^b) & (a^r))) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+}
+
+/* Multiply with overflow checking.
+ *
+ * This is slightly more tricky - the usual sign rules for add/subtract
+ * don't apply.
*
- * They are all defined in terms of 32-bit integers and use the GCC
- * 'long long' extension to get a 64-bit result. We'd like to use
- * 64-bit integers on 64-bit architectures, but it seems that gcc's
- * 'long long' type is set at 64-bits even on a 64-bit machine.
+ * On x86 hardware we use a hand-crafted assembly fragment to do the job.
+ *
+ * On other 32-bit machines we use gcc's 'long long' types, finding
+ * overflow with some careful bit-twiddling.
+ *
+ * On 64-bit machines where gcc's 'long long' type is also 64-bits,
+ * we use a crude approximation, testing whether either operand is
+ * larger than 32-bits; if neither is, then we go ahead with the
+ * multiplication.
*/
+#if i386_TARGET_ARCH
+
+#define mulIntCzh(r,c,a,b) \
+{ \
+ __asm__("xorl %1,%1\n\t \
+ imull %2,%3\n\t \
+ jno 1f\n\t \
+ movl $1,%1\n\t \
+ 1:" \
+ : "=r" (r), "=&r" (c) : "r" (a), "0" (b)); \
+}
+
+#elif SIZEOF_VOID_P == 4
+
#ifdef WORDS_BIGENDIAN
#define C 0
#define R 1
StgInt32 i[2];
} long_long_u ;
-#define addWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a + b; \
+#define mulIntCzh(r,c,a,b) \
+{ \
+ long_long_u z; \
+ z.l = (StgInt64)a * (StgInt64)b; \
r = z.i[R]; \
c = z.i[C]; \
+ if (c == 0 || c == -1) { \
+ c = ((StgWord)((a^b) ^ r)) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+ } \
}
+/* Careful: the carry calculation above is extremely delicate. Make sure
+ * you test it thoroughly after changing it.
+ */
+#else
-#define subWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a + b; \
- r = z.i[R]; \
- c = z.i[C]; \
-}
+#define HALF_INT (1 << (BITS_PER_BYTE * sizeof(I_) / 2))
-#define mulWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a * b; \
- r = z.i[R]; \
- c = z.i[C]; \
+#define stg_abs(a) ((a) < 0 ? -(a) : (a))
+
+#define mulIntCzh(r,c,a,b) \
+{ \
+ if (stg_abs(a) >= HALF_INT \
+ stg_abs(b) >= HALF_INT) { \
+ c = 1; \
+ } else { \
+ r = a * b; \
+ c = 0; \
+ } \
}
+#endif
/* -----------------------------------------------------------------------------
Word PrimOps.
#define xorzh(r,a,b) r=(a)^(b)
#define notzh(r,a) r=~(a)
-#define shiftLzh(r,a,b) r=(a)<<(b)
-#define shiftRLzh(r,a,b) r=(a)>>(b)
-#define iShiftLzh(r,a,b) r=(a)<<(b)
+/* The extra tests below properly define the behaviour when shifting
+ * by offsets larger than the width of the value being shifted. Doing
+ * so is undefined in C (and in fact gives different answers depending
+ * on whether the operation is constant folded or not with gcc on x86!)
+ */
+
+#define shiftLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b)
+#define shiftRLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)>>(b)
+#define iShiftLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b)
/* Right shifting of signed quantities is not portable in C, so
the behaviour you'll get from using these primops depends
on the whatever your C compiler is doing. ToDo: fix/document. -- sof 8/98
*/
-#define iShiftRAzh(r,a,b) r=(a)>>(b)
-#define iShiftRLzh(r,a,b) r=(a)>>(b)
+#define iShiftRAzh(r,a,b) r=((b) >= BITS_IN(I_)) ? (((a) < 0) ? -1 : 0) : (a)>>(b)
+#define iShiftRLzh(r,a,b) r=((b) >= BITS_IN(I_)) ? 0 : ((W_)(a))>>(b)
#define int2Wordzh(r,a) r=(W_)(a)
#define word2Intzh(r,a) r=(I_)(a)
/* We can do integer2Int and cmpInteger inline, since they don't need
* to allocate any memory.
+ *
+ * integer2Int# is now modular.
*/
-#define integer2Intzh(r, aa,sa,da) \
-{ MP_INT arg; \
- \
- arg._mp_alloc = (aa); \
- arg._mp_size = (sa); \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \
+#define integer2Intzh(r, sa,da) \
+{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0]; \
+ int size = sa; \
+ \
+ (r) = \
+ ( size == 0 ) ? \
+ 0 : \
+ ( size < 0 && word0 != 0x8000000 ) ? \
+ -(I_)word0 : \
+ (I_)word0; \
}
-#define integer2Wordzh(r, aa,sa,da) \
-{ MP_INT arg; \
- \
- arg._mp_alloc = (aa); \
- arg._mp_size = (sa); \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \
+#define integer2Wordzh(r, sa,da) \
+{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0]; \
+ int size = sa; \
+ (r) = ( size == 0 ) ? 0 : word0 ; \
}
-#define cmpIntegerzh(r, a1,s1,d1, a2,s2,d2) \
-{ MP_INT arg1; \
- MP_INT arg2; \
- \
- arg1._mp_alloc= (a1); \
- arg1._mp_size = (s1); \
- arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
- arg2._mp_alloc= (a2); \
- arg2._mp_size = (s2); \
- arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
- \
- (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \
+#define cmpIntegerzh(r, s1,d1, s2,d2) \
+{ MP_INT arg1; \
+ MP_INT arg2; \
+ \
+ arg1._mp_size = (s1); \
+ arg1._mp_alloc= ((StgArrWords *)d1)->words; \
+ arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
+ arg2._mp_size = (s2); \
+ arg2._mp_alloc= ((StgArrWords *)d2)->words; \
+ arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
+ \
+ (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \
}
-/* A glorious hack: calling mpz_neg would entail allocation and
- * copying, but by looking at what mpz_neg actually does, we can
- * derive a better version:
- */
+#define cmpIntegerIntzh(r, s,d, i) \
+{ MP_INT arg; \
+ \
+ arg._mp_size = (s); \
+ arg._mp_alloc = ((StgArrWords *)d)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d)); \
+ \
+ (r) = RET_PRIM_STGCALL2(I_,mpz_cmp_si,&arg,i); \
+}
-#define negateIntegerzh(ra, rs, rd, a, s, d) \
-{ \
- (ra) = (a); \
- (rs) = -(s); \
- (rd) = d; \
+/* I think mp_limb_t must be the same size as StgInt for this to work
+ * properly --SDM
+ */
+#define gcdIntzh(r,a,b) \
+{ StgInt aa = a; \
+ r = (aa) ? (b) ? \
+ RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(b)) \
+ : abs(aa) \
+ : abs(b); \
}
+#define gcdIntegerIntzh_fast(r,a,sb,b) \
+ RET_STGCALL3(StgInt, mpn_gcd_1, (unsigned long int *) b, sb, (mp_limb_t)(a))
+
/* The rest are all out-of-line: -------- */
/* Integer arithmetic */
EF_(timesIntegerzh_fast);
EF_(gcdIntegerzh_fast);
EF_(quotRemIntegerzh_fast);
+EF_(quotIntegerzh_fast);
+EF_(remIntegerzh_fast);
+EF_(divExactIntegerzh_fast);
EF_(divModIntegerzh_fast);
/* Conversions */
EF_(word2Integerzh_fast);
EF_(addr2Integerzh_fast);
-/* Floating-point encodings/decodings */
-EF_(encodeFloatzh_fast);
+/* Floating-point decodings */
EF_(decodeFloatzh_fast);
-
-EF_(encodeDoublezh_fast);
EF_(decodeDoublezh_fast);
/* -----------------------------------------------------------------------------
#ifdef SUPPORT_LONG_LONGS
-#define integerToWord64zh(r, aa,sa,da) \
-{ unsigned long int* d; \
- StgNat64 res; \
- \
- d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- if ( (aa) == 0 ) { \
- res = (LW_)0; \
- } else if ( (aa) == 1) { \
- res = (LW_)d[0]; \
- } else { \
- res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \
- } \
- (r) = res; \
+#define integerToWord64zh(r, sa,da) \
+{ unsigned long int* d; \
+ I_ aa; \
+ StgWord64 res; \
+ \
+ d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ aa = ((StgArrWords *)da)->words; \
+ if ( (aa) == 0 ) { \
+ res = (LW_)0; \
+ } else if ( (aa) == 1) { \
+ res = (LW_)d[0]; \
+ } else { \
+ res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \
+ } \
+ (r) = res; \
}
-#define integerToInt64zh(r, aa,sa,da) \
-{ unsigned long int* d; \
- StgInt64 res; \
- \
- d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- if ( (aa) == 0 ) { \
- res = (LI_)0; \
- } else if ( (aa) == 1) { \
- res = (LI_)d[0]; \
- } else { \
- res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \
- if ( sa < 0 ) { \
- res = (LI_)-res; \
- } \
- } \
- (r) = res; \
+#define integerToInt64zh(r, sa,da) \
+{ unsigned long int* d; \
+ I_ aa; \
+ StgInt64 res; \
+ \
+ d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ aa = ((StgArrWords *)da)->words; \
+ if ( (aa) == 0 ) { \
+ res = (LI_)0; \
+ } else if ( (aa) == 1) { \
+ res = (LI_)d[0]; \
+ } else { \
+ res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \
+ if ( sa < 0 ) { \
+ res = (LI_)-res; \
+ } \
+ } \
+ (r) = res; \
}
/* Conversions */
/* The rest are (way!) out of line, implemented via C entry points.
*/
-I_ stg_gtWord64 (StgNat64, StgNat64);
-I_ stg_geWord64 (StgNat64, StgNat64);
-I_ stg_eqWord64 (StgNat64, StgNat64);
-I_ stg_neWord64 (StgNat64, StgNat64);
-I_ stg_ltWord64 (StgNat64, StgNat64);
-I_ stg_leWord64 (StgNat64, StgNat64);
+I_ stg_gtWord64 (StgWord64, StgWord64);
+I_ stg_geWord64 (StgWord64, StgWord64);
+I_ stg_eqWord64 (StgWord64, StgWord64);
+I_ stg_neWord64 (StgWord64, StgWord64);
+I_ stg_ltWord64 (StgWord64, StgWord64);
+I_ stg_leWord64 (StgWord64, StgWord64);
I_ stg_gtInt64 (StgInt64, StgInt64);
I_ stg_geInt64 (StgInt64, StgInt64);
I_ stg_ltInt64 (StgInt64, StgInt64);
I_ stg_leInt64 (StgInt64, StgInt64);
-LW_ stg_remWord64 (StgNat64, StgNat64);
-LW_ stg_quotWord64 (StgNat64, StgNat64);
+LW_ stg_remWord64 (StgWord64, StgWord64);
+LW_ stg_quotWord64 (StgWord64, StgWord64);
LI_ stg_remInt64 (StgInt64, StgInt64);
LI_ stg_quotInt64 (StgInt64, StgInt64);
LI_ stg_minusInt64 (StgInt64, StgInt64);
LI_ stg_timesInt64 (StgInt64, StgInt64);
-LW_ stg_and64 (StgNat64, StgNat64);
-LW_ stg_or64 (StgNat64, StgNat64);
-LW_ stg_xor64 (StgNat64, StgNat64);
-LW_ stg_not64 (StgNat64);
+LW_ stg_and64 (StgWord64, StgWord64);
+LW_ stg_or64 (StgWord64, StgWord64);
+LW_ stg_xor64 (StgWord64, StgWord64);
+LW_ stg_not64 (StgWord64);
-LW_ stg_shiftL64 (StgNat64, StgInt);
-LW_ stg_shiftRL64 (StgNat64, StgInt);
+LW_ stg_shiftL64 (StgWord64, StgInt);
+LW_ stg_shiftRL64 (StgWord64, StgInt);
LI_ stg_iShiftL64 (StgInt64, StgInt);
LI_ stg_iShiftRL64 (StgInt64, StgInt);
LI_ stg_iShiftRA64 (StgInt64, StgInt);
LW_ stg_int64ToWord64 (StgInt64);
LW_ stg_wordToWord64 (StgWord);
-W_ stg_word64ToWord (StgNat64);
-LI_ stg_word64ToInt64 (StgNat64);
+W_ stg_word64ToWord (StgWord64);
+LI_ stg_word64ToInt64 (StgWord64);
#endif
/* -----------------------------------------------------------------------------
#ifdef DEBUG
#define BYTE_ARR_CTS(a) \
- ({ ASSERT(GET_INFO(a) == &ARR_WORDS_info); \
+ ({ ASSERT(GET_INFO((StgArrWords *)(a)) == &ARR_WORDS_info); \
REAL_BYTE_ARR_CTS(a); })
#define PTRS_ARR_CTS(a) \
- ({ ASSERT((GET_INFO(a) == &ARR_PTRS_info) \
- || (GET_INFO(a) == &MUT_ARR_PTRS_info)); \
+ ({ ASSERT((GET_INFO((StgMutArrPtrs *)(a)) == &MUT_ARR_PTRS_FROZEN_info) \
+ || (GET_INFO((StgMutArrPtrs *)(a)) == &MUT_ARR_PTRS_info)); \
REAL_PTRS_ARR_CTS(a); })
#else
#define BYTE_ARR_CTS(a) REAL_BYTE_ARR_CTS(a)
}
#define unsafeFreezzeByteArrayzh(r,a) r=(a)
+#define unsafeThawByteArrayzh(r,a) r=(a)
+
+EF_(unsafeThawArrayzh_fast);
#define sizzeofByteArrayzh(r,a) \
r = (((StgArrWords *)(a))->words * sizeof(W_))
/* We only support IEEE floating point format */
#include "ieee-flpt.h"
-#if FLOATS_AS_DOUBLES /* i.e. 64-bit machines */
-#define encodeFloatzh(r, aa,sa,da, expon) encodeDoublezh(r, aa,sa,da, expon)
-#else
-#define encodeFloatzh(r, aa,sa,da, expon) \
-{ MP_INT arg; \
- /* Does not allocate memory */ \
- \
- arg._mp_alloc = aa; \
- arg._mp_size = sa; \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon));\
-}
-#endif /* FLOATS_AS_DOUBLES */
-
-#define encodeDoublezh(r, aa,sa,da, expon) \
-{ MP_INT arg; \
- /* Does not allocate memory */ \
- \
- arg._mp_alloc = aa; \
- arg._mp_size = sa; \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon));\
-}
-
/* The decode operations are out-of-line because they need to allocate
* a byte array.
*/
-
#ifdef FLOATS_AS_DOUBLES
#define decodeFloatzh_fast decodeDoublezh_fast
#else
/* grimy low-level support functions defined in StgPrimFloat.c */
-extern StgDouble __encodeDouble (MP_INT *s, I_ e);
-extern StgFloat __encodeFloat (MP_INT *s, I_ e);
+extern StgDouble __encodeDouble (I_ size, StgByteArray arr, I_ e);
+extern StgDouble __int_encodeDouble (I_ j, I_ e);
+#ifndef FLOATS_AS_DOUBLES
+extern StgFloat __encodeFloat (I_ size, StgByteArray arr, I_ e);
+extern StgFloat __int_encodeFloat (I_ j, I_ e);
+#endif
extern void __decodeDouble (MP_INT *man, I_ *_exp, StgDouble dbl);
extern void __decodeFloat (MP_INT *man, I_ *_exp, StgFloat flt);
extern StgInt isDoubleNaN(StgDouble d);
Delay/Wait PrimOps
-------------------------------------------------------------------------- */
-/* Hmm, I'll think about these later. */
+EF_(waitReadzh_fast);
+EF_(waitWritezh_fast);
+EF_(delayzh_fast);
/* -----------------------------------------------------------------------------
Primitive I/O, error-handling PrimOps
#endif
/* -----------------------------------------------------------------------------
- Parallel PrimOps.
+ Concurrency/Exception PrimOps.
-------------------------------------------------------------------------- */
EF_(forkzh_fast);
+EF_(yieldzh_fast);
EF_(killThreadzh_fast);
EF_(seqzh_fast);
+EF_(blockAsyncExceptionszh_fast);
+EF_(unblockAsyncExceptionszh_fast);
+
+#define myThreadIdzh(t) (t = CurrentTSO)
+
+extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2);
/* Hmm, I'll think about these later. */
/* -----------------------------------------------------------------------------
#ifndef PAR
EF_(mkWeakzh_fast);
-EF_(finalizeWeakzh_fast);
+EF_(finalizzeWeakzh_fast);
#define deRefWeakzh(code,val,w) \
if (((StgWeak *)w)->header.info == &WEAK_info) { \
#endif
/* -----------------------------------------------------------------------------
+ Constructor tags
+ -------------------------------------------------------------------------- */
+
+#define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
+/* tagToEnum# is handled directly by the code generator. */
+
+/* -----------------------------------------------------------------------------
Signal processing. Not really primops, but called directly from
Haskell.
-------------------------------------------------------------------------- */