X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fincludes%2FPrimOps.h;h=76f392af70f5542683c8a52dee8077d0cfc8f4d2;hb=efdcccf6b431cbc54a1c4da33bb1ac80c579bea4;hp=4ccbd278b30bf66f2498004dcdc80df73215ec20;hpb=b41f38a42b197dfb166ebfe78476b24982379e19;p=ghc-hetmet.git diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h index 4ccbd27..76f392a 100644 --- a/ghc/includes/PrimOps.h +++ b/ghc/includes/PrimOps.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.h,v 1.17 1999/02/11 14:22:57 simonm Exp $ + * $Id: PrimOps.h,v 1.54 2000/05/12 20:01:28 panne Exp $ * * (c) The GHC Team, 1998-1999 * @@ -58,9 +58,6 @@ #define zlzhzh(r,a,b) r=(I_)((a) <(b)) #define zlzezhzh(r,a,b) r=(I_)((a)<=(b)) -/* used by returning comparison primops, defined in Prims.hc. */ -extern const StgClosure *PrelBase_Bool_closure_tbl[]; - /* ----------------------------------------------------------------------------- Char# PrimOps. -------------------------------------------------------------------------- */ @@ -82,15 +79,64 @@ I_ stg_div (I_ a, I_ b); #define remIntzh(r,a,b) r=(a)%(b) #define negateIntzh(r,a) r=-(a) -/* The following operations are the standard add,subtract and multiply - * except that they return a carry if the operation overflows. +/* ----------------------------------------------------------------------------- + * Int operations with carry. + * -------------------------------------------------------------------------- */ + +/* With some bit-twiddling, we can define int{Add,Sub}Czh portably in + * C, and without needing any comparisons. This may not be the + * fastest way to do it - if you have better code, please send it! --SDM + * + * Return : r = a + b, c = 0 if no overflow, 1 on overflow. + * + * We currently don't make use of the r value if c is != 0 (i.e. + * overflow), we just convert to big integers and try again. This + * could be improved by making r and c the correct values for + * plugging into a new J#. + */ +#define addIntCzh(r,c,a,b) \ +{ r = a + b; \ + c = ((StgWord)(~(a^b) & (a^r))) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ +} + + +#define subIntCzh(r,c,a,b) \ +{ r = a - b; \ + c = ((StgWord)((a^b) & (a^r))) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ +} + +/* Multiply with overflow checking. * - * They are all defined in terms of 32-bit integers and use the GCC - * 'long long' extension to get a 64-bit result. We'd like to use - * 64-bit integers on 64-bit architectures, but it seems that gcc's - * 'long long' type is set at 64-bits even on a 64-bit machine. + * This is slightly more tricky - the usual sign rules for add/subtract + * don't apply. + * + * On x86 hardware we use a hand-crafted assembly fragment to do the job. + * + * On other 32-bit machines we use gcc's 'long long' types, finding + * overflow with some careful bit-twiddling. + * + * On 64-bit machines where gcc's 'long long' type is also 64-bits, + * we use a crude approximation, testing whether either operand is + * larger than 32-bits; if neither is, then we go ahead with the + * multiplication. */ +#if i386_TARGET_ARCH + +#define mulIntCzh(r,c,a,b) \ +{ \ + __asm__("xorl %1,%1\n\t \ + imull %2,%3\n\t \ + jno 1f\n\t \ + movl $1,%1\n\t \ + 1:" \ + : "=r" (r), "=&r" (c) : "r" (a), "0" (b)); \ +} + +#elif SIZEOF_VOID_P == 4 + #ifdef WORDS_BIGENDIAN #define C 0 #define R 1 @@ -104,27 +150,38 @@ typedef union { StgInt32 i[2]; } long_long_u ; -#define addWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a + b; \ +#define mulIntCzh(r,c,a,b) \ +{ \ + long_long_u z; \ + z.l = (StgInt64)a * (StgInt64)b; \ r = z.i[R]; \ c = z.i[C]; \ + if (c == 0 || c == -1) { \ + c = ((StgWord)((a^b) ^ r)) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ + } \ } +/* Careful: the carry calculation above is extremely delicate. Make sure + * you test it thoroughly after changing it. + */ +#else -#define subWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a + b; \ - r = z.i[R]; \ - c = z.i[C]; \ -} +#define HALF_INT (1 << (BITS_PER_BYTE * sizeof(I_) / 2)) -#define mulWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a * b; \ - r = z.i[R]; \ - c = z.i[C]; \ +#define stg_abs(a) ((a) < 0 ? -(a) : (a)) + +#define mulIntCzh(r,c,a,b) \ +{ \ + if (stg_abs(a) >= HALF_INT \ + stg_abs(b) >= HALF_INT) { \ + c = 1; \ + } else { \ + r = a * b; \ + c = 0; \ + } \ } +#endif /* ----------------------------------------------------------------------------- Word PrimOps. @@ -138,15 +195,21 @@ typedef union { #define xorzh(r,a,b) r=(a)^(b) #define notzh(r,a) r=~(a) -#define shiftLzh(r,a,b) r=(a)<<(b) -#define shiftRLzh(r,a,b) r=(a)>>(b) -#define iShiftLzh(r,a,b) r=(a)<<(b) +/* The extra tests below properly define the behaviour when shifting + * by offsets larger than the width of the value being shifted. Doing + * so is undefined in C (and in fact gives different answers depending + * on whether the operation is constant folded or not with gcc on x86!) + */ + +#define shiftLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b) +#define shiftRLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)>>(b) +#define iShiftLzh(r,a,b) r=((b) >= BITS_IN(W_)) ? 0 : (a)<<(b) /* Right shifting of signed quantities is not portable in C, so the behaviour you'll get from using these primops depends on the whatever your C compiler is doing. ToDo: fix/document. -- sof 8/98 */ -#define iShiftRAzh(r,a,b) r=(a)>>(b) -#define iShiftRLzh(r,a,b) r=(a)>>(b) +#define iShiftRAzh(r,a,b) r=((b) >= BITS_IN(I_)) ? (((a) < 0) ? -1 : 0) : (a)>>(b) +#define iShiftRLzh(r,a,b) r=((b) >= BITS_IN(I_)) ? 0 : ((W_)(a))>>(b) #define int2Wordzh(r,a) r=(W_)(a) #define word2Intzh(r,a) r=(I_)(a) @@ -158,15 +221,16 @@ typedef union { #define int2Addrzh(r,a) r=(A_)(a) #define addr2Intzh(r,a) r=(I_)(a) -#define indexCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] -#define indexIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] -#define indexAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] -#define indexFloatOffAddrzh(r,a,i) r= PK_FLT((P_) (((StgFloat *)(a)) + i)) -#define indexDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i)) -#define indexStablePtrOffAddrzh(r,a,i) r= ((StgStablePtr *)(a))[i] +#define readCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] +#define readIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] +#define readWordOffAddrzh(r,a,i) r= ((W_ *)(a))[i] +#define readAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] +#define readFloatOffAddrzh(r,a,i) r= PK_FLT((P_) (((StgFloat *)(a)) + i)) +#define readDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i)) +#define readStablePtrOffAddrzh(r,a,i) r= ((StgStablePtr *)(a))[i] #ifdef SUPPORT_LONG_LONGS -#define indexInt64OffAddrzh(r,a,i) r= ((LI_ *)(a))[i] -#define indexWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i] +#define readInt64OffAddrzh(r,a,i) r= ((LI_ *)(a))[i] +#define readWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i] #endif #define writeCharOffAddrzh(a,i,v) ((C_ *)(a))[i] = (v) @@ -182,6 +246,18 @@ typedef union { #define writeWord64OffAddrzh(a,i,v) ((LW_ *)(a))[i] = (v) #endif +#define indexCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] +#define indexIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] +#define indexWordOffAddrzh(r,a,i) r= ((W_ *)(a))[i] +#define indexAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] +#define indexFloatOffAddrzh(r,a,i) r= PK_FLT((P_) (((StgFloat *)(a)) + i)) +#define indexDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i)) +#define indexStablePtrOffAddrzh(r,a,i) r= ((StgStablePtr *)(a))[i] +#ifdef SUPPORT_LONG_LONGS +#define indexInt64OffAddrzh(r,a,i) r= ((LI_ *)(a))[i] +#define indexWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i] +#endif + /* ----------------------------------------------------------------------------- Float PrimOps. -------------------------------------------------------------------------- */ @@ -246,54 +322,66 @@ typedef union { /* We can do integer2Int and cmpInteger inline, since they don't need * to allocate any memory. + * + * integer2Int# is now modular. */ -#define integer2Intzh(r, aa,sa,da) \ -{ MP_INT arg; \ - \ - arg._mp_alloc = (aa); \ - arg._mp_size = (sa); \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \ +#define integer2Intzh(r, sa,da) \ +{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0]; \ + int size = sa; \ + \ + (r) = \ + ( size == 0 ) ? \ + 0 : \ + ( size < 0 && word0 != 0x8000000 ) ? \ + -(I_)word0 : \ + (I_)word0; \ } -#define integer2Wordzh(r, aa,sa,da) \ -{ MP_INT arg; \ - \ - arg._mp_alloc = (aa); \ - arg._mp_size = (sa); \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \ +#define integer2Wordzh(r, sa,da) \ +{ StgWord word0 = ((StgWord *)BYTE_ARR_CTS(da))[0]; \ + int size = sa; \ + (r) = ( size == 0 ) ? 0 : word0 ; \ } -#define cmpIntegerzh(r, a1,s1,d1, a2,s2,d2) \ -{ MP_INT arg1; \ - MP_INT arg2; \ - \ - arg1._mp_alloc= (a1); \ - arg1._mp_size = (s1); \ - arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \ - arg2._mp_alloc= (a2); \ - arg2._mp_size = (s2); \ - arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \ - \ - (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \ +#define cmpIntegerzh(r, s1,d1, s2,d2) \ +{ MP_INT arg1; \ + MP_INT arg2; \ + \ + arg1._mp_size = (s1); \ + arg1._mp_alloc= ((StgArrWords *)d1)->words; \ + arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \ + arg2._mp_size = (s2); \ + arg2._mp_alloc= ((StgArrWords *)d2)->words; \ + arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \ + \ + (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \ } -/* A glorious hack: calling mpz_neg would entail allocation and - * copying, but by looking at what mpz_neg actually does, we can - * derive a better version: - */ +#define cmpIntegerIntzh(r, s,d, i) \ +{ MP_INT arg; \ + \ + arg._mp_size = (s); \ + arg._mp_alloc = ((StgArrWords *)d)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d)); \ + \ + (r) = RET_PRIM_STGCALL2(I_,mpz_cmp_si,&arg,i); \ +} -#define negateIntegerzh(ra, rs, rd, a, s, d) \ -{ \ - (ra) = (a); \ - (rs) = -(s); \ - (rd) = d; \ +/* I think mp_limb_t must be the same size as StgInt for this to work + * properly --SDM + */ +#define gcdIntzh(r,a,b) \ +{ StgInt aa = a; \ + r = (aa) ? (b) ? \ + RET_STGCALL3(StgInt, mpn_gcd_1, (mp_limb_t *)(&aa), 1, (mp_limb_t)(b)) \ + : abs(aa) \ + : abs(b); \ } +#define gcdIntegerIntzh(r,a,sb,b) \ + RET_STGCALL3(StgInt, mpn_gcd_1, (unsigned long int *) b, sb, (mp_limb_t)(a)) + /* The rest are all out-of-line: -------- */ /* Integer arithmetic */ @@ -302,6 +390,9 @@ EF_(minusIntegerzh_fast); EF_(timesIntegerzh_fast); EF_(gcdIntegerzh_fast); EF_(quotRemIntegerzh_fast); +EF_(quotIntegerzh_fast); +EF_(remIntegerzh_fast); +EF_(divExactIntegerzh_fast); EF_(divModIntegerzh_fast); /* Conversions */ @@ -309,11 +400,8 @@ EF_(int2Integerzh_fast); EF_(word2Integerzh_fast); EF_(addr2Integerzh_fast); -/* Floating-point encodings/decodings */ -EF_(encodeFloatzh_fast); +/* Floating-point decodings */ EF_(decodeFloatzh_fast); - -EF_(encodeDoublezh_fast); EF_(decodeDoublezh_fast); /* ----------------------------------------------------------------------------- @@ -322,37 +410,41 @@ EF_(decodeDoublezh_fast); #ifdef SUPPORT_LONG_LONGS -#define integerToWord64zh(r, aa,sa,da) \ -{ unsigned long int* d; \ - StgNat64 res; \ - \ - d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - if ( (aa) == 0 ) { \ - res = (LW_)0; \ - } else if ( (aa) == 1) { \ - res = (LW_)d[0]; \ - } else { \ - res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \ - } \ - (r) = res; \ +#define integerToWord64zh(r, sa,da) \ +{ unsigned long int* d; \ + I_ s; \ + StgWord64 res; \ + \ + d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + s = (sa); \ + if ( s == 0 ) { \ + res = (LW_)0; \ + } else if ( s == 1) { \ + res = (LW_)d[0]; \ + } else { \ + res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \ + } \ + (r) = res; \ } -#define integerToInt64zh(r, aa,sa,da) \ -{ unsigned long int* d; \ - StgInt64 res; \ - \ - d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - if ( (aa) == 0 ) { \ - res = (LI_)0; \ - } else if ( (aa) == 1) { \ - res = (LI_)d[0]; \ - } else { \ - res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \ - if ( sa < 0 ) { \ - res = (LI_)-res; \ - } \ - } \ - (r) = res; \ +#define integerToInt64zh(r, sa,da) \ +{ unsigned long int* d; \ + I_ s; \ + StgInt64 res; \ + \ + d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + s = (sa); \ + if ( s == 0 ) { \ + res = (LI_)0; \ + } else if ( s == 1) { \ + res = (LI_)d[0]; \ + } else { \ + res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \ + if ( s < 0 ) { \ + res = (LI_)-res; \ + } \ + } \ + (r) = res; \ } /* Conversions */ @@ -361,12 +453,12 @@ EF_(word64ToIntegerzh_fast); /* The rest are (way!) out of line, implemented via C entry points. */ -I_ stg_gtWord64 (StgNat64, StgNat64); -I_ stg_geWord64 (StgNat64, StgNat64); -I_ stg_eqWord64 (StgNat64, StgNat64); -I_ stg_neWord64 (StgNat64, StgNat64); -I_ stg_ltWord64 (StgNat64, StgNat64); -I_ stg_leWord64 (StgNat64, StgNat64); +I_ stg_gtWord64 (StgWord64, StgWord64); +I_ stg_geWord64 (StgWord64, StgWord64); +I_ stg_eqWord64 (StgWord64, StgWord64); +I_ stg_neWord64 (StgWord64, StgWord64); +I_ stg_ltWord64 (StgWord64, StgWord64); +I_ stg_leWord64 (StgWord64, StgWord64); I_ stg_gtInt64 (StgInt64, StgInt64); I_ stg_geInt64 (StgInt64, StgInt64); @@ -375,8 +467,8 @@ I_ stg_neInt64 (StgInt64, StgInt64); I_ stg_ltInt64 (StgInt64, StgInt64); I_ stg_leInt64 (StgInt64, StgInt64); -LW_ stg_remWord64 (StgNat64, StgNat64); -LW_ stg_quotWord64 (StgNat64, StgNat64); +LW_ stg_remWord64 (StgWord64, StgWord64); +LW_ stg_quotWord64 (StgWord64, StgWord64); LI_ stg_remInt64 (StgInt64, StgInt64); LI_ stg_quotInt64 (StgInt64, StgInt64); @@ -385,13 +477,13 @@ LI_ stg_plusInt64 (StgInt64, StgInt64); LI_ stg_minusInt64 (StgInt64, StgInt64); LI_ stg_timesInt64 (StgInt64, StgInt64); -LW_ stg_and64 (StgNat64, StgNat64); -LW_ stg_or64 (StgNat64, StgNat64); -LW_ stg_xor64 (StgNat64, StgNat64); -LW_ stg_not64 (StgNat64); +LW_ stg_and64 (StgWord64, StgWord64); +LW_ stg_or64 (StgWord64, StgWord64); +LW_ stg_xor64 (StgWord64, StgWord64); +LW_ stg_not64 (StgWord64); -LW_ stg_shiftL64 (StgNat64, StgInt); -LW_ stg_shiftRL64 (StgNat64, StgInt); +LW_ stg_shiftL64 (StgWord64, StgInt); +LW_ stg_shiftRL64 (StgWord64, StgInt); LI_ stg_iShiftL64 (StgInt64, StgInt); LI_ stg_iShiftRL64 (StgInt64, StgInt); LI_ stg_iShiftRA64 (StgInt64, StgInt); @@ -401,8 +493,8 @@ I_ stg_int64ToInt (StgInt64); LW_ stg_int64ToWord64 (StgInt64); LW_ stg_wordToWord64 (StgWord); -W_ stg_word64ToWord (StgNat64); -LI_ stg_word64ToInt64 (StgNat64); +W_ stg_word64ToWord (StgWord64); +LI_ stg_word64ToInt64 (StgWord64); #endif /* ----------------------------------------------------------------------------- @@ -417,11 +509,11 @@ LI_ stg_word64ToInt64 (StgNat64); #ifdef DEBUG #define BYTE_ARR_CTS(a) \ - ({ ASSERT(GET_INFO(a) == &ARR_WORDS_info); \ + ({ ASSERT(GET_INFO((StgArrWords *)(a)) == &ARR_WORDS_info); \ REAL_BYTE_ARR_CTS(a); }) #define PTRS_ARR_CTS(a) \ - ({ ASSERT((GET_INFO(a) == &ARR_PTRS_info) \ - || (GET_INFO(a) == &MUT_ARR_PTRS_info)); \ + ({ ASSERT((GET_INFO((StgMutArrPtrs *)(a)) == &MUT_ARR_PTRS_FROZEN_info) \ + || (GET_INFO((StgMutArrPtrs *)(a)) == &MUT_ARR_PTRS_info)); \ REAL_PTRS_ARR_CTS(a); }) #else #define BYTE_ARR_CTS(a) REAL_BYTE_ARR_CTS(a) @@ -481,29 +573,6 @@ extern I_ resetGenSymZh(void); #define indexWord64Arrayzh(r,a,i) indexWord64OffAddrzh(r,BYTE_ARR_CTS(a),i) #endif -#define indexCharOffForeignObjzh(r,fo,i) indexCharOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexIntOffForeignObjzh(r,fo,i) indexIntOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexWordOffForeignObjzh(r,fo,i) indexWordOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexAddrOffForeignObjzh(r,fo,i) indexAddrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexFloatOffForeignObjzh(r,fo,i) indexFloatOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexDoubleOffForeignObjzh(r,fo,i) indexDoubleOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexStablePtrOffForeignObjzh(r,fo,i) indexStablePtrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#ifdef SUPPORT_LONG_LONGS -#define indexInt64OffForeignObjzh(r,fo,i) indexInt64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#define indexWord64OffForeignObjzh(r,fo,i) indexWord64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) -#endif - -#define indexCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] -#define indexIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] -#define indexWordOffAddrzh(r,a,i) r= ((W_ *)(a))[i] -#define indexAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] -#define indexFloatOffAddrzh(r,a,i) r= PK_FLT((P_) (((StgFloat *)(a)) + i)) -#define indexDoubleOffAddrzh(r,a,i) r= PK_DBL((P_) (((StgDouble *)(a)) + i)) -#ifdef SUPPORT_LONG_LONGS -#define indexInt64OffAddrzh(r,a,i) r= ((LI_ *)(a))[i] -#define indexWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i] -#endif - /* Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable objects, even if they are in old space. When they become immutable, @@ -517,6 +586,8 @@ extern I_ resetGenSymZh(void); #define unsafeFreezzeByteArrayzh(r,a) r=(a) +EF_(unsafeThawArrayzh_fast); + #define sizzeofByteArrayzh(r,a) \ r = (((StgArrWords *)(a))->words * sizeof(W_)) #define sizzeofMutableByteArrayzh(r,a) \ @@ -538,36 +609,9 @@ EF_(newArrayzh_fast); /* We only support IEEE floating point format */ #include "ieee-flpt.h" -#if FLOATS_AS_DOUBLES /* i.e. 64-bit machines */ -#define encodeFloatzh(r, aa,sa,da, expon) encodeDoublezh(r, aa,sa,da, expon) -#else -#define encodeFloatzh(r, aa,sa,da, expon) \ -{ MP_INT arg; \ - /* Does not allocate memory */ \ - \ - arg._mp_alloc = aa; \ - arg._mp_size = sa; \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon));\ -} -#endif /* FLOATS_AS_DOUBLES */ - -#define encodeDoublezh(r, aa,sa,da, expon) \ -{ MP_INT arg; \ - /* Does not allocate memory */ \ - \ - arg._mp_alloc = aa; \ - arg._mp_size = sa; \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon));\ -} - /* The decode operations are out-of-line because they need to allocate * a byte array. */ - #ifdef FLOATS_AS_DOUBLES #define decodeFloatzh_fast decodeDoublezh_fast #else @@ -578,8 +622,12 @@ EF_(decodeDoublezh_fast); /* grimy low-level support functions defined in StgPrimFloat.c */ -extern StgDouble __encodeDouble (MP_INT *s, I_ e); -extern StgFloat __encodeFloat (MP_INT *s, I_ e); +extern StgDouble __encodeDouble (I_ size, StgByteArray arr, I_ e); +extern StgDouble __int_encodeDouble (I_ j, I_ e); +#ifndef FLOATS_AS_DOUBLES +extern StgFloat __encodeFloat (I_ size, StgByteArray arr, I_ e); +extern StgFloat __int_encodeFloat (I_ j, I_ e); +#endif extern void __decodeDouble (MP_INT *man, I_ *_exp, StgDouble dbl); extern void __decodeFloat (MP_INT *man, I_ *_exp, StgFloat flt); extern StgInt isDoubleNaN(StgDouble d); @@ -614,6 +662,7 @@ EF_(newMutVarzh_fast); #define isEmptyMVarzh(r,a) r=(I_)((GET_INFO((StgMVar*)(a))) == &EMPTY_MVAR_info ) EF_(newMVarzh_fast); EF_(takeMVarzh_fast); +EF_(tryTakeMVarzh_fast); EF_(putMVarzh_fast); @@ -621,7 +670,9 @@ EF_(putMVarzh_fast); Delay/Wait PrimOps -------------------------------------------------------------------------- */ -/* Hmm, I'll think about these later. */ +EF_(waitReadzh_fast); +EF_(waitWritezh_fast); +EF_(delayzh_fast); /* ----------------------------------------------------------------------------- Primitive I/O, error-handling PrimOps @@ -649,24 +700,122 @@ EF_(makeStableNamezh_fast); r = RET_STGCALL1(StgStablePtr,getStablePtr,a) #define deRefStablePtrzh(r,sp) do { \ - ASSERT(stable_ptr_table[sp & ~STABLEPTR_WEIGHT_MASK].weight > 0); \ - r = stable_ptr_table[sp & ~STABLEPTR_WEIGHT_MASK].addr; \ + ASSERT(stable_ptr_table[stgCast(StgWord,sp) & ~STABLEPTR_WEIGHT_MASK].weight > 0); \ + r = stable_ptr_table[stgCast(StgWord,sp) & ~STABLEPTR_WEIGHT_MASK].addr; \ } while (0); #define eqStablePtrzh(r,sp1,sp2) \ - (r = ((sp1 & ~STABLEPTR_WEIGHT_MASK) == (sp2 & ~STABLEPTR_WEIGHT_MASK))) + (r = ((stgCast(StgWord,sp1) & ~STABLEPTR_WEIGHT_MASK) == (stgCast(StgWord,sp2) & ~STABLEPTR_WEIGHT_MASK))) #endif /* ----------------------------------------------------------------------------- - Parallel PrimOps. + Concurrency/Exception PrimOps. -------------------------------------------------------------------------- */ EF_(forkzh_fast); +EF_(yieldzh_fast); EF_(killThreadzh_fast); EF_(seqzh_fast); +EF_(blockAsyncExceptionszh_fast); +EF_(unblockAsyncExceptionszh_fast); + +#define myThreadIdzh(t) (t = CurrentTSO) + +extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2); + +/* ------------------------------------------------------------------------ + Parallel PrimOps + + A par in the Haskell code is ultimately translated to a parzh macro + (with a case wrapped around it to guarantee that the macro is actually + executed; see compiler/prelude/PrimOps.lhs) + In GUM and SMP we only add a pointer to the spark pool. + In GranSim we call an RTS fct, forwarding additional parameters which + supply info on granularity of the computation, size of the result value + and the degree of parallelism in the sparked expression. + ---------------------------------------------------------------------- */ + +#if defined(GRAN) +//@cindex _par_ +#define parzh(r,node) PAR(r,node,1,0,0,0,0,0) + +//@cindex _parAt_ +#define parAtzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \ + parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,1) + +//@cindex _parAtAbs_ +#define parAtAbszh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ + parAT(r,node,proc,identifier,gran_info,size_info,par_info,rest,2) + +//@cindex _parAtRel_ +#define parAtRelzh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ + parAT(r,node,proc,identifier,gran_info,size_info,par_info,rest,3) + +//@cindex _parAtForNow_ +#define parAtForNowzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \ + parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,0) + +#define parAT(r,node,where,identifier,gran_info,size_info,par_info,rest,local) \ +{ \ + if (closure_SHOULD_SPARK((StgClosure*)node)) { \ + rtsSparkQ result; \ + PEs p; \ + \ + STGCALL6(newSpark, node,identifier,gran_info,size_info,par_info,local); \ + switch (local) { \ + case 2: p = where; /* parAtAbs means absolute PE no. expected */ \ + break; \ + case 3: p = CurrentProc+where; /* parAtRel means rel PE no. expected */\ + break; \ + default: p = where_is(where); /* parAt means closure expected */ \ + break; \ + } \ + /* update GranSim state according to this spark */ \ + STGCALL3(GranSimSparkAtAbs, result, (I_)p, identifier); \ + } \ +} + +//@cindex _parLocal_ +#define parLocalzh(r,node,identifier,gran_info,size_info,par_info,rest) \ + PAR(r,node,rest,identifier,gran_info,size_info,par_info,1) + +//@cindex _parGlobal_ +#define parGlobalzh(r,node,identifier,gran_info,size_info,par_info,rest) \ + PAR(r,node,rest,identifier,gran_info,size_info,par_info,0) + +#define PAR(r,node,rest,identifier,gran_info,size_info,par_info,local) \ +{ \ + if (closure_SHOULD_SPARK((StgClosure*)node)) { \ + rtsSpark *result; \ + result = RET_STGCALL6(rtsSpark*, newSpark, \ + node,identifier,gran_info,size_info,par_info,local);\ + STGCALL1(add_to_spark_queue,result); \ + STGCALL2(GranSimSpark, local,(P_)node); \ + } \ +} + +#define copyablezh(r,node) \ + /* copyable not yet implemented!! */ + +#define noFollowzh(r,node) \ + /* noFollow not yet implemented!! */ + +#elif defined(SMP) || defined(PAR) + +#define parzh(r,node) \ +{ \ + extern unsigned int context_switch; \ + if (closure_SHOULD_SPARK((StgClosure *)node) && \ + SparkTl < SparkLim) { \ + *SparkTl++ = (StgClosure *)(node); \ + } \ + r = context_switch = 1; \ +} +#else /* !GRAN && !SMP && !PAR */ +#define parzh(r,node) r = 1 +#endif -/* Hmm, I'll think about these later. */ /* ----------------------------------------------------------------------------- Pointer equality -------------------------------------------------------------------------- */ @@ -686,7 +835,7 @@ EF_(seqzh_fast); #ifndef PAR EF_(mkWeakzh_fast); -EF_(finalizeWeakzh_fast); +EF_(finalizzeWeakzh_fast); #define deRefWeakzh(code,val,w) \ if (((StgWeak *)w)->header.info == &WEAK_info) { \ @@ -709,16 +858,35 @@ EF_(finalizeWeakzh_fast); #define ForeignObj_CLOSURE_DATA(c) (((StgForeignObj *)c)->data) -EF_(makeForeignObjzh_fast); +EF_(mkForeignObjzh_fast); #define writeForeignObjzh(res,datum) \ (ForeignObj_CLOSURE_DATA(res) = (P_)(datum)) #define eqForeignObj(f1,f2) ((f1)==(f2)) +#define indexCharOffForeignObjzh(r,fo,i) indexCharOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexIntOffForeignObjzh(r,fo,i) indexIntOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexWordOffForeignObjzh(r,fo,i) indexWordOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexAddrOffForeignObjzh(r,fo,i) indexAddrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexFloatOffForeignObjzh(r,fo,i) indexFloatOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexDoubleOffForeignObjzh(r,fo,i) indexDoubleOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexStablePtrOffForeignObjzh(r,fo,i) indexStablePtrOffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#ifdef SUPPORT_LONG_LONGS +#define indexInt64OffForeignObjzh(r,fo,i) indexInt64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#define indexWord64OffForeignObjzh(r,fo,i) indexWord64OffAddrzh(r,ForeignObj_CLOSURE_DATA(fo),i) +#endif + #endif /* ----------------------------------------------------------------------------- + Constructor tags + -------------------------------------------------------------------------- */ + +#define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +/* tagToEnum# is handled directly by the code generator. */ + +/* ----------------------------------------------------------------------------- Signal processing. Not really primops, but called directly from Haskell. -------------------------------------------------------------------------- */ @@ -733,4 +901,4 @@ extern StgInt sig_install (StgInt, StgInt, StgStablePtr, sigset_t *); #define stg_sig_ignore(sig,mask) sig_install(sig,STG_SIG_IGN,0,(sigset_t *)mask) #define stg_sig_catch(sig,ptr,mask) sig_install(sig,STG_SIG_HAN,ptr,(sigset_t *)mask) -#endif PRIMOPS_H +#endif /* PRIMOPS_H */