X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=9efc9f1ed87355ce18cc4542f387cb709127472f;hb=7bb3d1fc79521d591cd9f824893963141a7997b6;hp=f2ce41551ad29ada1d30e518dc50bbdad0cc432e;hpb=2378b2325df64a5ccc5b2e038ac3dbb848dea5f7;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index f2ce415..9efc9f1 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -28,30 +28,13 @@ #include "Cmm.h" #ifdef __PIC__ -#ifndef mingw32_HOST_OS -import __gmpz_init; -import __gmpz_add; -import __gmpz_sub; -import __gmpz_mul; -import __gmpz_gcd; -import __gmpn_gcd_1; -import __gmpn_cmp; -import __gmpz_tdiv_q; -import __gmpz_tdiv_r; -import __gmpz_tdiv_qr; -import __gmpz_fdiv_qr; -import __gmpz_divexact; -import __gmpz_and; -import __gmpz_xor; -import __gmpz_ior; -import __gmpz_com; -#endif import pthread_mutex_lock; import pthread_mutex_unlock; #endif import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; +import ghczmprim_GHCziBool_False_closure; /*----------------------------------------------------------------------------- Array Primitives @@ -82,32 +65,65 @@ newByteArrayzh_fast RET_P(p); } +#define BA_ALIGN 16 +#define BA_MASK (BA_ALIGN-1) + newPinnedByteArrayzh_fast { - W_ words, payload_words, n, p; + W_ words, bytes, payload_words, p; MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - n = R1; - payload_words = ROUNDUP_BYTES_TO_WDS(n); + bytes = R1; + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to BA_ALIGN bytes, so we need to allow space + to shift up to BA_ALIGN - 1 bytes: */ + bytes = bytes + BA_ALIGN - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); - // We want an 8-byte aligned array. allocatePinned() gives us - // 8-byte aligned memory by default, but we want to align the - // *goods* inside the ArrWords object, so we have to check the - // size of the ArrWords header and adjust our size accordingly. - words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - if ((SIZEOF_StgArrWords & 7) != 0) { - words = words + 1; - } + ("ptr" p) = foreign "C" allocatePinned(words) []; + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + + /* Now we need to move p forward so that the payload is aligned + to BA_ALIGN bytes: */ + p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); + + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(p) = payload_words; + RET_P(p); +} + +newAlignedPinnedByteArrayzh_fast +{ + W_ words, bytes, payload_words, p, alignment; + + MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); + bytes = R1; + alignment = R2; + + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to bytes, so we need to allow space + to shift up to bytes: */ + bytes = bytes + alignment - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - // Again, if the ArrWords header isn't a multiple of 8 bytes, we - // have to push the object forward one word so that the goods - // fall on an 8-byte boundary. - if ((SIZEOF_StgArrWords & 7) != 0) { - p = p + WDS(1); - } + /* Now we need to move p forward so that the payload is aligned + to bytes. Note that we are assuming that + is a power of 2, which is technically not guaranteed */ + p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -193,7 +209,7 @@ newMutVarzh_fast atomicModifyMutVarzh_fast { - W_ mv, z, x, y, r; + W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then @@ -232,19 +248,15 @@ atomicModifyMutVarzh_fast HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); -#if defined(THREADED_RTS) - ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2]; -#endif - - x = StgMutVar_var(R1); + mv = R1; + f = R2; TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); - StgThunk_payload(z,0) = R2; - StgThunk_payload(z,1) = x; + StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); @@ -253,9 +265,6 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; - StgMutVar_var(R1) = y; - foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1]; - TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; @@ -263,10 +272,20 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; -#if defined(THREADED_RTS) - RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") []; + retry: + x = StgMutVar_var(mv); + StgThunk_payload(z,1) = x; +#ifdef THREADED_RTS + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; + if (h != x) { goto retry; } +#else + StgMutVar_var(mv) = y; #endif + if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { + foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + } + RET_P(r); } @@ -293,9 +312,14 @@ mkWeakzh_fast w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); - StgWeak_key(w) = R1; - StgWeak_value(w) = R2; - StgWeak_finalizer(w) = R3; + // We don't care about cfinalizer here. + // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or + // something else? + + StgWeak_key(w) = R1; + StgWeak_value(w) = R2; + StgWeak_finalizer(w) = R3; + StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; @@ -305,12 +329,65 @@ mkWeakzh_fast RET_P(w); } +mkWeakForeignEnvzh_fast +{ + /* R1 = key + R2 = value + R3 = finalizer + R4 = pointer + R5 = has environment (0 or 1) + R6 = environment + */ + W_ w, payload_words, words, p; + + W_ key, val, fptr, ptr, flag, eptr; + + key = R1; + val = R2; + fptr = R3; + ptr = R4; + flag = R5; + eptr = R6; + + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast ); + + w = Hp - SIZEOF_StgWeak + WDS(1); + SET_HDR(w, stg_WEAK_info, W_[CCCS]); + + payload_words = 4; + words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; + ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + + StgArrWords_words(p) = payload_words; + StgArrWords_payload(p,0) = fptr; + StgArrWords_payload(p,1) = ptr; + StgArrWords_payload(p,2) = eptr; + StgArrWords_payload(p,3) = flag; + + // We don't care about the value here. + // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else? + + StgWeak_key(w) = key; + StgWeak_value(w) = val; + StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; + StgWeak_cfinalizer(w) = p; + + StgWeak_link(w) = W_[weak_ptr_list]; + W_[weak_ptr_list] = w; + + IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + + RET_P(w); +} finalizzeWeakzh_fast { /* R1 = weak ptr */ - W_ w, f; + W_ w, f, arr; w = R1; @@ -338,9 +415,18 @@ finalizzeWeakzh_fast SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); - f = StgWeak_finalizer(w); + f = StgWeak_finalizer(w); + arr = StgWeak_cfinalizer(w); + StgDeadWeak_link(w) = StgWeak_link(w); + if (arr != stg_NO_FINALIZER_closure) { + foreign "C" runCFinalizer(StgArrWords_payload(arr,0), + StgArrWords_payload(arr,1), + StgArrWords_payload(arr,2), + StgArrWords_payload(arr,3)) []; + } + /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); @@ -366,522 +452,20 @@ deRefWeakzh_fast } /* ----------------------------------------------------------------------------- - Arbitrary-precision Integer operations. - - There are some assumptions in this code that mp_limb_t == W_. This is - the case for all the platforms that GHC supports, currently. + Floating point operations. -------------------------------------------------------------------------- */ -int2Integerzh_fast -{ - /* arguments: R1 = Int# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast ); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - /* mpz_set_si is inlined here, makes things simpler */ - if (%lt(val,0)) { - s = -1; - Hp(0) = -val; - } else { - if (%gt(val,0)) { - s = 1; - Hp(0) = val; - } else { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# - #) - */ - RET_NP(s,p); -} - -word2Integerzh_fast -{ - /* arguments: R1 = Word# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - if (val != 0) { - s = 1; - W_[Hp] = val; - } else { - s = 0; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - -/* - * 'long long' primops for converting to/from Integers. - */ - -#ifdef SUPPORT_LONG_LONGS - -int64ToIntegerzh_fast -{ - /* arguments: L1 = Int64# */ - - L_ val; - W_ hi, lo, s, neg, words_needed, p; - - val = L1; - neg = 0; - - hi = TO_W_(val >> 32); - lo = TO_W_(val); - - if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) ) { - // minimum is one word - words_needed = 1; - } else { - words_needed = 2; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, int64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - if ( %lt(hi,0) ) { - neg = 1; - lo = -lo; - if(lo == 0) { - hi = -hi; - } else { - hi = -hi - 1; - } - } - - if ( words_needed == 2 ) { - s = 2; - Hp(-1) = lo; - Hp(0) = hi; - } else { - if ( lo != 0 ) { - s = 1; - Hp(0) = lo; - } else /* val==0 */ { - s = 0; - } - } - if ( neg != 0 ) { - s = -s; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} -word64ToIntegerzh_fast -{ - /* arguments: L1 = Word64# */ - - L_ val; - W_ hi, lo, s, words_needed, p; - - val = L1; - hi = TO_W_(val >> 32); - lo = TO_W_(val); - - if ( hi != 0 ) { - words_needed = 2; - } else { - words_needed = 1; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, word64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - if ( hi != 0 ) { - s = 2; - Hp(-1) = lo; - Hp(0) = hi; - } else { - if ( lo != 0 ) { - s = 1; - Hp(0) = lo; - } else /* val==0 */ { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - - -#endif /* SUPPORT_LONG_LONGS */ - -/* ToDo: this is shockingly inefficient */ - -#ifndef THREADED_RTS -section "bss" { - mp_tmp1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_tmp2: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result2: - bits8 [SIZEOF_MP_INT]; -} -#endif - -#ifdef THREADED_RTS -#define FETCH_MP_TEMP(X) \ -W_ X; \ -X = BaseReg + (OFFSET_StgRegTable_r ## X); -#else -#define FETCH_MP_TEMP(X) /* Nothing */ -#endif - -#define GMP_TAKE2_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2); \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE1_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1; \ - W_ d1; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_result1) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR, name); \ - \ - d1 = R2; \ - s1 = W_TO_INT(R1); \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE2_RET2(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - foreign "C" __gmpz_init(mp_result2 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords, \ - TO_W_(MP_INT__mp_size(mp_result2)), \ - MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords); \ -} - -GMP_TAKE2_RET1(plusIntegerzh_fast, __gmpz_add) -GMP_TAKE2_RET1(minusIntegerzh_fast, __gmpz_sub) -GMP_TAKE2_RET1(timesIntegerzh_fast, __gmpz_mul) -GMP_TAKE2_RET1(gcdIntegerzh_fast, __gmpz_gcd) -GMP_TAKE2_RET1(quotIntegerzh_fast, __gmpz_tdiv_q) -GMP_TAKE2_RET1(remIntegerzh_fast, __gmpz_tdiv_r) -GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact) -GMP_TAKE2_RET1(andIntegerzh_fast, __gmpz_and) -GMP_TAKE2_RET1(orIntegerzh_fast, __gmpz_ior) -GMP_TAKE2_RET1(xorIntegerzh_fast, __gmpz_xor) -GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com) - -GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr) -GMP_TAKE2_RET2(divModIntegerzh_fast, __gmpz_fdiv_qr) - -#ifndef THREADED_RTS -section "bss" { - mp_tmp_w: W_; // NB. mp_tmp_w is really an here mp_limb_t -} -#endif - -gcdIntzh_fast -{ - /* R1 = the first Int#; R2 = the second Int# */ - W_ r; - FETCH_MP_TEMP(mp_tmp_w); - - W_[mp_tmp_w] = R1; - (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) []; - - R1 = r; - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -gcdIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - W_ s1; - (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) []; - R1 = s1; - - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -cmpIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - W_ usize, vsize, v_digit, u_digit; - - usize = R1; - vsize = 0; - v_digit = R3; - - // paraphrased from __gmpz_cmp_si() in the GMP sources - if (%gt(v_digit,0)) { - vsize = 1; - } else { - if (%lt(v_digit,0)) { - vsize = -1; - v_digit = -v_digit; - } - } - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - u_digit = W_[BYTE_ARR_CTS(R2)]; - - if (u_digit == v_digit) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's - R1 = usize; - } else { - R1 = -usize; - } - - jump %ENTRY_CODE(Sp(0)); -} - -cmpIntegerzh_fast -{ - /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */ - W_ usize, vsize, size, up, vp; - CInt cmp; - - // paraphrased from __gmpz_cmp() in the GMP sources - usize = R1; - vsize = R3; - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(usize,0)) { // NB. not <, which is unsigned - size = -usize; - } else { - size = usize; - } - - up = BYTE_ARR_CTS(R2); - vp = BYTE_ARR_CTS(R4); - - (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) []; - - if (cmp == 0 :: CInt) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(cmp,0 :: CInt) == %lt(usize,0)) { - R1 = 1; - } else { - R1 = (-1); - } - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - -integer2Intzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -integer2Wordzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -decodeFloatzh_fast -{ - W_ p; - F_ arg; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); - - /* arguments: F1 = Float# */ - arg = F1; - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast ); - - /* Be prepared to tell Lennart-coded __decodeFloat - where mantissa._mp_d can be put (it does not care about the rest) */ - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]); - StgArrWords_words(p) = 1; - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - - /* Perform the operation */ - foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) []; - - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); -} - decodeFloatzuIntzh_fast { W_ p; F_ arg; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + W_ mp_tmp1; + W_ mp_tmp_w; + + STK_CHK_GEN( WDS(2), NO_PTRS, decodeFloatzuIntzh_fast ); + + mp_tmp1 = Sp - WDS(1); + mp_tmp_w = Sp - WDS(2); /* arguments: F1 = Float# */ arg = F1; @@ -893,43 +477,21 @@ decodeFloatzuIntzh_fast RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } -#define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE -#define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE) - -decodeDoublezh_fast +decodeDoublezu2Intzh_fast { D_ arg; W_ p; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + W_ mp_tmp1; + W_ mp_tmp2; + W_ mp_result1; + W_ mp_result2; - /* arguments: D1 = Double# */ - arg = D1; - - ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast ); - - /* Be prepared to tell Lennart-coded __decodeDouble - where mantissa.d can be put (it does not care about the rest) */ - p = Hp - ARR_SIZE + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE); - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - - /* Perform the operation */ - foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) []; - - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); -} + STK_CHK_GEN( WDS(4), NO_PTRS, decodeDoublezu2Intzh_fast ); -decodeDoublezu2Intzh_fast -{ - D_ arg; - W_ p; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp2); - FETCH_MP_TEMP(mp_result1); - FETCH_MP_TEMP(mp_result2); + mp_tmp1 = Sp - WDS(1); + mp_tmp2 = Sp - WDS(2); + mp_result1 = Sp - WDS(3); + mp_result2 = Sp - WDS(4); /* arguments: D1 = Double# */ arg = D1; @@ -969,8 +531,9 @@ forkzh_fast foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -998,8 +561,9 @@ forkOnzh_fast foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -1080,7 +644,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5) + W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; @@ -1114,18 +678,20 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid, next_invariant, q, outer; - frame = Sp; - trec = StgTSO_trec(CurrentTSO); + frame = Sp; + trec = StgTSO_trec(CurrentTSO); + result = R1; ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; StgAtomicallyFrame_next_invariant_to_check(frame) = q; + StgAtomicallyFrame_result(frame) = result; } else { /* Second/subsequent time back at the atomically frame -- abort the @@ -1159,6 +725,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; + R1 = StgAtomicallyFrame_result(frame); Sp = Sp + SIZEOF_StgAtomicallyFrame; jump %ENTRY_CODE(Sp(SP_OFF)); } else { @@ -1176,7 +743,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid; @@ -1210,7 +777,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ r, frame, trec, outer; frame = Sp; @@ -1261,6 +828,7 @@ atomicallyzh_fast SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; + StgAtomicallyFrame_result(frame) = NO_TREC; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ @@ -1452,6 +1020,17 @@ readTVarzh_fast RET_P(result); } +readTVarIOzh_fast +{ + W_ result; + +again: + result = StgTVar_current_value(R1); + if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + goto again; + } + RET_P(result); +} writeTVarzh_fast { @@ -1569,8 +1148,11 @@ takeMVarzh_fast CurrentTSO) []; } StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; R1 = mvar; @@ -1726,8 +1308,11 @@ putMVarzh_fast CurrentTSO) []; } StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; R1 = mvar; @@ -2273,3 +1858,39 @@ getApStackValzh_fast } RET_NP(ok,val); } + +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + +// Write the cost center stack of the first argument on stderr; return +// the second. Possibly only makes sense for already evaluated +// things? +traceCcszh_fast +{ + W_ ccs; + +#ifdef PROFILING + ccs = StgHeader_ccs(UNTAG(R1)); + foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; +#endif + + R1 = R2; + ENTER(); +} + +getSparkzh_fast +{ + W_ spark; + +#ifndef THREADED_RTS + RET_NP(0,ghczmprim_GHCziBool_False_closure); +#else + (spark) = foreign "C" findSpark(MyCapability()); + if (spark != 0) { + RET_NP(1,spark); + } else { + RET_NP(0,ghczmprim_GHCziBool_False_closure); + } +#endif +}