X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=e17c6fb3f870b7b923c79570cfe45a087bd787ee;hp=501a719b33b14ab4a0f854e9fd65f4a0cd2930da;hb=e5c3b478b3cd1707cf122833822f44b2ac09b8e9;hpb=3e180332e69eb3a3eb2c0cb052c39e648887fe49 diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 501a719..e17c6fb 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2004 + * (c) The GHC Team, 1998-2011 * * Out-of-line primitive operations * @@ -28,31 +28,16 @@ #include "Cmm.h" #ifdef __PIC__ -#ifndef mingw32_HOST_OS -import __gmpz_init; -import __gmpz_add; -import __gmpz_sub; -import __gmpz_mul; -import __gmpz_gcd; -import __gmpn_gcd_1; -import __gmpn_cmp; -import __gmpz_tdiv_q; -import __gmpz_tdiv_r; -import __gmpz_tdiv_qr; -import __gmpz_fdiv_qr; -import __gmpz_divexact; -import __gmpz_and; -import __gmpz_xor; -import __gmpz_ior; -import __gmpz_com; -#endif import pthread_mutex_lock; import pthread_mutex_unlock; #endif import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; -import ghczmprim_GHCziBool_False_closure; +import ghczmprim_GHCziTypes_False_closure; +#if !defined(mingw32_HOST_OS) +import sm_mutex; +#endif /*----------------------------------------------------------------------------- Array Primitives @@ -69,29 +54,30 @@ import ghczmprim_GHCziBool_False_closure; * round up to the nearest word for the size of the array. */ -newByteArrayzh_fast +stg_newByteArrayzh { W_ words, payload_words, n, p; - MAYBE_GC(NO_PTRS,newByteArrayzh_fast); + MAYBE_GC(NO_PTRS,stg_newByteArrayzh); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } #define BA_ALIGN 16 #define BA_MASK (BA_ALIGN-1) -newPinnedByteArrayzh_fast +stg_newPinnedByteArrayzh { - W_ words, bytes, payload_words, p; + W_ words, n, bytes, payload_words, p; - MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - bytes = R1; + MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); + n = R1; + bytes = n; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); /* When we actually allocate memory, we need to allow space for the @@ -103,7 +89,7 @@ newPinnedByteArrayzh_fast /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -111,18 +97,25 @@ newPinnedByteArrayzh_fast p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } -newAlignedPinnedByteArrayzh_fast +stg_newAlignedPinnedByteArrayzh { - W_ words, bytes, payload_words, p, alignment; + W_ words, n, bytes, payload_words, p, alignment; - MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); - bytes = R1; + MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); + n = R1; alignment = R2; + /* we always supply at least word-aligned memory, so there's no + need to allow extra space for alignment if the requirement is less + than a word. This also prevents mischief with alignment == 0. */ + if (alignment <= SIZEOF_W) { alignment = 1; } + + bytes = n; + /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); @@ -135,7 +128,7 @@ newAlignedPinnedByteArrayzh_fast /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -144,24 +137,29 @@ newAlignedPinnedByteArrayzh_fast p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } -newArrayzh_fast +stg_newArrayzh { - W_ words, n, init, arr, p; + W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; - MAYBE_GC(R2_PTR,newArrayzh_fast); - - words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + MAYBE_GC(R2_PTR,stg_newArrayzh); + + // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words + // in the array, making sure we round up, and then rounding up to a whole + // number of words. + size = n + mutArrPtrsCardWords(n); + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; + StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; @@ -172,18 +170,25 @@ newArrayzh_fast p = p + WDS(1); goto for; } + // Initialise the mark bits with 0 + for2: + if (p < arr + WDS(size)) { + W_[p] = 0; + p = p + WDS(1); + goto for2; + } RET_P(arr); } -unsafeThawArrayzh_fast +stg_unsafeThawArrayzh { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from - // the mutable list is not easy, because the mut_list is only singly-linked). + // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 @@ -207,16 +212,17 @@ unsafeThawArrayzh_fast } } + /* ----------------------------------------------------------------------------- MutVar primitives -------------------------------------------------------------------------- */ -newMutVarzh_fast +stg_newMutVarzh { W_ mv; /* Args: R1 = initialisation value */ - ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast); + ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); @@ -225,7 +231,26 @@ newMutVarzh_fast RET_P(mv); } -atomicModifyMutVarzh_fast +stg_casMutVarzh + /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */ +{ + W_ mv, old, new, h; + + mv = R1; + old = R2; + new = R3; + + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, + old, new) []; + if (h != old) { + RET_NP(1,h); + } else { + RET_NP(0,h); + } +} + + +stg_atomicModifyMutVarzh { W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ @@ -264,7 +289,7 @@ atomicModifyMutVarzh_fast #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) - HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); + HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh); mv = R1; f = R2; @@ -313,7 +338,7 @@ atomicModifyMutVarzh_fast STRING(stg_weak_msg,"New weak pointer at %p\n") -mkWeakzh_fast +stg_mkWeakzh { /* R1 = key R2 = value @@ -325,7 +350,7 @@ mkWeakzh_fast R3 = stg_NO_FINALIZER_closure; } - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast ); + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); @@ -339,15 +364,17 @@ mkWeakzh_fast StgWeak_finalizer(w) = R3; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } -mkWeakForeignEnvzh_fast +stg_mkWeakForeignEnvzh { /* R1 = key R2 = value @@ -367,19 +394,19 @@ mkWeakForeignEnvzh_fast flag = R5; eptr = R6; - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast ); + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = WDS(payload_words); StgArrWords_payload(p,0) = fptr; StgArrWords_payload(p,1) = ptr; StgArrWords_payload(p,2) = eptr; @@ -393,15 +420,17 @@ mkWeakForeignEnvzh_fast StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; StgWeak_cfinalizer(w) = p; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } -finalizzeWeakzh_fast +stg_finalizzeWeakzh { /* R1 = weak ptr */ @@ -453,7 +482,7 @@ finalizzeWeakzh_fast } } -deRefWeakzh_fast +stg_deRefWeakzh { /* R1 = weak ptr */ W_ w, code, val; @@ -470,522 +499,20 @@ deRefWeakzh_fast } /* ----------------------------------------------------------------------------- - Arbitrary-precision Integer operations. - - There are some assumptions in this code that mp_limb_t == W_. This is - the case for all the platforms that GHC supports, currently. + Floating point operations. -------------------------------------------------------------------------- */ -int2Integerzh_fast -{ - /* arguments: R1 = Int# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast ); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - /* mpz_set_si is inlined here, makes things simpler */ - if (%lt(val,0)) { - s = -1; - Hp(0) = -val; - } else { - if (%gt(val,0)) { - s = 1; - Hp(0) = val; - } else { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# - #) - */ - RET_NP(s,p); -} - -word2Integerzh_fast -{ - /* arguments: R1 = Word# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - if (val != 0) { - s = 1; - W_[Hp] = val; - } else { - s = 0; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - -/* - * 'long long' primops for converting to/from Integers. - */ - -#ifdef SUPPORT_LONG_LONGS - -int64ToIntegerzh_fast -{ - /* arguments: L1 = Int64# */ - - L_ val; - W_ hi, lo, s, neg, words_needed, p; - - val = L1; - neg = 0; - - hi = TO_W_(val >> 32); - lo = TO_W_(val); - - if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) ) { - // minimum is one word - words_needed = 1; - } else { - words_needed = 2; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, int64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - if ( %lt(hi,0) ) { - neg = 1; - lo = -lo; - if(lo == 0) { - hi = -hi; - } else { - hi = -hi - 1; - } - } - - if ( words_needed == 2 ) { - s = 2; - Hp(-1) = lo; - Hp(0) = hi; - } else { - if ( lo != 0 ) { - s = 1; - Hp(0) = lo; - } else /* val==0 */ { - s = 0; - } - } - if ( neg != 0 ) { - s = -s; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} -word64ToIntegerzh_fast -{ - /* arguments: L1 = Word64# */ - - L_ val; - W_ hi, lo, s, words_needed, p; - - val = L1; - hi = TO_W_(val >> 32); - lo = TO_W_(val); - - if ( hi != 0 ) { - words_needed = 2; - } else { - words_needed = 1; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, word64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - if ( hi != 0 ) { - s = 2; - Hp(-1) = lo; - Hp(0) = hi; - } else { - if ( lo != 0 ) { - s = 1; - Hp(0) = lo; - } else /* val==0 */ { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - - -#endif /* SUPPORT_LONG_LONGS */ - -/* ToDo: this is shockingly inefficient */ - -#ifndef THREADED_RTS -section "bss" { - mp_tmp1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_tmp2: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result2: - bits8 [SIZEOF_MP_INT]; -} -#endif - -#ifdef THREADED_RTS -#define FETCH_MP_TEMP(X) \ -W_ X; \ -X = BaseReg + (OFFSET_StgRegTable_r ## X); -#else -#define FETCH_MP_TEMP(X) /* Nothing */ -#endif - -#define GMP_TAKE2_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2); \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE1_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1; \ - W_ d1; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_result1) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR, name); \ - \ - d1 = R2; \ - s1 = W_TO_INT(R1); \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE2_RET2(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - foreign "C" __gmpz_init(mp_result2 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords, \ - TO_W_(MP_INT__mp_size(mp_result2)), \ - MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords); \ -} - -GMP_TAKE2_RET1(plusIntegerzh_fast, __gmpz_add) -GMP_TAKE2_RET1(minusIntegerzh_fast, __gmpz_sub) -GMP_TAKE2_RET1(timesIntegerzh_fast, __gmpz_mul) -GMP_TAKE2_RET1(gcdIntegerzh_fast, __gmpz_gcd) -GMP_TAKE2_RET1(quotIntegerzh_fast, __gmpz_tdiv_q) -GMP_TAKE2_RET1(remIntegerzh_fast, __gmpz_tdiv_r) -GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact) -GMP_TAKE2_RET1(andIntegerzh_fast, __gmpz_and) -GMP_TAKE2_RET1(orIntegerzh_fast, __gmpz_ior) -GMP_TAKE2_RET1(xorIntegerzh_fast, __gmpz_xor) -GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com) - -GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr) -GMP_TAKE2_RET2(divModIntegerzh_fast, __gmpz_fdiv_qr) - -#ifndef THREADED_RTS -section "bss" { - mp_tmp_w: W_; // NB. mp_tmp_w is really an here mp_limb_t -} -#endif - -gcdIntzh_fast -{ - /* R1 = the first Int#; R2 = the second Int# */ - W_ r; - FETCH_MP_TEMP(mp_tmp_w); - - W_[mp_tmp_w] = R1; - (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) []; - - R1 = r; - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -gcdIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - W_ s1; - (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) []; - R1 = s1; - - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -cmpIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - W_ usize, vsize, v_digit, u_digit; - - usize = R1; - vsize = 0; - v_digit = R3; - - // paraphrased from __gmpz_cmp_si() in the GMP sources - if (%gt(v_digit,0)) { - vsize = 1; - } else { - if (%lt(v_digit,0)) { - vsize = -1; - v_digit = -v_digit; - } - } - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - u_digit = W_[BYTE_ARR_CTS(R2)]; - - if (u_digit == v_digit) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's - R1 = usize; - } else { - R1 = -usize; - } - - jump %ENTRY_CODE(Sp(0)); -} - -cmpIntegerzh_fast -{ - /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */ - W_ usize, vsize, size, up, vp; - CInt cmp; - - // paraphrased from __gmpz_cmp() in the GMP sources - usize = R1; - vsize = R3; - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(usize,0)) { // NB. not <, which is unsigned - size = -usize; - } else { - size = usize; - } - - up = BYTE_ARR_CTS(R2); - vp = BYTE_ARR_CTS(R4); - - (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) []; - - if (cmp == 0 :: CInt) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(cmp,0 :: CInt) == %lt(usize,0)) { - R1 = 1; - } else { - R1 = (-1); - } - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - -integer2Intzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -integer2Wordzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -decodeFloatzh_fast +stg_decodeFloatzuIntzh { W_ p; F_ arg; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); - - /* arguments: F1 = Float# */ - arg = F1; - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast ); - - /* Be prepared to tell Lennart-coded __decodeFloat - where mantissa._mp_d can be put (it does not care about the rest) */ - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]); - StgArrWords_words(p) = 1; - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - - /* Perform the operation */ - foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) []; - - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); -} + W_ mp_tmp1; + W_ mp_tmp_w; -decodeFloatzuIntzh_fast -{ - W_ p; - F_ arg; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh ); + + mp_tmp1 = Sp - WDS(1); + mp_tmp_w = Sp - WDS(2); /* arguments: F1 = Float# */ arg = F1; @@ -997,43 +524,21 @@ decodeFloatzuIntzh_fast RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } -#define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE -#define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE) - -decodeDoublezh_fast +stg_decodeDoublezu2Intzh { D_ arg; W_ p; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + W_ mp_tmp1; + W_ mp_tmp2; + W_ mp_result1; + W_ mp_result2; - /* arguments: D1 = Double# */ - arg = D1; + STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh ); - ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast ); - - /* Be prepared to tell Lennart-coded __decodeDouble - where mantissa.d can be put (it does not care about the rest) */ - p = Hp - ARR_SIZE + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE); - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - - /* Perform the operation */ - foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) []; - - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); -} - -decodeDoublezu2Intzh_fast -{ - D_ arg; - W_ p; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp2); - FETCH_MP_TEMP(mp_result1); - FETCH_MP_TEMP(mp_result2); + mp_tmp1 = Sp - WDS(1); + mp_tmp2 = Sp - WDS(2); + mp_result1 = Sp - WDS(3); + mp_result2 = Sp - WDS(4); /* arguments: D1 = Double# */ arg = D1; @@ -1052,11 +557,11 @@ decodeDoublezu2Intzh_fast * Concurrency primitives * -------------------------------------------------------------------------- */ -forkzh_fast +stg_forkzh { /* args: R1 = closure to spark */ - MAYBE_GC(R1_PTR, forkzh_fast); + MAYBE_GC(R1_PTR, stg_forkzh); W_ closure; W_ threadid; @@ -1067,9 +572,9 @@ forkzh_fast closure "ptr") []; /* start blocked if the current thread is blocked */ - StgTSO_flags(threadid) = - StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & - (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; @@ -1080,11 +585,11 @@ forkzh_fast RET_P(threadid); } -forkOnzh_fast +stg_forkOnzh { /* args: R1 = cpu, R2 = closure to spark */ - MAYBE_GC(R2_PTR, forkOnzh_fast); + MAYBE_GC(R2_PTR, stg_forkOnzh); W_ cpu; W_ closure; @@ -1097,9 +602,9 @@ forkOnzh_fast closure "ptr") []; /* start blocked if the current thread is blocked */ - StgTSO_flags(threadid) = - StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & - (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; @@ -1110,18 +615,18 @@ forkOnzh_fast RET_P(threadid); } -yieldzh_fast +stg_yieldzh { jump stg_yield_noregs; } -myThreadIdzh_fast +stg_myThreadIdzh { /* no args. */ RET_P(CurrentTSO); } -labelThreadzh_fast +stg_labelThreadzh { /* args: R1 = ThreadId# @@ -1132,7 +637,7 @@ labelThreadzh_fast jump %ENTRY_CODE(Sp(0)); } -isCurrentThreadBoundzh_fast +stg_isCurrentThreadBoundzh { /* no args */ W_ r; @@ -1140,20 +645,15 @@ isCurrentThreadBoundzh_fast RET_N(r); } -threadStatuszh_fast +stg_threadStatuszh { /* args: R1 :: ThreadId# */ W_ tso; W_ why_blocked; W_ what_next; - W_ ret; + W_ ret, cap, locked; tso = R1; - loop: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop; - } what_next = TO_W_(StgTSO_what_next(tso)); why_blocked = TO_W_(StgTSO_why_blocked(tso)); @@ -1171,7 +671,16 @@ threadStatuszh_fast ret = why_blocked; } } - RET_N(ret); + + cap = TO_W_(Capability_no(StgTSO_cap(tso))); + + if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) { + locked = 1; + } else { + locked = 0; + } + + RET_NNN(ret,cap,locked); } /* ----------------------------------------------------------------------------- @@ -1192,7 +701,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ @@ -1220,18 +729,20 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - P_ unused3, P_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid, next_invariant, q, outer; - frame = Sp; - trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + frame = Sp; + trec = StgTSO_trec(CurrentTSO); + result = R1; + outer = StgTRecHeader_enclosing_trec(trec); if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; StgAtomicallyFrame_next_invariant_to_check(frame) = q; + StgAtomicallyFrame_result(frame) = result; } else { /* Second/subsequent time back at the atomically frame -- abort the @@ -1265,6 +776,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; + R1 = StgAtomicallyFrame_result(frame); Sp = Sp + SIZEOF_StgAtomicallyFrame; jump %ENTRY_CODE(Sp(SP_OFF)); } else { @@ -1282,7 +794,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - P_ unused3, P_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid; @@ -1321,7 +833,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Commit succeeded */ @@ -1341,24 +853,24 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, // Primop definition ------------------------------------------------------------ -atomicallyzh_fast +stg_atomicallyzh { W_ frame; W_ old_trec; W_ new_trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR, atomicallyzh_fast); + MAYBE_GC (R1_PTR, stg_atomicallyzh); /* Args: R1 = m :: STM a */ - STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast); + STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { R1 = base_ControlziExceptionziBase_nestedAtomically_closure; - jump raisezh_fast; + jump stg_raisezh; } /* Set up the atomically frame */ @@ -1367,6 +879,7 @@ atomicallyzh_fast SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; + StgAtomicallyFrame_result(frame) = NO_TREC; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ @@ -1378,13 +891,13 @@ atomicallyzh_fast } -catchSTMzh_fast +stg_catchSTMzh { W_ frame; /* Args: R1 :: STM a */ /* Args: R2 :: Exception -> STM a */ - STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh); /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchSTMFrame; @@ -1406,18 +919,18 @@ catchSTMzh_fast } -catchRetryzh_fast +stg_catchRetryzh { W_ frame; W_ new_trec; W_ trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); + MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); /* Args: R1 :: STM a */ /* Args: R2 :: STM a */ - STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh); /* Start a nested transaction within which to run the first code */ trec = StgTSO_trec(CurrentTSO); @@ -1438,7 +951,7 @@ catchRetryzh_fast } -retryzh_fast +stg_retryzh { W_ frame_type; W_ frame; @@ -1446,16 +959,16 @@ retryzh_fast W_ outer; W_ r; - MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate + MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: - StgTSO_sp(CurrentTSO) = Sp; - (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; - Sp = StgTSO_sp(CurrentTSO); + SAVE_THREAD_STATE(); + (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") []; + LOAD_THREAD_STATE(); frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame @@ -1488,7 +1001,7 @@ retry_pop_stack: foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; trec = outer; StgTSO_trec(CurrentTSO) = trec; - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); } ASSERT(outer == NO_TREC); @@ -1511,12 +1024,12 @@ retry_pop_stack: } -checkzh_fast +stg_checkzh { W_ trec, closure; /* Args: R1 = invariant closure */ - MAYBE_GC (R1_PTR, checkzh_fast); + MAYBE_GC (R1_PTR, stg_checkzh); trec = StgTSO_trec(CurrentTSO); closure = R1; @@ -1528,21 +1041,21 @@ checkzh_fast } -newTVarzh_fast +stg_newTVarzh { W_ tv; W_ new_value; /* Args: R1 = initialisation value */ - MAYBE_GC (R1_PTR, newTVarzh_fast); + MAYBE_GC (R1_PTR, stg_newTVarzh); new_value = R1; ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; RET_P(tv); } -readTVarzh_fast +stg_readTVarzh { W_ trec; W_ tvar; @@ -1550,7 +1063,7 @@ readTVarzh_fast /* Args: R1 = TVar closure */ - MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate + MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; @@ -1558,7 +1071,7 @@ readTVarzh_fast RET_P(result); } -readTVarIOzh_fast +stg_readTVarIOzh { W_ result; @@ -1570,7 +1083,7 @@ again: RET_P(result); } -writeTVarzh_fast +stg_writeTVarzh { W_ trec; W_ tvar; @@ -1579,7 +1092,7 @@ writeTVarzh_fast /* Args: R1 = TVar closure */ /* R2 = New value */ - MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate + MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; new_value = R2; @@ -1621,7 +1134,7 @@ writeTVarzh_fast * * -------------------------------------------------------------------------- */ -isEmptyMVarzh_fast +stg_isEmptyMVarzh { /* args: R1 = MVar closure */ @@ -1632,12 +1145,12 @@ isEmptyMVarzh_fast } } -newMVarzh_fast +stg_newMVarzh { /* args: none */ W_ mvar; - ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast ); + ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); @@ -1649,17 +1162,21 @@ newMVarzh_fast } -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ - W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; +#define PerformTake(stack, value) \ + W_ sp; \ + sp = StgStack_sp(stack); \ + W_[sp + WDS(1)] = value; \ + W_[sp + WDS(0)] = stg_gc_unpt_r1_info; -#define PerformPut(tso,lval) \ - StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ - lval = W_[StgTSO_sp(tso) - WDS(1)]; +#define PerformPut(stack,lval) \ + W_ sp; \ + sp = StgStack_sp(stack) + WDS(3); \ + StgStack_sp(stack) = sp; \ + lval = W_[sp - WDS(1)]; -takeMVarzh_fast +stg_takeMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ mvar = R1; @@ -1678,80 +1195,88 @@ takeMVarzh_fast * and wait until we're woken up. */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { + + // Note [mvar-heap-check] We want to do the heap check in the + // branch here, to avoid the conditional in the common case. + // However, we've already locked the MVar above, so we better + // be careful to unlock it again if the the heap check fails. + // Unfortunately we don't have an easy way to inject any code + // into the heap check generated by the code generator, so we + // have to do it in stg_gc_gen (see HeapStackCheck.cmm). + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + StgMVarTSOQueue_link(q) = END_TSO_QUEUE; + StgMVarTSOQueue_tso(q) = CurrentTSO; + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_head(mvar) = CurrentTSO; + StgMVar_head(mvar) = q; } else { - foreign "C" setTSOLink(MyCapability() "ptr", - StgMVar_tail(mvar) "ptr", - CurrentTSO) []; + StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; + foreign "C" recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)) []; } - StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; - // write barrier for throwTo(), which looks at block_info - // if why_blocked==BlockedOnMVar. - prim %write_barrier() []; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; - StgMVar_tail(mvar) = CurrentTSO; + StgMVar_tail(mvar) = q; R1 = mvar; jump stg_block_takemvar; - } - - /* we got the value... */ - val = StgMVar_value(mvar); - - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) - { - /* There are putMVar(s) waiting... - * wake up the first thread on the queue - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); - - /* actually perform the putMVar for the thread that we just woke up */ - tso = StgMVar_head(mvar); - PerformPut(tso,StgMVar_value(mvar)); - - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } + } + + /* we got the value... */ + val = StgMVar_value(mvar); + + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further putMVars, MVar is now empty */ + StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_P(val); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } + + // There are putMVar(s) waiting... wake up the first thread on the queue + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + // actually perform the putMVar for the thread that we just woke up + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - RET_P(val); - } - else - { - /* No further putMVars, MVar is now empty */ - StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; - -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - RET_P(val); - } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_P(val); } -tryTakeMVarzh_fast +stg_tryTakeMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ - mvar = R1; #if defined(THREADED_RTS) @@ -1759,7 +1284,10 @@ tryTakeMVarzh_fast #else info = GET_INFO(mvar); #endif - + + /* If the MVar is empty, put ourselves on its blocking queue, + * and wait until we're woken up. + */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); @@ -1769,59 +1297,59 @@ tryTakeMVarzh_fast */ RET_NP(0, stg_NO_FINALIZER_closure); } - + if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; } /* we got the value... */ val = StgMVar_value(mvar); + + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further putMVars, MVar is now empty */ + StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_NP(1, val); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } + + // There are putMVar(s) waiting... wake up the first thread on the queue + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { - - /* There are putMVar(s) waiting... - * wake up the first thread on the queue - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - /* actually perform the putMVar for the thread that we just woke up */ - tso = StgMVar_head(mvar); - PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } + // actually perform the putMVar for the thread that we just woke up + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } - else - { - /* No further putMVars, MVar is now empty */ - StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; - RET_NP(1, val); + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_NP(1,val); } -putMVarzh_fast +stg_putMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; @@ -1838,84 +1366,95 @@ putMVarzh_fast } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { + + // see Note [mvar-heap-check] above + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + StgMVarTSOQueue_link(q) = END_TSO_QUEUE; + StgMVarTSOQueue_tso(q) = CurrentTSO; + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_head(mvar) = CurrentTSO; + StgMVar_head(mvar) = q; } else { - foreign "C" setTSOLink(MyCapability() "ptr", - StgMVar_tail(mvar) "ptr", - CurrentTSO) []; + StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; + foreign "C" recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)) []; } - StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; - // write barrier for throwTo(), which looks at block_info - // if why_blocked==BlockedOnMVar. - prim %write_barrier() []; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; - StgMVar_tail(mvar) = CurrentTSO; - + StgMVar_tail(mvar) = q; + R1 = mvar; R2 = val; jump stg_block_putmvar; } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further takes, the MVar is now full. */ + StgMVar_value(mvar) = val; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + jump %ENTRY_CODE(Sp(0)); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } - /* There are takeMVar(s) waiting: wake up the first one - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); + // There are takeMVar(s) waiting: wake up the first one + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - /* actually perform the takeMVar */ - tso = StgMVar_head(mvar); - PerformTake(tso, val); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } - - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + // actually perform the takeMVar + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - jump %ENTRY_CODE(Sp(0)); - } - else - { - /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = val; + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - jump %ENTRY_CODE(Sp(0)); + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } - /* ToDo: yield afterward for better communication performance? */ + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); + jump %ENTRY_CODE(Sp(0)); } -tryPutMVarzh_fast +stg_tryPutMVarzh { - W_ mvar, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; + val = R2; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); @@ -1923,51 +1462,47 @@ tryPutMVarzh_fast RET_N(0); } - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further takes, the MVar is now full. */ + StgMVar_value(mvar) = val; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_N(1); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { - - /* There are takeMVar(s) waiting: wake up the first one - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); - - /* actually perform the takeMVar */ - tso = StgMVar_head(mvar); - PerformTake(tso, R2); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } - - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + // There are takeMVar(s) waiting: wake up the first one + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } - else - { - /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = R2; + // actually perform the takeMVar + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); - /* ToDo: yield afterward for better communication performance? */ } @@ -1975,11 +1510,11 @@ tryPutMVarzh_fast Stable pointer primitives ------------------------------------------------------------------------- */ -makeStableNamezh_fast +stg_makeStableNamezh { W_ index, sn_obj; - ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast ); + ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); (index) = foreign "C" lookupStableName(R1 "ptr") []; @@ -1999,16 +1534,16 @@ makeStableNamezh_fast } -makeStablePtrzh_fast +stg_makeStablePtrzh { /* Args: R1 = a */ W_ sp; - MAYBE_GC(R1_PTR, makeStablePtrzh_fast); + MAYBE_GC(R1_PTR, stg_makeStablePtrzh); ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; RET_N(sp); } -deRefStablePtrzh_fast +stg_deRefStablePtrzh { /* Args: R1 = the stable ptr */ W_ r, sp; @@ -2021,7 +1556,7 @@ deRefStablePtrzh_fast Bytecode object primitives ------------------------------------------------------------------------- */ -newBCOzh_fast +stg_newBCOzh { /* R1 = instrs R2 = literals @@ -2033,10 +1568,10 @@ newBCOzh_fast bitmap_arr = R5; - words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr); + words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr); bytes = WDS(words); - ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast ); + ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); @@ -2051,7 +1586,7 @@ newBCOzh_fast W_ i; i = 0; for: - if (i < StgArrWords_words(bitmap_arr)) { + if (i < BYTE_ARR_WDS(bitmap_arr)) { StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i); i = i + 1; goto for; @@ -2061,7 +1596,7 @@ for: } -mkApUpd0zh_fast +stg_mkApUpd0zh { // R1 = the BCO# for the AP // @@ -2073,7 +1608,7 @@ mkApUpd0zh_fast ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && StgBCO_arity(R1) == HALF_W_(0)); - HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast); + HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); @@ -2086,7 +1621,7 @@ mkApUpd0zh_fast RET_P(ap); } -unpackClosurezh_fast +stg_unpackClosurezh { /* args: R1 = closure to analyze */ // TODO: Consider the absence of ptrs or nonptrs as a special case ? @@ -2116,11 +1651,12 @@ unpackClosurezh_fast }} out: - W_ ptrs_arr_sz, nptrs_arr_sz; + W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); - ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs); + ptrs_arr_cards = mutArrPtrsCardWords(ptrs); + ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); - ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast); + ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); W_ clos; clos = UNTAG(R1); @@ -2130,6 +1666,8 @@ out: SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; + StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; + p = 0; for: if(p < ptrs) { @@ -2137,9 +1675,12 @@ for: p = p + 1; goto for; } + /* We can leave the card table uninitialised, since the array is + allocated in the nursery. The GC will fill it in if/when the array + is promoted. */ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(nptrs_arr) = nptrs; + StgArrWords_bytes(nptrs_arr) = WDS(nptrs); p = 0; for2: if(p < nptrs) { @@ -2166,7 +1707,7 @@ for2: } \ W_[blocked_queue_tl] = tso; -waitReadzh_fast +stg_waitReadzh { /* args: R1 */ #ifdef THREADED_RTS @@ -2183,7 +1724,7 @@ waitReadzh_fast #endif } -waitWritezh_fast +stg_waitWritezh { /* args: R1 */ #ifdef THREADED_RTS @@ -2201,8 +1742,8 @@ waitWritezh_fast } -STRING(stg_delayzh_malloc_str, "delayzh_fast") -delayzh_fast +STRING(stg_delayzh_malloc_str, "stg_delayzh") +stg_delayzh { #ifdef mingw32_HOST_OS W_ ares; @@ -2275,8 +1816,8 @@ while: #ifdef mingw32_HOST_OS -STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast") -asyncReadzh_fast +STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh") +stg_asyncReadzh { W_ ares; CInt reqID; @@ -2303,8 +1844,8 @@ asyncReadzh_fast #endif } -STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast") -asyncWritezh_fast +STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh") +stg_asyncWritezh { W_ ares; CInt reqID; @@ -2331,8 +1872,8 @@ asyncWritezh_fast #endif } -STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast") -asyncDoProczh_fast +STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh") +stg_asyncDoProczh { W_ ares; CInt reqID; @@ -2360,12 +1901,71 @@ asyncDoProczh_fast } #endif -// noDuplicate# tries to ensure that none of the thunks under -// evaluation by the current thread are also under evaluation by -// another thread. It relies on *both* threads doing noDuplicate#; -// the second one will get blocked if they are duplicating some work. -noDuplicatezh_fast +/* ----------------------------------------------------------------------------- + * noDuplicate# + * + * noDuplicate# tries to ensure that none of the thunks under + * evaluation by the current thread are also under evaluation by + * another thread. It relies on *both* threads doing noDuplicate#; + * the second one will get blocked if they are duplicating some work. + * + * The idea is that noDuplicate# is used within unsafePerformIO to + * ensure that the IO operation is performed at most once. + * noDuplicate# calls threadPaused which acquires an exclusive lock on + * all the thunks currently under evaluation by the current thread. + * + * Consider the following scenario. There is a thunk A, whose + * evaluation requires evaluating thunk B, where thunk B is an + * unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2 + * is pre-empted before it enters B, and claims A by blackholing it + * (in threadPaused). Thread 1 now enters B, and calls noDuplicate#. + * + * thread 1 thread 2 + * +-----------+ +---------------+ + * | -------+-----> A <-------+------- | + * | update | BLACKHOLE | marked_update | + * +-----------+ +---------------+ + * | | | | + * ... ... + * | | +---------------+ + * +-----------+ + * | ------+-----> B + * | update | BLACKHOLE + * +-----------+ + * + * At this point: A is a blackhole, owned by thread 2. noDuplicate# + * calls threadPaused, which walks up the stack and + * - claims B on behalf of thread 1 + * - then it reaches the update frame for A, which it sees is already + * a BLACKHOLE and is therefore owned by another thread. Since + * thread 1 is duplicating work, the computation up to the update + * frame for A is suspended, including thunk B. + * - thunk B, which is an unsafePerformIO, has now been reverted to + * an AP_STACK which could be duplicated - BAD! + * - The solution is as follows: before calling threadPaused, we + * leave a frame on the stack (stg_noDuplicate_info) that will call + * noDuplicate# again if the current computation is suspended and + * restarted. + * + * See the test program in concurrent/prog003 for a way to demonstrate + * this. It needs to be run with +RTS -N3 or greater, and the bug + * only manifests occasionally (once very 10 runs or so). + * -------------------------------------------------------------------------- */ + +INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) +{ + Sp_adj(1); + jump stg_noDuplicatezh; +} + +stg_noDuplicatezh { + STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); + // leave noDuplicate frame in case the current + // computation is suspended and restarted (see above). + Sp_adj(-1); + Sp(0) = stg_noDuplicate_info; + SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; @@ -2375,11 +1975,19 @@ noDuplicatezh_fast } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + // remove the stg_noDuplicate frame if it is still there. + if (Sp(0) == stg_noDuplicate_info) { + Sp_adj(1); + } jump %ENTRY_CODE(Sp(0)); } } -getApStackValzh_fast +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + +stg_getApStackValzh { W_ ap_stack, offset, val, ok; @@ -2397,14 +2005,10 @@ getApStackValzh_fast RET_NP(ok,val); } -/* ----------------------------------------------------------------------------- - Misc. primitives - -------------------------------------------------------------------------- */ - // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? -traceCcszh_fast +stg_traceCcszh { W_ ccs; @@ -2417,18 +2021,67 @@ traceCcszh_fast ENTER(); } -getSparkzh_fast +stg_getSparkzh { W_ spark; #ifndef THREADED_RTS - RET_NP(0,ghczmprim_GHCziBool_False_closure); + RET_NP(0,ghczmprim_GHCziTypes_False_closure); #else (spark) = foreign "C" findSpark(MyCapability()); if (spark != 0) { RET_NP(1,spark); } else { - RET_NP(0,ghczmprim_GHCziBool_False_closure); + RET_NP(0,ghczmprim_GHCziTypes_False_closure); + } +#endif +} + +stg_numSparkszh +{ + W_ n; +#ifdef THREADED_RTS + (n) = foreign "C" dequeElements(Capability_sparks(MyCapability())); +#else + n = 0; +#endif + RET_N(n); +} + +stg_traceEventzh +{ + W_ msg; + msg = R1; + +#if defined(TRACING) || defined(DEBUG) + + foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + +#elif defined(DTRACE) + + W_ enabled; + + // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from + // RtsProbes.h, but that header file includes unistd.h, which doesn't + // work in Cmm +#if !defined(solaris2_TARGET_OS) + (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; +#else + // Solaris' DTrace can't handle the + // __dtrace_isenabled$HaskellEvent$user__msg$v1 + // call above. This call is just for testing whether the user__msg + // probe is enabled, and is here for just performance optimization. + // Since preparation for the probe is not that complex I disable usage of + // this test above for Solaris and enable the probe usage manually + // here. Please note that this does not mean that the probe will be + // used during the runtime! You still need to enable it by consumption + // in your dtrace script as you do with any other probe. + enabled = 1; +#endif + if (enabled != 0) { + foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; } + #endif + jump %ENTRY_CODE(Sp(0)); }