X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=5c575f695bd576b8de19397e03837450cb781891;hp=153baaaae52d803638d4c2567d3353f4cc7e1211;hb=5d52d9b64c21dcf77849866584744722f8121389;hpb=73566e25d75588185b0581722406da5c48965c51 diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 153baaa..5c575f6 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -27,6 +27,18 @@ #include "Cmm.h" +#ifdef __PIC__ +import pthread_mutex_lock; +import pthread_mutex_unlock; +#endif +import base_ControlziExceptionziBase_nestedAtomically_closure; +import EnterCriticalSection; +import LeaveCriticalSection; +import ghczmprim_GHCziBool_False_closure; +#if !defined(mingw32_HOST_OS) +import sm_mutex; +#endif + /*----------------------------------------------------------------------------- Array Primitives @@ -42,66 +54,104 @@ * round up to the nearest word for the size of the array. */ -newByteArrayzh_fast +stg_newByteArrayzh { W_ words, payload_words, n, p; - MAYBE_GC(NO_PTRS,newByteArrayzh_fast); + MAYBE_GC(NO_PTRS,stg_newByteArrayzh); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; RET_P(p); } -newPinnedByteArrayzh_fast +#define BA_ALIGN 16 +#define BA_MASK (BA_ALIGN-1) + +stg_newPinnedByteArrayzh { - W_ words, payload_words, n, p; + W_ words, bytes, payload_words, p; + + MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); + bytes = R1; + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to BA_ALIGN bytes, so we need to allow space + to shift up to BA_ALIGN - 1 bytes: */ + bytes = bytes + BA_ALIGN - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); + + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - n = R1; - payload_words = ROUNDUP_BYTES_TO_WDS(n); + /* Now we need to move p forward so that the payload is aligned + to BA_ALIGN bytes: */ + p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); - // We want an 8-byte aligned array. allocatePinned() gives us - // 8-byte aligned memory by default, but we want to align the - // *goods* inside the ArrWords object, so we have to check the - // size of the ArrWords header and adjust our size accordingly. - words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - if ((SIZEOF_StgArrWords & 7) != 0) { - words = words + 1; - } + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(p) = payload_words; + RET_P(p); +} - "ptr" p = foreign "C" allocatePinned(words) []; +stg_newAlignedPinnedByteArrayzh +{ + W_ words, bytes, payload_words, p, alignment; + + MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); + bytes = R1; + alignment = R2; + + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to bytes, so we need to allow space + to shift up to bytes: */ + bytes = bytes + alignment - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); + + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - // Again, if the ArrWords header isn't a multiple of 8 bytes, we - // have to push the object forward one word so that the goods - // fall on an 8-byte boundary. - if ((SIZEOF_StgArrWords & 7) != 0) { - p = p + WDS(1); - } + /* Now we need to move p forward so that the payload is aligned + to bytes. Note that we are assuming that + is a power of 2, which is technically not guaranteed */ + p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; RET_P(p); } -newArrayzh_fast +stg_newArrayzh { - W_ words, n, init, arr, p; + W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; - MAYBE_GC(R2_PTR,newArrayzh_fast); - - words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + MAYBE_GC(R2_PTR,stg_newArrayzh); + + // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words + // in the array, making sure we round up, and then rounding up to a whole + // number of words. + size = n + mutArrPtrsCardWords(n); + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; + StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; @@ -112,18 +162,25 @@ newArrayzh_fast p = p + WDS(1); goto for; } + // Initialise the mark bits with 0 + for2: + if (p < arr + WDS(size)) { + W_[p] = 0; + p = p + WDS(1); + goto for2; + } RET_P(arr); } -unsafeThawArrayzh_fast +stg_unsafeThawArrayzh { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from - // the mutable list is not easy, because the mut_list is only singly-linked). + // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 @@ -151,12 +208,12 @@ unsafeThawArrayzh_fast MutVar primitives -------------------------------------------------------------------------- */ -newMutVarzh_fast +stg_newMutVarzh { W_ mv; /* Args: R1 = initialisation value */ - ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast); + ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); @@ -165,9 +222,9 @@ newMutVarzh_fast RET_P(mv); } -atomicModifyMutVarzh_fast +stg_atomicModifyMutVarzh { - W_ mv, z, x, y, r; + W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then @@ -204,21 +261,17 @@ atomicModifyMutVarzh_fast #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) - HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); + HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh); -#if defined(THREADED_RTS) - foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2]; -#endif - - x = StgMutVar_var(R1); + mv = R1; + f = R2; TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); - StgThunk_payload(z,0) = R2; - StgThunk_payload(z,1) = x; + StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); @@ -227,9 +280,6 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; - StgMutVar_var(R1) = y; - foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1]; - TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; @@ -237,10 +287,20 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; -#if defined(THREADED_RTS) - foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") []; + retry: + x = StgMutVar_var(mv); + StgThunk_payload(z,1) = x; +#ifdef THREADED_RTS + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; + if (h != x) { goto retry; } +#else + StgMutVar_var(mv) = y; #endif + if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { + foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + } + RET_P(r); } @@ -250,7 +310,7 @@ atomicModifyMutVarzh_fast STRING(stg_weak_msg,"New weak pointer at %p\n") -mkWeakzh_fast +stg_mkWeakzh { /* R1 = key R2 = value @@ -262,29 +322,91 @@ mkWeakzh_fast R3 = stg_NO_FINALIZER_closure; } - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast ); + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); - StgWeak_key(w) = R1; - StgWeak_value(w) = R2; - StgWeak_finalizer(w) = R3; + // We don't care about cfinalizer here. + // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or + // something else? + StgWeak_key(w) = R1; + StgWeak_value(w) = R2; + StgWeak_finalizer(w) = R3; + StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; + + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } +stg_mkWeakForeignEnvzh +{ + /* R1 = key + R2 = value + R3 = finalizer + R4 = pointer + R5 = has environment (0 or 1) + R6 = environment + */ + W_ w, payload_words, words, p; + + W_ key, val, fptr, ptr, flag, eptr; + + key = R1; + val = R2; + fptr = R3; + ptr = R4; + flag = R5; + eptr = R6; -finalizzeWeakzh_fast + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); + + w = Hp - SIZEOF_StgWeak + WDS(1); + SET_HDR(w, stg_WEAK_info, W_[CCCS]); + + payload_words = 4; + words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; + + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + + StgArrWords_words(p) = payload_words; + StgArrWords_payload(p,0) = fptr; + StgArrWords_payload(p,1) = ptr; + StgArrWords_payload(p,2) = eptr; + StgArrWords_payload(p,3) = flag; + + // We don't care about the value here. + // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else? + + StgWeak_key(w) = key; + StgWeak_value(w) = val; + StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; + StgWeak_cfinalizer(w) = p; + + ACQUIRE_LOCK(sm_mutex); + StgWeak_link(w) = W_[weak_ptr_list]; + W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); + + IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + + RET_P(w); +} + +stg_finalizzeWeakzh { /* R1 = weak ptr */ - W_ w, f; + W_ w, f, arr; w = R1; @@ -312,9 +434,18 @@ finalizzeWeakzh_fast SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); - f = StgWeak_finalizer(w); + f = StgWeak_finalizer(w); + arr = StgWeak_cfinalizer(w); + StgDeadWeak_link(w) = StgWeak_link(w); + if (arr != stg_NO_FINALIZER_closure) { + foreign "C" runCFinalizer(StgArrWords_payload(arr,0), + StgArrWords_payload(arr,1), + StgArrWords_payload(arr,2), + StgArrWords_payload(arr,3)) []; + } + /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); @@ -323,7 +454,7 @@ finalizzeWeakzh_fast } } -deRefWeakzh_fast +stg_deRefWeakzh { /* R1 = weak ptr */ W_ w, code, val; @@ -340,565 +471,97 @@ deRefWeakzh_fast } /* ----------------------------------------------------------------------------- - Arbitrary-precision Integer operations. - - There are some assumptions in this code that mp_limb_t == W_. This is - the case for all the platforms that GHC supports, currently. + Floating point operations. -------------------------------------------------------------------------- */ -int2Integerzh_fast -{ - /* arguments: R1 = Int# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast ); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - /* mpz_set_si is inlined here, makes things simpler */ - if (%lt(val,0)) { - s = -1; - Hp(0) = -val; - } else { - if (%gt(val,0)) { - s = 1; - Hp(0) = val; - } else { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# - #) - */ - RET_NP(s,p); -} - -word2Integerzh_fast -{ - /* arguments: R1 = Word# */ - - W_ val, s, p; /* to avoid aliasing */ - - val = R1; - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast); - - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = 1; - - if (val != 0) { - s = 1; - W_[Hp] = val; - } else { - s = 0; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - -/* - * 'long long' primops for converting to/from Integers. - */ - -#ifdef SUPPORT_LONG_LONGS - -int64ToIntegerzh_fast -{ - /* arguments: L1 = Int64# */ - - L_ val; - W_ hi, s, neg, words_needed, p; - - val = L1; - neg = 0; - - if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) ) { - words_needed = 2; - } else { - // minimum is one word - words_needed = 1; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, int64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - if ( %lt(val,0::L_) ) { - neg = 1; - val = -val; - } - - hi = TO_W_(val >> 32); - - if ( words_needed == 2 ) { - s = 2; - Hp(-1) = TO_W_(val); - Hp(0) = hi; - } else { - if ( val != 0::L_ ) { - s = 1; - Hp(0) = TO_W_(val); - } else /* val==0 */ { - s = 0; - } - } - if ( neg != 0 ) { - s = -s; - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - -word64ToIntegerzh_fast -{ - /* arguments: L1 = Word64# */ - - L_ val; - W_ hi, s, words_needed, p; - - val = L1; - if ( val >= 0x100000000::L_ ) { - words_needed = 2; - } else { - words_needed = 1; - } - - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), - NO_PTRS, word64ToIntegerzh_fast ); - - p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = words_needed; - - hi = TO_W_(val >> 32); - if ( val >= 0x100000000::L_ ) { - s = 2; - Hp(-1) = TO_W_(val); - Hp(0) = hi; - } else { - if ( val != 0::L_ ) { - s = 1; - Hp(0) = TO_W_(val); - } else /* val==0 */ { - s = 0; - } - } - - /* returns (# size :: Int#, - data :: ByteArray# #) - */ - RET_NP(s,p); -} - - -#endif /* SUPPORT_LONG_LONGS */ - -/* ToDo: this is shockingly inefficient */ - -#ifndef THREADED_RTS -section "bss" { - mp_tmp1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_tmp2: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result1: - bits8 [SIZEOF_MP_INT]; -} - -section "bss" { - mp_result2: - bits8 [SIZEOF_MP_INT]; -} -#endif - -#ifdef THREADED_RTS -#define FETCH_MP_TEMP(X) \ -W_ X; \ -X = BaseReg + (OFFSET_StgRegTable_r ## X); -#else -#define FETCH_MP_TEMP(X) /* Nothing */ -#endif - -#define GMP_TAKE2_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2); \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE1_RET1(name,mp_fun) \ -name \ -{ \ - CInt s1; \ - W_ d1; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_result1) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR, name); \ - \ - d1 = R2; \ - s1 = W_TO_INT(R1); \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") []; \ - \ - RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ -} - -#define GMP_TAKE2_RET2(name,mp_fun) \ -name \ -{ \ - CInt s1, s2; \ - W_ d1, d2; \ - FETCH_MP_TEMP(mp_tmp1); \ - FETCH_MP_TEMP(mp_tmp2); \ - FETCH_MP_TEMP(mp_result1) \ - FETCH_MP_TEMP(mp_result2) \ - \ - /* call doYouWantToGC() */ \ - MAYBE_GC(R2_PTR & R4_PTR, name); \ - \ - s1 = W_TO_INT(R1); \ - d1 = R2; \ - s2 = W_TO_INT(R3); \ - d2 = R4; \ - \ - MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ - MP_INT__mp_size(mp_tmp1) = (s1); \ - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ - MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ - MP_INT__mp_size(mp_tmp2) = (s2); \ - MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ - \ - foreign "C" __gmpz_init(mp_result1 "ptr") []; \ - foreign "C" __gmpz_init(mp_result2 "ptr") []; \ - \ - /* Perform the operation */ \ - foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ - \ - RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)), \ - MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords, \ - TO_W_(MP_INT__mp_size(mp_result2)), \ - MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords); \ -} - -GMP_TAKE2_RET1(plusIntegerzh_fast, __gmpz_add) -GMP_TAKE2_RET1(minusIntegerzh_fast, __gmpz_sub) -GMP_TAKE2_RET1(timesIntegerzh_fast, __gmpz_mul) -GMP_TAKE2_RET1(gcdIntegerzh_fast, __gmpz_gcd) -GMP_TAKE2_RET1(quotIntegerzh_fast, __gmpz_tdiv_q) -GMP_TAKE2_RET1(remIntegerzh_fast, __gmpz_tdiv_r) -GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact) -GMP_TAKE2_RET1(andIntegerzh_fast, __gmpz_and) -GMP_TAKE2_RET1(orIntegerzh_fast, __gmpz_ior) -GMP_TAKE2_RET1(xorIntegerzh_fast, __gmpz_xor) -GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com) - -GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr) -GMP_TAKE2_RET2(divModIntegerzh_fast, __gmpz_fdiv_qr) - -#ifndef THREADED_RTS -section "bss" { - mp_tmp_w: W_; // NB. mp_tmp_w is really an here mp_limb_t -} -#endif - -gcdIntzh_fast -{ - /* R1 = the first Int#; R2 = the second Int# */ - W_ r; - FETCH_MP_TEMP(mp_tmp_w); - - W_[mp_tmp_w] = R1; - r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) []; - - R1 = r; - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -gcdIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) []; - - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - - -cmpIntegerIntzh_fast -{ - /* R1 = s1; R2 = d1; R3 = the int */ - W_ usize, vsize, v_digit, u_digit; - - usize = R1; - vsize = 0; - v_digit = R3; - - // paraphrased from __gmpz_cmp_si() in the GMP sources - if (%gt(v_digit,0)) { - vsize = 1; - } else { - if (%lt(v_digit,0)) { - vsize = -1; - v_digit = -v_digit; - } - } - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - u_digit = W_[BYTE_ARR_CTS(R2)]; - - if (u_digit == v_digit) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's - R1 = usize; - } else { - R1 = -usize; - } - - jump %ENTRY_CODE(Sp(0)); -} - -cmpIntegerzh_fast -{ - /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */ - W_ usize, vsize, size, up, vp; - CInt cmp; - - // paraphrased from __gmpz_cmp() in the GMP sources - usize = R1; - vsize = R3; - - if (usize != vsize) { - R1 = usize - vsize; - jump %ENTRY_CODE(Sp(0)); - } - - if (usize == 0) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(usize,0)) { // NB. not <, which is unsigned - size = -usize; - } else { - size = usize; - } - - up = BYTE_ARR_CTS(R2); - vp = BYTE_ARR_CTS(R4); - - cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) []; - - if (cmp == 0 :: CInt) { - R1 = 0; - jump %ENTRY_CODE(Sp(0)); - } - - if (%lt(cmp,0 :: CInt) == %lt(usize,0)) { - R1 = 1; - } else { - R1 = (-1); - } - /* Result parked in R1, return via info-pointer at TOS */ - jump %ENTRY_CODE(Sp(0)); -} - -integer2Intzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -integer2Wordzh_fast -{ - /* R1 = s; R2 = d */ - W_ r, s; - - s = R1; - if (s == 0) { - r = 0; - } else { - r = W_[R2 + SIZEOF_StgArrWords]; - if (%lt(s,0)) { - r = -r; - } - } - /* Result parked in R1, return via info-pointer at TOS */ - R1 = r; - jump %ENTRY_CODE(Sp(0)); -} - -decodeFloatzh_fast +stg_decodeFloatzuIntzh { W_ p; F_ arg; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + W_ mp_tmp1; + W_ mp_tmp_w; + + STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh ); + + mp_tmp1 = Sp - WDS(1); + mp_tmp_w = Sp - WDS(2); /* arguments: F1 = Float# */ arg = F1; - ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast ); - - /* Be prepared to tell Lennart-coded __decodeFloat - where mantissa._mp_d can be put (it does not care about the rest) */ - p = Hp - SIZEOF_StgArrWords; - SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]); - StgArrWords_words(p) = 1; - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - /* Perform the operation */ - foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) []; + foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) []; - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); + /* returns: (Int# (mantissa), Int# (exponent)) */ + RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } -#define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE -#define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE) - -decodeDoublezh_fast +stg_decodeDoublezu2Intzh { D_ arg; W_ p; - FETCH_MP_TEMP(mp_tmp1); - FETCH_MP_TEMP(mp_tmp_w); + W_ mp_tmp1; + W_ mp_tmp2; + W_ mp_result1; + W_ mp_result2; + + STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh ); + + mp_tmp1 = Sp - WDS(1); + mp_tmp2 = Sp - WDS(2); + mp_result1 = Sp - WDS(3); + mp_result2 = Sp - WDS(4); /* arguments: D1 = Double# */ arg = D1; - ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast ); - - /* Be prepared to tell Lennart-coded __decodeDouble - where mantissa.d can be put (it does not care about the rest) */ - p = Hp - ARR_SIZE + WDS(1); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE); - MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); - /* Perform the operation */ - foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) []; - - /* returns: (Int# (expn), Int#, ByteArray#) */ - RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); + foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", + mp_result1 "ptr", mp_result2 "ptr", + arg) []; + + /* returns: + (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */ + RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]); } /* ----------------------------------------------------------------------------- * Concurrency primitives * -------------------------------------------------------------------------- */ -forkzh_fast +stg_forkzh { /* args: R1 = closure to spark */ - MAYBE_GC(R1_PTR, forkzh_fast); + MAYBE_GC(R1_PTR, stg_forkzh); W_ closure; W_ threadid; closure = R1; - "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", + ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; + + /* start blocked if the current thread is blocked */ + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); + foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } -forkOnzh_fast +stg_forkOnzh { /* args: R1 = cpu, R2 = closure to spark */ - MAYBE_GC(R2_PTR, forkOnzh_fast); + MAYBE_GC(R2_PTR, stg_forkOnzh); W_ cpu; W_ closure; @@ -906,29 +569,36 @@ forkOnzh_fast cpu = R1; closure = R2; - "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", + ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; + + /* start blocked if the current thread is blocked */ + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); + foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } -yieldzh_fast +stg_yieldzh { jump stg_yield_noregs; } -myThreadIdzh_fast +stg_myThreadIdzh { /* no args. */ RET_P(CurrentTSO); } -labelThreadzh_fast +stg_labelThreadzh { /* args: R1 = ThreadId# @@ -939,185 +609,173 @@ labelThreadzh_fast jump %ENTRY_CODE(Sp(0)); } -isCurrentThreadBoundzh_fast +stg_isCurrentThreadBoundzh { /* no args */ W_ r; - r = foreign "C" isThreadBound(CurrentTSO) []; + (r) = foreign "C" isThreadBound(CurrentTSO) []; RET_N(r); } +stg_threadStatuszh +{ + /* args: R1 :: ThreadId# */ + W_ tso; + W_ why_blocked; + W_ what_next; + W_ ret; + + tso = R1; + loop: + if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { + tso = StgTSO__link(tso); + goto loop; + } + + what_next = TO_W_(StgTSO_what_next(tso)); + why_blocked = TO_W_(StgTSO_why_blocked(tso)); + // Note: these two reads are not atomic, so they might end up + // being inconsistent. It doesn't matter, since we + // only return one or the other. If we wanted to return the + // contents of block_info too, then we'd have to do some synchronisation. + + if (what_next == ThreadComplete) { + ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus + } else { + if (what_next == ThreadKilled) { + ret = 17; + } else { + ret = why_blocked; + } + } + RET_N(ret); +} /* ----------------------------------------------------------------------------- * TVar primitives * -------------------------------------------------------------------------- */ -#ifdef REG_R1 #define SP_OFF 0 -#define IF_NOT_REG_R1(x) -#else -#define SP_OFF 1 -#define IF_NOT_REG_R1(x) x -#endif // Catch retry frame ------------------------------------------------------------ -#define CATCH_RETRY_FRAME_ERROR(label) \ - label { foreign "C" barf("catch_retry_frame incorrectly entered!"); } - -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret) -CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret) - -#if MAX_VECTORED_RTN > 8 -#error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too. -#endif - +INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) -#define CATCH_RETRY_FRAME_BITMAP 7 -#define CATCH_RETRY_FRAME_WORDS 6 -#else -#define CATCH_RETRY_FRAME_BITMAP 1 -#define CATCH_RETRY_FRAME_WORDS 4 + W_ unused1, W_ unused2, #endif - -INFO_TABLE_RET(stg_catch_retry_frame, - CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP, - CATCH_RETRY_FRAME, - stg_catch_retry_frame_0_ret, - stg_catch_retry_frame_1_ret, - stg_catch_retry_frame_2_ret, - stg_catch_retry_frame_3_ret, - stg_catch_retry_frame_4_ret, - stg_catch_retry_frame_5_ret, - stg_catch_retry_frame_6_ret, - stg_catch_retry_frame_7_ret) + W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; trec = StgTSO_trec(CurrentTSO); - "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") []; - r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); + (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump %ENTRY_CODE(Sp(SP_OFF)); } else { - /* Did not commit: retry */ + /* Did not commit: re-execute */ W_ new_trec; - "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = new_trec; if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { R1 = StgCatchRetryFrame_alt_code(frame); } else { R1 = StgCatchRetryFrame_first_code(frame); - StgCatchRetryFrame_first_code_trec(frame) = new_trec; } jump stg_ap_v_fast; } } -// Atomically frame ------------------------------------------------------------- - - -#define ATOMICALLY_FRAME_ERROR(label) \ - label { foreign "C" barf("atomically_frame incorrectly entered!"); } - -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret) -ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret) - -#if MAX_VECTORED_RTN > 8 -#error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too. -#endif +// Atomically frame ------------------------------------------------------------ +INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) -#define ATOMICALLY_FRAME_BITMAP 3 -#define ATOMICALLY_FRAME_WORDS 3 -#else -#define ATOMICALLY_FRAME_BITMAP 0 -#define ATOMICALLY_FRAME_WORDS 1 + W_ unused1, W_ unused2, #endif - - -INFO_TABLE_RET(stg_atomically_frame, - ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP, - ATOMICALLY_FRAME, - stg_atomically_frame_0_ret, - stg_atomically_frame_1_ret, - stg_atomically_frame_2_ret, - stg_atomically_frame_3_ret, - stg_atomically_frame_4_ret, - stg_atomically_frame_5_ret, - stg_atomically_frame_6_ret, - stg_atomically_frame_7_ret) + P_ code, P_ next_invariant_to_check, P_ result) { - W_ frame, trec, valid; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) + W_ frame, trec, valid, next_invariant, q, outer; - frame = Sp; - trec = StgTSO_trec(CurrentTSO); + frame = Sp; + trec = StgTSO_trec(CurrentTSO); + result = R1; + outer = StgTRecHeader_enclosing_trec(trec); + + if (outer == NO_TREC) { + /* First time back at the atomically frame -- pick up invariants */ + ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; + StgAtomicallyFrame_next_invariant_to_check(frame) = q; + StgAtomicallyFrame_result(frame) = result; - /* The TSO is not currently waiting: try to commit the transaction */ - valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") []; - if (valid != 0) { - /* Transaction was valid: commit succeeded */ - StgTSO_trec(CurrentTSO) = NO_TREC; - Sp = Sp + SIZEOF_StgAtomicallyFrame; - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) - jump %ENTRY_CODE(Sp(SP_OFF)); } else { - /* Transaction was not valid: try again */ - "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + /* Second/subsequent time back at the atomically frame -- abort the + * tx that's checking the invariant and move on to the next one */ + StgTSO_trec(CurrentTSO) = outer; + q = StgAtomicallyFrame_next_invariant_to_check(frame); + StgInvariantCheckQueue_my_execution(q) = trec; + foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; + /* Don't free trec -- it's linked from q and will be stashed in the + * invariant if we eventually commit. */ + q = StgInvariantCheckQueue_next_queue_entry(q); + StgAtomicallyFrame_next_invariant_to_check(frame) = q; + trec = outer; + } + + q = StgAtomicallyFrame_next_invariant_to_check(frame); + + if (q != END_INVARIANT_CHECK_QUEUE) { + /* We can't commit yet: another invariant to check */ + ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") []; StgTSO_trec(CurrentTSO) = trec; - R1 = StgAtomicallyFrame_code(frame); + + next_invariant = StgInvariantCheckQueue_invariant(q); + R1 = StgAtomicInvariant_code(next_invariant); jump stg_ap_v_fast; + + } else { + + /* We've got no more invariants to check, try to commit */ + (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") []; + if (valid != 0) { + /* Transaction was valid: commit succeeded */ + StgTSO_trec(CurrentTSO) = NO_TREC; + R1 = StgAtomicallyFrame_result(frame); + Sp = Sp + SIZEOF_StgAtomicallyFrame; + jump %ENTRY_CODE(Sp(SP_OFF)); + } else { + /* Transaction was not valid: try again */ + ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + StgTSO_trec(CurrentTSO) = trec; + StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; + R1 = StgAtomicallyFrame_code(frame); + jump stg_ap_v_fast; + } } } -INFO_TABLE_RET(stg_atomically_waiting_frame, - ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP, - ATOMICALLY_FRAME, - stg_atomically_frame_0_ret, - stg_atomically_frame_1_ret, - stg_atomically_frame_2_ret, - stg_atomically_frame_3_ret, - stg_atomically_frame_4_ret, - stg_atomically_frame_5_ret, - stg_atomically_frame_6_ret, - stg_atomically_frame_7_ret) +INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, +#if defined(PROFILING) + W_ unused1, W_ unused2, +#endif + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; /* The TSO is currently waiting: should we stop waiting? */ - valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; + (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; if (valid != 0) { /* Previous attempt is still valid: no point trying again yet */ - IF_NOT_REG_R1(Sp_adj(-2); - Sp(1) = stg_NO_FINALIZER_closure; - Sp(0) = stg_ut_1_0_unreg_info;) jump stg_block_noregs; } else { /* Previous attempt is no longer valid: try again */ - "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgHeader_info(frame) = stg_atomically_frame_info; R1 = StgAtomicallyFrame_code(frame); @@ -1127,81 +785,60 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, // STM catch frame -------------------------------------------------------------- -#define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret) \ - label \ - { \ - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) \ - Sp = Sp + SIZEOF_StgCatchSTMFrame; \ - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) \ - jump ret; \ - } - -#ifdef REG_R1 #define SP_OFF 0 -#else -#define SP_OFF 1 -#endif - -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6)) -CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7)) - -#if MAX_VECTORED_RTN > 8 -#error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too. -#endif - -#if defined(PROFILING) -#define CATCH_STM_FRAME_BITMAP 3 -#define CATCH_STM_FRAME_WORDS 3 -#else -#define CATCH_STM_FRAME_BITMAP 0 -#define CATCH_STM_FRAME_WORDS 1 -#endif /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct * kind of return to the activation record underneath us on the stack. */ -INFO_TABLE_RET(stg_catch_stm_frame, - CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP, - CATCH_STM_FRAME, - stg_catch_stm_frame_0_ret, - stg_catch_stm_frame_1_ret, - stg_catch_stm_frame_2_ret, - stg_catch_stm_frame_3_ret, - stg_catch_stm_frame_4_ret, - stg_catch_stm_frame_5_ret, - stg_catch_stm_frame_6_ret, - stg_catch_stm_frame_7_ret) -CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF))) +INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, +#if defined(PROFILING) + W_ unused1, W_ unused2, +#endif + P_ unused3, P_ unused4) + { + W_ r, frame, trec, outer; + frame = Sp; + trec = StgTSO_trec(CurrentTSO); + outer = StgTRecHeader_enclosing_trec(trec); + (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; + if (r != 0) { + /* Commit succeeded */ + StgTSO_trec(CurrentTSO) = outer; + Sp = Sp + SIZEOF_StgCatchSTMFrame; + jump Sp(SP_OFF); + } else { + /* Commit failed */ + W_ new_trec; + ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + StgTSO_trec(CurrentTSO) = new_trec; + R1 = StgCatchSTMFrame_code(frame); + jump stg_ap_v_fast; + } + } // Primop definition ------------------------------------------------------------ -atomicallyzh_fast +stg_atomicallyzh { W_ frame; W_ old_trec; W_ new_trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR, atomicallyzh_fast); + MAYBE_GC (R1_PTR, stg_atomicallyzh); /* Args: R1 = m :: STM a */ - STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast); + STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { - R1 = GHCziIOBase_NestedAtomically_closure; - jump raisezh_fast; + R1 = base_ControlziExceptionziBase_nestedAtomically_closure; + jump stg_raisezh; } /* Set up the atomically frame */ @@ -1210,9 +847,11 @@ atomicallyzh_fast SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; + StgAtomicallyFrame_result(frame) = NO_TREC; + StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ - "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1]; + ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1]; StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ @@ -1220,13 +859,13 @@ atomicallyzh_fast } -catchSTMzh_fast +stg_catchSTMzh { W_ frame; /* Args: R1 :: STM a */ /* Args: R2 :: Exception -> STM a */ - STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh); /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchSTMFrame; @@ -1234,28 +873,36 @@ catchSTMzh_fast SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]); StgCatchSTMFrame_handler(frame) = R2; + StgCatchSTMFrame_code(frame) = R1; + + /* Start a nested transaction to run the body of the try block in */ + W_ cur_trec; + W_ new_trec; + cur_trec = StgTSO_trec(CurrentTSO); + ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr"); + StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } -catchRetryzh_fast +stg_catchRetryzh { W_ frame; W_ new_trec; W_ trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); + MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); /* Args: R1 :: STM a */ /* Args: R2 :: STM a */ - STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh); /* Start a nested transaction within which to run the first code */ trec = StgTSO_trec(CurrentTSO); - "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2]; + ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2]; StgTSO_trec(CurrentTSO) = new_trec; /* Set up the catch-retry frame */ @@ -1266,14 +913,13 @@ catchRetryzh_fast StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; StgCatchRetryFrame_first_code(frame) = R1; StgCatchRetryFrame_alt_code(frame) = R2; - StgCatchRetryFrame_first_code_trec(frame) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } -retryzh_fast +stg_retryzh { W_ frame_type; W_ frame; @@ -1281,72 +927,63 @@ retryzh_fast W_ outer; W_ r; - MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate + MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: - trec = StgTSO_trec(CurrentTSO); - "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") []; StgTSO_sp(CurrentTSO) = Sp; - frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; + (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; Sp = StgTSO_sp(CurrentTSO); frame = Sp; + trec = StgTSO_trec(CurrentTSO); + outer = StgTRecHeader_enclosing_trec(trec); if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame ASSERT(outer != NO_TREC); + // Abort the transaction attempting the current branch + foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; + foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { - // Retry in the first code: try the alternative - "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + // Retry in the first branch: try the alternative + ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true; R1 = StgCatchRetryFrame_alt_code(frame); jump stg_ap_v_fast; } else { - // Retry in the alternative code: propagate - W_ other_trec; - other_trec = StgCatchRetryFrame_first_code_trec(frame); - r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr") []; - if (r != 0) { - r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; - } else { - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; - } - if (r != 0) { - // Merge between siblings succeeded: commit it back to enclosing transaction - // and then propagate the retry - StgTSO_trec(CurrentTSO) = outer; - Sp = Sp + SIZEOF_StgCatchRetryFrame; - goto retry_pop_stack; - } else { - // Merge failed: we musn't propagate the retry. Try both paths again. - "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; - StgCatchRetryFrame_first_code_trec(frame) = trec; - StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; - StgTSO_trec(CurrentTSO) = trec; - R1 = StgCatchRetryFrame_first_code(frame); - jump stg_ap_v_fast; - } + // Retry in the alternative code: propagate the retry + StgTSO_trec(CurrentTSO) = outer; + Sp = Sp + SIZEOF_StgCatchRetryFrame; + goto retry_pop_stack; } } // We've reached the ATOMICALLY_FRAME: attempt to wait ASSERT(frame_type == ATOMICALLY_FRAME); + if (outer != NO_TREC) { + // We called retry while checking invariants, so abort the current + // invariant check (merging its TVar accesses into the parents read + // set so we'll wait on them) + foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; + foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; + trec = outer; + StgTSO_trec(CurrentTSO) = trec; + outer = StgTRecHeader_enclosing_trec(trec); + } ASSERT(outer == NO_TREC); - r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") []; + + (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") []; if (r != 0) { // Transaction was valid: stmWait put us on the TVars' queues, we now block StgHeader_info(frame) = stg_atomically_waiting_frame_info; Sp = frame; // Fix up the stack in the unregisterised case: the return convention is different. - IF_NOT_REG_R1(Sp_adj(-2); - Sp(1) = stg_NO_FINALIZER_closure; - Sp(0) = stg_ut_1_0_unreg_info;) R3 = trec; // passing to stmWaitUnblock() jump stg_block_stmwait; } else { // Transaction was not valid: retry immediately - "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; R1 = StgAtomicallyFrame_code(frame); Sp = frame; @@ -1355,21 +992,38 @@ retry_pop_stack: } -newTVarzh_fast +stg_checkzh +{ + W_ trec, closure; + + /* Args: R1 = invariant closure */ + MAYBE_GC (R1_PTR, stg_checkzh); + + trec = StgTSO_trec(CurrentTSO); + closure = R1; + foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", + trec "ptr", + closure "ptr") []; + + jump %ENTRY_CODE(Sp(0)); +} + + +stg_newTVarzh { W_ tv; W_ new_value; /* Args: R1 = initialisation value */ - MAYBE_GC (R1_PTR, newTVarzh_fast); + MAYBE_GC (R1_PTR, stg_newTVarzh); new_value = R1; - "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; + ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; RET_P(tv); } -readTVarzh_fast +stg_readTVarzh { W_ trec; W_ tvar; @@ -1377,16 +1031,27 @@ readTVarzh_fast /* Args: R1 = TVar closure */ - MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate + MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; - "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; + ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; RET_P(result); } +stg_readTVarIOzh +{ + W_ result; -writeTVarzh_fast +again: + result = StgTVar_current_value(R1); + if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + goto again; + } + RET_P(result); +} + +stg_writeTVarzh { W_ trec; W_ tvar; @@ -1395,7 +1060,7 @@ writeTVarzh_fast /* Args: R1 = TVar closure */ /* R2 = New value */ - MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate + MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; new_value = R2; @@ -1437,26 +1102,27 @@ writeTVarzh_fast * * -------------------------------------------------------------------------- */ -isEmptyMVarzh_fast +stg_isEmptyMVarzh { /* args: R1 = MVar closure */ - if (GET_INFO(R1) == stg_EMPTY_MVAR_info) { + if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) { RET_N(1); } else { RET_N(0); } } -newMVarzh_fast +stg_newMVarzh { /* args: none */ W_ mvar; - ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast ); + ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); mvar = Hp - SIZEOF_StgMVar + WDS(1); - SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]); + SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); + // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; @@ -1464,22 +1130,15 @@ newMVarzh_fast } -/* If R1 isn't available, pass it on the stack */ -#ifdef REG_R1 #define PerformTake(tso, value) \ W_[StgTSO_sp(tso) + WDS(1)] = value; \ W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; -#else -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ - W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info; -#endif #define PerformPut(tso,lval) \ StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ lval = W_[StgTSO_sp(tso) - WDS(1)]; -takeMVarzh_fast +stg_takeMVarzh { W_ mvar, val, info, tso; @@ -1487,25 +1146,35 @@ takeMVarzh_fast mvar = R1; #if defined(THREADED_RTS) - "ptr" info = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif + + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + } /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ - if (info == stg_EMPTY_MVAR_info) { + if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { - StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", + StgMVar_tail(mvar) "ptr", + CurrentTSO) []; } - StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; + StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; + R1 = mvar; jump stg_block_takemvar; } @@ -1522,25 +1191,20 @@ takeMVarzh_fast /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - dirtyTSO(tso); -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) []; - StgMVar_head(mvar) = tso; -#else - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", - StgMVar_head(mvar) "ptr") []; + if (TO_W_(StgTSO_dirty(tso)) == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } + + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) []; -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_P(val); } else @@ -1548,18 +1212,14 @@ takeMVarzh_fast /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) []; -#else - SET_INFO(mvar,stg_EMPTY_MVAR_info); -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_P(val); } } -tryTakeMVarzh_fast +stg_tryTakeMVarzh { W_ mvar, val, info, tso; @@ -1568,14 +1228,14 @@ tryTakeMVarzh_fast mvar = R1; #if defined(THREADED_RTS) - "ptr" info = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif - if (info == stg_EMPTY_MVAR_info) { + if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) []; + unlockClosure(mvar, info); #endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure @@ -1583,6 +1243,10 @@ tryTakeMVarzh_fast RET_NP(0, stg_NO_FINALIZER_closure); } + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + /* we got the value... */ val = StgMVar_value(mvar); @@ -1596,64 +1260,66 @@ tryTakeMVarzh_fast /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - dirtyTSO(tso); + if (TO_W_(StgTSO_dirty(tso)) == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#else - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", - StgMVar_head(mvar) "ptr") []; - StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) []; -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); } else { /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) []; -#else - SET_INFO(mvar,stg_EMPTY_MVAR_info); -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); } RET_NP(1, val); } -putMVarzh_fast +stg_putMVarzh { - W_ mvar, info, tso; + W_ mvar, val, info, tso; /* args: R1 = MVar, R2 = value */ mvar = R1; + val = R2; #if defined(THREADED_RTS) - "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2]; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif - if (info == stg_FULL_MVAR_info) { + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { - StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", + StgMVar_tail(mvar) "ptr", + CurrentTSO) []; } - StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; + StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; + R1 = mvar; + R2 = val; jump stg_block_putmvar; } @@ -1665,37 +1331,28 @@ putMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); - PerformTake(tso, R2); - dirtyTSO(tso); + PerformTake(tso, val); + if (TO_W_(StgTSO_dirty(tso)) == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#else - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; - StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) []; -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); } else { /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = R2; + StgMVar_value(mvar) = val; -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) []; -#else - SET_INFO(mvar,stg_FULL_MVAR_info); -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); } @@ -1703,7 +1360,7 @@ putMVarzh_fast } -tryPutMVarzh_fast +stg_tryPutMVarzh { W_ mvar, info, tso; @@ -1711,18 +1368,22 @@ tryPutMVarzh_fast mvar = R1; #if defined(THREADED_RTS) - "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2]; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; #else info = GET_INFO(mvar); #endif - if (info == stg_FULL_MVAR_info) { + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) []; + unlockClosure(mvar, info); #endif RET_N(0); } + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are takeMVar(s) waiting: wake up the first one @@ -1732,35 +1393,26 @@ tryPutMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); - dirtyTSO(tso); + if (TO_W_(StgTSO_dirty(tso)) == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; - StgMVar_head(mvar) = tso; -#else - "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) []; -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); } else { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = R2; -#if defined(THREADED_RTS) - foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) []; -#else - SET_INFO(mvar,stg_FULL_MVAR_info); -#endif + unlockClosure(mvar, stg_MVAR_DIRTY_info); } RET_N(1); @@ -1772,13 +1424,13 @@ tryPutMVarzh_fast Stable pointer primitives ------------------------------------------------------------------------- */ -makeStableNamezh_fast +stg_makeStableNamezh { W_ index, sn_obj; - ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast ); + ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); - index = foreign "C" lookupStableName(R1 "ptr") []; + (index) = foreign "C" lookupStableName(R1 "ptr") []; /* Is there already a StableName for this heap object? * stable_ptr_table is a pointer to an array of snEntry structs. @@ -1796,16 +1448,16 @@ makeStableNamezh_fast } -makeStablePtrzh_fast +stg_makeStablePtrzh { /* Args: R1 = a */ W_ sp; - MAYBE_GC(R1_PTR, makeStablePtrzh_fast); - "ptr" sp = foreign "C" getStablePtr(R1 "ptr") []; + MAYBE_GC(R1_PTR, stg_makeStablePtrzh); + ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; RET_N(sp); } -deRefStablePtrzh_fast +stg_deRefStablePtrzh { /* Args: R1 = the stable ptr */ W_ r, sp; @@ -1818,22 +1470,22 @@ deRefStablePtrzh_fast Bytecode object primitives ------------------------------------------------------------------------- */ -newBCOzh_fast +stg_newBCOzh { /* R1 = instrs R2 = literals R3 = ptrs - R4 = itbls - R5 = arity - R6 = bitmap array + R4 = arity + R5 = bitmap array */ W_ bco, bitmap_arr, bytes, words; - bitmap_arr = R6; + bitmap_arr = R5; + words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr); bytes = WDS(words); - ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast ); + ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); @@ -1841,8 +1493,7 @@ newBCOzh_fast StgBCO_instrs(bco) = R1; StgBCO_literals(bco) = R2; StgBCO_ptrs(bco) = R3; - StgBCO_itbls(bco) = R4; - StgBCO_arity(bco) = HALF_W_(R5); + StgBCO_arity(bco) = HALF_W_(R4); StgBCO_size(bco) = HALF_W_(words); // Copy the arity/bitmap info into the BCO @@ -1859,7 +1510,7 @@ for: } -mkApUpd0zh_fast +stg_mkApUpd0zh { // R1 = the BCO# for the AP // @@ -1871,7 +1522,7 @@ mkApUpd0zh_fast ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && StgBCO_arity(R1) == HALF_W_(0)); - HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast); + HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); @@ -1884,6 +1535,76 @@ mkApUpd0zh_fast RET_P(ap); } +stg_unpackClosurezh +{ +/* args: R1 = closure to analyze */ +// TODO: Consider the absence of ptrs or nonptrs as a special case ? + + W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; + info = %GET_STD_INFO(UNTAG(R1)); + + // Some closures have non-standard layout, so we omit those here. + W_ type; + type = TO_W_(%INFO_TYPE(info)); + switch [0 .. N_CLOSURE_TYPES] type { + case THUNK_SELECTOR : { + ptrs = 1; + nptrs = 0; + goto out; + } + case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, + THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : { + ptrs = 0; + nptrs = 0; + goto out; + } + default: { + ptrs = TO_W_(%INFO_PTRS(info)); + nptrs = TO_W_(%INFO_NPTRS(info)); + goto out; + }} +out: + + W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; + nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); + ptrs_arr_cards = mutArrPtrsCardWords(ptrs); + ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); + + ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); + + W_ clos; + clos = UNTAG(R1); + + ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); + nptrs_arr = Hp - nptrs_arr_sz + WDS(1); + + SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); + StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; + StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; + + p = 0; +for: + if(p < ptrs) { + W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p); + p = p + 1; + goto for; + } + /* We can leave the card table uninitialised, since the array is + allocated in the nursery. The GC will fill it in if/when the array + is promoted. */ + + SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(nptrs_arr) = nptrs; + p = 0; +for2: + if(p < nptrs) { + W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs); + p = p + 1; + goto for2; + } + RET_NPP(info, ptrs_arr, nptrs_arr); +} + /* ----------------------------------------------------------------------------- Thread I/O blocking primitives -------------------------------------------------------------------------- */ @@ -1892,19 +1613,19 @@ mkApUpd0zh_fast * macro in Schedule.h). */ #define APPEND_TO_BLOCKED_QUEUE(tso) \ - ASSERT(StgTSO_link(tso) == END_TSO_QUEUE); \ + ASSERT(StgTSO__link(tso) == END_TSO_QUEUE); \ if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \ W_[blocked_queue_hd] = tso; \ } else { \ - StgTSO_link(W_[blocked_queue_tl]) = tso; \ + foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \ } \ W_[blocked_queue_tl] = tso; -waitReadzh_fast +stg_waitReadzh { /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitRead# on threaded RTS"); + foreign "C" barf("waitRead# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); @@ -1917,11 +1638,11 @@ waitReadzh_fast #endif } -waitWritezh_fast +stg_waitWritezh { /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitWrite# on threaded RTS"); + foreign "C" barf("waitWrite# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); @@ -1935,8 +1656,8 @@ waitWritezh_fast } -STRING(stg_delayzh_malloc_str, "delayzh_fast") -delayzh_fast +STRING(stg_delayzh_malloc_str, "stg_delayzh") +stg_delayzh { #ifdef mingw32_HOST_OS W_ ares; @@ -1946,7 +1667,7 @@ delayzh_fast #endif #ifdef THREADED_RTS - foreign "C" barf("delay# on threaded RTS"); + foreign "C" barf("delay# on threaded RTS") never returns; #else /* args: R1 (microsecond delay amount) */ @@ -1956,9 +1677,9 @@ delayzh_fast #ifdef mingw32_HOST_OS /* could probably allocate this on the heap instead */ - "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, + ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_delayzh_malloc_str); - reqID = foreign "C" addDelayRequest(R1); + (reqID) = foreign "C" addDelayRequest(R1); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; @@ -1975,8 +1696,15 @@ delayzh_fast #else W_ time; - time = foreign "C" getourtimeofday(); - target = (R1 / (TICK_MILLISECS*1000)) + time; + W_ divisor; + (time) = foreign "C" getourtimeofday() [R1]; + divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); + if (divisor == 0) { + divisor = 50; + } + divisor = divisor * 1000; + target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ + + time + 1; /* Add 1 as getourtimeofday rounds down */ StgTSO_block_info(CurrentTSO) = target; /* Insert the new thread in the sleeping queue. */ @@ -1985,15 +1713,15 @@ delayzh_fast while: if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) { prev = t; - t = StgTSO_link(t); + t = StgTSO__link(t); goto while; } - StgTSO_link(CurrentTSO) = t; + StgTSO__link(CurrentTSO) = t; if (prev == NULL) { W_[sleeping_queue] = CurrentTSO; } else { - StgTSO_link(prev) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) []; } jump stg_block_noregs; #endif @@ -2002,14 +1730,14 @@ while: #ifdef mingw32_HOST_OS -STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast") -asyncReadzh_fast +STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh") +stg_asyncReadzh { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncRead# on threaded RTS"); + foreign "C" barf("asyncRead# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ @@ -2017,10 +1745,10 @@ asyncReadzh_fast StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; /* could probably allocate this on the heap instead */ - "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, + ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncReadzh_malloc_str) [R1,R2,R3,R4]; - reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") []; + (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; @@ -2030,24 +1758,24 @@ asyncReadzh_fast #endif } -STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast") -asyncWritezh_fast +STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh") +stg_asyncWritezh { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncWrite# on threaded RTS"); + foreign "C" barf("asyncWrite# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; - "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, + ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncWritezh_malloc_str) [R1,R2,R3,R4]; - reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") []; + (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; @@ -2058,14 +1786,14 @@ asyncWritezh_fast #endif } -STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast") -asyncDoProczh_fast +STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh") +stg_asyncDoProczh { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncDoProc# on threaded RTS"); + foreign "C" barf("asyncDoProc# on threaded RTS") never returns; #else /* args: R1 = proc, R2 = param */ @@ -2073,10 +1801,10 @@ asyncDoProczh_fast StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; /* could probably allocate this on the heap instead */ - "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, + ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncDoProczh_malloc_str) [R1,R2]; - reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") []; + (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; @@ -2088,19 +1816,162 @@ asyncDoProczh_fast #endif /* ----------------------------------------------------------------------------- - ** temporary ** + * noDuplicate# + * + * noDuplicate# tries to ensure that none of the thunks under + * evaluation by the current thread are also under evaluation by + * another thread. It relies on *both* threads doing noDuplicate#; + * the second one will get blocked if they are duplicating some work. + * + * The idea is that noDuplicate# is used within unsafePerformIO to + * ensure that the IO operation is performed at most once. + * noDuplicate# calls threadPaused which acquires an exclusive lock on + * all the thunks currently under evaluation by the current thread. + * + * Consider the following scenario. There is a thunk A, whose + * evaluation requires evaluating thunk B, where thunk B is an + * unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2 + * is pre-empted before it enters B, and claims A by blackholing it + * (in threadPaused). Thread 1 now enters B, and calls noDuplicate#. + * + * thread 1 thread 2 + * +-----------+ +---------------+ + * | -------+-----> A <-------+------- | + * | update | BLACKHOLE | marked_update | + * +-----------+ +---------------+ + * | | | | + * ... ... + * | | +---------------+ + * +-----------+ + * | ------+-----> B + * | update | BLACKHOLE + * +-----------+ + * + * At this point: A is a blackhole, owned by thread 2. noDuplicate# + * calls threadPaused, which walks up the stack and + * - claims B on behalf of thread 1 + * - then it reaches the update frame for A, which it sees is already + * a BLACKHOLE and is therefore owned by another thread. Since + * thread 1 is duplicating work, the computation up to the update + * frame for A is suspended, including thunk B. + * - thunk B, which is an unsafePerformIO, has now been reverted to + * an AP_STACK which could be duplicated - BAD! + * - The solution is as follows: before calling threadPaused, we + * leave a frame on the stack (stg_noDuplicate_info) that will call + * noDuplicate# again if the current computation is suspended and + * restarted. + * + * See the test program in concurrent/prog003 for a way to demonstrate + * this. It needs to be run with +RTS -N3 or greater, and the bug + * only manifests occasionally (once very 10 runs or so). + * -------------------------------------------------------------------------- */ + +INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) +{ + Sp_adj(1); + jump stg_noDuplicatezh; +} + +stg_noDuplicatezh +{ + STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); + // leave noDuplicate frame in case the current + // computation is suspended and restarted (see above). + Sp_adj(-1); + Sp(0) = stg_noDuplicate_info; + + SAVE_THREAD_STATE(); + ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; + + if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { + jump stg_threadFinished; + } else { + LOAD_THREAD_STATE(); + ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + // remove the stg_noDuplicate frame if it is still there. + if (Sp(0) == stg_noDuplicate_info) { + Sp_adj(1); + } + jump %ENTRY_CODE(Sp(0)); + } +} + +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + +stg_getApStackValzh +{ + W_ ap_stack, offset, val, ok; - classes CCallable and CReturnable don't really exist, but the - compiler insists on generating dictionaries containing references - to GHC_ZcCCallable_static_info etc., so we provide dummy symbols - for these. Some C compilers can't cope with zero-length static arrays, - so we have to make these one element long. - --------------------------------------------------------------------------- */ + /* args: R1 = AP_STACK, R2 = offset */ + ap_stack = R1; + offset = R2; -section "rodata" { - GHC_ZCCCallable_static_info: W_ 0; + if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) { + ok = 1; + val = StgAP_STACK_payload(ap_stack,offset); + } else { + ok = 0; + val = R1; + } + RET_NP(ok,val); +} + +// Write the cost center stack of the first argument on stderr; return +// the second. Possibly only makes sense for already evaluated +// things? +stg_traceCcszh +{ + W_ ccs; + +#ifdef PROFILING + ccs = StgHeader_ccs(UNTAG(R1)); + foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; +#endif + + R1 = R2; + ENTER(); } -section "rodata" { - GHC_ZCCReturnable_static_info: W_ 0; +stg_getSparkzh +{ + W_ spark; + +#ifndef THREADED_RTS + RET_NP(0,ghczmprim_GHCziBool_False_closure); +#else + (spark) = foreign "C" findSpark(MyCapability()); + if (spark != 0) { + RET_NP(1,spark); + } else { + RET_NP(0,ghczmprim_GHCziBool_False_closure); + } +#endif +} + +stg_traceEventzh +{ + W_ msg; + msg = R1; + +#if defined(TRACING) || defined(DEBUG) + + foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + +#elif defined(DTRACE) + + W_ enabled; + + // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from + // RtsProbes.h, but that header file includes unistd.h, which doesn't + // work in Cmm + (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; + if (enabled != 0) { + foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; + } + +#endif + jump %ENTRY_CODE(Sp(0)); }