/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 1998-2004 * * Out-of-line primitive operations * * This file contains the implementations of all the primitive * operations ("primops") which are not expanded inline. See * ghc/compiler/prelude/primops.txt.pp for a list of all the primops; * this file contains code for most of those with the attribute * out_of_line=True. * * Entry convention: the entry convention for a primop is that all the * args are in Stg registers (R1, R2, etc.). This is to make writing * the primops easier. (see compiler/codeGen/CgCallConv.hs). * * Return convention: results from a primop are generally returned * using the ordinary unboxed tuple return convention. The C-- parser * implements the RET_xxxx() macros to perform unboxed-tuple returns * based on the prevailing return convention. * * This file is written in a subset of C--, extended with various * features specific to GHC. It is compiled by GHC directly. For the * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * * ---------------------------------------------------------------------------*/ #include "Cmm.h" #ifdef __PIC__ #ifndef mingw32_HOST_OS import __gmpz_init; import __gmpz_add; import __gmpz_sub; import __gmpz_mul; import __gmpz_gcd; import __gmpn_gcd_1; import __gmpn_cmp; import __gmpz_tdiv_q; import __gmpz_tdiv_r; import __gmpz_tdiv_qr; import __gmpz_fdiv_qr; import __gmpz_divexact; import __gmpz_and; import __gmpz_xor; import __gmpz_ior; import __gmpz_com; #endif import pthread_mutex_lock; import pthread_mutex_unlock; #endif import base_GHCziIOBase_NestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; /*----------------------------------------------------------------------------- Array Primitives Basically just new*Array - the others are all inline macros. The size arg is always passed in R1, and the result returned in R1. The slow entry point is for returning from a heap check, the saved size argument must be re-loaded from the stack. -------------------------------------------------------------------------- */ /* for objects that are *less* than the size of a word, make sure we * round up to the nearest word for the size of the array. */ newByteArrayzh_fast { W_ words, payload_words, n, p; MAYBE_GC(NO_PTRS,newByteArrayzh_fast); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; RET_P(p); } newPinnedByteArrayzh_fast { W_ words, payload_words, n, p; MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); // We want an 8-byte aligned array. allocatePinned() gives us // 8-byte aligned memory by default, but we want to align the // *goods* inside the ArrWords object, so we have to check the // size of the ArrWords header and adjust our size accordingly. words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; if ((SIZEOF_StgArrWords & 7) != 0) { words = words + 1; } ("ptr" p) = foreign "C" allocatePinned(words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); // Again, if the ArrWords header isn't a multiple of 8 bytes, we // have to push the object forward one word so that the goods // fall on an 8-byte boundary. if ((SIZEOF_StgArrWords & 7) != 0) { p = p + WDS(1); } SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; RET_P(p); } newArrayzh_fast { W_ words, n, init, arr, p; /* Args: R1 = words, R2 = initialisation value */ n = R1; MAYBE_GC(R2_PTR,newArrayzh_fast); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; // Initialise all elements of the the array with the value in R2 init = R2; p = arr + SIZEOF_StgMutArrPtrs; for: if (p < arr + WDS(words)) { W_[p] = init; p = p + WDS(1); goto for; } RET_P(arr); } unsafeThawArrayzh_fast { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from // the mutable list is not easy, because the mut_list is only singly-linked). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 // to indicate that it is still on the mutable list. // // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases: // either it is on a mut_list, or it isn't. We adopt the convention that // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list, // and MUT_ARR_PTRS_FROZEN otherwise. In fact it wouldn't matter if // we put it on the mutable list more than once, but it would get scavenged // multiple times during GC, which would be unnecessarily slow. // if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) { SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); recordMutable(R1, R1); // must be done after SET_INFO, because it ASSERTs closure_MUTABLE() RET_P(R1); } else { SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); RET_P(R1); } } /* ----------------------------------------------------------------------------- MutVar primitives -------------------------------------------------------------------------- */ newMutVarzh_fast { W_ mv; /* Args: R1 = initialisation value */ ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); StgMutVar_var(mv) = R1; RET_P(mv); } atomicModifyMutVarzh_fast { W_ mv, z, x, y, r; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then We want to make the new contents point to (sel_0 (f x)) and the return value is (sel_1 (f x)) obviously we can share (f x). z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE) y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE) r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE) */ #if MIN_UPD_SIZE > 1 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE)) #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1)) #else #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1)) #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0) #endif #if MIN_UPD_SIZE > 2 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE)) #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2)) #else #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2)) #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0) #endif #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); #if defined(THREADED_RTS) ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2]; #endif x = StgMutVar_var(R1); TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); StgThunk_payload(z,0) = R2; StgThunk_payload(z,1) = x; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); y = z - THUNK_1_SIZE; SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]); LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; StgMutVar_var(R1) = y; foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1]; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]); LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; #if defined(THREADED_RTS) RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") []; #endif RET_P(r); } /* ----------------------------------------------------------------------------- Weak Pointer Primitives -------------------------------------------------------------------------- */ STRING(stg_weak_msg,"New weak pointer at %p\n") mkWeakzh_fast { /* R1 = key R2 = value R3 = finalizer (or NULL) */ W_ w; if (R3 == NULL) { R3 = stg_NO_FINALIZER_closure; } ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); StgWeak_key(w) = R1; StgWeak_value(w) = R2; StgWeak_finalizer(w) = R3; StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } finalizzeWeakzh_fast { /* R1 = weak ptr */ W_ w, f; w = R1; // already dead? if (GET_INFO(w) == stg_DEAD_WEAK_info) { RET_NP(0,stg_NO_FINALIZER_closure); } // kill it #ifdef PROFILING // @LDV profiling // A weak pointer is inherently used, so we do not need to call // LDV_recordDead_FILL_SLOP_DYNAMIC(): // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w); // or, LDV_recordDead(): // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader)); // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as // large as weak pointers, so there is no need to fill the slop, either. // See stg_DEAD_WEAK_info in StgMiscClosures.hc. #endif // // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? // SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); f = StgWeak_finalizer(w); StgDeadWeak_link(w) = StgWeak_link(w); /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); } else { RET_NP(1,f); } } deRefWeakzh_fast { /* R1 = weak ptr */ W_ w, code, val; w = R1; if (GET_INFO(w) == stg_WEAK_info) { code = 1; val = StgWeak_value(w); } else { code = 0; val = w; } RET_NP(code,val); } /* ----------------------------------------------------------------------------- Arbitrary-precision Integer operations. There are some assumptions in this code that mp_limb_t == W_. This is the case for all the platforms that GHC supports, currently. -------------------------------------------------------------------------- */ int2Integerzh_fast { /* arguments: R1 = Int# */ W_ val, s, p; /* to avoid aliasing */ val = R1; ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast ); p = Hp - SIZEOF_StgArrWords; SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = 1; /* mpz_set_si is inlined here, makes things simpler */ if (%lt(val,0)) { s = -1; Hp(0) = -val; } else { if (%gt(val,0)) { s = 1; Hp(0) = val; } else { s = 0; } } /* returns (# size :: Int#, data :: ByteArray# #) */ RET_NP(s,p); } word2Integerzh_fast { /* arguments: R1 = Word# */ W_ val, s, p; /* to avoid aliasing */ val = R1; ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast); p = Hp - SIZEOF_StgArrWords; SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = 1; if (val != 0) { s = 1; W_[Hp] = val; } else { s = 0; } /* returns (# size :: Int#, data :: ByteArray# #) */ RET_NP(s,p); } /* * 'long long' primops for converting to/from Integers. */ #ifdef SUPPORT_LONG_LONGS int64ToIntegerzh_fast { /* arguments: L1 = Int64# */ L_ val; W_ hi, lo, s, neg, words_needed, p; val = L1; neg = 0; hi = TO_W_(val >> 32); lo = TO_W_(val); if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) ) { // minimum is one word words_needed = 1; } else { words_needed = 2; } ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), NO_PTRS, int64ToIntegerzh_fast ); p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = words_needed; if ( %lt(hi,0) ) { neg = 1; lo = -lo; if(lo == 0) { hi = -hi; } else { hi = -hi - 1; } } if ( words_needed == 2 ) { s = 2; Hp(-1) = lo; Hp(0) = hi; } else { if ( lo != 0 ) { s = 1; Hp(0) = lo; } else /* val==0 */ { s = 0; } } if ( neg != 0 ) { s = -s; } /* returns (# size :: Int#, data :: ByteArray# #) */ RET_NP(s,p); } word64ToIntegerzh_fast { /* arguments: L1 = Word64# */ L_ val; W_ hi, lo, s, words_needed, p; val = L1; hi = TO_W_(val >> 32); lo = TO_W_(val); if ( hi != 0 ) { words_needed = 2; } else { words_needed = 1; } ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), NO_PTRS, word64ToIntegerzh_fast ); p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = words_needed; if ( hi != 0 ) { s = 2; Hp(-1) = lo; Hp(0) = hi; } else { if ( lo != 0 ) { s = 1; Hp(0) = lo; } else /* val==0 */ { s = 0; } } /* returns (# size :: Int#, data :: ByteArray# #) */ RET_NP(s,p); } #endif /* SUPPORT_LONG_LONGS */ /* ToDo: this is shockingly inefficient */ #ifndef THREADED_RTS section "bss" { mp_tmp1: bits8 [SIZEOF_MP_INT]; } section "bss" { mp_tmp2: bits8 [SIZEOF_MP_INT]; } section "bss" { mp_result1: bits8 [SIZEOF_MP_INT]; } section "bss" { mp_result2: bits8 [SIZEOF_MP_INT]; } #endif #ifdef THREADED_RTS #define FETCH_MP_TEMP(X) \ W_ X; \ X = BaseReg + (OFFSET_StgRegTable_r ## X); #else #define FETCH_MP_TEMP(X) /* Nothing */ #endif #define GMP_TAKE2_RET1(name,mp_fun) \ name \ { \ CInt s1, s2; \ W_ d1, d2; \ FETCH_MP_TEMP(mp_tmp1); \ FETCH_MP_TEMP(mp_tmp2); \ FETCH_MP_TEMP(mp_result1) \ FETCH_MP_TEMP(mp_result2); \ \ /* call doYouWantToGC() */ \ MAYBE_GC(R2_PTR & R4_PTR, name); \ \ s1 = W_TO_INT(R1); \ d1 = R2; \ s2 = W_TO_INT(R3); \ d2 = R4; \ \ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ MP_INT__mp_size(mp_tmp1) = (s1); \ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ MP_INT__mp_size(mp_tmp2) = (s2); \ MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ \ foreign "C" __gmpz_init(mp_result1 "ptr") []; \ \ /* Perform the operation */ \ foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ \ RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ } #define GMP_TAKE1_RET1(name,mp_fun) \ name \ { \ CInt s1; \ W_ d1; \ FETCH_MP_TEMP(mp_tmp1); \ FETCH_MP_TEMP(mp_result1) \ \ /* call doYouWantToGC() */ \ MAYBE_GC(R2_PTR, name); \ \ d1 = R2; \ s1 = W_TO_INT(R1); \ \ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ MP_INT__mp_size(mp_tmp1) = (s1); \ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ \ foreign "C" __gmpz_init(mp_result1 "ptr") []; \ \ /* Perform the operation */ \ foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") []; \ \ RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \ } #define GMP_TAKE2_RET2(name,mp_fun) \ name \ { \ CInt s1, s2; \ W_ d1, d2; \ FETCH_MP_TEMP(mp_tmp1); \ FETCH_MP_TEMP(mp_tmp2); \ FETCH_MP_TEMP(mp_result1) \ FETCH_MP_TEMP(mp_result2) \ \ /* call doYouWantToGC() */ \ MAYBE_GC(R2_PTR & R4_PTR, name); \ \ s1 = W_TO_INT(R1); \ d1 = R2; \ s2 = W_TO_INT(R3); \ d2 = R4; \ \ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \ MP_INT__mp_size(mp_tmp1) = (s1); \ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \ MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \ MP_INT__mp_size(mp_tmp2) = (s2); \ MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \ \ foreign "C" __gmpz_init(mp_result1 "ptr") []; \ foreign "C" __gmpz_init(mp_result2 "ptr") []; \ \ /* Perform the operation */ \ foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \ \ RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)), \ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords, \ TO_W_(MP_INT__mp_size(mp_result2)), \ MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords); \ } GMP_TAKE2_RET1(plusIntegerzh_fast, __gmpz_add) GMP_TAKE2_RET1(minusIntegerzh_fast, __gmpz_sub) GMP_TAKE2_RET1(timesIntegerzh_fast, __gmpz_mul) GMP_TAKE2_RET1(gcdIntegerzh_fast, __gmpz_gcd) GMP_TAKE2_RET1(quotIntegerzh_fast, __gmpz_tdiv_q) GMP_TAKE2_RET1(remIntegerzh_fast, __gmpz_tdiv_r) GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact) GMP_TAKE2_RET1(andIntegerzh_fast, __gmpz_and) GMP_TAKE2_RET1(orIntegerzh_fast, __gmpz_ior) GMP_TAKE2_RET1(xorIntegerzh_fast, __gmpz_xor) GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com) GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr) GMP_TAKE2_RET2(divModIntegerzh_fast, __gmpz_fdiv_qr) #ifndef THREADED_RTS section "bss" { mp_tmp_w: W_; // NB. mp_tmp_w is really an here mp_limb_t } #endif gcdIntzh_fast { /* R1 = the first Int#; R2 = the second Int# */ W_ r; FETCH_MP_TEMP(mp_tmp_w); W_[mp_tmp_w] = R1; (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) []; R1 = r; /* Result parked in R1, return via info-pointer at TOS */ jump %ENTRY_CODE(Sp(0)); } gcdIntegerIntzh_fast { /* R1 = s1; R2 = d1; R3 = the int */ W_ s1; (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) []; R1 = s1; /* Result parked in R1, return via info-pointer at TOS */ jump %ENTRY_CODE(Sp(0)); } cmpIntegerIntzh_fast { /* R1 = s1; R2 = d1; R3 = the int */ W_ usize, vsize, v_digit, u_digit; usize = R1; vsize = 0; v_digit = R3; // paraphrased from __gmpz_cmp_si() in the GMP sources if (%gt(v_digit,0)) { vsize = 1; } else { if (%lt(v_digit,0)) { vsize = -1; v_digit = -v_digit; } } if (usize != vsize) { R1 = usize - vsize; jump %ENTRY_CODE(Sp(0)); } if (usize == 0) { R1 = 0; jump %ENTRY_CODE(Sp(0)); } u_digit = W_[BYTE_ARR_CTS(R2)]; if (u_digit == v_digit) { R1 = 0; jump %ENTRY_CODE(Sp(0)); } if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's R1 = usize; } else { R1 = -usize; } jump %ENTRY_CODE(Sp(0)); } cmpIntegerzh_fast { /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */ W_ usize, vsize, size, up, vp; CInt cmp; // paraphrased from __gmpz_cmp() in the GMP sources usize = R1; vsize = R3; if (usize != vsize) { R1 = usize - vsize; jump %ENTRY_CODE(Sp(0)); } if (usize == 0) { R1 = 0; jump %ENTRY_CODE(Sp(0)); } if (%lt(usize,0)) { // NB. not <, which is unsigned size = -usize; } else { size = usize; } up = BYTE_ARR_CTS(R2); vp = BYTE_ARR_CTS(R4); (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) []; if (cmp == 0 :: CInt) { R1 = 0; jump %ENTRY_CODE(Sp(0)); } if (%lt(cmp,0 :: CInt) == %lt(usize,0)) { R1 = 1; } else { R1 = (-1); } /* Result parked in R1, return via info-pointer at TOS */ jump %ENTRY_CODE(Sp(0)); } integer2Intzh_fast { /* R1 = s; R2 = d */ W_ r, s; s = R1; if (s == 0) { r = 0; } else { r = W_[R2 + SIZEOF_StgArrWords]; if (%lt(s,0)) { r = -r; } } /* Result parked in R1, return via info-pointer at TOS */ R1 = r; jump %ENTRY_CODE(Sp(0)); } integer2Wordzh_fast { /* R1 = s; R2 = d */ W_ r, s; s = R1; if (s == 0) { r = 0; } else { r = W_[R2 + SIZEOF_StgArrWords]; if (%lt(s,0)) { r = -r; } } /* Result parked in R1, return via info-pointer at TOS */ R1 = r; jump %ENTRY_CODE(Sp(0)); } decodeFloatzh_fast { W_ p; F_ arg; FETCH_MP_TEMP(mp_tmp1); FETCH_MP_TEMP(mp_tmp_w); /* arguments: F1 = Float# */ arg = F1; ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast ); /* Be prepared to tell Lennart-coded __decodeFloat where mantissa._mp_d can be put (it does not care about the rest) */ p = Hp - SIZEOF_StgArrWords; SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]); StgArrWords_words(p) = 1; MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); /* Perform the operation */ foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) []; /* returns: (Int# (expn), Int#, ByteArray#) */ RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); } decodeFloatzuIntzh_fast { W_ p; F_ arg; FETCH_MP_TEMP(mp_tmp1); FETCH_MP_TEMP(mp_tmp_w); /* arguments: F1 = Float# */ arg = F1; /* Perform the operation */ foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) []; /* returns: (Int# (mantissa), Int# (exponent)) */ RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE) decodeDoublezh_fast { D_ arg; W_ p; FETCH_MP_TEMP(mp_tmp1); FETCH_MP_TEMP(mp_tmp_w); /* arguments: D1 = Double# */ arg = D1; ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast ); /* Be prepared to tell Lennart-coded __decodeDouble where mantissa.d can be put (it does not care about the rest) */ p = Hp - ARR_SIZE + WDS(1); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE); MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p); /* Perform the operation */ foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) []; /* returns: (Int# (expn), Int#, ByteArray#) */ RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); } decodeDoublezu2Intzh_fast { D_ arg; W_ p; FETCH_MP_TEMP(mp_tmp1); FETCH_MP_TEMP(mp_tmp2); FETCH_MP_TEMP(mp_tmp_w); /* arguments: D1 = Double# */ arg = D1; /* Perform the operation */ foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", mp_tmp_w "ptr", arg) []; /* returns: (Int# (mant high), Int# (mant low), Int# (expn)) */ RET_NNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_tmp_w]); } /* ----------------------------------------------------------------------------- * Concurrency primitives * -------------------------------------------------------------------------- */ forkzh_fast { /* args: R1 = closure to spark */ MAYBE_GC(R1_PTR, forkzh_fast); W_ closure; W_ threadid; closure = R1; ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; // switch at the earliest opportunity CInt[context_switch] = 1 :: CInt; RET_P(threadid); } forkOnzh_fast { /* args: R1 = cpu, R2 = closure to spark */ MAYBE_GC(R2_PTR, forkOnzh_fast); W_ cpu; W_ closure; W_ threadid; cpu = R1; closure = R2; ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; // switch at the earliest opportunity CInt[context_switch] = 1 :: CInt; RET_P(threadid); } yieldzh_fast { jump stg_yield_noregs; } myThreadIdzh_fast { /* no args. */ RET_P(CurrentTSO); } labelThreadzh_fast { /* args: R1 = ThreadId# R2 = Addr# */ #ifdef DEBUG foreign "C" labelThread(R1 "ptr", R2 "ptr") []; #endif jump %ENTRY_CODE(Sp(0)); } isCurrentThreadBoundzh_fast { /* no args */ W_ r; (r) = foreign "C" isThreadBound(CurrentTSO) []; RET_N(r); } /* ----------------------------------------------------------------------------- * TVar primitives * -------------------------------------------------------------------------- */ #ifdef REG_R1 #define SP_OFF 0 #define IF_NOT_REG_R1(x) #else #define SP_OFF 1 #define IF_NOT_REG_R1(x) x #endif // Catch retry frame ------------------------------------------------------------ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5) { W_ r, frame, trec, outer; IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; trec = StgTSO_trec(CurrentTSO); ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Did not commit: re-execute */ W_ new_trec; ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = new_trec; if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { R1 = StgCatchRetryFrame_alt_code(frame); } else { R1 = StgCatchRetryFrame_first_code(frame); } jump stg_ap_v_fast; } } // Atomically frame ------------------------------------------------------------ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif "ptr" W_ unused3, "ptr" W_ unused4) { W_ frame, trec, valid, next_invariant, q, outer; IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; trec = StgTSO_trec(CurrentTSO); ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; StgAtomicallyFrame_next_invariant_to_check(frame) = q; } else { /* Second/subsequent time back at the atomically frame -- abort the * tx that's checking the invariant and move on to the next one */ StgTSO_trec(CurrentTSO) = outer; q = StgAtomicallyFrame_next_invariant_to_check(frame); StgInvariantCheckQueue_my_execution(q) = trec; foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; /* Don't free trec -- it's linked from q and will be stashed in the * invariant if we eventually commit. */ q = StgInvariantCheckQueue_next_queue_entry(q); StgAtomicallyFrame_next_invariant_to_check(frame) = q; trec = outer; } q = StgAtomicallyFrame_next_invariant_to_check(frame); if (q != END_INVARIANT_CHECK_QUEUE) { /* We can't commit yet: another invariant to check */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") []; StgTSO_trec(CurrentTSO) = trec; next_invariant = StgInvariantCheckQueue_invariant(q); R1 = StgAtomicInvariant_code(next_invariant); jump stg_ap_v_fast; } else { /* We've got no more invariants to check, try to commit */ (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") []; if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; Sp = Sp + SIZEOF_StgAtomicallyFrame; IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Transaction was not valid: try again */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; R1 = StgAtomicallyFrame_code(frame); jump stg_ap_v_fast; } } } INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif "ptr" W_ unused3, "ptr" W_ unused4) { W_ frame, trec, valid; IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; /* The TSO is currently waiting: should we stop waiting? */ (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; if (valid != 0) { /* Previous attempt is still valid: no point trying again yet */ IF_NOT_REG_R1(Sp_adj(-2); Sp(1) = stg_NO_FINALIZER_closure; Sp(0) = stg_ut_1_0_unreg_info;) jump stg_block_noregs; } else { /* Previous attempt is no longer valid: try again */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgHeader_info(frame) = stg_atomically_frame_info; R1 = StgAtomicallyFrame_code(frame); jump stg_ap_v_fast; } } // STM catch frame -------------------------------------------------------------- #ifdef REG_R1 #define SP_OFF 0 #else #define SP_OFF 1 #endif /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct * kind of return to the activation record underneath us on the stack. */ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif "ptr" W_ unused3, "ptr" W_ unused4) { IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Commit succeeded */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchSTMFrame; IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump Sp(SP_OFF); } else { /* Commit failed */ W_ new_trec; ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = new_trec; R1 = StgCatchSTMFrame_code(frame); jump stg_ap_v_fast; } } // Primop definition ------------------------------------------------------------ atomicallyzh_fast { W_ frame; W_ old_trec; W_ new_trec; // stmStartTransaction may allocate MAYBE_GC (R1_PTR, atomicallyzh_fast); /* Args: R1 = m :: STM a */ STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { R1 = base_GHCziIOBase_NestedAtomically_closure; jump raisezh_fast; } /* Set up the atomically frame */ Sp = Sp - SIZEOF_StgAtomicallyFrame; frame = Sp; SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1]; StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } catchSTMzh_fast { W_ frame; /* Args: R1 :: STM a */ /* Args: R2 :: Exception -> STM a */ STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast); /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchSTMFrame; frame = Sp; SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]); StgCatchSTMFrame_handler(frame) = R2; StgCatchSTMFrame_code(frame) = R1; /* Start a nested transaction to run the body of the try block in */ W_ cur_trec; W_ new_trec; cur_trec = StgTSO_trec(CurrentTSO); ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr"); StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } catchRetryzh_fast { W_ frame; W_ new_trec; W_ trec; // stmStartTransaction may allocate MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); /* Args: R1 :: STM a */ /* Args: R2 :: STM a */ STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast); /* Start a nested transaction within which to run the first code */ trec = StgTSO_trec(CurrentTSO); ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2]; StgTSO_trec(CurrentTSO) = new_trec; /* Set up the catch-retry frame */ Sp = Sp - SIZEOF_StgCatchRetryFrame; frame = Sp; SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]); StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; StgCatchRetryFrame_first_code(frame) = R1; StgCatchRetryFrame_alt_code(frame) = R2; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } retryzh_fast { W_ frame_type; W_ frame; W_ trec; W_ outer; W_ r; MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: StgTSO_sp(CurrentTSO) = Sp; (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; Sp = StgTSO_sp(CurrentTSO); frame = Sp; trec = StgTSO_trec(CurrentTSO); ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame ASSERT(outer != NO_TREC); // Abort the transaction attempting the current branch foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { // Retry in the first branch: try the alternative ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true; R1 = StgCatchRetryFrame_alt_code(frame); jump stg_ap_v_fast; } else { // Retry in the alternative code: propagate the retry StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; goto retry_pop_stack; } } // We've reached the ATOMICALLY_FRAME: attempt to wait ASSERT(frame_type == ATOMICALLY_FRAME); if (outer != NO_TREC) { // We called retry while checking invariants, so abort the current // invariant check (merging its TVar accesses into the parents read // set so we'll wait on them) foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; trec = outer; StgTSO_trec(CurrentTSO) = trec; ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; } ASSERT(outer == NO_TREC); (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") []; if (r != 0) { // Transaction was valid: stmWait put us on the TVars' queues, we now block StgHeader_info(frame) = stg_atomically_waiting_frame_info; Sp = frame; // Fix up the stack in the unregisterised case: the return convention is different. IF_NOT_REG_R1(Sp_adj(-2); Sp(1) = stg_NO_FINALIZER_closure; Sp(0) = stg_ut_1_0_unreg_info;) R3 = trec; // passing to stmWaitUnblock() jump stg_block_stmwait; } else { // Transaction was not valid: retry immediately ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; R1 = StgAtomicallyFrame_code(frame); Sp = frame; jump stg_ap_v_fast; } } checkzh_fast { W_ trec, closure; /* Args: R1 = invariant closure */ MAYBE_GC (R1_PTR, checkzh_fast); trec = StgTSO_trec(CurrentTSO); closure = R1; foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", trec "ptr", closure "ptr") []; jump %ENTRY_CODE(Sp(0)); } newTVarzh_fast { W_ tv; W_ new_value; /* Args: R1 = initialisation value */ MAYBE_GC (R1_PTR, newTVarzh_fast); new_value = R1; ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; RET_P(tv); } readTVarzh_fast { W_ trec; W_ tvar; W_ result; /* Args: R1 = TVar closure */ MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; RET_P(result); } writeTVarzh_fast { W_ trec; W_ tvar; W_ new_value; /* Args: R1 = TVar closure */ /* R2 = New value */ MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; new_value = R2; foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") []; jump %ENTRY_CODE(Sp(0)); } /* ----------------------------------------------------------------------------- * MVar primitives * * take & putMVar work as follows. Firstly, an important invariant: * * If the MVar is full, then the blocking queue contains only * threads blocked on putMVar, and if the MVar is empty then the * blocking queue contains only threads blocked on takeMVar. * * takeMvar: * MVar empty : then add ourselves to the blocking queue * MVar full : remove the value from the MVar, and * blocking queue empty : return * blocking queue non-empty : perform the first blocked putMVar * from the queue, and wake up the * thread (MVar is now full again) * * putMVar is just the dual of the above algorithm. * * How do we "perform a putMVar"? Well, we have to fiddle around with * the stack of the thread waiting to do the putMVar. See * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for * the stack layout, and the PerformPut and PerformTake macros below. * * It is important that a blocked take or put is woken up with the * take/put already performed, because otherwise there would be a * small window of vulnerability where the thread could receive an * exception and never perform its take or put, and we'd end up with a * deadlock. * * -------------------------------------------------------------------------- */ isEmptyMVarzh_fast { /* args: R1 = MVar closure */ if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) { RET_N(1); } else { RET_N(0); } } newMVarzh_fast { /* args: none */ W_ mvar; ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast ); mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; RET_P(mvar); } /* If R1 isn't available, pass it on the stack */ #ifdef REG_R1 #define PerformTake(tso, value) \ W_[StgTSO_sp(tso) + WDS(1)] = value; \ W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; #else #define PerformTake(tso, value) \ W_[StgTSO_sp(tso) + WDS(1)] = value; \ W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info; #endif #define PerformPut(tso,lval) \ StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ lval = W_[StgTSO_sp(tso) - WDS(1)]; takeMVarzh_fast { W_ mvar, val, info, tso; /* args: R1 = MVar closure */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; } StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; StgMVar_tail(mvar) = CurrentTSO; jump stg_block_takemvar; } /* we got the value... */ val = StgMVar_value(mvar); if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are putMVar(s) waiting... * wake up the first thread on the queue */ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); dirtyTSO(tso); #if defined(GRAN) || defined(PAR) /* ToDo: check 2nd arg (mvar) is right */ ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) []; StgMVar_head(mvar) = tso; #else ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; StgMVar_head(mvar) = tso; #endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif RET_P(val); } else { /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif RET_P(val); } } tryTakeMVarzh_fast { W_ mvar, val, info, tso; /* args: R1 = MVar closure */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); #endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure */ RET_NP(0, stg_NO_FINALIZER_closure); } if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } /* we got the value... */ val = StgMVar_value(mvar); if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are putMVar(s) waiting... * wake up the first thread on the queue */ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); dirtyTSO(tso); #if defined(GRAN) || defined(PAR) /* ToDo: check 2nd arg (mvar) is right */ ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") []; StgMVar_head(mvar) = tso; #else ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; StgMVar_head(mvar) = tso; #endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } else { /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } RET_NP(1, val); } putMVarzh_fast { W_ mvar, info, tso; /* args: R1 = MVar, R2 = value */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; } StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; StgMVar_tail(mvar) = CurrentTSO; jump stg_block_putmvar; } if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are takeMVar(s) waiting: wake up the first one */ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); dirtyTSO(tso); #if defined(GRAN) || defined(PAR) /* ToDo: check 2nd arg (mvar) is right */ ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; StgMVar_head(mvar) = tso; #else ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; StgMVar_head(mvar) = tso; #endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif jump %ENTRY_CODE(Sp(0)); } else { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = R2; #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif jump %ENTRY_CODE(Sp(0)); } /* ToDo: yield afterward for better communication performance? */ } tryPutMVarzh_fast { W_ mvar, info, tso; /* args: R1 = MVar, R2 = value */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; #else info = GET_INFO(mvar); #endif if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); #endif RET_N(0); } if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are takeMVar(s) waiting: wake up the first one */ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); dirtyTSO(tso); #if defined(GRAN) || defined(PAR) /* ToDo: check 2nd arg (mvar) is right */ ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; StgMVar_head(mvar) = tso; #else ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; StgMVar_head(mvar) = tso; #endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } else { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = R2; #if defined(THREADED_RTS) unlockClosure(mvar, stg_MVAR_DIRTY_info); #else SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } RET_N(1); /* ToDo: yield afterward for better communication performance? */ } /* ----------------------------------------------------------------------------- Stable pointer primitives ------------------------------------------------------------------------- */ makeStableNamezh_fast { W_ index, sn_obj; ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast ); (index) = foreign "C" lookupStableName(R1 "ptr") []; /* Is there already a StableName for this heap object? * stable_ptr_table is a pointer to an array of snEntry structs. */ if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) { sn_obj = Hp - SIZEOF_StgStableName + WDS(1); SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]); StgStableName_sn(sn_obj) = index; snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj; } else { sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry); } RET_P(sn_obj); } makeStablePtrzh_fast { /* Args: R1 = a */ W_ sp; MAYBE_GC(R1_PTR, makeStablePtrzh_fast); ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; RET_N(sp); } deRefStablePtrzh_fast { /* Args: R1 = the stable ptr */ W_ r, sp; sp = R1; r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry); RET_P(r); } /* ----------------------------------------------------------------------------- Bytecode object primitives ------------------------------------------------------------------------- */ newBCOzh_fast { /* R1 = instrs R2 = literals R3 = ptrs R4 = arity R5 = bitmap array */ W_ bco, bitmap_arr, bytes, words; bitmap_arr = R5; words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr); bytes = WDS(words); ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast ); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); StgBCO_instrs(bco) = R1; StgBCO_literals(bco) = R2; StgBCO_ptrs(bco) = R3; StgBCO_arity(bco) = HALF_W_(R4); StgBCO_size(bco) = HALF_W_(words); // Copy the arity/bitmap info into the BCO W_ i; i = 0; for: if (i < StgArrWords_words(bitmap_arr)) { StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i); i = i + 1; goto for; } RET_P(bco); } mkApUpd0zh_fast { // R1 = the BCO# for the AP // W_ ap; // This function is *only* used to wrap zero-arity BCOs in an // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always // saturated and always points directly to a FUN or BCO. ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && StgBCO_arity(R1) == HALF_W_(0)); HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); ap = Hp - SIZEOF_StgAP + WDS(1); SET_HDR(ap, stg_AP_info, W_[CCCS]); StgAP_n_args(ap) = HALF_W_(0); StgAP_fun(ap) = R1; RET_P(ap); } unpackClosurezh_fast { /* args: R1 = closure to analyze */ // TODO: Consider the absence of ptrs or nonptrs as a special case ? W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; info = %GET_STD_INFO(UNTAG(R1)); // Some closures have non-standard layout, so we omit those here. W_ type; type = TO_W_(%INFO_TYPE(info)); switch [0 .. N_CLOSURE_TYPES] type { case THUNK_SELECTOR : { ptrs = 1; nptrs = 0; goto out; } case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : { ptrs = 0; nptrs = 0; goto out; } default: { ptrs = TO_W_(%INFO_PTRS(info)); nptrs = TO_W_(%INFO_NPTRS(info)); goto out; }} out: W_ ptrs_arr_sz, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs); ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast); W_ clos; clos = UNTAG(R1); ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; p = 0; for: if(p < ptrs) { W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p); p = p + 1; goto for; } SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(nptrs_arr) = nptrs; p = 0; for2: if(p < nptrs) { W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs); p = p + 1; goto for2; } RET_NPP(info, ptrs_arr, nptrs_arr); } /* ----------------------------------------------------------------------------- Thread I/O blocking primitives -------------------------------------------------------------------------- */ /* Add a thread to the end of the blocked queue. (C-- version of the C * macro in Schedule.h). */ #define APPEND_TO_BLOCKED_QUEUE(tso) \ ASSERT(StgTSO_link(tso) == END_TSO_QUEUE); \ if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \ W_[blocked_queue_hd] = tso; \ } else { \ StgTSO_link(W_[blocked_queue_tl]) = tso; \ } \ W_[blocked_queue_tl] = tso; waitReadzh_fast { /* args: R1 */ #ifdef THREADED_RTS foreign "C" barf("waitRead# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; StgTSO_block_info(CurrentTSO) = R1; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_noregs; #endif } waitWritezh_fast { /* args: R1 */ #ifdef THREADED_RTS foreign "C" barf("waitWrite# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; StgTSO_block_info(CurrentTSO) = R1; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_noregs; #endif } STRING(stg_delayzh_malloc_str, "delayzh_fast") delayzh_fast { #ifdef mingw32_HOST_OS W_ ares; CInt reqID; #else W_ t, prev, target; #endif #ifdef THREADED_RTS foreign "C" barf("delay# on threaded RTS") never returns; #else /* args: R1 (microsecond delay amount) */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16; #ifdef mingw32_HOST_OS /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_delayzh_malloc_str); (reqID) = foreign "C" addDelayRequest(R1); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; /* Having all async-blocked threads reside on the blocked_queue * simplifies matters, so change the status to OnDoProc put the * delayed thread on the blocked_queue. */ StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async_void; #else W_ time; W_ divisor; (time) = foreign "C" getourtimeofday() [R1]; divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); if (divisor == 0) { divisor = 50; } divisor = divisor * 1000; target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ + time + 1; /* Add 1 as getourtimeofday rounds down */ StgTSO_block_info(CurrentTSO) = target; /* Insert the new thread in the sleeping queue. */ prev = NULL; t = W_[sleeping_queue]; while: if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) { prev = t; t = StgTSO_link(t); goto while; } StgTSO_link(CurrentTSO) = t; if (prev == NULL) { W_[sleeping_queue] = CurrentTSO; } else { StgTSO_link(prev) = CurrentTSO; } jump stg_block_noregs; #endif #endif /* !THREADED_RTS */ } #ifdef mingw32_HOST_OS STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast") asyncReadzh_fast { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncRead# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncReadzh_malloc_str) [R1,R2,R3,R4]; (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast") asyncWritezh_fast { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncWrite# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncWritezh_malloc_str) [R1,R2,R3,R4]; (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast") asyncDoProczh_fast { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncDoProc# on threaded RTS") never returns; #else /* args: R1 = proc, R2 = param */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncDoProczh_malloc_str) [R1,R2]; (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } #endif // noDuplicate# tries to ensure that none of the thunks under // evaluation by the current thread are also under evaluation by // another thread. It relies on *both* threads doing noDuplicate#; // the second one will get blocked if they are duplicating some work. noDuplicatezh_fast { SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { jump stg_threadFinished; } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); jump %ENTRY_CODE(Sp(0)); } } getApStackValzh_fast { W_ ap_stack, offset, val, ok; /* args: R1 = AP_STACK, R2 = offset */ ap_stack = R1; offset = R2; if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) { ok = 1; val = StgAP_STACK_payload(ap_stack,offset); } else { ok = 0; val = R1; } RET_NP(ok,val); }