/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 1998-2011 * * Out-of-line primitive operations * * This file contains the implementations of all the primitive * operations ("primops") which are not expanded inline. See * ghc/compiler/prelude/primops.txt.pp for a list of all the primops; * this file contains code for most of those with the attribute * out_of_line=True. * * Entry convention: the entry convention for a primop is that all the * args are in Stg registers (R1, R2, etc.). This is to make writing * the primops easier. (see compiler/codeGen/CgCallConv.hs). * * Return convention: results from a primop are generally returned * using the ordinary unboxed tuple return convention. The C-- parser * implements the RET_xxxx() macros to perform unboxed-tuple returns * based on the prevailing return convention. * * This file is written in a subset of C--, extended with various * features specific to GHC. It is compiled by GHC directly. For the * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * * ---------------------------------------------------------------------------*/ #include "Cmm.h" #ifdef __PIC__ import pthread_mutex_lock; import pthread_mutex_unlock; #endif import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; import ghczmprim_GHCziTypes_False_closure; #if !defined(mingw32_HOST_OS) import sm_mutex; #endif /*----------------------------------------------------------------------------- Array Primitives Basically just new*Array - the others are all inline macros. The size arg is always passed in R1, and the result returned in R1. The slow entry point is for returning from a heap check, the saved size argument must be re-loaded from the stack. -------------------------------------------------------------------------- */ /* for objects that are *less* than the size of a word, make sure we * round up to the nearest word for the size of the array. */ stg_newByteArrayzh { W_ words, payload_words, n, p; MAYBE_GC(NO_PTRS,stg_newByteArrayzh); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(p) = n; RET_P(p); } #define BA_ALIGN 16 #define BA_MASK (BA_ALIGN-1) stg_newPinnedByteArrayzh { W_ words, n, bytes, payload_words, p; MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); n = R1; bytes = n; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); /* When we actually allocate memory, we need to allow space for the header: */ bytes = bytes + SIZEOF_StgArrWords; /* And we want to align to BA_ALIGN bytes, so we need to allow space to shift up to BA_ALIGN - 1 bytes: */ bytes = bytes + BA_ALIGN - 1; /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned to BA_ALIGN bytes: */ p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(p) = n; RET_P(p); } stg_newAlignedPinnedByteArrayzh { W_ words, n, bytes, payload_words, p, alignment; MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); n = R1; alignment = R2; /* we always supply at least word-aligned memory, so there's no need to allow extra space for alignment if the requirement is less than a word. This also prevents mischief with alignment == 0. */ if (alignment <= SIZEOF_W) { alignment = 1; } bytes = n; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); /* When we actually allocate memory, we need to allow space for the header: */ bytes = bytes + SIZEOF_StgArrWords; /* And we want to align to bytes, so we need to allow space to shift up to bytes: */ bytes = bytes + alignment - 1; /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned to bytes. Note that we are assuming that is a power of 2, which is technically not guaranteed */ p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(p) = n; RET_P(p); } stg_newArrayzh { W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; MAYBE_GC(R2_PTR,stg_newArrayzh); // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words // in the array, making sure we round up, and then rounding up to a whole // number of words. size = n + mutArrPtrsCardWords(n); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; p = arr + SIZEOF_StgMutArrPtrs; for: if (p < arr + WDS(words)) { W_[p] = init; p = p + WDS(1); goto for; } // Initialise the mark bits with 0 for2: if (p < arr + WDS(size)) { W_[p] = 0; p = p + WDS(1); goto for2; } RET_P(arr); } stg_unsafeThawArrayzh { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 // to indicate that it is still on the mutable list. // // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases: // either it is on a mut_list, or it isn't. We adopt the convention that // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list, // and MUT_ARR_PTRS_FROZEN otherwise. In fact it wouldn't matter if // we put it on the mutable list more than once, but it would get scavenged // multiple times during GC, which would be unnecessarily slow. // if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) { SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); recordMutable(R1, R1); // must be done after SET_INFO, because it ASSERTs closure_MUTABLE() RET_P(R1); } else { SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); RET_P(R1); } } /* ----------------------------------------------------------------------------- MutVar primitives -------------------------------------------------------------------------- */ stg_newMutVarzh { W_ mv; /* Args: R1 = initialisation value */ ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); StgMutVar_var(mv) = R1; RET_P(mv); } stg_casMutVarzh /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */ { W_ mv, old, new, h; mv = R1; old = R2; new = R3; (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, old, new) []; if (h != old) { RET_NP(1,h); } else { RET_NP(0,h); } } stg_atomicModifyMutVarzh { W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then We want to make the new contents point to (sel_0 (f x)) and the return value is (sel_1 (f x)) obviously we can share (f x). z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE) y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE) r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE) */ #if MIN_UPD_SIZE > 1 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE)) #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1)) #else #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1)) #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0) #endif #if MIN_UPD_SIZE > 2 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE)) #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2)) #else #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2)) #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0) #endif #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh); mv = R1; f = R2; TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); y = z - THUNK_1_SIZE; SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]); LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]); LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; retry: x = StgMutVar_var(mv); StgThunk_payload(z,1) = x; #ifdef THREADED_RTS (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; if (h != x) { goto retry; } #else StgMutVar_var(mv) = y; #endif if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; } RET_P(r); } /* ----------------------------------------------------------------------------- Weak Pointer Primitives -------------------------------------------------------------------------- */ STRING(stg_weak_msg,"New weak pointer at %p\n") stg_mkWeakzh { /* R1 = key R2 = value R3 = finalizer (or NULL) */ W_ w; if (R3 == NULL) { R3 = stg_NO_FINALIZER_closure; } ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); // We don't care about cfinalizer here. // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or // something else? StgWeak_key(w) = R1; StgWeak_value(w) = R2; StgWeak_finalizer(w) = R3; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } stg_mkWeakForeignEnvzh { /* R1 = key R2 = value R3 = finalizer R4 = pointer R5 = has environment (0 or 1) R6 = environment */ W_ w, payload_words, words, p; W_ key, val, fptr, ptr, flag, eptr; key = R1; val = R2; fptr = R3; ptr = R4; flag = R5; eptr = R6; ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(p) = WDS(payload_words); StgArrWords_payload(p,0) = fptr; StgArrWords_payload(p,1) = ptr; StgArrWords_payload(p,2) = eptr; StgArrWords_payload(p,3) = flag; // We don't care about the value here. // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else? StgWeak_key(w) = key; StgWeak_value(w) = val; StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; StgWeak_cfinalizer(w) = p; ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } stg_finalizzeWeakzh { /* R1 = weak ptr */ W_ w, f, arr; w = R1; // already dead? if (GET_INFO(w) == stg_DEAD_WEAK_info) { RET_NP(0,stg_NO_FINALIZER_closure); } // kill it #ifdef PROFILING // @LDV profiling // A weak pointer is inherently used, so we do not need to call // LDV_recordDead_FILL_SLOP_DYNAMIC(): // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w); // or, LDV_recordDead(): // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader)); // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as // large as weak pointers, so there is no need to fill the slop, either. // See stg_DEAD_WEAK_info in StgMiscClosures.hc. #endif // // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? // SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); f = StgWeak_finalizer(w); arr = StgWeak_cfinalizer(w); StgDeadWeak_link(w) = StgWeak_link(w); if (arr != stg_NO_FINALIZER_closure) { foreign "C" runCFinalizer(StgArrWords_payload(arr,0), StgArrWords_payload(arr,1), StgArrWords_payload(arr,2), StgArrWords_payload(arr,3)) []; } /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); } else { RET_NP(1,f); } } stg_deRefWeakzh { /* R1 = weak ptr */ W_ w, code, val; w = R1; if (GET_INFO(w) == stg_WEAK_info) { code = 1; val = StgWeak_value(w); } else { code = 0; val = w; } RET_NP(code,val); } /* ----------------------------------------------------------------------------- Floating point operations. -------------------------------------------------------------------------- */ stg_decodeFloatzuIntzh { W_ p; F_ arg; W_ mp_tmp1; W_ mp_tmp_w; STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh ); mp_tmp1 = Sp - WDS(1); mp_tmp_w = Sp - WDS(2); /* arguments: F1 = Float# */ arg = F1; /* Perform the operation */ foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) []; /* returns: (Int# (mantissa), Int# (exponent)) */ RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } stg_decodeDoublezu2Intzh { D_ arg; W_ p; W_ mp_tmp1; W_ mp_tmp2; W_ mp_result1; W_ mp_result2; STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh ); mp_tmp1 = Sp - WDS(1); mp_tmp2 = Sp - WDS(2); mp_result1 = Sp - WDS(3); mp_result2 = Sp - WDS(4); /* arguments: D1 = Double# */ arg = D1; /* Perform the operation */ foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", mp_result1 "ptr", mp_result2 "ptr", arg) []; /* returns: (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */ RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]); } /* ----------------------------------------------------------------------------- * Concurrency primitives * -------------------------------------------------------------------------- */ stg_forkzh { /* args: R1 = closure to spark */ MAYBE_GC(R1_PTR, stg_forkzh); W_ closure; W_ threadid; closure = R1; ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = %lobits16( TO_W_(StgTSO_flags(threadid)) | TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; // context switch soon, but not immediately: we don't want every // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } stg_forkOnzh { /* args: R1 = cpu, R2 = closure to spark */ MAYBE_GC(R2_PTR, stg_forkOnzh); W_ cpu; W_ closure; W_ threadid; cpu = R1; closure = R2; ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = %lobits16( TO_W_(StgTSO_flags(threadid)) | TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; // context switch soon, but not immediately: we don't want every // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } stg_yieldzh { jump stg_yield_noregs; } stg_myThreadIdzh { /* no args. */ RET_P(CurrentTSO); } stg_labelThreadzh { /* args: R1 = ThreadId# R2 = Addr# */ #ifdef DEBUG foreign "C" labelThread(R1 "ptr", R2 "ptr") []; #endif jump %ENTRY_CODE(Sp(0)); } stg_isCurrentThreadBoundzh { /* no args */ W_ r; (r) = foreign "C" isThreadBound(CurrentTSO) []; RET_N(r); } stg_threadStatuszh { /* args: R1 :: ThreadId# */ W_ tso; W_ why_blocked; W_ what_next; W_ ret, cap, locked; tso = R1; what_next = TO_W_(StgTSO_what_next(tso)); why_blocked = TO_W_(StgTSO_why_blocked(tso)); // Note: these two reads are not atomic, so they might end up // being inconsistent. It doesn't matter, since we // only return one or the other. If we wanted to return the // contents of block_info too, then we'd have to do some synchronisation. if (what_next == ThreadComplete) { ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus } else { if (what_next == ThreadKilled) { ret = 17; } else { ret = why_blocked; } } cap = TO_W_(Capability_no(StgTSO_cap(tso))); if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) { locked = 1; } else { locked = 0; } RET_NNN(ret,cap,locked); } /* ----------------------------------------------------------------------------- * TVar primitives * -------------------------------------------------------------------------- */ #define SP_OFF 0 // Catch retry frame ------------------------------------------------------------ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Did not commit: re-execute */ W_ new_trec; ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = new_trec; if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { R1 = StgCatchRetryFrame_alt_code(frame); } else { R1 = StgCatchRetryFrame_first_code(frame); } jump stg_ap_v_fast; } } // Atomically frame ------------------------------------------------------------ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid, next_invariant, q, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); result = R1; outer = StgTRecHeader_enclosing_trec(trec); if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; StgAtomicallyFrame_next_invariant_to_check(frame) = q; StgAtomicallyFrame_result(frame) = result; } else { /* Second/subsequent time back at the atomically frame -- abort the * tx that's checking the invariant and move on to the next one */ StgTSO_trec(CurrentTSO) = outer; q = StgAtomicallyFrame_next_invariant_to_check(frame); StgInvariantCheckQueue_my_execution(q) = trec; foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; /* Don't free trec -- it's linked from q and will be stashed in the * invariant if we eventually commit. */ q = StgInvariantCheckQueue_next_queue_entry(q); StgAtomicallyFrame_next_invariant_to_check(frame) = q; trec = outer; } q = StgAtomicallyFrame_next_invariant_to_check(frame); if (q != END_INVARIANT_CHECK_QUEUE) { /* We can't commit yet: another invariant to check */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") []; StgTSO_trec(CurrentTSO) = trec; next_invariant = StgInvariantCheckQueue_invariant(q); R1 = StgAtomicInvariant_code(next_invariant); jump stg_ap_v_fast; } else { /* We've got no more invariants to check, try to commit */ (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") []; if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; R1 = StgAtomicallyFrame_result(frame); Sp = Sp + SIZEOF_StgAtomicallyFrame; jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Transaction was not valid: try again */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; R1 = StgAtomicallyFrame_code(frame); jump stg_ap_v_fast; } } } INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid; frame = Sp; /* The TSO is currently waiting: should we stop waiting? */ (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; if (valid != 0) { /* Previous attempt is still valid: no point trying again yet */ jump stg_block_noregs; } else { /* Previous attempt is no longer valid: try again */ ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgHeader_info(frame) = stg_atomically_frame_info; R1 = StgAtomicallyFrame_code(frame); jump stg_ap_v_fast; } } // STM catch frame -------------------------------------------------------------- #define SP_OFF 0 /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct * kind of return to the activation record underneath us on the stack. */ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif P_ unused3, P_ unused4) { W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Commit succeeded */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchSTMFrame; jump Sp(SP_OFF); } else { /* Commit failed */ W_ new_trec; ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = new_trec; R1 = StgCatchSTMFrame_code(frame); jump stg_ap_v_fast; } } // Primop definition ------------------------------------------------------------ stg_atomicallyzh { W_ frame; W_ old_trec; W_ new_trec; // stmStartTransaction may allocate MAYBE_GC (R1_PTR, stg_atomicallyzh); /* Args: R1 = m :: STM a */ STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { R1 = base_ControlziExceptionziBase_nestedAtomically_closure; jump stg_raisezh; } /* Set up the atomically frame */ Sp = Sp - SIZEOF_StgAtomicallyFrame; frame = Sp; SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; StgAtomicallyFrame_result(frame) = NO_TREC; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1]; StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } stg_catchSTMzh { W_ frame; /* Args: R1 :: STM a */ /* Args: R2 :: Exception -> STM a */ STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh); /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchSTMFrame; frame = Sp; SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]); StgCatchSTMFrame_handler(frame) = R2; StgCatchSTMFrame_code(frame) = R1; /* Start a nested transaction to run the body of the try block in */ W_ cur_trec; W_ new_trec; cur_trec = StgTSO_trec(CurrentTSO); ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr"); StgTSO_trec(CurrentTSO) = new_trec; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } stg_catchRetryzh { W_ frame; W_ new_trec; W_ trec; // stmStartTransaction may allocate MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); /* Args: R1 :: STM a */ /* Args: R2 :: STM a */ STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh); /* Start a nested transaction within which to run the first code */ trec = StgTSO_trec(CurrentTSO); ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2]; StgTSO_trec(CurrentTSO) = new_trec; /* Set up the catch-retry frame */ Sp = Sp - SIZEOF_StgCatchRetryFrame; frame = Sp; SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]); StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; StgCatchRetryFrame_first_code(frame) = R1; StgCatchRetryFrame_alt_code(frame) = R2; /* Apply R1 to the realworld token */ jump stg_ap_v_fast; } stg_retryzh { W_ frame_type; W_ frame; W_ trec; W_ outer; W_ r; MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: SAVE_THREAD_STATE(); (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") []; LOAD_THREAD_STATE(); frame = Sp; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame ASSERT(outer != NO_TREC); // Abort the transaction attempting the current branch foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { // Retry in the first branch: try the alternative ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true; R1 = StgCatchRetryFrame_alt_code(frame); jump stg_ap_v_fast; } else { // Retry in the alternative code: propagate the retry StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; goto retry_pop_stack; } } // We've reached the ATOMICALLY_FRAME: attempt to wait ASSERT(frame_type == ATOMICALLY_FRAME); if (outer != NO_TREC) { // We called retry while checking invariants, so abort the current // invariant check (merging its TVar accesses into the parents read // set so we'll wait on them) foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; trec = outer; StgTSO_trec(CurrentTSO) = trec; outer = StgTRecHeader_enclosing_trec(trec); } ASSERT(outer == NO_TREC); (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") []; if (r != 0) { // Transaction was valid: stmWait put us on the TVars' queues, we now block StgHeader_info(frame) = stg_atomically_waiting_frame_info; Sp = frame; // Fix up the stack in the unregisterised case: the return convention is different. R3 = trec; // passing to stmWaitUnblock() jump stg_block_stmwait; } else { // Transaction was not valid: retry immediately ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; StgTSO_trec(CurrentTSO) = trec; R1 = StgAtomicallyFrame_code(frame); Sp = frame; jump stg_ap_v_fast; } } stg_checkzh { W_ trec, closure; /* Args: R1 = invariant closure */ MAYBE_GC (R1_PTR, stg_checkzh); trec = StgTSO_trec(CurrentTSO); closure = R1; foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", trec "ptr", closure "ptr") []; jump %ENTRY_CODE(Sp(0)); } stg_newTVarzh { W_ tv; W_ new_value; /* Args: R1 = initialisation value */ MAYBE_GC (R1_PTR, stg_newTVarzh); new_value = R1; ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; RET_P(tv); } stg_readTVarzh { W_ trec; W_ tvar; W_ result; /* Args: R1 = TVar closure */ MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; RET_P(result); } stg_readTVarIOzh { W_ result; again: result = StgTVar_current_value(R1); if (%INFO_PTR(result) == stg_TREC_HEADER_info) { goto again; } RET_P(result); } stg_writeTVarzh { W_ trec; W_ tvar; W_ new_value; /* Args: R1 = TVar closure */ /* R2 = New value */ MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; new_value = R2; foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") []; jump %ENTRY_CODE(Sp(0)); } /* ----------------------------------------------------------------------------- * MVar primitives * * take & putMVar work as follows. Firstly, an important invariant: * * If the MVar is full, then the blocking queue contains only * threads blocked on putMVar, and if the MVar is empty then the * blocking queue contains only threads blocked on takeMVar. * * takeMvar: * MVar empty : then add ourselves to the blocking queue * MVar full : remove the value from the MVar, and * blocking queue empty : return * blocking queue non-empty : perform the first blocked putMVar * from the queue, and wake up the * thread (MVar is now full again) * * putMVar is just the dual of the above algorithm. * * How do we "perform a putMVar"? Well, we have to fiddle around with * the stack of the thread waiting to do the putMVar. See * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for * the stack layout, and the PerformPut and PerformTake macros below. * * It is important that a blocked take or put is woken up with the * take/put already performed, because otherwise there would be a * small window of vulnerability where the thread could receive an * exception and never perform its take or put, and we'd end up with a * deadlock. * * -------------------------------------------------------------------------- */ stg_isEmptyMVarzh { /* args: R1 = MVar closure */ if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) { RET_N(1); } else { RET_N(0); } } stg_newMVarzh { /* args: none */ W_ mvar; ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; RET_P(mvar); } #define PerformTake(stack, value) \ W_ sp; \ sp = StgStack_sp(stack); \ W_[sp + WDS(1)] = value; \ W_[sp + WDS(0)] = stg_gc_unpt_r1_info; #define PerformPut(stack,lval) \ W_ sp; \ sp = StgStack_sp(stack) + WDS(3); \ StgStack_sp(stack) = sp; \ lval = W_[sp - WDS(1)]; stg_takeMVarzh { W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; } /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { // Note [mvar-heap-check] We want to do the heap check in the // branch here, to avoid the conditional in the common case. // However, we've already locked the MVar above, so we better // be careful to unlock it again if the the heap check fails. // Unfortunately we don't have an easy way to inject any code // into the heap check generated by the code generator, so we // have to do it in stg_gc_gen (see HeapStackCheck.cmm). HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; foreign "C" recordClosureMutated(MyCapability() "ptr", StgMVar_tail(mvar)) []; } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = q; R1 = mvar; jump stg_block_takemvar; } /* we got the value... */ val = StgMVar_value(mvar); q = StgMVar_head(mvar); loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_P(val); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } // There are putMVar(s) waiting... wake up the first thread on the queue tso = StgMVarTSOQueue_tso(q); StgMVar_head(mvar) = StgMVarTSOQueue_link(q); if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the putMVar for the thread that we just woke up W_ stack; stack = StgTSO_stackobj(tso); PerformPut(stack, StgMVar_value(mvar)); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_P(val); } stg_tryTakeMVarzh { W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ mvar = R1; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); #endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure */ RET_NP(0, stg_NO_FINALIZER_closure); } if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; } /* we got the value... */ val = StgMVar_value(mvar); q = StgMVar_head(mvar); loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_NP(1, val); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } // There are putMVar(s) waiting... wake up the first thread on the queue tso = StgMVarTSOQueue_tso(q); StgMVar_head(mvar) = StgMVarTSOQueue_link(q); if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the putMVar for the thread that we just woke up W_ stack; stack = StgTSO_stackobj(tso); PerformPut(stack, StgMVar_value(mvar)); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_NP(1,val); } stg_putMVarzh { W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; val = R2; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { // see Note [mvar-heap-check] above HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; foreign "C" recordClosureMutated(MyCapability() "ptr", StgMVar_tail(mvar)) []; } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = q; R1 = mvar; R2 = val; jump stg_block_putmvar; } q = StgMVar_head(mvar); loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } // There are takeMVar(s) waiting: wake up the first one tso = StgMVarTSOQueue_tso(q); StgMVar_head(mvar) = StgMVarTSOQueue_link(q); if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the takeMVar W_ stack; stack = StgTSO_stackobj(tso); PerformTake(stack, val); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; if (TO_W_(StgStack_dirty(stack)) == 0) { foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); } stg_tryPutMVarzh { W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; val = R2; #if defined(THREADED_RTS) ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); #endif RET_N(0); } q = StgMVar_head(mvar); loop: if (q == stg_END_TSO_QUEUE_closure) { /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } // There are takeMVar(s) waiting: wake up the first one tso = StgMVarTSOQueue_tso(q); StgMVar_head(mvar) = StgMVarTSOQueue_link(q); if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the takeMVar W_ stack; stack = StgTSO_stackobj(tso); PerformTake(stack, val); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; if (TO_W_(StgStack_dirty(stack)) == 0) { foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); } /* ----------------------------------------------------------------------------- Stable pointer primitives ------------------------------------------------------------------------- */ stg_makeStableNamezh { W_ index, sn_obj; ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); (index) = foreign "C" lookupStableName(R1 "ptr") []; /* Is there already a StableName for this heap object? * stable_ptr_table is a pointer to an array of snEntry structs. */ if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) { sn_obj = Hp - SIZEOF_StgStableName + WDS(1); SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]); StgStableName_sn(sn_obj) = index; snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj; } else { sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry); } RET_P(sn_obj); } stg_makeStablePtrzh { /* Args: R1 = a */ W_ sp; MAYBE_GC(R1_PTR, stg_makeStablePtrzh); ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; RET_N(sp); } stg_deRefStablePtrzh { /* Args: R1 = the stable ptr */ W_ r, sp; sp = R1; r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry); RET_P(r); } /* ----------------------------------------------------------------------------- Bytecode object primitives ------------------------------------------------------------------------- */ stg_newBCOzh { /* R1 = instrs R2 = literals R3 = ptrs R4 = arity R5 = bitmap array */ W_ bco, bitmap_arr, bytes, words; bitmap_arr = R5; words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr); bytes = WDS(words); ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); StgBCO_instrs(bco) = R1; StgBCO_literals(bco) = R2; StgBCO_ptrs(bco) = R3; StgBCO_arity(bco) = HALF_W_(R4); StgBCO_size(bco) = HALF_W_(words); // Copy the arity/bitmap info into the BCO W_ i; i = 0; for: if (i < BYTE_ARR_WDS(bitmap_arr)) { StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i); i = i + 1; goto for; } RET_P(bco); } stg_mkApUpd0zh { // R1 = the BCO# for the AP // W_ ap; // This function is *only* used to wrap zero-arity BCOs in an // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always // saturated and always points directly to a FUN or BCO. ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && StgBCO_arity(R1) == HALF_W_(0)); HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); ap = Hp - SIZEOF_StgAP + WDS(1); SET_HDR(ap, stg_AP_info, W_[CCCS]); StgAP_n_args(ap) = HALF_W_(0); StgAP_fun(ap) = R1; RET_P(ap); } stg_unpackClosurezh { /* args: R1 = closure to analyze */ // TODO: Consider the absence of ptrs or nonptrs as a special case ? W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; info = %GET_STD_INFO(UNTAG(R1)); // Some closures have non-standard layout, so we omit those here. W_ type; type = TO_W_(%INFO_TYPE(info)); switch [0 .. N_CLOSURE_TYPES] type { case THUNK_SELECTOR : { ptrs = 1; nptrs = 0; goto out; } case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : { ptrs = 0; nptrs = 0; goto out; } default: { ptrs = TO_W_(%INFO_PTRS(info)); nptrs = TO_W_(%INFO_NPTRS(info)); goto out; }} out: W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); ptrs_arr_cards = mutArrPtrsCardWords(ptrs); ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); W_ clos; clos = UNTAG(R1); ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; p = 0; for: if(p < ptrs) { W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p); p = p + 1; goto for; } /* We can leave the card table uninitialised, since the array is allocated in the nursery. The GC will fill it in if/when the array is promoted. */ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_bytes(nptrs_arr) = WDS(nptrs); p = 0; for2: if(p < nptrs) { W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs); p = p + 1; goto for2; } RET_NPP(info, ptrs_arr, nptrs_arr); } /* ----------------------------------------------------------------------------- Thread I/O blocking primitives -------------------------------------------------------------------------- */ /* Add a thread to the end of the blocked queue. (C-- version of the C * macro in Schedule.h). */ #define APPEND_TO_BLOCKED_QUEUE(tso) \ ASSERT(StgTSO__link(tso) == END_TSO_QUEUE); \ if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \ W_[blocked_queue_hd] = tso; \ } else { \ foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \ } \ W_[blocked_queue_tl] = tso; stg_waitReadzh { /* args: R1 */ #ifdef THREADED_RTS foreign "C" barf("waitRead# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; StgTSO_block_info(CurrentTSO) = R1; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_noregs; #endif } stg_waitWritezh { /* args: R1 */ #ifdef THREADED_RTS foreign "C" barf("waitWrite# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; StgTSO_block_info(CurrentTSO) = R1; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_noregs; #endif } STRING(stg_delayzh_malloc_str, "stg_delayzh") stg_delayzh { #ifdef mingw32_HOST_OS W_ ares; CInt reqID; #else W_ t, prev, target; #endif #ifdef THREADED_RTS foreign "C" barf("delay# on threaded RTS") never returns; #else /* args: R1 (microsecond delay amount) */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16; #ifdef mingw32_HOST_OS /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_delayzh_malloc_str); (reqID) = foreign "C" addDelayRequest(R1); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; /* Having all async-blocked threads reside on the blocked_queue * simplifies matters, so change the status to OnDoProc put the * delayed thread on the blocked_queue. */ StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async_void; #else W_ time; W_ divisor; (time) = foreign "C" getourtimeofday() [R1]; divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); if (divisor == 0) { divisor = 50; } divisor = divisor * 1000; target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ + time + 1; /* Add 1 as getourtimeofday rounds down */ StgTSO_block_info(CurrentTSO) = target; /* Insert the new thread in the sleeping queue. */ prev = NULL; t = W_[sleeping_queue]; while: if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) { prev = t; t = StgTSO__link(t); goto while; } StgTSO__link(CurrentTSO) = t; if (prev == NULL) { W_[sleeping_queue] = CurrentTSO; } else { foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) []; } jump stg_block_noregs; #endif #endif /* !THREADED_RTS */ } #ifdef mingw32_HOST_OS STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh") stg_asyncReadzh { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncRead# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncReadzh_malloc_str) [R1,R2,R3,R4]; (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh") stg_asyncWritezh { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncWrite# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncWritezh_malloc_str) [R1,R2,R3,R4]; (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh") stg_asyncDoProczh { W_ ares; CInt reqID; #ifdef THREADED_RTS foreign "C" barf("asyncDoProc# on threaded RTS") never returns; #else /* args: R1 = proc, R2 = param */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; /* could probably allocate this on the heap instead */ ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_asyncDoProczh_malloc_str) [R1,R2]; (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") []; StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); jump stg_block_async; #endif } #endif /* ----------------------------------------------------------------------------- * noDuplicate# * * noDuplicate# tries to ensure that none of the thunks under * evaluation by the current thread are also under evaluation by * another thread. It relies on *both* threads doing noDuplicate#; * the second one will get blocked if they are duplicating some work. * * The idea is that noDuplicate# is used within unsafePerformIO to * ensure that the IO operation is performed at most once. * noDuplicate# calls threadPaused which acquires an exclusive lock on * all the thunks currently under evaluation by the current thread. * * Consider the following scenario. There is a thunk A, whose * evaluation requires evaluating thunk B, where thunk B is an * unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2 * is pre-empted before it enters B, and claims A by blackholing it * (in threadPaused). Thread 1 now enters B, and calls noDuplicate#. * * thread 1 thread 2 * +-----------+ +---------------+ * | -------+-----> A <-------+------- | * | update | BLACKHOLE | marked_update | * +-----------+ +---------------+ * | | | | * ... ... * | | +---------------+ * +-----------+ * | ------+-----> B * | update | BLACKHOLE * +-----------+ * * At this point: A is a blackhole, owned by thread 2. noDuplicate# * calls threadPaused, which walks up the stack and * - claims B on behalf of thread 1 * - then it reaches the update frame for A, which it sees is already * a BLACKHOLE and is therefore owned by another thread. Since * thread 1 is duplicating work, the computation up to the update * frame for A is suspended, including thunk B. * - thunk B, which is an unsafePerformIO, has now been reverted to * an AP_STACK which could be duplicated - BAD! * - The solution is as follows: before calling threadPaused, we * leave a frame on the stack (stg_noDuplicate_info) that will call * noDuplicate# again if the current computation is suspended and * restarted. * * See the test program in concurrent/prog003 for a way to demonstrate * this. It needs to be run with +RTS -N3 or greater, and the bug * only manifests occasionally (once very 10 runs or so). * -------------------------------------------------------------------------- */ INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) { Sp_adj(1); jump stg_noDuplicatezh; } stg_noDuplicatezh { STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); // leave noDuplicate frame in case the current // computation is suspended and restarted (see above). Sp_adj(-1); Sp(0) = stg_noDuplicate_info; SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { jump stg_threadFinished; } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); // remove the stg_noDuplicate frame if it is still there. if (Sp(0) == stg_noDuplicate_info) { Sp_adj(1); } jump %ENTRY_CODE(Sp(0)); } } /* ----------------------------------------------------------------------------- Misc. primitives -------------------------------------------------------------------------- */ stg_getApStackValzh { W_ ap_stack, offset, val, ok; /* args: R1 = AP_STACK, R2 = offset */ ap_stack = R1; offset = R2; if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) { ok = 1; val = StgAP_STACK_payload(ap_stack,offset); } else { ok = 0; val = R1; } RET_NP(ok,val); } // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? stg_traceCcszh { W_ ccs; #ifdef PROFILING ccs = StgHeader_ccs(UNTAG(R1)); foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; #endif R1 = R2; ENTER(); } stg_getSparkzh { W_ spark; #ifndef THREADED_RTS RET_NP(0,ghczmprim_GHCziTypes_False_closure); #else (spark) = foreign "C" findSpark(MyCapability()); if (spark != 0) { RET_NP(1,spark); } else { RET_NP(0,ghczmprim_GHCziTypes_False_closure); } #endif } stg_numSparkszh { W_ n; #ifdef THREADED_RTS (n) = foreign "C" dequeElements(Capability_sparks(MyCapability())); #else n = 0; #endif RET_N(n); } stg_traceEventzh { W_ msg; msg = R1; #if defined(TRACING) || defined(DEBUG) foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; #elif defined(DTRACE) W_ enabled; // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from // RtsProbes.h, but that header file includes unistd.h, which doesn't // work in Cmm #if !defined(solaris2_TARGET_OS) (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; #else // Solaris' DTrace can't handle the // __dtrace_isenabled$HaskellEvent$user__msg$v1 // call above. This call is just for testing whether the user__msg // probe is enabled, and is here for just performance optimization. // Since preparation for the probe is not that complex I disable usage of // this test above for Solaris and enable the probe usage manually // here. Please note that this does not mean that the probe will be // used during the runtime! You still need to enable it by consumption // in your dtrace script as you do with any other probe. enabled = 1; #endif if (enabled != 0) { foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; } #endif jump %ENTRY_CODE(Sp(0)); }