X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=121102c8e17f1a97dab5e546d89820bd25832166;hb=de75026f5a48d3d052135a973ab4dff76c5b20f5;hp=cb8626e5dde6c2db2bb6ded34687dd59290ec628;hpb=6015a94f9108a502150565577b66c23650796639;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index cb8626e..121102c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -27,6 +27,33 @@ #include "Cmm.h" +#ifdef __PIC__ +#ifndef mingw32_HOST_OS +import __gmpz_init; +import __gmpz_add; +import __gmpz_sub; +import __gmpz_mul; +import __gmpz_gcd; +import __gmpn_gcd_1; +import __gmpn_cmp; +import __gmpz_tdiv_q; +import __gmpz_tdiv_r; +import __gmpz_tdiv_qr; +import __gmpz_fdiv_qr; +import __gmpz_divexact; +import __gmpz_and; +import __gmpz_xor; +import __gmpz_ior; +import __gmpz_com; +#endif +import pthread_mutex_lock; +import pthread_mutex_unlock; +#endif +import base_ControlziExceptionziBase_nestedAtomically_closure; +import EnterCriticalSection; +import LeaveCriticalSection; +import ghczmprim_GHCziBool_False_closure; + /*----------------------------------------------------------------------------- Array Primitives @@ -56,32 +83,65 @@ newByteArrayzh_fast RET_P(p); } +#define BA_ALIGN 16 +#define BA_MASK (BA_ALIGN-1) + newPinnedByteArrayzh_fast { - W_ words, payload_words, n, p; + W_ words, bytes, payload_words, p; MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - n = R1; - payload_words = ROUNDUP_BYTES_TO_WDS(n); + bytes = R1; + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to BA_ALIGN bytes, so we need to allow space + to shift up to BA_ALIGN - 1 bytes: */ + bytes = bytes + BA_ALIGN - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); - // We want an 8-byte aligned array. allocatePinned() gives us - // 8-byte aligned memory by default, but we want to align the - // *goods* inside the ArrWords object, so we have to check the - // size of the ArrWords header and adjust our size accordingly. - words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - if ((SIZEOF_StgArrWords & 7) != 0) { - words = words + 1; - } + ("ptr" p) = foreign "C" allocatePinned(words) []; + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + + /* Now we need to move p forward so that the payload is aligned + to BA_ALIGN bytes: */ + p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); + + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(p) = payload_words; + RET_P(p); +} + +newAlignedPinnedByteArrayzh_fast +{ + W_ words, bytes, payload_words, p, alignment; + + MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); + bytes = R1; + alignment = R2; + + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to bytes, so we need to allow space + to shift up to bytes: */ + bytes = bytes + alignment - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - // Again, if the ArrWords header isn't a multiple of 8 bytes, we - // have to push the object forward one word so that the goods - // fall on an 8-byte boundary. - if ((SIZEOF_StgArrWords & 7) != 0) { - p = p + WDS(1); - } + /* Now we need to move p forward so that the payload is aligned + to bytes. Note that we are assuming that + is a power of 2, which is technically not guaranteed */ + p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -167,7 +227,7 @@ newMutVarzh_fast atomicModifyMutVarzh_fast { - W_ mv, z, x, y, r; + W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then @@ -206,19 +266,15 @@ atomicModifyMutVarzh_fast HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); -#if defined(THREADED_RTS) - foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2]; -#endif - - x = StgMutVar_var(R1); + mv = R1; + f = R2; TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); - StgThunk_payload(z,0) = R2; - StgThunk_payload(z,1) = x; + StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); @@ -227,9 +283,6 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; - StgMutVar_var(R1) = y; - foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1]; - TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; @@ -237,10 +290,20 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; -#if defined(THREADED_RTS) - foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") []; + retry: + x = StgMutVar_var(mv); + StgThunk_payload(z,1) = x; +#ifdef THREADED_RTS + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; + if (h != x) { goto retry; } +#else + StgMutVar_var(mv) = y; #endif + if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { + foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + } + RET_P(r); } @@ -267,9 +330,14 @@ mkWeakzh_fast w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); - StgWeak_key(w) = R1; - StgWeak_value(w) = R2; - StgWeak_finalizer(w) = R3; + // We don't care about cfinalizer here. + // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or + // something else? + + StgWeak_key(w) = R1; + StgWeak_value(w) = R2; + StgWeak_finalizer(w) = R3; + StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; @@ -279,12 +347,65 @@ mkWeakzh_fast RET_P(w); } +mkWeakForeignEnvzh_fast +{ + /* R1 = key + R2 = value + R3 = finalizer + R4 = pointer + R5 = has environment (0 or 1) + R6 = environment + */ + W_ w, payload_words, words, p; + + W_ key, val, fptr, ptr, flag, eptr; + + key = R1; + val = R2; + fptr = R3; + ptr = R4; + flag = R5; + eptr = R6; + + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast ); + + w = Hp - SIZEOF_StgWeak + WDS(1); + SET_HDR(w, stg_WEAK_info, W_[CCCS]); + + payload_words = 4; + words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; + ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + + StgArrWords_words(p) = payload_words; + StgArrWords_payload(p,0) = fptr; + StgArrWords_payload(p,1) = ptr; + StgArrWords_payload(p,2) = eptr; + StgArrWords_payload(p,3) = flag; + + // We don't care about the value here. + // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else? + + StgWeak_key(w) = key; + StgWeak_value(w) = val; + StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; + StgWeak_cfinalizer(w) = p; + + StgWeak_link(w) = W_[weak_ptr_list]; + W_[weak_ptr_list] = w; + + IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + + RET_P(w); +} finalizzeWeakzh_fast { /* R1 = weak ptr */ - W_ w, f; + W_ w, f, arr; w = R1; @@ -312,9 +433,18 @@ finalizzeWeakzh_fast SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); - f = StgWeak_finalizer(w); + f = StgWeak_finalizer(w); + arr = StgWeak_cfinalizer(w); + StgDeadWeak_link(w) = StgWeak_link(w); + if (arr != stg_NO_FINALIZER_closure) { + foreign "C" runCFinalizer(StgArrWords_payload(arr,0), + StgArrWords_payload(arr,1), + StgArrWords_payload(arr,2), + StgArrWords_payload(arr,3)) []; + } + /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); @@ -426,11 +556,11 @@ int64ToIntegerzh_fast hi = TO_W_(val >> 32); lo = TO_W_(val); - if ( hi != 0 && hi != 0xFFFFFFFF ) { - words_needed = 2; - } else { + if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) ) { // minimum is one word words_needed = 1; + } else { + words_needed = 2; } ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed), @@ -850,6 +980,23 @@ decodeFloatzh_fast RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); } +decodeFloatzuIntzh_fast +{ + W_ p; + F_ arg; + FETCH_MP_TEMP(mp_tmp1); + FETCH_MP_TEMP(mp_tmp_w); + + /* arguments: F1 = Float# */ + arg = F1; + + /* Perform the operation */ + foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) []; + + /* returns: (Int# (mantissa), Int# (exponent)) */ + RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); +} + #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE) @@ -879,6 +1026,28 @@ decodeDoublezh_fast RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p); } +decodeDoublezu2Intzh_fast +{ + D_ arg; + W_ p; + FETCH_MP_TEMP(mp_tmp1); + FETCH_MP_TEMP(mp_tmp2); + FETCH_MP_TEMP(mp_result1); + FETCH_MP_TEMP(mp_result2); + + /* arguments: D1 = Double# */ + arg = D1; + + /* Perform the operation */ + foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", + mp_result1 "ptr", mp_result2 "ptr", + arg) []; + + /* returns: + (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */ + RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]); +} + /* ----------------------------------------------------------------------------- * Concurrency primitives * -------------------------------------------------------------------------- */ @@ -896,10 +1065,17 @@ forkzh_fast ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; + + /* start blocked if the current thread is blocked */ + StgTSO_flags(threadid) = + StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & + (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -919,10 +1095,17 @@ forkOnzh_fast ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), closure "ptr") []; + + /* start blocked if the current thread is blocked */ + StgTSO_flags(threadid) = + StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & + (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -957,18 +1140,45 @@ isCurrentThreadBoundzh_fast RET_N(r); } +threadStatuszh_fast +{ + /* args: R1 :: ThreadId# */ + W_ tso; + W_ why_blocked; + W_ what_next; + W_ ret; + + tso = R1; + loop: + if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { + tso = StgTSO__link(tso); + goto loop; + } + + what_next = TO_W_(StgTSO_what_next(tso)); + why_blocked = TO_W_(StgTSO_why_blocked(tso)); + // Note: these two reads are not atomic, so they might end up + // being inconsistent. It doesn't matter, since we + // only return one or the other. If we wanted to return the + // contents of block_info too, then we'd have to do some synchronisation. + + if (what_next == ThreadComplete) { + ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus + } else { + if (what_next == ThreadKilled) { + ret = 17; + } else { + ret = why_blocked; + } + } + RET_N(ret); +} /* ----------------------------------------------------------------------------- * TVar primitives * -------------------------------------------------------------------------- */ -#ifdef REG_R1 #define SP_OFF 0 -#define IF_NOT_REG_R1(x) -#else -#define SP_OFF 1 -#define IF_NOT_REG_R1(x) x -#endif // Catch retry frame ------------------------------------------------------------ @@ -976,10 +1186,9 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5) + W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; trec = StgTSO_trec(CurrentTSO); @@ -989,7 +1198,6 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, /* Succeeded (either first branch or second branch) */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchRetryFrame; - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Did not commit: re-execute */ @@ -1012,10 +1220,9 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid, next_invariant, q, outer; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; trec = StgTSO_trec(CurrentTSO); @@ -1059,7 +1266,6 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; Sp = Sp + SIZEOF_StgAtomicallyFrame; - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump %ENTRY_CODE(Sp(SP_OFF)); } else { /* Transaction was not valid: try again */ @@ -1076,10 +1282,9 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid; - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) frame = Sp; @@ -1087,9 +1292,6 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; if (valid != 0) { /* Previous attempt is still valid: no point trying again yet */ - IF_NOT_REG_R1(Sp_adj(-2); - Sp(1) = stg_NO_FINALIZER_closure; - Sp(0) = stg_ut_1_0_unreg_info;) jump stg_block_noregs; } else { /* Previous attempt is no longer valid: try again */ @@ -1103,11 +1305,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, // STM catch frame -------------------------------------------------------------- -#ifdef REG_R1 #define SP_OFF 0 -#else -#define SP_OFF 1 -#endif /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct @@ -1118,9 +1316,8 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { - IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); @@ -1130,7 +1327,6 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, /* Commit succeeded */ StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchSTMFrame; - IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) jump Sp(SP_OFF); } else { /* Commit failed */ @@ -1161,7 +1357,7 @@ atomicallyzh_fast /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { - R1 = base_GHCziIOBase_NestedAtomically_closure; + R1 = base_ControlziExceptionziBase_nestedAtomically_closure; jump raisezh_fast; } @@ -1302,9 +1498,6 @@ retry_pop_stack: StgHeader_info(frame) = stg_atomically_waiting_frame_info; Sp = frame; // Fix up the stack in the unregisterised case: the return convention is different. - IF_NOT_REG_R1(Sp_adj(-2); - Sp(1) = stg_NO_FINALIZER_closure; - Sp(0) = stg_ut_1_0_unreg_info;) R3 = trec; // passing to stmWaitUnblock() jump stg_block_stmwait; } else { @@ -1365,6 +1558,17 @@ readTVarzh_fast RET_P(result); } +readTVarIOzh_fast +{ + W_ result; + +again: + result = StgTVar_current_value(R1); + if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + goto again; + } + RET_P(result); +} writeTVarzh_fast { @@ -1421,7 +1625,7 @@ isEmptyMVarzh_fast { /* args: R1 = MVar closure */ - if (GET_INFO(R1) == stg_EMPTY_MVAR_info) { + if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) { RET_N(1); } else { RET_N(0); @@ -1436,7 +1640,8 @@ newMVarzh_fast ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast ); mvar = Hp - SIZEOF_StgMVar + WDS(1); - SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]); + SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); + // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; @@ -1444,16 +1649,9 @@ newMVarzh_fast } -/* If R1 isn't available, pass it on the stack */ -#ifdef REG_R1 #define PerformTake(tso, value) \ W_[StgTSO_sp(tso) + WDS(1)] = value; \ W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; -#else -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ - W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info; -#endif #define PerformPut(tso,lval) \ StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ @@ -1471,21 +1669,31 @@ takeMVarzh_fast #else info = GET_INFO(mvar); #endif + + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + } /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. */ - if (info == stg_EMPTY_MVAR_info) { + if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { - StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", + StgMVar_tail(mvar) "ptr", + CurrentTSO) []; } - StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; + StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; + R1 = mvar; jump stg_block_takemvar; } @@ -1502,24 +1710,23 @@ takeMVarzh_fast /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - dirtyTSO(tso); -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) []; - StgMVar_head(mvar) = tso; -#else - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", - StgMVar_head(mvar) "ptr") []; + if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } + + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) - unlockClosure(mvar, stg_FULL_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); +#else + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif RET_P(val); } @@ -1529,9 +1736,9 @@ takeMVarzh_fast StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; #if defined(THREADED_RTS) - unlockClosure(mvar, stg_EMPTY_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); #else - SET_INFO(mvar,stg_EMPTY_MVAR_info); + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif RET_P(val); @@ -1553,9 +1760,9 @@ tryTakeMVarzh_fast info = GET_INFO(mvar); #endif - if (info == stg_EMPTY_MVAR_info) { + if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) - unlockClosure(mvar, stg_EMPTY_MVAR_info); + unlockClosure(mvar, info); #endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure @@ -1563,6 +1770,10 @@ tryTakeMVarzh_fast RET_NP(0, stg_NO_FINALIZER_closure); } + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + /* we got the value... */ val = StgMVar_value(mvar); @@ -1576,23 +1787,21 @@ tryTakeMVarzh_fast /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - dirtyTSO(tso); + if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#else - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", - StgMVar_head(mvar) "ptr") []; - StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) - unlockClosure(mvar, stg_FULL_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); +#else + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } else @@ -1600,9 +1809,9 @@ tryTakeMVarzh_fast /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; #if defined(THREADED_RTS) - unlockClosure(mvar, stg_EMPTY_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); #else - SET_INFO(mvar,stg_EMPTY_MVAR_info); + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } @@ -1612,28 +1821,40 @@ tryTakeMVarzh_fast putMVarzh_fast { - W_ mvar, info, tso; + W_ mvar, val, info, tso; /* args: R1 = MVar, R2 = value */ mvar = R1; + val = R2; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif - if (info == stg_FULL_MVAR_info) { + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = CurrentTSO; } else { - StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", + StgMVar_tail(mvar) "ptr", + CurrentTSO) []; } - StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; + StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; + R1 = mvar; + R2 = val; jump stg_block_putmvar; } @@ -1645,36 +1866,35 @@ putMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); - PerformTake(tso, R2); - dirtyTSO(tso); + PerformTake(tso, val); + if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#else - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; - StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) - unlockClosure(mvar, stg_EMPTY_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); +#else + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif jump %ENTRY_CODE(Sp(0)); } else { /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = R2; + StgMVar_value(mvar) = val; #if defined(THREADED_RTS) - unlockClosure(mvar, stg_FULL_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); #else - SET_INFO(mvar,stg_FULL_MVAR_info); + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif jump %ENTRY_CODE(Sp(0)); } @@ -1696,13 +1916,17 @@ tryPutMVarzh_fast info = GET_INFO(mvar); #endif - if (info == stg_FULL_MVAR_info) { + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) - unlockClosure(mvar, stg_FULL_MVAR_info); + unlockClosure(mvar, info); #endif RET_N(0); } + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { /* There are takeMVar(s) waiting: wake up the first one @@ -1712,23 +1936,22 @@ tryPutMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); - dirtyTSO(tso); + if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + } -#if defined(GRAN) || defined(PAR) - /* ToDo: check 2nd arg (mvar) is right */ - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") []; - StgMVar_head(mvar) = tso; -#else - ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") []; + ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", + StgMVar_head(mvar) "ptr", 1) []; StgMVar_head(mvar) = tso; -#endif if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } #if defined(THREADED_RTS) - unlockClosure(mvar, stg_EMPTY_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); +#else + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } else @@ -1737,9 +1960,9 @@ tryPutMVarzh_fast StgMVar_value(mvar) = R2; #if defined(THREADED_RTS) - unlockClosure(mvar, stg_FULL_MVAR_info); + unlockClosure(mvar, stg_MVAR_DIRTY_info); #else - SET_INFO(mvar,stg_FULL_MVAR_info); + SET_INFO(mvar,stg_MVAR_DIRTY_info); #endif } @@ -1935,11 +2158,11 @@ for2: * macro in Schedule.h). */ #define APPEND_TO_BLOCKED_QUEUE(tso) \ - ASSERT(StgTSO_link(tso) == END_TSO_QUEUE); \ + ASSERT(StgTSO__link(tso) == END_TSO_QUEUE); \ if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \ W_[blocked_queue_hd] = tso; \ } else { \ - StgTSO_link(W_[blocked_queue_tl]) = tso; \ + foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \ } \ W_[blocked_queue_tl] = tso; @@ -1947,7 +2170,7 @@ waitReadzh_fast { /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitRead# on threaded RTS"); + foreign "C" barf("waitRead# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); @@ -1964,7 +2187,7 @@ waitWritezh_fast { /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitWrite# on threaded RTS"); + foreign "C" barf("waitWrite# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); @@ -1989,7 +2212,7 @@ delayzh_fast #endif #ifdef THREADED_RTS - foreign "C" barf("delay# on threaded RTS"); + foreign "C" barf("delay# on threaded RTS") never returns; #else /* args: R1 (microsecond delay amount) */ @@ -2020,7 +2243,11 @@ delayzh_fast W_ time; W_ divisor; (time) = foreign "C" getourtimeofday() [R1]; - divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000; + divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags)); + if (divisor == 0) { + divisor = 50; + } + divisor = divisor * 1000; target = ((R1 + divisor - 1) / divisor) /* divide rounding up */ + time + 1; /* Add 1 as getourtimeofday rounds down */ StgTSO_block_info(CurrentTSO) = target; @@ -2031,15 +2258,15 @@ delayzh_fast while: if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) { prev = t; - t = StgTSO_link(t); + t = StgTSO__link(t); goto while; } - StgTSO_link(CurrentTSO) = t; + StgTSO__link(CurrentTSO) = t; if (prev == NULL) { W_[sleeping_queue] = CurrentTSO; } else { - StgTSO_link(prev) = CurrentTSO; + foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) []; } jump stg_block_noregs; #endif @@ -2055,7 +2282,7 @@ asyncReadzh_fast CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncRead# on threaded RTS"); + foreign "C" barf("asyncRead# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ @@ -2083,7 +2310,7 @@ asyncWritezh_fast CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncWrite# on threaded RTS"); + foreign "C" barf("asyncWrite# on threaded RTS") never returns; #else /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ @@ -2111,7 +2338,7 @@ asyncDoProczh_fast CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncDoProc# on threaded RTS"); + foreign "C" barf("asyncDoProc# on threaded RTS") never returns; #else /* args: R1 = proc, R2 = param */ @@ -2169,3 +2396,39 @@ getApStackValzh_fast } RET_NP(ok,val); } + +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + +// Write the cost center stack of the first argument on stderr; return +// the second. Possibly only makes sense for already evaluated +// things? +traceCcszh_fast +{ + W_ ccs; + +#ifdef PROFILING + ccs = StgHeader_ccs(UNTAG(R1)); + foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; +#endif + + R1 = R2; + ENTER(); +} + +getSparkzh_fast +{ + W_ spark; + +#ifndef THREADED_RTS + RET_NP(0,ghczmprim_GHCziBool_False_closure); +#else + (spark) = foreign "C" findSpark(MyCapability()); + if (spark != 0) { + RET_NP(1,spark); + } else { + RET_NP(0,ghczmprim_GHCziBool_False_closure); + } +#endif +}