X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=121102c8e17f1a97dab5e546d89820bd25832166;hb=de75026f5a48d3d052135a973ab4dff76c5b20f5;hp=b8d8ccc5a9ad51e90603d4cbe77c55567ac287d5;hpb=aa9a4f1053d3c554629a2ec25955e7530c95b892;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index b8d8ccc..121102c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -49,9 +49,10 @@ import __gmpz_com; import pthread_mutex_lock; import pthread_mutex_unlock; #endif -import base_ControlziException_nestedAtomically_closure; +import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; +import ghczmprim_GHCziBool_False_closure; /*----------------------------------------------------------------------------- Array Primitives @@ -82,32 +83,65 @@ newByteArrayzh_fast RET_P(p); } +#define BA_ALIGN 16 +#define BA_MASK (BA_ALIGN-1) + newPinnedByteArrayzh_fast { - W_ words, payload_words, n, p; + W_ words, bytes, payload_words, p; MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - n = R1; - payload_words = ROUNDUP_BYTES_TO_WDS(n); + bytes = R1; + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to BA_ALIGN bytes, so we need to allow space + to shift up to BA_ALIGN - 1 bytes: */ + bytes = bytes + BA_ALIGN - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); - // We want an 8-byte aligned array. allocatePinned() gives us - // 8-byte aligned memory by default, but we want to align the - // *goods* inside the ArrWords object, so we have to check the - // size of the ArrWords header and adjust our size accordingly. - words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - if ((SIZEOF_StgArrWords & 7) != 0) { - words = words + 1; - } + ("ptr" p) = foreign "C" allocatePinned(words) []; + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + + /* Now we need to move p forward so that the payload is aligned + to BA_ALIGN bytes: */ + p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); + + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(p) = payload_words; + RET_P(p); +} + +newAlignedPinnedByteArrayzh_fast +{ + W_ words, bytes, payload_words, p, alignment; + + MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); + bytes = R1; + alignment = R2; + + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to bytes, so we need to allow space + to shift up to bytes: */ + bytes = bytes + alignment - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - // Again, if the ArrWords header isn't a multiple of 8 bytes, we - // have to push the object forward one word so that the goods - // fall on an 8-byte boundary. - if ((SIZEOF_StgArrWords & 7) != 0) { - p = p + WDS(1); - } + /* Now we need to move p forward so that the payload is aligned + to bytes. Note that we are assuming that + is a power of 2, which is technically not guaranteed */ + p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -193,7 +227,7 @@ newMutVarzh_fast atomicModifyMutVarzh_fast { - W_ mv, z, x, y, r; + W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ /* If x is the current contents of the MutVar#, then @@ -232,19 +266,15 @@ atomicModifyMutVarzh_fast HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); -#if defined(THREADED_RTS) - ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2]; -#endif - - x = StgMutVar_var(R1); + mv = R1; + f = R2; TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); LDV_RECORD_CREATE(z); - StgThunk_payload(z,0) = R2; - StgThunk_payload(z,1) = x; + StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); @@ -253,9 +283,6 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; - StgMutVar_var(R1) = y; - foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1]; - TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; @@ -263,10 +290,20 @@ atomicModifyMutVarzh_fast LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; -#if defined(THREADED_RTS) - RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") []; + retry: + x = StgMutVar_var(mv); + StgThunk_payload(z,1) = x; +#ifdef THREADED_RTS + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; + if (h != x) { goto retry; } +#else + StgMutVar_var(mv) = y; #endif + if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { + foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + } + RET_P(r); } @@ -293,9 +330,14 @@ mkWeakzh_fast w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); - StgWeak_key(w) = R1; - StgWeak_value(w) = R2; - StgWeak_finalizer(w) = R3; + // We don't care about cfinalizer here. + // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or + // something else? + + StgWeak_key(w) = R1; + StgWeak_value(w) = R2; + StgWeak_finalizer(w) = R3; + StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; @@ -305,12 +347,65 @@ mkWeakzh_fast RET_P(w); } +mkWeakForeignEnvzh_fast +{ + /* R1 = key + R2 = value + R3 = finalizer + R4 = pointer + R5 = has environment (0 or 1) + R6 = environment + */ + W_ w, payload_words, words, p; + + W_ key, val, fptr, ptr, flag, eptr; + + key = R1; + val = R2; + fptr = R3; + ptr = R4; + flag = R5; + eptr = R6; + + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast ); + + w = Hp - SIZEOF_StgWeak + WDS(1); + SET_HDR(w, stg_WEAK_info, W_[CCCS]); + + payload_words = 4; + words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; + ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + + StgArrWords_words(p) = payload_words; + StgArrWords_payload(p,0) = fptr; + StgArrWords_payload(p,1) = ptr; + StgArrWords_payload(p,2) = eptr; + StgArrWords_payload(p,3) = flag; + + // We don't care about the value here. + // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else? + + StgWeak_key(w) = key; + StgWeak_value(w) = val; + StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; + StgWeak_cfinalizer(w) = p; + + StgWeak_link(w) = W_[weak_ptr_list]; + W_[weak_ptr_list] = w; + + IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + + RET_P(w); +} finalizzeWeakzh_fast { /* R1 = weak ptr */ - W_ w, f; + W_ w, f, arr; w = R1; @@ -338,9 +433,18 @@ finalizzeWeakzh_fast SET_INFO(w,stg_DEAD_WEAK_info); LDV_RECORD_CREATE(w); - f = StgWeak_finalizer(w); + f = StgWeak_finalizer(w); + arr = StgWeak_cfinalizer(w); + StgDeadWeak_link(w) = StgWeak_link(w); + if (arr != stg_NO_FINALIZER_closure) { + foreign "C" runCFinalizer(StgArrWords_payload(arr,0), + StgArrWords_payload(arr,1), + StgArrWords_payload(arr,2), + StgArrWords_payload(arr,3)) []; + } + /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { RET_NP(0,stg_NO_FINALIZER_closure); @@ -969,8 +1073,9 @@ forkzh_fast foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -998,8 +1103,9 @@ forkOnzh_fast foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity - CInt[context_switch] = 1 :: CInt; + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. + Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); } @@ -1080,7 +1186,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5) + W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; @@ -1114,7 +1220,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid, next_invariant, q, outer; @@ -1176,7 +1282,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid; @@ -1210,7 +1316,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ r, frame, trec, outer; frame = Sp; @@ -1251,7 +1357,7 @@ atomicallyzh_fast /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { - R1 = base_ControlziException_nestedAtomically_closure; + R1 = base_ControlziExceptionziBase_nestedAtomically_closure; jump raisezh_fast; } @@ -1452,6 +1558,17 @@ readTVarzh_fast RET_P(result); } +readTVarIOzh_fast +{ + W_ result; + +again: + result = StgTVar_current_value(R1); + if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + goto again; + } + RET_P(result); +} writeTVarzh_fast { @@ -1569,8 +1686,11 @@ takeMVarzh_fast CurrentTSO) []; } StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; R1 = mvar; @@ -1726,8 +1846,11 @@ putMVarzh_fast CurrentTSO) []; } StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; - StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgTSO_block_info(CurrentTSO) = mvar; + // write barrier for throwTo(), which looks at block_info + // if why_blocked==BlockedOnMVar. + prim %write_barrier() []; + StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = CurrentTSO; R1 = mvar; @@ -2273,3 +2396,39 @@ getApStackValzh_fast } RET_NP(ok,val); } + +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + +// Write the cost center stack of the first argument on stderr; return +// the second. Possibly only makes sense for already evaluated +// things? +traceCcszh_fast +{ + W_ ccs; + +#ifdef PROFILING + ccs = StgHeader_ccs(UNTAG(R1)); + foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; +#endif + + R1 = R2; + ENTER(); +} + +getSparkzh_fast +{ + W_ spark; + +#ifndef THREADED_RTS + RET_NP(0,ghczmprim_GHCziBool_False_closure); +#else + (spark) = foreign "C" findSpark(MyCapability()); + if (spark != 0) { + RET_NP(1,spark); + } else { + RET_NP(0,ghczmprim_GHCziBool_False_closure); + } +#endif +}