X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=d7cc3e82ec2340f4db1ae12ac794724e61920188;hb=4180687e4fa56dd82407ba950e89bb6e09006fc3;hp=84567fee8fa2eda2cc84d28f452be0e8ab31d914;hpb=1b61c2db6a8d6627577bcd7876474a0c5bd1eedb;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 84567fe..d7cc3e8 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -35,6 +35,7 @@ import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; import ghczmprim_GHCziBool_False_closure; +import sm_mutex; /*----------------------------------------------------------------------------- Array Primitives @@ -51,14 +52,14 @@ import ghczmprim_GHCziBool_False_closure; * round up to the nearest word for the size of the array. */ -newByteArrayzh_fast +stg_newByteArrayzh { W_ words, payload_words, n, p; - MAYBE_GC(NO_PTRS,newByteArrayzh_fast); + MAYBE_GC(NO_PTRS,stg_newByteArrayzh); n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -68,11 +69,11 @@ newByteArrayzh_fast #define BA_ALIGN 16 #define BA_MASK (BA_ALIGN-1) -newPinnedByteArrayzh_fast +stg_newPinnedByteArrayzh { W_ words, bytes, payload_words, p; - MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); + MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); bytes = R1; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); @@ -85,7 +86,7 @@ newPinnedByteArrayzh_fast /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -97,11 +98,11 @@ newPinnedByteArrayzh_fast RET_P(p); } -newAlignedPinnedByteArrayzh_fast +stg_newAlignedPinnedByteArrayzh { W_ words, bytes, payload_words, p, alignment; - MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); + MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); bytes = R1; alignment = R2; @@ -117,7 +118,7 @@ newAlignedPinnedByteArrayzh_fast /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -130,20 +131,25 @@ newAlignedPinnedByteArrayzh_fast RET_P(p); } -newArrayzh_fast +stg_newArrayzh { - W_ words, n, init, arr, p; + W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; - MAYBE_GC(R2_PTR,newArrayzh_fast); - - words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + MAYBE_GC(R2_PTR,stg_newArrayzh); + + // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words + // in the array, making sure we round up, and then rounding up to a whole + // number of words. + size = n + mutArrPtrsCardWords(n); + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; + StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; @@ -154,18 +160,25 @@ newArrayzh_fast p = p + WDS(1); goto for; } + // Initialise the mark bits with 0 + for2: + if (p < arr + WDS(size)) { + W_[p] = 0; + p = p + WDS(1); + goto for2; + } RET_P(arr); } -unsafeThawArrayzh_fast +stg_unsafeThawArrayzh { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from - // the mutable list is not easy, because the mut_list is only singly-linked). + // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 @@ -193,12 +206,12 @@ unsafeThawArrayzh_fast MutVar primitives -------------------------------------------------------------------------- */ -newMutVarzh_fast +stg_newMutVarzh { W_ mv; /* Args: R1 = initialisation value */ - ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast); + ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); @@ -207,7 +220,7 @@ newMutVarzh_fast RET_P(mv); } -atomicModifyMutVarzh_fast +stg_atomicModifyMutVarzh { W_ mv, f, z, x, y, r, h; /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ @@ -246,7 +259,7 @@ atomicModifyMutVarzh_fast #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) - HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); + HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh); mv = R1; f = R2; @@ -295,7 +308,7 @@ atomicModifyMutVarzh_fast STRING(stg_weak_msg,"New weak pointer at %p\n") -mkWeakzh_fast +stg_mkWeakzh { /* R1 = key R2 = value @@ -307,7 +320,7 @@ mkWeakzh_fast R3 = stg_NO_FINALIZER_closure; } - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast ); + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); @@ -321,15 +334,17 @@ mkWeakzh_fast StgWeak_finalizer(w) = R3; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } -mkWeakForeignEnvzh_fast +stg_mkWeakForeignEnvzh { /* R1 = key R2 = value @@ -349,14 +364,14 @@ mkWeakForeignEnvzh_fast flag = R5; eptr = R6; - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast ); + ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, W_[CCCS]); payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); @@ -375,15 +390,17 @@ mkWeakForeignEnvzh_fast StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; StgWeak_cfinalizer(w) = p; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); RET_P(w); } -finalizzeWeakzh_fast +stg_finalizzeWeakzh { /* R1 = weak ptr */ @@ -435,7 +452,7 @@ finalizzeWeakzh_fast } } -deRefWeakzh_fast +stg_deRefWeakzh { /* R1 = weak ptr */ W_ w, code, val; @@ -455,14 +472,14 @@ deRefWeakzh_fast Floating point operations. -------------------------------------------------------------------------- */ -decodeFloatzuIntzh_fast +stg_decodeFloatzuIntzh { W_ p; F_ arg; W_ mp_tmp1; W_ mp_tmp_w; - STK_CHK_GEN( WDS(2), NO_PTRS, decodeFloatzuIntzh_fast ); + STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh ); mp_tmp1 = Sp - WDS(1); mp_tmp_w = Sp - WDS(2); @@ -477,7 +494,7 @@ decodeFloatzuIntzh_fast RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); } -decodeDoublezu2Intzh_fast +stg_decodeDoublezu2Intzh { D_ arg; W_ p; @@ -486,7 +503,7 @@ decodeDoublezu2Intzh_fast W_ mp_result1; W_ mp_result2; - STK_CHK_GEN( WDS(4), NO_PTRS, decodeDoublezu2Intzh_fast ); + STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh ); mp_tmp1 = Sp - WDS(1); mp_tmp2 = Sp - WDS(2); @@ -510,11 +527,11 @@ decodeDoublezu2Intzh_fast * Concurrency primitives * -------------------------------------------------------------------------- */ -forkzh_fast +stg_forkzh { /* args: R1 = closure to spark */ - MAYBE_GC(R1_PTR, forkzh_fast); + MAYBE_GC(R1_PTR, stg_forkzh); W_ closure; W_ threadid; @@ -538,11 +555,11 @@ forkzh_fast RET_P(threadid); } -forkOnzh_fast +stg_forkOnzh { /* args: R1 = cpu, R2 = closure to spark */ - MAYBE_GC(R2_PTR, forkOnzh_fast); + MAYBE_GC(R2_PTR, stg_forkOnzh); W_ cpu; W_ closure; @@ -568,18 +585,18 @@ forkOnzh_fast RET_P(threadid); } -yieldzh_fast +stg_yieldzh { jump stg_yield_noregs; } -myThreadIdzh_fast +stg_myThreadIdzh { /* no args. */ RET_P(CurrentTSO); } -labelThreadzh_fast +stg_labelThreadzh { /* args: R1 = ThreadId# @@ -590,7 +607,7 @@ labelThreadzh_fast jump %ENTRY_CODE(Sp(0)); } -isCurrentThreadBoundzh_fast +stg_isCurrentThreadBoundzh { /* no args */ W_ r; @@ -598,7 +615,7 @@ isCurrentThreadBoundzh_fast RET_N(r); } -threadStatuszh_fast +stg_threadStatuszh { /* args: R1 :: ThreadId# */ W_ tso; @@ -650,7 +667,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ @@ -678,18 +695,20 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - P_ unused3, P_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid, next_invariant, q, outer; - frame = Sp; - trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + frame = Sp; + trec = StgTSO_trec(CurrentTSO); + result = R1; + outer = StgTRecHeader_enclosing_trec(trec); if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; StgAtomicallyFrame_next_invariant_to_check(frame) = q; + StgAtomicallyFrame_result(frame) = result; } else { /* Second/subsequent time back at the atomically frame -- abort the @@ -723,6 +742,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; + R1 = StgAtomicallyFrame_result(frame); Sp = Sp + SIZEOF_StgAtomicallyFrame; jump %ENTRY_CODE(Sp(SP_OFF)); } else { @@ -740,7 +760,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - P_ unused3, P_ unused4) + P_ code, P_ next_invariant_to_check, P_ result) { W_ frame, trec, valid; @@ -779,7 +799,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Commit succeeded */ @@ -799,24 +819,24 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, // Primop definition ------------------------------------------------------------ -atomicallyzh_fast +stg_atomicallyzh { W_ frame; W_ old_trec; W_ new_trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR, atomicallyzh_fast); + MAYBE_GC (R1_PTR, stg_atomicallyzh); /* Args: R1 = m :: STM a */ - STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast); + STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { R1 = base_ControlziExceptionziBase_nestedAtomically_closure; - jump raisezh_fast; + jump stg_raisezh; } /* Set up the atomically frame */ @@ -825,6 +845,7 @@ atomicallyzh_fast SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); StgAtomicallyFrame_code(frame) = R1; + StgAtomicallyFrame_result(frame) = NO_TREC; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; /* Start the memory transcation */ @@ -836,13 +857,13 @@ atomicallyzh_fast } -catchSTMzh_fast +stg_catchSTMzh { W_ frame; /* Args: R1 :: STM a */ /* Args: R2 :: Exception -> STM a */ - STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh); /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchSTMFrame; @@ -864,18 +885,18 @@ catchSTMzh_fast } -catchRetryzh_fast +stg_catchRetryzh { W_ frame; W_ new_trec; W_ trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); + MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); /* Args: R1 :: STM a */ /* Args: R2 :: STM a */ - STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast); + STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh); /* Start a nested transaction within which to run the first code */ trec = StgTSO_trec(CurrentTSO); @@ -896,7 +917,7 @@ catchRetryzh_fast } -retryzh_fast +stg_retryzh { W_ frame_type; W_ frame; @@ -904,7 +925,7 @@ retryzh_fast W_ outer; W_ r; - MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate + MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: @@ -913,7 +934,7 @@ retry_pop_stack: Sp = StgTSO_sp(CurrentTSO); frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame @@ -946,7 +967,7 @@ retry_pop_stack: foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; trec = outer; StgTSO_trec(CurrentTSO) = trec; - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); } ASSERT(outer == NO_TREC); @@ -969,12 +990,12 @@ retry_pop_stack: } -checkzh_fast +stg_checkzh { W_ trec, closure; /* Args: R1 = invariant closure */ - MAYBE_GC (R1_PTR, checkzh_fast); + MAYBE_GC (R1_PTR, stg_checkzh); trec = StgTSO_trec(CurrentTSO); closure = R1; @@ -986,21 +1007,21 @@ checkzh_fast } -newTVarzh_fast +stg_newTVarzh { W_ tv; W_ new_value; /* Args: R1 = initialisation value */ - MAYBE_GC (R1_PTR, newTVarzh_fast); + MAYBE_GC (R1_PTR, stg_newTVarzh); new_value = R1; ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; RET_P(tv); } -readTVarzh_fast +stg_readTVarzh { W_ trec; W_ tvar; @@ -1008,7 +1029,7 @@ readTVarzh_fast /* Args: R1 = TVar closure */ - MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate + MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; @@ -1016,7 +1037,7 @@ readTVarzh_fast RET_P(result); } -readTVarIOzh_fast +stg_readTVarIOzh { W_ result; @@ -1028,7 +1049,7 @@ again: RET_P(result); } -writeTVarzh_fast +stg_writeTVarzh { W_ trec; W_ tvar; @@ -1037,7 +1058,7 @@ writeTVarzh_fast /* Args: R1 = TVar closure */ /* R2 = New value */ - MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate + MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate trec = StgTSO_trec(CurrentTSO); tvar = R1; new_value = R2; @@ -1079,7 +1100,7 @@ writeTVarzh_fast * * -------------------------------------------------------------------------- */ -isEmptyMVarzh_fast +stg_isEmptyMVarzh { /* args: R1 = MVar closure */ @@ -1090,12 +1111,12 @@ isEmptyMVarzh_fast } } -newMVarzh_fast +stg_newMVarzh { /* args: none */ W_ mvar; - ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast ); + ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); @@ -1115,7 +1136,7 @@ newMVarzh_fast StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ lval = W_[StgTSO_sp(tso) - WDS(1)]; -takeMVarzh_fast +stg_takeMVarzh { W_ mvar, val, info, tso; @@ -1169,7 +1190,7 @@ takeMVarzh_fast tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1204,7 +1225,7 @@ takeMVarzh_fast } -tryTakeMVarzh_fast +stg_tryTakeMVarzh { W_ mvar, val, info, tso; @@ -1245,7 +1266,7 @@ tryTakeMVarzh_fast /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1277,7 +1298,7 @@ tryTakeMVarzh_fast } -putMVarzh_fast +stg_putMVarzh { W_ mvar, val, info, tso; @@ -1325,7 +1346,7 @@ putMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, val); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1361,7 +1382,7 @@ putMVarzh_fast } -tryPutMVarzh_fast +stg_tryPutMVarzh { W_ mvar, info, tso; @@ -1394,7 +1415,7 @@ tryPutMVarzh_fast /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1433,11 +1454,11 @@ tryPutMVarzh_fast Stable pointer primitives ------------------------------------------------------------------------- */ -makeStableNamezh_fast +stg_makeStableNamezh { W_ index, sn_obj; - ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast ); + ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); (index) = foreign "C" lookupStableName(R1 "ptr") []; @@ -1457,16 +1478,16 @@ makeStableNamezh_fast } -makeStablePtrzh_fast +stg_makeStablePtrzh { /* Args: R1 = a */ W_ sp; - MAYBE_GC(R1_PTR, makeStablePtrzh_fast); + MAYBE_GC(R1_PTR, stg_makeStablePtrzh); ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; RET_N(sp); } -deRefStablePtrzh_fast +stg_deRefStablePtrzh { /* Args: R1 = the stable ptr */ W_ r, sp; @@ -1479,7 +1500,7 @@ deRefStablePtrzh_fast Bytecode object primitives ------------------------------------------------------------------------- */ -newBCOzh_fast +stg_newBCOzh { /* R1 = instrs R2 = literals @@ -1494,7 +1515,7 @@ newBCOzh_fast words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr); bytes = WDS(words); - ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast ); + ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, W_[CCCS]); @@ -1519,7 +1540,7 @@ for: } -mkApUpd0zh_fast +stg_mkApUpd0zh { // R1 = the BCO# for the AP // @@ -1531,7 +1552,7 @@ mkApUpd0zh_fast ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && StgBCO_arity(R1) == HALF_W_(0)); - HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast); + HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); @@ -1544,7 +1565,7 @@ mkApUpd0zh_fast RET_P(ap); } -unpackClosurezh_fast +stg_unpackClosurezh { /* args: R1 = closure to analyze */ // TODO: Consider the absence of ptrs or nonptrs as a special case ? @@ -1574,11 +1595,12 @@ unpackClosurezh_fast }} out: - W_ ptrs_arr_sz, nptrs_arr_sz; + W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); - ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs); + ptrs_arr_cards = mutArrPtrsCardWords(ptrs); + ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); - ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast); + ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); W_ clos; clos = UNTAG(R1); @@ -1588,6 +1610,8 @@ out: SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; + StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; + p = 0; for: if(p < ptrs) { @@ -1595,6 +1619,9 @@ for: p = p + 1; goto for; } + /* We can leave the card table uninitialised, since the array is + allocated in the nursery. The GC will fill it in if/when the array + is promoted. */ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(nptrs_arr) = nptrs; @@ -1624,7 +1651,7 @@ for2: } \ W_[blocked_queue_tl] = tso; -waitReadzh_fast +stg_waitReadzh { /* args: R1 */ #ifdef THREADED_RTS @@ -1641,7 +1668,7 @@ waitReadzh_fast #endif } -waitWritezh_fast +stg_waitWritezh { /* args: R1 */ #ifdef THREADED_RTS @@ -1659,8 +1686,8 @@ waitWritezh_fast } -STRING(stg_delayzh_malloc_str, "delayzh_fast") -delayzh_fast +STRING(stg_delayzh_malloc_str, "stg_delayzh") +stg_delayzh { #ifdef mingw32_HOST_OS W_ ares; @@ -1733,8 +1760,8 @@ while: #ifdef mingw32_HOST_OS -STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast") -asyncReadzh_fast +STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh") +stg_asyncReadzh { W_ ares; CInt reqID; @@ -1761,8 +1788,8 @@ asyncReadzh_fast #endif } -STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast") -asyncWritezh_fast +STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh") +stg_asyncWritezh { W_ ares; CInt reqID; @@ -1789,8 +1816,8 @@ asyncWritezh_fast #endif } -STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast") -asyncDoProczh_fast +STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh") +stg_asyncDoProczh { W_ ares; CInt reqID; @@ -1822,7 +1849,7 @@ asyncDoProczh_fast // evaluation by the current thread are also under evaluation by // another thread. It relies on *both* threads doing noDuplicate#; // the second one will get blocked if they are duplicating some work. -noDuplicatezh_fast +stg_noDuplicatezh { SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); @@ -1837,7 +1864,7 @@ noDuplicatezh_fast } } -getApStackValzh_fast +stg_getApStackValzh { W_ ap_stack, offset, val, ok; @@ -1862,7 +1889,7 @@ getApStackValzh_fast // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? -traceCcszh_fast +stg_traceCcszh { W_ ccs; @@ -1875,7 +1902,7 @@ traceCcszh_fast ENTER(); } -getSparkzh_fast +stg_getSparkzh { W_ spark; @@ -1890,3 +1917,28 @@ getSparkzh_fast } #endif } + +stg_traceEventzh +{ + W_ msg; + msg = R1; + +#if defined(TRACING) || defined(DEBUG) + + foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + +#elif defined(DTRACE) + + W_ enabled; + + // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from + // RtsProbes.h, but that header file includes unistd.h, which doesn't + // work in Cmm + (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; + if (enabled != 0) { + foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; + } + +#endif + jump %ENTRY_CODE(Sp(0)); +}