X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=5325c85d1f1313f70c7e266642d654470be1d29b;hb=d83c0339a26179567eed0a8d65d944008e3637b2;hp=d5b6c46f688d14658d576e66a47e7a96da4058e6;hpb=5407ad8e0a5c08ac5193c1a9ede2a12570baee0b;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index d5b6c46..5325c85 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -35,6 +35,7 @@ import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; import ghczmprim_GHCziBool_False_closure; +import sm_mutex; /*----------------------------------------------------------------------------- Array Primitives @@ -58,7 +59,7 @@ stg_newByteArrayzh n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -85,7 +86,7 @@ stg_newPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -117,7 +118,7 @@ stg_newAlignedPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -132,18 +133,23 @@ stg_newAlignedPinnedByteArrayzh stg_newArrayzh { - W_ words, n, init, arr, p; + W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; MAYBE_GC(R2_PTR,stg_newArrayzh); - words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words + // in the array, making sure we round up, and then rounding up to a whole + // number of words. + size = n + mutArrPtrsCardWords(n); + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; + StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; @@ -154,6 +160,13 @@ stg_newArrayzh p = p + WDS(1); goto for; } + // Initialise the mark bits with 0 + for2: + if (p < arr + WDS(size)) { + W_[p] = 0; + p = p + WDS(1); + goto for2; + } RET_P(arr); } @@ -165,7 +178,7 @@ stg_unsafeThawArrayzh // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from - // the mutable list is not easy, because the mut_list is only singly-linked). + // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 @@ -321,8 +334,10 @@ stg_mkWeakzh StgWeak_finalizer(w) = R3; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); @@ -356,7 +371,7 @@ stg_mkWeakForeignEnvzh payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); @@ -375,8 +390,10 @@ stg_mkWeakForeignEnvzh StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; StgWeak_cfinalizer(w) = p; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); @@ -650,7 +667,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Succeeded (either first branch or second branch) */ @@ -685,7 +702,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, frame = Sp; trec = StgTSO_trec(CurrentTSO); result = R1; - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ @@ -782,7 +799,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, W_ r, frame, trec, outer; frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; if (r != 0) { /* Commit succeeded */ @@ -917,7 +934,7 @@ retry_pop_stack: Sp = StgTSO_sp(CurrentTSO); frame = Sp; trec = StgTSO_trec(CurrentTSO); - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); if (frame_type == CATCH_RETRY_FRAME) { // The retry reaches a CATCH_RETRY_FRAME before the atomic frame @@ -950,7 +967,7 @@ retry_pop_stack: foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; trec = outer; StgTSO_trec(CurrentTSO) = trec; - ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") []; + outer = StgTRecHeader_enclosing_trec(trec); } ASSERT(outer == NO_TREC); @@ -1578,9 +1595,10 @@ stg_unpackClosurezh }} out: - W_ ptrs_arr_sz, nptrs_arr_sz; + W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); - ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs); + ptrs_arr_cards = mutArrPtrsCardWords(ptrs); + ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); @@ -1592,6 +1610,8 @@ out: SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; + StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; + p = 0; for: if(p < ptrs) { @@ -1599,6 +1619,9 @@ for: p = p + 1; goto for; } + /* We can leave the card table uninitialised, since the array is + allocated in the nursery. The GC will fill it in if/when the array + is promoted. */ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(nptrs_arr) = nptrs; @@ -1822,12 +1845,71 @@ stg_asyncDoProczh } #endif -// noDuplicate# tries to ensure that none of the thunks under -// evaluation by the current thread are also under evaluation by -// another thread. It relies on *both* threads doing noDuplicate#; -// the second one will get blocked if they are duplicating some work. +/* ----------------------------------------------------------------------------- + * noDuplicate# + * + * noDuplicate# tries to ensure that none of the thunks under + * evaluation by the current thread are also under evaluation by + * another thread. It relies on *both* threads doing noDuplicate#; + * the second one will get blocked if they are duplicating some work. + * + * The idea is that noDuplicate# is used within unsafePerformIO to + * ensure that the IO operation is performed at most once. + * noDuplicate# calls threadPaused which acquires an exclusive lock on + * all the thunks currently under evaluation by the current thread. + * + * Consider the following scenario. There is a thunk A, whose + * evaluation requires evaluating thunk B, where thunk B is an + * unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2 + * is pre-empted before it enters B, and claims A by blackholing it + * (in threadPaused). Thread 1 now enters B, and calls noDuplicate#. + * + * thread 1 thread 2 + * +-----------+ +---------------+ + * | -------+-----> A <-------+------- | + * | update | BLACKHOLE | marked_update | + * +-----------+ +---------------+ + * | | | | + * ... ... + * | | +---------------+ + * +-----------+ + * | ------+-----> B + * | update | BLACKHOLE + * +-----------+ + * + * At this point: A is a blackhole, owned by thread 2. noDuplicate# + * calls threadPaused, which walks up the stack and + * - claims B on behalf of thread 1 + * - then it reaches the update frame for A, which it sees is already + * a BLACKHOLE and is therefore owned by another thread. Since + * thread 1 is duplicating work, the computation up to the update + * frame for A is suspended, including thunk B. + * - thunk B, which is an unsafePerformIO, has now been reverted to + * an AP_STACK which could be duplicated - BAD! + * - The solution is as follows: before calling threadPaused, we + * leave a frame on the stack (stg_noDuplicate_info) that will call + * noDuplicate# again if the current computation is suspended and + * restarted. + * + * See the test program in concurrent/prog003 for a way to demonstrate + * this. It needs to be run with +RTS -N3 or greater, and the bug + * only manifests occasionally (once very 10 runs or so). + * -------------------------------------------------------------------------- */ + +INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) +{ + Sp_adj(1); + jump stg_noDuplicatezh; +} + stg_noDuplicatezh { + STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); + // leave noDuplicate frame in case the current + // computation is suspended and restarted (see above). + Sp_adj(-1); + Sp(0) = stg_noDuplicate_info; + SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; @@ -1837,10 +1919,18 @@ stg_noDuplicatezh } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + // remove the stg_noDuplicate frame if it is still there. + if (Sp(0) == stg_noDuplicate_info) { + Sp_adj(1); + } jump %ENTRY_CODE(Sp(0)); } } +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + stg_getApStackValzh { W_ ap_stack, offset, val, ok; @@ -1859,10 +1949,6 @@ stg_getApStackValzh RET_NP(ok,val); } -/* ----------------------------------------------------------------------------- - Misc. primitives - -------------------------------------------------------------------------- */ - // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? @@ -1899,8 +1985,23 @@ stg_traceEventzh { W_ msg; msg = R1; + #if defined(TRACING) || defined(DEBUG) + foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + +#elif defined(DTRACE) + + W_ enabled; + + // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from + // RtsProbes.h, but that header file includes unistd.h, which doesn't + // work in Cmm + (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; + if (enabled != 0) { + foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; + } + #endif jump %ENTRY_CODE(Sp(0)); }