X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=e17c6fb3f870b7b923c79570cfe45a087bd787ee;hp=ac6de81e1c6e03c542b7e5f9083bc74a67a68a17;hb=e5c3b478b3cd1707cf122833822f44b2ac09b8e9;hpb=0856ac59cfb455d32a3042317fdba0f5e85cab9c diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index ac6de81..e17c6fb 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2004 + * (c) The GHC Team, 1998-2011 * * Out-of-line primitive operations * @@ -34,7 +34,10 @@ import pthread_mutex_unlock; import base_ControlziExceptionziBase_nestedAtomically_closure; import EnterCriticalSection; import LeaveCriticalSection; -import ghczmprim_GHCziBool_False_closure; +import ghczmprim_GHCziTypes_False_closure; +#if !defined(mingw32_HOST_OS) +import sm_mutex; +#endif /*----------------------------------------------------------------------------- Array Primitives @@ -58,10 +61,10 @@ stg_newByteArrayzh n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } @@ -70,10 +73,11 @@ stg_newByteArrayzh stg_newPinnedByteArrayzh { - W_ words, bytes, payload_words, p; + W_ words, n, bytes, payload_words, p; MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); - bytes = R1; + n = R1; + bytes = n; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); /* When we actually allocate memory, we need to allow space for the @@ -85,7 +89,7 @@ stg_newPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -93,18 +97,25 @@ stg_newPinnedByteArrayzh p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } stg_newAlignedPinnedByteArrayzh { - W_ words, bytes, payload_words, p, alignment; + W_ words, n, bytes, payload_words, p, alignment; MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); - bytes = R1; + n = R1; alignment = R2; + /* we always supply at least word-aligned memory, so there's no + need to allow extra space for alignment if the requirement is less + than a word. This also prevents mischief with alignment == 0. */ + if (alignment <= SIZEOF_W) { alignment = 1; } + + bytes = n; + /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); @@ -117,7 +128,7 @@ stg_newAlignedPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -126,24 +137,29 @@ stg_newAlignedPinnedByteArrayzh p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = n; RET_P(p); } stg_newArrayzh { - W_ words, n, init, arr, p; + W_ words, n, init, arr, p, size; /* Args: R1 = words, R2 = initialisation value */ n = R1; MAYBE_GC(R2_PTR,stg_newArrayzh); - words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words + // in the array, making sure we round up, and then rounding up to a whole + // number of words. + size = n + mutArrPtrsCardWords(n); + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); StgMutArrPtrs_ptrs(arr) = n; + StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 init = R2; @@ -154,6 +170,13 @@ stg_newArrayzh p = p + WDS(1); goto for; } + // Initialise the mark bits with 0 + for2: + if (p < arr + WDS(size)) { + W_[p] = 0; + p = p + WDS(1); + goto for2; + } RET_P(arr); } @@ -165,7 +188,7 @@ stg_unsafeThawArrayzh // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave // it on the mutable list for the GC to remove (removing something from - // the mutable list is not easy, because the mut_list is only singly-linked). + // the mutable list is not easy). // // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list, // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 @@ -189,6 +212,7 @@ stg_unsafeThawArrayzh } } + /* ----------------------------------------------------------------------------- MutVar primitives -------------------------------------------------------------------------- */ @@ -207,6 +231,25 @@ stg_newMutVarzh RET_P(mv); } +stg_casMutVarzh + /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */ +{ + W_ mv, old, new, h; + + mv = R1; + old = R2; + new = R3; + + (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, + old, new) []; + if (h != old) { + RET_NP(1,h); + } else { + RET_NP(0,h); + } +} + + stg_atomicModifyMutVarzh { W_ mv, f, z, x, y, r, h; @@ -321,8 +364,10 @@ stg_mkWeakzh StgWeak_finalizer(w) = R3; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); @@ -356,12 +401,12 @@ stg_mkWeakForeignEnvzh payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(p) = payload_words; + StgArrWords_bytes(p) = WDS(payload_words); StgArrWords_payload(p,0) = fptr; StgArrWords_payload(p,1) = ptr; StgArrWords_payload(p,2) = eptr; @@ -375,8 +420,10 @@ stg_mkWeakForeignEnvzh StgWeak_finalizer(w) = stg_NO_FINALIZER_closure; StgWeak_cfinalizer(w) = p; + ACQUIRE_LOCK(sm_mutex); StgWeak_link(w) = W_[weak_ptr_list]; W_[weak_ptr_list] = w; + RELEASE_LOCK(sm_mutex); IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); @@ -525,9 +572,9 @@ stg_forkzh closure "ptr") []; /* start blocked if the current thread is blocked */ - StgTSO_flags(threadid) = - StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & - (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; @@ -555,9 +602,9 @@ stg_forkOnzh closure "ptr") []; /* start blocked if the current thread is blocked */ - StgTSO_flags(threadid) = - StgTSO_flags(threadid) | (StgTSO_flags(CurrentTSO) & - (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32)); + StgTSO_flags(threadid) = %lobits16( + TO_W_(StgTSO_flags(threadid)) | + TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; @@ -604,14 +651,9 @@ stg_threadStatuszh W_ tso; W_ why_blocked; W_ what_next; - W_ ret; + W_ ret, cap, locked; tso = R1; - loop: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop; - } what_next = TO_W_(StgTSO_what_next(tso)); why_blocked = TO_W_(StgTSO_why_blocked(tso)); @@ -629,7 +671,16 @@ stg_threadStatuszh ret = why_blocked; } } - RET_N(ret); + + cap = TO_W_(Capability_no(StgTSO_cap(tso))); + + if ((TO_W_(StgTSO_flags(tso)) & TSO_LOCKED) != 0) { + locked = 1; + } else { + locked = 0; + } + + RET_NNN(ret,cap,locked); } /* ----------------------------------------------------------------------------- @@ -912,9 +963,9 @@ stg_retryzh // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: - StgTSO_sp(CurrentTSO) = Sp; - (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; - Sp = StgTSO_sp(CurrentTSO); + SAVE_THREAD_STATE(); + (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") []; + LOAD_THREAD_STATE(); frame = Sp; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); @@ -1111,17 +1162,21 @@ stg_newMVarzh } -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ - W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; +#define PerformTake(stack, value) \ + W_ sp; \ + sp = StgStack_sp(stack); \ + W_[sp + WDS(1)] = value; \ + W_[sp + WDS(0)] = stg_gc_unpt_r1_info; -#define PerformPut(tso,lval) \ - StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ - lval = W_[StgTSO_sp(tso) - WDS(1)]; +#define PerformPut(stack,lval) \ + W_ sp; \ + sp = StgStack_sp(stack) + WDS(3); \ + StgStack_sp(stack) = sp; \ + lval = W_[sp - WDS(1)]; stg_takeMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ mvar = R1; @@ -1140,80 +1195,88 @@ stg_takeMVarzh * and wait until we're woken up. */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { + + // Note [mvar-heap-check] We want to do the heap check in the + // branch here, to avoid the conditional in the common case. + // However, we've already locked the MVar above, so we better + // be careful to unlock it again if the the heap check fails. + // Unfortunately we don't have an easy way to inject any code + // into the heap check generated by the code generator, so we + // have to do it in stg_gc_gen (see HeapStackCheck.cmm). + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + StgMVarTSOQueue_link(q) = END_TSO_QUEUE; + StgMVarTSOQueue_tso(q) = CurrentTSO; + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_head(mvar) = CurrentTSO; + StgMVar_head(mvar) = q; } else { - foreign "C" setTSOLink(MyCapability() "ptr", - StgMVar_tail(mvar) "ptr", - CurrentTSO) []; + StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; + foreign "C" recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)) []; } - StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; - // write barrier for throwTo(), which looks at block_info - // if why_blocked==BlockedOnMVar. - prim %write_barrier() []; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; - StgMVar_tail(mvar) = CurrentTSO; + StgMVar_tail(mvar) = q; R1 = mvar; jump stg_block_takemvar; - } - - /* we got the value... */ - val = StgMVar_value(mvar); - - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) - { - /* There are putMVar(s) waiting... - * wake up the first thread on the queue - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); - - /* actually perform the putMVar for the thread that we just woke up */ - tso = StgMVar_head(mvar); - PerformPut(tso,StgMVar_value(mvar)); - - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } + } + + /* we got the value... */ + val = StgMVar_value(mvar); + + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further putMVars, MVar is now empty */ + StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_P(val); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } + + // There are putMVar(s) waiting... wake up the first thread on the queue + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + // actually perform the putMVar for the thread that we just woke up + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - RET_P(val); - } - else - { - /* No further putMVars, MVar is now empty */ - StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; - -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - RET_P(val); - } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_P(val); } stg_tryTakeMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar closure */ - mvar = R1; #if defined(THREADED_RTS) @@ -1221,7 +1284,10 @@ stg_tryTakeMVarzh #else info = GET_INFO(mvar); #endif - + + /* If the MVar is empty, put ourselves on its blocking queue, + * and wait until we're woken up. + */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); @@ -1231,59 +1297,59 @@ stg_tryTakeMVarzh */ RET_NP(0, stg_NO_FINALIZER_closure); } - + if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; } /* we got the value... */ val = StgMVar_value(mvar); + + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further putMVars, MVar is now empty */ + StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_NP(1, val); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } + + // There are putMVar(s) waiting... wake up the first thread on the queue + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - /* There are putMVar(s) waiting... - * wake up the first thread on the queue - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); + // actually perform the putMVar for the thread that we just woke up + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); - /* actually perform the putMVar for the thread that we just woke up */ - tso = StgMVar_head(mvar); - PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } - - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } - else - { - /* No further putMVars, MVar is now empty */ - StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; - RET_NP(1, val); + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_NP(1,val); } stg_putMVarzh { - W_ mvar, val, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; @@ -1300,84 +1366,95 @@ stg_putMVarzh } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { + + // see Note [mvar-heap-check] above + HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + + q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); + + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + StgMVarTSOQueue_link(q) = END_TSO_QUEUE; + StgMVarTSOQueue_tso(q) = CurrentTSO; + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_head(mvar) = CurrentTSO; + StgMVar_head(mvar) = q; } else { - foreign "C" setTSOLink(MyCapability() "ptr", - StgMVar_tail(mvar) "ptr", - CurrentTSO) []; + StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; + foreign "C" recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)) []; } - StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure; + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; - // write barrier for throwTo(), which looks at block_info - // if why_blocked==BlockedOnMVar. - prim %write_barrier() []; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; - StgMVar_tail(mvar) = CurrentTSO; - + StgMVar_tail(mvar) = q; + R1 = mvar; R2 = val; jump stg_block_putmvar; } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further takes, the MVar is now full. */ + StgMVar_value(mvar) = val; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + jump %ENTRY_CODE(Sp(0)); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; + } - /* There are takeMVar(s) waiting: wake up the first one - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); + // There are takeMVar(s) waiting: wake up the first one + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - /* actually perform the takeMVar */ - tso = StgMVar_head(mvar); - PerformTake(tso, val); - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } - - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + // actually perform the takeMVar + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - jump %ENTRY_CODE(Sp(0)); - } - else - { - /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = val; + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - jump %ENTRY_CODE(Sp(0)); + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } - /* ToDo: yield afterward for better communication performance? */ + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); + jump %ENTRY_CODE(Sp(0)); } stg_tryPutMVarzh { - W_ mvar, info, tso; + W_ mvar, val, info, tso, q; /* args: R1 = MVar, R2 = value */ mvar = R1; + val = R2; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2]; + ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; #else info = GET_INFO(mvar); #endif + if (info == stg_MVAR_CLEAN_info) { + foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + } + if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { #if defined(THREADED_RTS) unlockClosure(mvar, info); @@ -1385,51 +1462,47 @@ stg_tryPutMVarzh RET_N(0); } - if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + q = StgMVar_head(mvar); +loop: + if (q == stg_END_TSO_QUEUE_closure) { + /* No further takes, the MVar is now full. */ + StgMVar_value(mvar) = val; + unlockClosure(mvar, stg_MVAR_DIRTY_info); + RET_N(1); + } + if (StgHeader_info(q) == stg_IND_info || + StgHeader_info(q) == stg_MSG_NULL_info) { + q = StgInd_indirectee(q); + goto loop; } - if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) { - - /* There are takeMVar(s) waiting: wake up the first one - */ - ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16); - - /* actually perform the takeMVar */ - tso = StgMVar_head(mvar); - PerformTake(tso, R2); - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; - } - - ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", - StgMVar_head(mvar) "ptr", 1) []; - StgMVar_head(mvar) = tso; + // There are takeMVar(s) waiting: wake up the first one + + tso = StgMVarTSOQueue_tso(q); + StgMVar_head(mvar) = StgMVarTSOQueue_link(q); + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { + StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; + } - if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { - StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; - } + ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); + ASSERT(StgTSO_block_info(tso) == mvar); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif - } - else - { - /* No further takes, the MVar is now full. */ - StgMVar_value(mvar) = R2; + // actually perform the takeMVar + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); -#if defined(THREADED_RTS) - unlockClosure(mvar, stg_MVAR_DIRTY_info); -#else - SET_INFO(mvar,stg_MVAR_DIRTY_info); -#endif + // indicate that the MVar operation has now completed. + StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; + + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + + unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); - /* ToDo: yield afterward for better communication performance? */ } @@ -1495,7 +1568,7 @@ stg_newBCOzh bitmap_arr = R5; - words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr); + words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr); bytes = WDS(words); ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); @@ -1513,7 +1586,7 @@ stg_newBCOzh W_ i; i = 0; for: - if (i < StgArrWords_words(bitmap_arr)) { + if (i < BYTE_ARR_WDS(bitmap_arr)) { StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i); i = i + 1; goto for; @@ -1578,9 +1651,10 @@ stg_unpackClosurezh }} out: - W_ ptrs_arr_sz, nptrs_arr_sz; + W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz; nptrs_arr_sz = SIZEOF_StgArrWords + WDS(nptrs); - ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs); + ptrs_arr_cards = mutArrPtrsCardWords(ptrs); + ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); @@ -1592,6 +1666,8 @@ out: SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; + StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; + p = 0; for: if(p < ptrs) { @@ -1599,9 +1675,12 @@ for: p = p + 1; goto for; } + /* We can leave the card table uninitialised, since the array is + allocated in the nursery. The GC will fill it in if/when the array + is promoted. */ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); - StgArrWords_words(nptrs_arr) = nptrs; + StgArrWords_bytes(nptrs_arr) = WDS(nptrs); p = 0; for2: if(p < nptrs) { @@ -1822,12 +1901,71 @@ stg_asyncDoProczh } #endif -// noDuplicate# tries to ensure that none of the thunks under -// evaluation by the current thread are also under evaluation by -// another thread. It relies on *both* threads doing noDuplicate#; -// the second one will get blocked if they are duplicating some work. +/* ----------------------------------------------------------------------------- + * noDuplicate# + * + * noDuplicate# tries to ensure that none of the thunks under + * evaluation by the current thread are also under evaluation by + * another thread. It relies on *both* threads doing noDuplicate#; + * the second one will get blocked if they are duplicating some work. + * + * The idea is that noDuplicate# is used within unsafePerformIO to + * ensure that the IO operation is performed at most once. + * noDuplicate# calls threadPaused which acquires an exclusive lock on + * all the thunks currently under evaluation by the current thread. + * + * Consider the following scenario. There is a thunk A, whose + * evaluation requires evaluating thunk B, where thunk B is an + * unsafePerformIO. Two threads, 1 and 2, bother enter A. Thread 2 + * is pre-empted before it enters B, and claims A by blackholing it + * (in threadPaused). Thread 1 now enters B, and calls noDuplicate#. + * + * thread 1 thread 2 + * +-----------+ +---------------+ + * | -------+-----> A <-------+------- | + * | update | BLACKHOLE | marked_update | + * +-----------+ +---------------+ + * | | | | + * ... ... + * | | +---------------+ + * +-----------+ + * | ------+-----> B + * | update | BLACKHOLE + * +-----------+ + * + * At this point: A is a blackhole, owned by thread 2. noDuplicate# + * calls threadPaused, which walks up the stack and + * - claims B on behalf of thread 1 + * - then it reaches the update frame for A, which it sees is already + * a BLACKHOLE and is therefore owned by another thread. Since + * thread 1 is duplicating work, the computation up to the update + * frame for A is suspended, including thunk B. + * - thunk B, which is an unsafePerformIO, has now been reverted to + * an AP_STACK which could be duplicated - BAD! + * - The solution is as follows: before calling threadPaused, we + * leave a frame on the stack (stg_noDuplicate_info) that will call + * noDuplicate# again if the current computation is suspended and + * restarted. + * + * See the test program in concurrent/prog003 for a way to demonstrate + * this. It needs to be run with +RTS -N3 or greater, and the bug + * only manifests occasionally (once very 10 runs or so). + * -------------------------------------------------------------------------- */ + +INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) +{ + Sp_adj(1); + jump stg_noDuplicatezh; +} + stg_noDuplicatezh { + STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); + // leave noDuplicate frame in case the current + // computation is suspended and restarted (see above). + Sp_adj(-1); + Sp(0) = stg_noDuplicate_info; + SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; @@ -1837,10 +1975,18 @@ stg_noDuplicatezh } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + // remove the stg_noDuplicate frame if it is still there. + if (Sp(0) == stg_noDuplicate_info) { + Sp_adj(1); + } jump %ENTRY_CODE(Sp(0)); } } +/* ----------------------------------------------------------------------------- + Misc. primitives + -------------------------------------------------------------------------- */ + stg_getApStackValzh { W_ ap_stack, offset, val, ok; @@ -1859,10 +2005,6 @@ stg_getApStackValzh RET_NP(ok,val); } -/* ----------------------------------------------------------------------------- - Misc. primitives - -------------------------------------------------------------------------- */ - // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? @@ -1884,23 +2026,62 @@ stg_getSparkzh W_ spark; #ifndef THREADED_RTS - RET_NP(0,ghczmprim_GHCziBool_False_closure); + RET_NP(0,ghczmprim_GHCziTypes_False_closure); #else (spark) = foreign "C" findSpark(MyCapability()); if (spark != 0) { RET_NP(1,spark); } else { - RET_NP(0,ghczmprim_GHCziBool_False_closure); + RET_NP(0,ghczmprim_GHCziTypes_False_closure); } #endif } +stg_numSparkszh +{ + W_ n; +#ifdef THREADED_RTS + (n) = foreign "C" dequeElements(Capability_sparks(MyCapability())); +#else + n = 0; +#endif + RET_N(n); +} + stg_traceEventzh { W_ msg; msg = R1; + #if defined(TRACING) || defined(DEBUG) + foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + +#elif defined(DTRACE) + + W_ enabled; + + // We should go through the macro HASKELLEVENT_USER_MSG_ENABLED from + // RtsProbes.h, but that header file includes unistd.h, which doesn't + // work in Cmm +#if !defined(solaris2_TARGET_OS) + (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; +#else + // Solaris' DTrace can't handle the + // __dtrace_isenabled$HaskellEvent$user__msg$v1 + // call above. This call is just for testing whether the user__msg + // probe is enabled, and is here for just performance optimization. + // Since preparation for the probe is not that complex I disable usage of + // this test above for Solaris and enable the probe usage manually + // here. Please note that this does not mean that the probe will be + // used during the runtime! You still need to enable it by consumption + // in your dtrace script as you do with any other probe. + enabled = 1; +#endif + if (enabled != 0) { + foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; + } + #endif jump %ENTRY_CODE(Sp(0)); }