import pthread_mutex_lock;
import pthread_mutex_unlock;
#endif
-import base_GHCziIOBase_NestedAtomically_closure;
+import base_ControlziExceptionziBase_nestedAtomically_closure;
import EnterCriticalSection;
import LeaveCriticalSection;
+import ghczmprim_GHCziBool_False_closure;
/*-----------------------------------------------------------------------------
Array Primitives
RET_P(p);
}
+#define BA_ALIGN 16
+#define BA_MASK (BA_ALIGN-1)
+
newPinnedByteArrayzh_fast
{
- W_ words, payload_words, n, p;
+ W_ words, bytes, payload_words, p;
MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
- n = R1;
- payload_words = ROUNDUP_BYTES_TO_WDS(n);
+ bytes = R1;
+ /* payload_words is what we will tell the profiler we had to allocate */
+ payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+ /* When we actually allocate memory, we need to allow space for the
+ header: */
+ bytes = bytes + SIZEOF_StgArrWords;
+ /* And we want to align to BA_ALIGN bytes, so we need to allow space
+ to shift up to BA_ALIGN - 1 bytes: */
+ bytes = bytes + BA_ALIGN - 1;
+ /* Now we convert to a number of words: */
+ words = ROUNDUP_BYTES_TO_WDS(bytes);
- // We want an 8-byte aligned array. allocatePinned() gives us
- // 8-byte aligned memory by default, but we want to align the
- // *goods* inside the ArrWords object, so we have to check the
- // size of the ArrWords header and adjust our size accordingly.
- words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
- if ((SIZEOF_StgArrWords & 7) != 0) {
- words = words + 1;
- }
+ ("ptr" p) = foreign "C" allocatePinned(words) [];
+ TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+
+ /* Now we need to move p forward so that the payload is aligned
+ to BA_ALIGN bytes: */
+ p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
+
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = payload_words;
+ RET_P(p);
+}
+
+newAlignedPinnedByteArrayzh_fast
+{
+ W_ words, bytes, payload_words, p, alignment;
+
+ MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
+ bytes = R1;
+ alignment = R2;
+
+ /* payload_words is what we will tell the profiler we had to allocate */
+ payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+
+ /* When we actually allocate memory, we need to allow space for the
+ header: */
+ bytes = bytes + SIZEOF_StgArrWords;
+ /* And we want to align to <alignment> bytes, so we need to allow space
+ to shift up to <alignment - 1> bytes: */
+ bytes = bytes + alignment - 1;
+ /* Now we convert to a number of words: */
+ words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = foreign "C" allocatePinned(words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- // Again, if the ArrWords header isn't a multiple of 8 bytes, we
- // have to push the object forward one word so that the goods
- // fall on an 8-byte boundary.
- if ((SIZEOF_StgArrWords & 7) != 0) {
- p = p + WDS(1);
- }
+ /* Now we need to move p forward so that the payload is aligned
+ to <alignment> bytes. Note that we are assuming that
+ <alignment> is a power of 2, which is technically not guaranteed */
+ p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
StgArrWords_words(p) = payload_words;
atomicModifyMutVarzh_fast
{
- W_ mv, z, x, y, r;
+ W_ mv, f, z, x, y, r, h;
/* Args: R1 :: MutVar#, R2 :: a -> (a,b) */
/* If x is the current contents of the MutVar#, then
HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
-#if defined(THREADED_RTS)
- ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
-#endif
-
- x = StgMutVar_var(R1);
+ mv = R1;
+ f = R2;
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
LDV_RECORD_CREATE(z);
- StgThunk_payload(z,0) = R2;
- StgThunk_payload(z,1) = x;
+ StgThunk_payload(z,0) = f;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
- StgMutVar_var(R1) = y;
- foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
-
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
r = y - THUNK_1_SIZE;
LDV_RECORD_CREATE(r);
StgThunk_payload(r,0) = z;
-#if defined(THREADED_RTS)
- RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
+ retry:
+ x = StgMutVar_var(mv);
+ StgThunk_payload(z,1) = x;
+#ifdef THREADED_RTS
+ (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) [];
+ if (h != x) { goto retry; }
+#else
+ StgMutVar_var(mv) = y;
#endif
+ if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
+ foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") [];
+ }
+
RET_P(r);
}
w = Hp - SIZEOF_StgWeak + WDS(1);
SET_HDR(w, stg_WEAK_info, W_[CCCS]);
- StgWeak_key(w) = R1;
- StgWeak_value(w) = R2;
- StgWeak_finalizer(w) = R3;
+ // We don't care about cfinalizer here.
+ // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
+ // something else?
+
+ StgWeak_key(w) = R1;
+ StgWeak_value(w) = R2;
+ StgWeak_finalizer(w) = R3;
+ StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure;
StgWeak_link(w) = W_[weak_ptr_list];
W_[weak_ptr_list] = w;
RET_P(w);
}
+mkWeakForeignEnvzh_fast
+{
+ /* R1 = key
+ R2 = value
+ R3 = finalizer
+ R4 = pointer
+ R5 = has environment (0 or 1)
+ R6 = environment
+ */
+ W_ w, payload_words, words, p;
+
+ W_ key, val, fptr, ptr, flag, eptr;
+
+ key = R1;
+ val = R2;
+ fptr = R3;
+ ptr = R4;
+ flag = R5;
+ eptr = R6;
+
+ ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast );
+
+ w = Hp - SIZEOF_StgWeak + WDS(1);
+ SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+
+ payload_words = 4;
+ words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
+ ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
+
+ TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+
+ StgArrWords_words(p) = payload_words;
+ StgArrWords_payload(p,0) = fptr;
+ StgArrWords_payload(p,1) = ptr;
+ StgArrWords_payload(p,2) = eptr;
+ StgArrWords_payload(p,3) = flag;
+
+ // We don't care about the value here.
+ // Should StgWeak_value(w) be stg_NO_FINALIZER_closure or something else?
+
+ StgWeak_key(w) = key;
+ StgWeak_value(w) = val;
+ StgWeak_finalizer(w) = stg_NO_FINALIZER_closure;
+ StgWeak_cfinalizer(w) = p;
+
+ StgWeak_link(w) = W_[weak_ptr_list];
+ W_[weak_ptr_list] = w;
+
+ IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
+
+ RET_P(w);
+}
finalizzeWeakzh_fast
{
/* R1 = weak ptr
*/
- W_ w, f;
+ W_ w, f, arr;
w = R1;
SET_INFO(w,stg_DEAD_WEAK_info);
LDV_RECORD_CREATE(w);
- f = StgWeak_finalizer(w);
+ f = StgWeak_finalizer(w);
+ arr = StgWeak_cfinalizer(w);
+
StgDeadWeak_link(w) = StgWeak_link(w);
+ if (arr != stg_NO_FINALIZER_closure) {
+ foreign "C" runCFinalizer(StgArrWords_payload(arr,0),
+ StgArrWords_payload(arr,1),
+ StgArrWords_payload(arr,2),
+ StgArrWords_payload(arr,3)) [];
+ }
+
/* return the finalizer */
if (f == stg_NO_FINALIZER_closure) {
RET_NP(0,stg_NO_FINALIZER_closure);
foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
- // switch at the earliest opportunity
- CInt[context_switch] = 1 :: CInt;
+ // context switch soon, but not immediately: we don't want every
+ // forkIO to force a context-switch.
+ Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
}
foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
- // switch at the earliest opportunity
- CInt[context_switch] = 1 :: CInt;
+ // context switch soon, but not immediately: we don't want every
+ // forkIO to force a context-switch.
+ Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
}
RET_N(r);
}
+threadStatuszh_fast
+{
+ /* args: R1 :: ThreadId# */
+ W_ tso;
+ W_ why_blocked;
+ W_ what_next;
+ W_ ret;
+
+ tso = R1;
+ loop:
+ if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
+ tso = StgTSO__link(tso);
+ goto loop;
+ }
+
+ what_next = TO_W_(StgTSO_what_next(tso));
+ why_blocked = TO_W_(StgTSO_why_blocked(tso));
+ // Note: these two reads are not atomic, so they might end up
+ // being inconsistent. It doesn't matter, since we
+ // only return one or the other. If we wanted to return the
+ // contents of block_info too, then we'd have to do some synchronisation.
+
+ if (what_next == ThreadComplete) {
+ ret = 16; // NB. magic, matches up with GHC.Conc.threadStatus
+ } else {
+ if (what_next == ThreadKilled) {
+ ret = 17;
+ } else {
+ ret = why_blocked;
+ }
+ }
+ RET_N(ret);
+}
/* -----------------------------------------------------------------------------
* TVar primitives
* -------------------------------------------------------------------------- */
-#ifdef REG_R1
#define SP_OFF 0
-#define IF_NOT_REG_R1(x)
-#else
-#define SP_OFF 1
-#define IF_NOT_REG_R1(x) x
-#endif
// Catch retry frame ------------------------------------------------------------
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
- W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
+ W_ unused3, P_ unused4, P_ unused5)
{
W_ r, frame, trec, outer;
- IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
/* Succeeded (either first branch or second branch) */
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchRetryFrame;
- IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
jump %ENTRY_CODE(Sp(SP_OFF));
} else {
/* Did not commit: re-execute */
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
- "ptr" W_ unused3, "ptr" W_ unused4)
+ P_ unused3, P_ unused4)
{
W_ frame, trec, valid, next_invariant, q, outer;
- IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
/* Transaction was valid: commit succeeded */
StgTSO_trec(CurrentTSO) = NO_TREC;
Sp = Sp + SIZEOF_StgAtomicallyFrame;
- IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
jump %ENTRY_CODE(Sp(SP_OFF));
} else {
/* Transaction was not valid: try again */
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
- "ptr" W_ unused3, "ptr" W_ unused4)
+ P_ unused3, P_ unused4)
{
W_ frame, trec, valid;
- IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
frame = Sp;
(valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
if (valid != 0) {
/* Previous attempt is still valid: no point trying again yet */
- IF_NOT_REG_R1(Sp_adj(-2);
- Sp(1) = stg_NO_FINALIZER_closure;
- Sp(0) = stg_ut_1_0_unreg_info;)
jump stg_block_noregs;
} else {
/* Previous attempt is no longer valid: try again */
// STM catch frame --------------------------------------------------------------
-#ifdef REG_R1
#define SP_OFF 0
-#else
-#define SP_OFF 1
-#endif
/* Catch frames are very similar to update frames, but when entering
* one we just pop the frame off the stack and perform the correct
#if defined(PROFILING)
W_ unused1, W_ unused2,
#endif
- "ptr" W_ unused3, "ptr" W_ unused4)
+ P_ unused3, P_ unused4)
{
- IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
W_ r, frame, trec, outer;
frame = Sp;
trec = StgTSO_trec(CurrentTSO);
/* Commit succeeded */
StgTSO_trec(CurrentTSO) = outer;
Sp = Sp + SIZEOF_StgCatchSTMFrame;
- IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
jump Sp(SP_OFF);
} else {
/* Commit failed */
/* Nested transactions are not allowed; raise an exception */
if (old_trec != NO_TREC) {
- R1 = base_GHCziIOBase_NestedAtomically_closure;
+ R1 = base_ControlziExceptionziBase_nestedAtomically_closure;
jump raisezh_fast;
}
StgHeader_info(frame) = stg_atomically_waiting_frame_info;
Sp = frame;
// Fix up the stack in the unregisterised case: the return convention is different.
- IF_NOT_REG_R1(Sp_adj(-2);
- Sp(1) = stg_NO_FINALIZER_closure;
- Sp(0) = stg_ut_1_0_unreg_info;)
R3 = trec; // passing to stmWaitUnblock()
jump stg_block_stmwait;
} else {
RET_P(result);
}
+readTVarIOzh_fast
+{
+ W_ result;
+
+again:
+ result = StgTVar_current_value(R1);
+ if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
+ goto again;
+ }
+ RET_P(result);
+}
writeTVarzh_fast
{
}
-/* If R1 isn't available, pass it on the stack */
-#ifdef REG_R1
#define PerformTake(tso, value) \
W_[StgTSO_sp(tso) + WDS(1)] = value; \
W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
-#else
-#define PerformTake(tso, value) \
- W_[StgTSO_sp(tso) + WDS(1)] = value; \
- W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
-#endif
#define PerformPut(tso,lval) \
StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \
CurrentTSO) [];
}
StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure;
- StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgTSO_block_info(CurrentTSO) = mvar;
+ // write barrier for throwTo(), which looks at block_info
+ // if why_blocked==BlockedOnMVar.
+ prim %write_barrier() [];
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgMVar_tail(mvar) = CurrentTSO;
+ R1 = mvar;
jump stg_block_takemvar;
}
tso = StgMVar_head(mvar);
PerformPut(tso,StgMVar_value(mvar));
- if (StgTSO_flags(tso) & TSO_DIRTY::I32 == 0) {
+ if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
/* actually perform the putMVar for the thread that we just woke up */
tso = StgMVar_head(mvar);
PerformPut(tso,StgMVar_value(mvar));
- if (StgTSO_flags(tso) & TSO_DIRTY::I32 == 0) {
+ if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
putMVarzh_fast
{
- W_ mvar, info, tso;
+ W_ mvar, val, info, tso;
/* args: R1 = MVar, R2 = value */
mvar = R1;
+ val = R2;
#if defined(THREADED_RTS)
- ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
+ ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
#else
info = GET_INFO(mvar);
#endif
CurrentTSO) [];
}
StgTSO__link(CurrentTSO) = stg_END_TSO_QUEUE_closure;
- StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgTSO_block_info(CurrentTSO) = mvar;
+ // write barrier for throwTo(), which looks at block_info
+ // if why_blocked==BlockedOnMVar.
+ prim %write_barrier() [];
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
StgMVar_tail(mvar) = CurrentTSO;
+ R1 = mvar;
+ R2 = val;
jump stg_block_putmvar;
}
/* actually perform the takeMVar */
tso = StgMVar_head(mvar);
- PerformTake(tso, R2);
- if (StgTSO_flags(tso) & TSO_DIRTY::I32 == 0) {
+ PerformTake(tso, val);
+ if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
else
{
/* No further takes, the MVar is now full. */
- StgMVar_value(mvar) = R2;
+ StgMVar_value(mvar) = val;
#if defined(THREADED_RTS)
unlockClosure(mvar, stg_MVAR_DIRTY_info);
/* actually perform the takeMVar */
tso = StgMVar_head(mvar);
PerformTake(tso, R2);
- if (StgTSO_flags(tso) & TSO_DIRTY::I32 == 0) {
+ if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
}
RET_NP(ok,val);
}
+
+/* -----------------------------------------------------------------------------
+ Misc. primitives
+ -------------------------------------------------------------------------- */
+
+// Write the cost center stack of the first argument on stderr; return
+// the second. Possibly only makes sense for already evaluated
+// things?
+traceCcszh_fast
+{
+ W_ ccs;
+
+#ifdef PROFILING
+ ccs = StgHeader_ccs(UNTAG(R1));
+ foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
+#endif
+
+ R1 = R2;
+ ENTER();
+}
+
+getSparkzh_fast
+{
+ W_ spark;
+
+#ifndef THREADED_RTS
+ RET_NP(0,ghczmprim_GHCziBool_False_closure);
+#else
+ (spark) = foreign "C" findSpark(MyCapability());
+ if (spark != 0) {
+ RET_NP(1,spark);
+ } else {
+ RET_NP(0,ghczmprim_GHCziBool_False_closure);
+ }
+#endif
+}