RET_P(p);
}
+#define BA_ALIGN 16
+#define BA_MASK (BA_ALIGN-1)
+
newPinnedByteArrayzh_fast
{
- W_ words, payload_words, n, p;
+ W_ words, bytes, payload_words, p;
MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
- n = R1;
- payload_words = ROUNDUP_BYTES_TO_WDS(n);
+ bytes = R1;
+ /* payload_words is what we will tell the profiler we had to allocate */
+ payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+ /* When we actually allocate memory, we need to allow space for the
+ header: */
+ bytes = bytes + SIZEOF_StgArrWords;
+ /* And we want to align to BA_ALIGN bytes, so we need to allow space
+ to shift up to BA_ALIGN - 1 bytes: */
+ bytes = bytes + BA_ALIGN - 1;
+ /* Now we convert to a number of words: */
+ words = ROUNDUP_BYTES_TO_WDS(bytes);
+
+ ("ptr" p) = foreign "C" allocatePinned(words) [];
+ TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+
+ /* Now we need to move p forward so that the payload is aligned
+ to BA_ALIGN bytes: */
+ p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
+
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = payload_words;
+ RET_P(p);
+}
- // We want a 16-byte aligned array. allocatePinned() gives us
- // 8-byte aligned memory by default, but we want to align the
- // *goods* inside the ArrWords object, so we have to check the
- // size of the ArrWords header and adjust our size accordingly.
- words = payload_words + ((SIZEOF_StgArrWords + 15) & ~15);
+newAlignedPinnedByteArrayzh_fast
+{
+ W_ words, bytes, payload_words, p, alignment;
+
+ MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
+ bytes = R1;
+ alignment = R2;
+
+ /* payload_words is what we will tell the profiler we had to allocate */
+ payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+
+ /* When we actually allocate memory, we need to allow space for the
+ header: */
+ bytes = bytes + SIZEOF_StgArrWords;
+ /* And we want to align to <alignment> bytes, so we need to allow space
+ to shift up to <alignment - 1> bytes: */
+ bytes = bytes + alignment - 1;
+ /* Now we convert to a number of words: */
+ words = ROUNDUP_BYTES_TO_WDS(bytes);
("ptr" p) = foreign "C" allocatePinned(words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- // Push the pointer forward so that the goods fall on a 16-byte boundary.
- p = p + ((p + SIZEOF_StgArrWords) & 15);
+ /* Now we need to move p forward so that the payload is aligned
+ to <alignment> bytes. Note that we are assuming that
+ <alignment> is a power of 2, which is technically not guaranteed */
+ p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
StgArrWords_words(p) = payload_words;
flag = R5;
eptr = R6;
- ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
+ ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast );
w = Hp - SIZEOF_StgWeak + WDS(1);
SET_HDR(w, stg_WEAK_info, W_[CCCS]);
jump %ENTRY_CODE(Sp(0));
}
-decodeFloatzh_fast
-{
- W_ p;
- F_ arg;
- FETCH_MP_TEMP(mp_tmp1);
- FETCH_MP_TEMP(mp_tmp_w);
-
- /* arguments: F1 = Float# */
- arg = F1;
-
- ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
-
- /* Be prepared to tell Lennart-coded __decodeFloat
- where mantissa._mp_d can be put (it does not care about the rest) */
- p = Hp - SIZEOF_StgArrWords;
- SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
- StgArrWords_words(p) = 1;
- MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
-
- /* Perform the operation */
- foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
-
- /* returns: (Int# (expn), Int#, ByteArray#) */
- RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
-}
-
decodeFloatzuIntzh_fast
{
W_ p;
foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
- // switch at the earliest opportunity
+ // context switch soon, but not immediately: we don't want every
+ // forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
- // switch at the earliest opportunity
+ // context switch soon, but not immediately: we don't want every
+ // forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);