X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2FPrimOps.cmm;h=121102c8e17f1a97dab5e546d89820bd25832166;hb=e8d7985d56595f6b8004546bedc41627ca70c528;hp=a6e221bc5f05a9336d943b0f9635ffd3769e976a;hpb=b220844f7776989b6c1a631112b4c8b57cdf2dd2;p=ghc-hetmet.git diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index a6e221b..121102c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -83,32 +83,65 @@ newByteArrayzh_fast RET_P(p); } +#define BA_ALIGN 16 +#define BA_MASK (BA_ALIGN-1) + newPinnedByteArrayzh_fast { - W_ words, payload_words, n, p; + W_ words, bytes, payload_words, p; MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast); - n = R1; - payload_words = ROUNDUP_BYTES_TO_WDS(n); + bytes = R1; + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to BA_ALIGN bytes, so we need to allow space + to shift up to BA_ALIGN - 1 bytes: */ + bytes = bytes + BA_ALIGN - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); - // We want an 8-byte aligned array. allocatePinned() gives us - // 8-byte aligned memory by default, but we want to align the - // *goods* inside the ArrWords object, so we have to check the - // size of the ArrWords header and adjust our size accordingly. - words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - if ((SIZEOF_StgArrWords & 7) != 0) { - words = words + 1; - } + ("ptr" p) = foreign "C" allocatePinned(words) []; + TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); + + /* Now we need to move p forward so that the payload is aligned + to BA_ALIGN bytes: */ + p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); + + SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + StgArrWords_words(p) = payload_words; + RET_P(p); +} + +newAlignedPinnedByteArrayzh_fast +{ + W_ words, bytes, payload_words, p, alignment; + + MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast); + bytes = R1; + alignment = R2; + + /* payload_words is what we will tell the profiler we had to allocate */ + payload_words = ROUNDUP_BYTES_TO_WDS(bytes); + + /* When we actually allocate memory, we need to allow space for the + header: */ + bytes = bytes + SIZEOF_StgArrWords; + /* And we want to align to bytes, so we need to allow space + to shift up to bytes: */ + bytes = bytes + alignment - 1; + /* Now we convert to a number of words: */ + words = ROUNDUP_BYTES_TO_WDS(bytes); ("ptr" p) = foreign "C" allocatePinned(words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - // Again, if the ArrWords header isn't a multiple of 8 bytes, we - // have to push the object forward one word so that the goods - // fall on an 8-byte boundary. - if ((SIZEOF_StgArrWords & 7) != 0) { - p = p + WDS(1); - } + /* Now we need to move p forward so that the payload is aligned + to bytes. Note that we are assuming that + is a power of 2, which is technically not guaranteed */ + p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -1040,7 +1073,8 @@ forkzh_fast foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); @@ -1069,7 +1103,8 @@ forkOnzh_fast foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid);