From: Simon Marlow Date: Thu, 5 Mar 2009 15:41:53 +0000 (+0000) Subject: Partial fix for #2917 X-Git-Tag: 2009-06-25~52 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=18ddadf0b8e35084a51f2560c3fe24ca8a2f3fea;p=ghc-base.git Partial fix for #2917 - add newAlignedPinnedByteArray# for allocating pinned BAs with arbitrary alignment - the old newPinnedByteArray# now aligns to 16 bytes Foreign.alloca will use newAlignedPinnedByteArray#, and so might end up wasting less space than before (we used to align to 8 by default). Foreign.allocaBytes and Foreign.mallocForeignPtrBytes will get 16-byte aligned memory, which is enough to avoid problems with SSE instructions on x86, for example. There was a bug in the old newPinnedByteArray#: it aligned to 8 bytes, but would have failed if the header was not a multiple of 8 (fortunately it always was, even with profiling). Also we occasionally wasted some space unnecessarily due to alignment in allocatePinned(). I haven't done anything about Foreign.malloc/mallocBytes, which will give you the same alignment guarantees as malloc() (8 bytes on Linux/x86 here). --- diff --git a/Foreign/Marshal/Alloc.hs b/Foreign/Marshal/Alloc.hs index 282791a..19cce12 100644 --- a/Foreign/Marshal/Alloc.hs +++ b/Foreign/Marshal/Alloc.hs @@ -32,7 +32,7 @@ module Foreign.Marshal.Alloc ( import Data.Maybe import Foreign.C.Types ( CSize ) -import Foreign.Storable ( Storable(sizeOf) ) +import Foreign.Storable ( Storable(sizeOf,alignment) ) #ifndef __GLASGOW_HASKELL__ import Foreign.Ptr ( Ptr, nullPtr, FunPtr ) @@ -97,7 +97,7 @@ alloca :: Storable a => (Ptr a -> IO b) -> IO b alloca = doAlloca undefined where doAlloca :: Storable a' => a' -> (Ptr a' -> IO b') -> IO b' - doAlloca dummy = allocaBytes (sizeOf dummy) + doAlloca dummy = allocaBytesAligned (sizeOf dummy) (alignment dummy) -- |@'allocaBytes' n f@ executes the computation @f@, passing as argument -- a pointer to a temporarily allocated block of memory of @n@ bytes. @@ -118,9 +118,23 @@ allocaBytes (I# size) action = IO $ \ s0 -> case touch# barr# s3 of { s4 -> (# s4, r #) }}}}} + +allocaBytesAligned :: Int -> Int -> (Ptr a -> IO b) -> IO b +allocaBytesAligned (I# size) (I# align) action = IO $ \ s0 -> + case newAlignedPinnedByteArray# size align s0 of { (# s1, mbarr# #) -> + case unsafeFreezeByteArray# mbarr# s1 of { (# s2, barr# #) -> + let addr = Ptr (byteArrayContents# barr#) in + case action addr of { IO action' -> + case action' s2 of { (# s3, r #) -> + case touch# barr# s3 of { s4 -> + (# s4, r #) + }}}}} #else allocaBytes :: Int -> (Ptr a -> IO b) -> IO b allocaBytes size = bracket (mallocBytes size) free + +allocaBytesAligned :: Int -> Int -> (Ptr a -> IO b) -> IO b +allocaBytesAligned size align = allocaBytes size -- wrong #endif -- |Resize a memory area that was allocated with 'malloc' or 'mallocBytes' diff --git a/GHC/ForeignPtr.hs b/GHC/ForeignPtr.hs index 50fa58d..9868942 100644 --- a/GHC/ForeignPtr.hs +++ b/GHC/ForeignPtr.hs @@ -152,11 +152,12 @@ mallocForeignPtr = doMalloc undefined doMalloc a = do r <- newIORef (NoFinalizers, []) IO $ \s -> - case newPinnedByteArray# size s of { (# s', mbarr# #) -> + case newAlignedPinnedByteArray# size align s of { (# s', mbarr# #) -> (# s', ForeignPtr (byteArrayContents# (unsafeCoerce# mbarr#)) (MallocPtr mbarr# r) #) } - where (I# size) = sizeOf a + where (I# size) = sizeOf a + (I# align) = alignment a -- | This function is similar to 'mallocForeignPtr', except that the -- size of the memory required is given explicitly as a number of bytes. @@ -186,11 +187,12 @@ mallocPlainForeignPtr :: Storable a => IO (ForeignPtr a) mallocPlainForeignPtr = doMalloc undefined where doMalloc :: Storable b => b -> IO (ForeignPtr b) doMalloc a = IO $ \s -> - case newPinnedByteArray# size s of { (# s', mbarr# #) -> + case newAlignedPinnedByteArray# size align s of { (# s', mbarr# #) -> (# s', ForeignPtr (byteArrayContents# (unsafeCoerce# mbarr#)) (PlainPtr mbarr#) #) } - where (I# size) = sizeOf a + where (I# size) = sizeOf a + (I# align) = alignment a -- | This function is similar to 'mallocForeignPtrBytes', except that -- the internally an optimised ForeignPtr representation with no