1 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
3 -- Module : ByteString.Base
5 -- Maintainer : dons@cse.unsw.edu.au
6 -- Stability : experimental
7 -- Portability : portable, requires ffi and cpp
8 -- Tested with : GHC 6.4.1 and Hugs March 2005
11 -- | A module containing semi-public ByteString internals. This exposes
12 -- the ByteString representation and low level construction functions.
13 -- Modules which extend the ByteString system will need to use this module
14 -- while ideally most users will be able to make do with the public interface
17 module Data.ByteString.Base (
19 -- * The @ByteString@ type and representation
20 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
23 unsafeHead, -- :: ByteString -> Word8
24 unsafeTail, -- :: ByteString -> ByteString
25 unsafeIndex, -- :: ByteString -> Int -> Word8
26 unsafeTake, -- :: Int -> ByteString -> ByteString
27 unsafeDrop, -- :: Int -> ByteString -> ByteString
29 -- * Low level introduction and elimination
30 empty, -- :: ByteString
31 create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
32 createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
33 createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
34 mallocByteString, -- :: Int -> IO (ForeignPtr a)
36 unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
37 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
38 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
40 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
41 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
43 #if defined(__GLASGOW_HASKELL__)
44 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
45 packAddress, -- :: Addr# -> ByteString
46 unsafePackAddress, -- :: Int -> Addr# -> ByteString
47 unsafeFinalize, -- :: ByteString -> IO ()
51 inlinePerformIO, -- :: IO a -> a
52 nullForeignPtr, -- :: ForeignPtr Word8
54 countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO ()
56 -- * Standard C Functions
57 c_strlen, -- :: CString -> IO CInt
58 c_malloc, -- :: CInt -> IO (Ptr Word8)
59 c_free, -- :: Ptr Word8 -> IO ()
60 c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ())
62 memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
63 memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
64 memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
65 memmove, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
66 memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
69 c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
70 c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
71 c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
72 c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
73 c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
75 -- * Internal GHC magic
76 #if defined(__GLASGOW_HASKELL__)
77 memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
81 w2c, c2w, isSpaceWord8
85 import Foreign.ForeignPtr (ForeignPtr, newForeignPtr_, withForeignPtr)
86 import Foreign.Ptr (Ptr, FunPtr, plusPtr, castPtr)
87 import Foreign.Storable (Storable(..))
88 import Foreign.C.Types (CInt, CSize, CULong)
89 import Foreign.C.String (CString, CStringLen)
91 import Control.Exception (assert)
93 import Data.Char (ord)
94 import Data.Word (Word8)
96 #if defined(__GLASGOW_HASKELL__)
97 import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
98 import qualified Foreign.Concurrent as FC (newForeignPtr)
100 import Data.Generics (Data(..), Typeable(..))
101 import GHC.Prim (Addr#)
102 import GHC.Ptr (Ptr(..))
103 import GHC.Base (realWorld#,unsafeChr)
104 import GHC.IOBase (IO(IO), unsafePerformIO, RawBuffer)
106 import Data.Char (chr)
107 import System.IO.Unsafe (unsafePerformIO)
110 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
111 import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
113 import Foreign.ForeignPtr (mallocForeignPtrBytes)
116 #if __GLASGOW_HASKELL__>=605
117 import GHC.ForeignPtr (ForeignPtr(ForeignPtr))
118 import GHC.Base (nullAddr#)
120 import Foreign.Ptr (nullPtr)
123 -- CFILES stuff is Hugs only
124 {-# CFILES cbits/fpstring.c #-}
126 -- -----------------------------------------------------------------------------
128 -- Useful macros, until we have bang patterns
131 #define STRICT1(f) f a | a `seq` False = undefined
132 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
133 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
134 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
135 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
137 -- -----------------------------------------------------------------------------
139 -- | A space-efficient representation of a Word8 vector, supporting many
140 -- efficient operations. A 'ByteString' contains 8-bit characters only.
142 -- Instances of Eq, Ord, Read, Show, Data, Typeable
144 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
145 {-# UNPACK #-} !Int -- offset
146 {-# UNPACK #-} !Int -- length
148 #if defined(__GLASGOW_HASKELL__)
149 deriving (Data, Typeable)
152 -- | /O(1)/ The empty 'ByteString'
154 empty = PS nullForeignPtr 0 0
156 nullForeignPtr :: ForeignPtr Word8
157 #if __GLASGOW_HASKELL__>=605
158 nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict?
160 nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr
161 {-# NOINLINE nullForeignPtr #-}
164 -- ---------------------------------------------------------------------
166 -- Extensions to the basic interface
169 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
170 -- check for the empty case, so there is an obligation on the programmer
171 -- to provide a proof that the ByteString is non-empty.
172 unsafeHead :: ByteString -> Word8
173 unsafeHead (PS x s l) = assert (l > 0) $
174 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
175 {-# INLINE unsafeHead #-}
177 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
178 -- check for the empty case. As with 'unsafeHead', the programmer must
179 -- provide a separate proof that the ByteString is non-empty.
180 unsafeTail :: ByteString -> ByteString
181 unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1)
182 {-# INLINE unsafeTail #-}
184 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
185 -- This omits the bounds check, which means there is an accompanying
186 -- obligation on the programmer to ensure the bounds are checked in some
188 unsafeIndex :: ByteString -> Int -> Word8
189 unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $
190 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
191 {-# INLINE unsafeIndex #-}
193 -- | A variety of 'take' which omits the checks on @n@ so there is an
194 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
195 unsafeTake :: Int -> ByteString -> ByteString
196 unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n
197 {-# INLINE unsafeTake #-}
199 -- | A variety of 'drop' which omits the checks on @n@ so there is an
200 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
201 unsafeDrop :: Int -> ByteString -> ByteString
202 unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
203 {-# INLINE unsafeDrop #-}
205 -- ---------------------------------------------------------------------
206 -- Low level constructors
208 -- | /O(1)/ Build a ByteString from a ForeignPtr
209 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
210 fromForeignPtr fp l = PS fp 0 l
212 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
213 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
214 toForeignPtr (PS ps s l) = (ps, s, l)
216 -- | A way of creating ByteStrings outside the IO monad. The @Int@
217 -- argument gives the final size of the ByteString. Unlike
218 -- 'createAndTrim' the ByteString is not reallocated if the final size
219 -- is less than the estimated size.
220 unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
221 unsafeCreate l f = unsafePerformIO (create l f)
222 {-# INLINE unsafeCreate #-}
224 -- | Create ByteString of size @l@ and use action @f@ to fill it's contents.
225 create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
227 fp <- mallocByteString l
228 withForeignPtr fp $ \p -> f p
231 -- | Given the maximum size needed and a function to make the contents
232 -- of a ByteString, createAndTrim makes the 'ByteString'. The generating
233 -- function is required to return the actual final size (<= the maximum
234 -- size), and the resulting byte array is realloced to this size.
236 -- createAndTrim is the main mechanism for creating custom, efficient
237 -- ByteString functions, using Haskell or C functions to fill the space.
239 createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
240 createAndTrim l f = do
241 fp <- mallocByteString l
242 withForeignPtr fp $ \p -> do
244 if assert (l' <= l) $ l' >= l
245 then return $! PS fp 0 l
246 else create l' $ \p' -> memcpy p' p (fromIntegral l')
248 createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
249 createAndTrim' l f = do
250 fp <- mallocByteString l
251 withForeignPtr fp $ \p -> do
252 (off, l', res) <- f p
253 if assert (l' <= l) $ l' >= l
254 then return $! (PS fp 0 l, res)
255 else do ps <- create l' $ \p' ->
256 memcpy p' (p `plusPtr` off) (fromIntegral l')
259 -- | Wrapper of mallocForeignPtrBytes with faster implementation
260 -- for GHC 6.5 builds newer than 06/06/06
261 mallocByteString :: Int -> IO (ForeignPtr a)
262 mallocByteString l = do
263 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
264 mallocPlainForeignPtrBytes l
266 mallocForeignPtrBytes l
269 #if defined(__GLASGOW_HASKELL__)
270 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
271 -- Addr\# (an arbitrary machine address assumed to point outside the
272 -- garbage-collected heap) into a @ByteString@. A much faster way to
273 -- create an Addr\# is with an unboxed string literal, than to pack a
274 -- boxed string. A unboxed string literal is compiled to a static @char
275 -- []@ by GHC. Establishing the length of the string requires a call to
276 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
277 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
278 -- if you know the length of the string statically.
282 -- > literalFS = packAddress "literal"#
284 packAddress :: Addr# -> ByteString
285 packAddress addr# = inlinePerformIO $ do
286 p <- newForeignPtr_ cstr
288 return $ PS p 0 (fromIntegral l)
291 {-# INLINE packAddress #-}
293 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
294 -- 'ByteStrings' -- which is ideal for string literals. It packs a
295 -- null-terminated sequence of bytes into a 'ByteString', given a raw
296 -- 'Addr\#' to the string, and the length of the string. Make sure the
297 -- length is correct, otherwise use the safer 'packAddress' (where the
298 -- length will be calculated once at runtime).
299 unsafePackAddress :: Int -> Addr# -> ByteString
300 unsafePackAddress len addr# = inlinePerformIO $ do
301 p <- newForeignPtr_ cstr
303 where cstr = Ptr addr#
305 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
306 -- length, and an IO action representing a finalizer. This function is
307 -- not available on Hugs.
309 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
310 packCStringFinalizer p l f = do
311 fp <- FC.newForeignPtr p f
314 -- | Explicitly run the finaliser associated with a 'ByteString'.
315 -- Further references to this value may generate invalid memory
316 -- references. This operation is unsafe, as there may be other
317 -- 'ByteStrings' referring to the same underlying pages. If you use
318 -- this, you need to have a proof of some kind that all 'ByteString's
319 -- ever generated from the underlying byte array are no longer live.
320 unsafeFinalize :: ByteString -> IO ()
321 unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p
325 ------------------------------------------------------------------------
327 -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
329 #if !defined(__GLASGOW_HASKELL__)
330 w2c = chr . fromIntegral
332 w2c = unsafeChr . fromIntegral
336 -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
337 -- silently truncates to 8 bits Chars > '\255'. It is provided as
338 -- convenience for ByteString construction.
340 c2w = fromIntegral . ord
343 -- Selects white-space characters in the Latin-1 range
344 -- ordered by frequency
346 isSpaceWord8 :: Word8 -> Bool
347 isSpaceWord8 w = case w of
348 0x20 -> True -- SPACE
349 0x0A -> True -- LF, \n
350 0x09 -> True -- HT, \t
351 0x0C -> True -- FF, \f
352 0x0D -> True -- CR, \r
353 0x0B -> True -- VT, \v
354 0xA0 -> True -- spotted by QC..
356 {-# INLINE isSpaceWord8 #-}
358 ------------------------------------------------------------------------
359 -- | Just like unsafePerformIO, but we inline it. Big performance gains as
360 -- it exposes lots of things to further inlining
362 {-# INLINE inlinePerformIO #-}
363 inlinePerformIO :: IO a -> a
364 #if defined(__GLASGOW_HASKELL__)
365 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
367 inlinePerformIO = unsafePerformIO
370 -- | Count the number of occurrences of each byte.
372 {-# SPECIALIZE countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () #-}
373 countOccurrences :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO ()
374 STRICT3(countOccurrences)
375 countOccurrences counts str l = go 0
378 go i | i == l = return ()
379 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
380 x <- peekElemOff counts k
381 pokeElemOff counts k (x + 1)
384 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
385 -- @CString@. Warning: modifying the @CString@ will affect the
386 -- @ByteString@. Why is this function unsafe? It relies on the null
387 -- byte at the end of the ByteString to be there. Unless you can
388 -- guarantee the null byte, you should use the safe version, which will
389 -- copy the string first.
390 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
391 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
393 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
395 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
396 unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)
398 -- ---------------------------------------------------------------------
400 -- Standard C functions
403 foreign import ccall unsafe "string.h strlen" c_strlen
404 :: CString -> IO CSize
406 foreign import ccall unsafe "stdlib.h malloc" c_malloc
407 :: CSize -> IO (Ptr Word8)
409 foreign import ccall unsafe "static stdlib.h free" c_free
410 :: Ptr Word8 -> IO ()
412 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
413 :: FunPtr (Ptr Word8 -> IO ())
415 foreign import ccall unsafe "string.h memchr" memchr
416 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
418 foreign import ccall unsafe "string.h memcmp" memcmp
419 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
421 foreign import ccall unsafe "string.h memcpy" memcpy
422 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
424 foreign import ccall unsafe "string.h memmove" memmove
425 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
427 foreign import ccall unsafe "string.h memset" memset
428 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
431 -- ---------------------------------------------------------------------
436 foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
437 :: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
439 foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
440 :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
442 foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
443 :: Ptr Word8 -> CULong -> IO Word8
445 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
446 :: Ptr Word8 -> CULong -> IO Word8
448 foreign import ccall unsafe "static fpstring.h fps_count" c_count
449 :: Ptr Word8 -> CULong -> Word8 -> IO CULong
451 -- ---------------------------------------------------------------------
455 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
456 :: Int -> Int -> IO (Ptr Word8)
458 foreign import ccall unsafe "static unistd.h close" c_close
461 # if !defined(__OpenBSD__)
462 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
463 :: Ptr Word8 -> Int -> IO Int
467 -- ---------------------------------------------------------------------
468 -- Internal GHC Haskell magic
470 #if defined(__GLASGOW_HASKELL__)
471 foreign import ccall unsafe "__hscore_memcpy_src_off"
472 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())