3 -- (c) The University of Glasgow 2002-2006
5 -- Binary I/O library, with special tweaks for GHC
7 -- Based on the nhc98 Binary library, which is copyright
8 -- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
9 -- Under the terms of the license for that software, we must tell you
10 -- where you can obtain the original version of the Binary library, namely
11 -- http://www.cs.york.ac.uk/fp/nhc98/
18 openBinIO, openBinIO_,
35 -- for writing instances:
43 #ifdef __GLASGOW_HASKELL__
50 UserData(..), getUserData, setUserData,
51 newReadState, newWriteState,
52 putDictionary, getDictionary, putFS,
55 #include "HsVersions.h"
57 -- The *host* architecture version:
60 import {-# SOURCE #-} Name (Name)
73 import Data.Char ( ord, chr )
74 import Control.Monad ( when )
75 import System.IO as IO
76 import System.IO.Unsafe ( unsafeInterleaveIO )
77 import System.IO.Error ( mkIOError, eofErrorType )
78 import GHC.Real ( Ratio(..) )
80 import GHC.IOBase ( IO(..) )
81 import GHC.Word ( Word8(..) )
82 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
83 -- openFileEx is available from the lang package, but we want to
84 -- be independent of hslibs libraries.
85 import GHC.Handle ( openFileEx, IOModeEx(..) )
87 import System.IO ( openBinaryFile )
90 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
91 openBinaryFile f mode = openFileEx f (BinaryMode mode)
94 type BinArray = ForeignPtr Word8
96 ---------------------------------------------------------------
98 ---------------------------------------------------------------
101 = BinMem { -- binary data stored in an unboxed array
102 bh_usr :: UserData, -- sigh, need parameterized modules :-)
103 _off_r :: !FastMutInt, -- the current offset
104 _sz_r :: !FastMutInt, -- size of the array (cached)
105 _arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
107 -- XXX: should really store a "high water mark" for dumping out
108 -- the binary data to a file.
110 | BinIO { -- binary data stored in a file
112 _off_r :: !FastMutInt, -- the current offset (cached)
113 _hdl :: !IO.Handle -- the file handle (must be seekable)
115 -- cache the file ptr in BinIO; using hTell is too expensive
116 -- to call repeatedly. If anyone else is modifying this Handle
117 -- at the same time, we'll be screwed.
119 getUserData :: BinHandle -> UserData
120 getUserData bh = bh_usr bh
122 setUserData :: BinHandle -> UserData -> BinHandle
123 setUserData bh us = bh { bh_usr = us }
126 ---------------------------------------------------------------
128 ---------------------------------------------------------------
130 newtype Bin a = BinPtr Int
131 deriving (Eq, Ord, Show, Bounded)
133 castBin :: Bin a -> Bin b
134 castBin (BinPtr i) = BinPtr i
136 ---------------------------------------------------------------
138 ---------------------------------------------------------------
141 put_ :: BinHandle -> a -> IO ()
142 put :: BinHandle -> a -> IO (Bin a)
143 get :: BinHandle -> IO a
145 -- define one of put_, put. Use of put_ is recommended because it
146 -- is more likely that tail-calls can kick in, and we rarely need the
147 -- position return value.
148 put_ bh a = do put bh a; return ()
149 put bh a = do p <- tellBin bh; put_ bh a; return p
151 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
152 putAt bh p x = do seekBin bh p; put bh x; return ()
154 getAt :: Binary a => BinHandle -> Bin a -> IO a
155 getAt bh p = do seekBin bh p; get bh
157 openBinIO_ :: IO.Handle -> IO BinHandle
158 openBinIO_ h = openBinIO h
160 openBinIO :: IO.Handle -> IO BinHandle
164 return (BinIO noUserData r h)
166 openBinMem :: Int -> IO BinHandle
168 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
170 arr <- mallocForeignPtrBytes size
171 arr_r <- newIORef arr
172 ix_r <- newFastMutInt
173 writeFastMutInt ix_r 0
174 sz_r <- newFastMutInt
175 writeFastMutInt sz_r size
176 return (BinMem noUserData ix_r sz_r arr_r)
178 tellBin :: BinHandle -> IO (Bin a)
179 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
180 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
182 seekBin :: BinHandle -> Bin a -> IO ()
183 seekBin (BinIO _ ix_r h) (BinPtr p) = do
184 writeFastMutInt ix_r p
185 hSeek h AbsoluteSeek (fromIntegral p)
186 seekBin h@(BinMem _ ix_r sz_r _) (BinPtr p) = do
187 sz <- readFastMutInt sz_r
189 then do expandBin h p; writeFastMutInt ix_r p
190 else writeFastMutInt ix_r p
192 seekBy :: BinHandle -> Int -> IO ()
193 seekBy (BinIO _ ix_r h) off = do
194 ix <- readFastMutInt ix_r
196 writeFastMutInt ix_r ix'
197 hSeek h AbsoluteSeek (fromIntegral ix')
198 seekBy h@(BinMem _ ix_r sz_r _) off = do
199 sz <- readFastMutInt sz_r
200 ix <- readFastMutInt ix_r
203 then do expandBin h ix'; writeFastMutInt ix_r ix'
204 else writeFastMutInt ix_r ix'
206 isEOFBin :: BinHandle -> IO Bool
207 isEOFBin (BinMem _ ix_r sz_r _) = do
208 ix <- readFastMutInt ix_r
209 sz <- readFastMutInt sz_r
211 isEOFBin (BinIO _ _ h) = hIsEOF h
213 writeBinMem :: BinHandle -> FilePath -> IO ()
214 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
215 writeBinMem (BinMem _ ix_r _ arr_r) fn = do
216 h <- openBinaryFile fn WriteMode
217 arr <- readIORef arr_r
218 ix <- readFastMutInt ix_r
219 withForeignPtr arr $ \p -> hPutBuf h p ix
222 readBinMem :: FilePath -> IO BinHandle
223 -- Return a BinHandle with a totally undefined State
224 readBinMem filename = do
225 h <- openBinaryFile filename ReadMode
226 filesize' <- hFileSize h
227 let filesize = fromIntegral filesize'
228 arr <- mallocForeignPtrBytes (filesize*2)
229 count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
230 when (count /= filesize) $
231 error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
233 arr_r <- newIORef arr
234 ix_r <- newFastMutInt
235 writeFastMutInt ix_r 0
236 sz_r <- newFastMutInt
237 writeFastMutInt sz_r filesize
238 return (BinMem noUserData ix_r sz_r arr_r)
240 fingerprintBinMem :: BinHandle -> IO Fingerprint
241 fingerprintBinMem (BinIO _ _ _) = error "Binary.md5BinMem: not a memory handle"
242 fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
243 arr <- readIORef arr_r
244 ix <- readFastMutInt ix_r
245 withForeignPtr arr $ \p -> fingerprintData p ix
247 -- expand the size of the array to include a specified offset
248 expandBin :: BinHandle -> Int -> IO ()
249 expandBin (BinMem _ _ sz_r arr_r) off = do
250 sz <- readFastMutInt sz_r
251 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
252 arr <- readIORef arr_r
253 arr' <- mallocForeignPtrBytes sz'
254 withForeignPtr arr $ \old ->
255 withForeignPtr arr' $ \new ->
257 writeFastMutInt sz_r sz'
258 writeIORef arr_r arr'
259 when False $ -- disabled
260 hPutStrLn stderr ("Binary: expanding to size: " ++ show sz')
262 expandBin (BinIO _ _ _) _ = return ()
263 -- no need to expand a file, we'll assume they expand by themselves.
265 -- -----------------------------------------------------------------------------
266 -- Low-level reading/writing of bytes
268 putWord8 :: BinHandle -> Word8 -> IO ()
269 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
270 ix <- readFastMutInt ix_r
271 sz <- readFastMutInt sz_r
272 -- double the size of the array if it overflows
274 then do expandBin h ix
276 else do arr <- readIORef arr_r
277 withForeignPtr arr $ \p -> pokeByteOff p ix w
278 writeFastMutInt ix_r (ix+1)
280 putWord8 (BinIO _ ix_r h) w = do
281 ix <- readFastMutInt ix_r
282 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
283 writeFastMutInt ix_r (ix+1)
286 getWord8 :: BinHandle -> IO Word8
287 getWord8 (BinMem _ ix_r sz_r arr_r) = do
288 ix <- readFastMutInt ix_r
289 sz <- readFastMutInt sz_r
291 ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
292 arr <- readIORef arr_r
293 w <- withForeignPtr arr $ \p -> peekByteOff p ix
294 writeFastMutInt ix_r (ix+1)
296 getWord8 (BinIO _ ix_r h) = do
297 ix <- readFastMutInt ix_r
299 writeFastMutInt ix_r (ix+1)
300 return $! (fromIntegral (ord c)) -- XXX not really correct
302 putByte :: BinHandle -> Word8 -> IO ()
303 putByte bh w = put_ bh w
305 getByte :: BinHandle -> IO Word8
308 -- -----------------------------------------------------------------------------
309 -- Primitve Word writes
311 instance Binary Word8 where
315 instance Binary Word16 where
316 put_ h w = do -- XXX too slow.. inline putWord8?
317 putByte h (fromIntegral (w `shiftR` 8))
318 putByte h (fromIntegral (w .&. 0xff))
322 return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
325 instance Binary Word32 where
327 putByte h (fromIntegral (w `shiftR` 24))
328 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
329 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
330 putByte h (fromIntegral (w .&. 0xff))
336 return $! ((fromIntegral w1 `shiftL` 24) .|.
337 (fromIntegral w2 `shiftL` 16) .|.
338 (fromIntegral w3 `shiftL` 8) .|.
341 instance Binary Word64 where
343 putByte h (fromIntegral (w `shiftR` 56))
344 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
345 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
346 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
347 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
348 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
349 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
350 putByte h (fromIntegral (w .&. 0xff))
360 return $! ((fromIntegral w1 `shiftL` 56) .|.
361 (fromIntegral w2 `shiftL` 48) .|.
362 (fromIntegral w3 `shiftL` 40) .|.
363 (fromIntegral w4 `shiftL` 32) .|.
364 (fromIntegral w5 `shiftL` 24) .|.
365 (fromIntegral w6 `shiftL` 16) .|.
366 (fromIntegral w7 `shiftL` 8) .|.
369 -- -----------------------------------------------------------------------------
370 -- Primitve Int writes
372 instance Binary Int8 where
373 put_ h w = put_ h (fromIntegral w :: Word8)
374 get h = do w <- get h; return $! (fromIntegral (w::Word8))
376 instance Binary Int16 where
377 put_ h w = put_ h (fromIntegral w :: Word16)
378 get h = do w <- get h; return $! (fromIntegral (w::Word16))
380 instance Binary Int32 where
381 put_ h w = put_ h (fromIntegral w :: Word32)
382 get h = do w <- get h; return $! (fromIntegral (w::Word32))
384 instance Binary Int64 where
385 put_ h w = put_ h (fromIntegral w :: Word64)
386 get h = do w <- get h; return $! (fromIntegral (w::Word64))
388 -- -----------------------------------------------------------------------------
389 -- Instances for standard types
391 instance Binary () where
392 put_ _ () = return ()
394 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
396 instance Binary Bool where
397 put_ bh b = putByte bh (fromIntegral (fromEnum b))
398 get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x))
399 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
401 instance Binary Char where
402 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
403 get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
404 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
406 instance Binary Int where
407 #if SIZEOF_HSINT == 4
408 put_ bh i = put_ bh (fromIntegral i :: Int32)
411 return $! (fromIntegral (x :: Int32))
412 #elif SIZEOF_HSINT == 8
413 put_ bh i = put_ bh (fromIntegral i :: Int64)
416 return $! (fromIntegral (x :: Int64))
418 #error "unsupported sizeof(HsInt)"
420 -- getF bh = getBitsF bh 32
422 instance Binary a => Binary [a] where
426 then putByte bh (fromIntegral len :: Word8)
427 else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32)
433 else return (fromIntegral b :: Word32)
434 let loop 0 = return []
435 loop n = do a <- get bh; as <- loop (n-1); return (a:as)
438 instance (Binary a, Binary b) => Binary (a,b) where
439 put_ bh (a,b) = do put_ bh a; put_ bh b
440 get bh = do a <- get bh
444 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
445 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
446 get bh = do a <- get bh
451 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
452 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
453 get bh = do a <- get bh
459 instance Binary a => Binary (Maybe a) where
460 put_ bh Nothing = putByte bh 0
461 put_ bh (Just a) = do putByte bh 1; put_ bh a
462 get bh = do h <- getWord8 bh
465 _ -> do x <- get bh; return (Just x)
467 instance (Binary a, Binary b) => Binary (Either a b) where
468 put_ bh (Left a) = do putByte bh 0; put_ bh a
469 put_ bh (Right b) = do putByte bh 1; put_ bh b
470 get bh = do h <- getWord8 bh
472 0 -> do a <- get bh ; return (Left a)
473 _ -> do b <- get bh ; return (Right b)
475 #if defined(__GLASGOW_HASKELL__) || 1
476 --to quote binary-0.3 on this code idea,
478 -- TODO This instance is not architecture portable. GMP stores numbers as
479 -- arrays of machine sized words, so the byte format is not portable across
480 -- architectures with different endianess and word size.
482 -- This makes it hard (impossible) to make an equivalent instance
483 -- with code that is compilable with non-GHC. Do we need any instance
484 -- Binary Integer, and if so, does it have to be blazing fast? Or can
485 -- we just change this instance to be portable like the rest of the
486 -- instances? (binary package has code to steal for that)
488 -- yes, we need Binary Integer and Binary Rational in basicTypes/Literal.lhs
490 instance Binary Integer where
491 -- XXX This is hideous
492 put_ bh i = put_ bh (show i)
493 get bh = do str <- get bh
495 [(i, "")] -> return i
496 _ -> fail ("Binary Integer: got " ++ show str)
499 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
500 put_ bh (J# s# a#) = do
503 let sz# = sizeofByteArray# a# -- in *bytes*
504 put_ bh (I# sz#) -- in *bytes*
505 putByteArray bh a# sz#
510 0 -> do (I# i#) <- get bh
512 _ -> do (I# s#) <- get bh
514 (BA a#) <- getByteArray bh sz
518 -- As for the rest of this code, even though this module
519 -- exports it, it doesn't seem to be used anywhere else
522 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
523 putByteArray bh a s# = loop 0#
525 | n# ==# s# = return ()
527 putByte bh (indexByteArray a n#)
530 getByteArray :: BinHandle -> Int -> IO ByteArray
531 getByteArray bh (I# sz) = do
532 (MBA arr) <- newByteArray sz
534 | n ==# sz = return ()
537 writeByteArray arr n w
543 data ByteArray = BA ByteArray#
544 data MBA = MBA (MutableByteArray# RealWorld)
546 newByteArray :: Int# -> IO MBA
547 newByteArray sz = IO $ \s ->
548 case newByteArray# sz s of { (# s, arr #) ->
551 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
552 freezeByteArray arr = IO $ \s ->
553 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
556 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
557 writeByteArray arr i (W8# w) = IO $ \s ->
558 case writeWord8Array# arr i w s of { s ->
561 indexByteArray :: ByteArray# -> Int# -> Word8
562 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
564 instance (Integral a, Binary a) => Binary (Ratio a) where
565 put_ bh (a :% b) = do put_ bh a; put_ bh b
566 get bh = do a <- get bh; b <- get bh; return (a :% b)
569 instance Binary (Bin a) where
570 put_ bh (BinPtr i) = put_ bh i
571 get bh = do i <- get bh; return (BinPtr i)
573 -- -----------------------------------------------------------------------------
574 -- Lazy reading/writing
576 lazyPut :: Binary a => BinHandle -> a -> IO ()
578 -- output the obj with a ptr to skip over it:
580 put_ bh pre_a -- save a slot for the ptr
581 put_ bh a -- dump the object
582 q <- tellBin bh -- q = ptr to after object
583 putAt bh pre_a q -- fill in slot before a with ptr to q
584 seekBin bh q -- finally carry on writing at q
586 lazyGet :: Binary a => BinHandle -> IO a
588 p <- get bh -- a BinPtr
590 a <- unsafeInterleaveIO (getAt bh p_a)
591 seekBin bh p -- skip over the object for now
594 -- -----------------------------------------------------------------------------
596 -- -----------------------------------------------------------------------------
600 -- for *deserialising* only:
601 ud_dict :: Dictionary,
602 ud_symtab :: SymbolTable,
604 -- for *serialising* only:
605 ud_put_name :: BinHandle -> Name -> IO (),
606 ud_put_fs :: BinHandle -> FastString -> IO ()
609 newReadState :: Dictionary -> IO UserData
610 newReadState dict = do
611 return UserData { ud_dict = dict,
612 ud_symtab = undef "symtab",
613 ud_put_name = undef "put_name",
614 ud_put_fs = undef "put_fs"
617 newWriteState :: (BinHandle -> Name -> IO ())
618 -> (BinHandle -> FastString -> IO ())
620 newWriteState put_name put_fs = do
621 return UserData { ud_dict = undef "dict",
622 ud_symtab = undef "symtab",
623 ud_put_name = put_name,
628 noUserData = undef "UserData"
631 undef s = panic ("Binary.UserData: no " ++ s)
633 ---------------------------------------------------------
635 ---------------------------------------------------------
637 type Dictionary = Array Int FastString -- The dictionary
638 -- Should be 0-indexed
640 putDictionary :: BinHandle -> Int -> UniqFM (Int,FastString) -> IO ()
641 putDictionary bh sz dict = do
643 mapM_ (putFS bh) (elems (array (0,sz-1) (eltsUFM dict)))
645 getDictionary :: BinHandle -> IO Dictionary
646 getDictionary bh = do
648 elems <- sequence (take sz (repeat (getFS bh)))
649 return (listArray (0,sz-1) elems)
651 ---------------------------------------------------------
653 ---------------------------------------------------------
655 -- On disk, the symbol table is an array of IfaceExtName, when
656 -- reading it in we turn it into a SymbolTable.
658 type SymbolTable = Array Int Name
660 ---------------------------------------------------------
661 -- Reading and writing FastStrings
662 ---------------------------------------------------------
664 putFS :: BinHandle -> FastString -> IO ()
665 putFS bh (FastString _ l _ buf _) = do
667 withForeignPtr buf $ \ptr ->
669 go n | n == l = return ()
671 b <- peekElemOff ptr n
677 {- -- possible faster version, not quite there yet:
678 getFS bh@BinMem{} = do
680 arr <- readIORef (arr_r bh)
681 off <- readFastMutInt (off_r bh)
682 return $! (mkFastSubStringBA# arr off l)
684 getFS :: BinHandle -> IO FastString
687 fp <- mallocForeignPtrBytes l
688 withForeignPtr fp $ \ptr -> do
690 go n | n == l = mkFastStringForeignPtr ptr fp l
698 instance Binary FastString where
700 case getUserData bh of
701 UserData { ud_put_fs = put_fs } -> put_fs bh f
705 return $! (ud_dict (getUserData bh) ! j)
707 -- Here to avoid loop
709 instance Binary Fingerprint where
710 put_ h (Fingerprint w1 w2) = do put_ h w1; put_ h w2
711 get h = do w1 <- get h; w2 <- get h; return (Fingerprint w1 w2)