3 -- (c) The University of Glasgow 2002-2006
5 -- Binary I/O library, with special tweaks for GHC
7 -- Based on the nhc98 Binary library, which is copyright
8 -- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
9 -- Under the terms of the license for that software, we must tell you
10 -- where you can obtain the original version of the Binary library, namely
11 -- http://www.cs.york.ac.uk/fp/nhc98/
18 openBinIO, openBinIO_,
35 -- for writing instances:
43 #ifdef __GLASGOW_HASKELL__
50 UserData(..), getUserData, setUserData,
51 newReadState, newWriteState,
52 putDictionary, getDictionary, putFS,
55 #include "HsVersions.h"
57 -- The *host* architecture version:
60 import {-# SOURCE #-} Name (Name)
73 import Data.Char ( ord, chr )
74 import Control.Monad ( when )
75 import System.IO as IO
76 import System.IO.Unsafe ( unsafeInterleaveIO )
77 import System.IO.Error ( mkIOError, eofErrorType )
78 import GHC.Real ( Ratio(..) )
80 import GHC.IOBase ( IO(..) )
81 import GHC.Word ( Word8(..) )
82 import System.IO ( openBinaryFile )
84 type BinArray = ForeignPtr Word8
86 ---------------------------------------------------------------
88 ---------------------------------------------------------------
91 = BinMem { -- binary data stored in an unboxed array
92 bh_usr :: UserData, -- sigh, need parameterized modules :-)
93 _off_r :: !FastMutInt, -- the current offset
94 _sz_r :: !FastMutInt, -- size of the array (cached)
95 _arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
97 -- XXX: should really store a "high water mark" for dumping out
98 -- the binary data to a file.
100 | BinIO { -- binary data stored in a file
102 _off_r :: !FastMutInt, -- the current offset (cached)
103 _hdl :: !IO.Handle -- the file handle (must be seekable)
105 -- cache the file ptr in BinIO; using hTell is too expensive
106 -- to call repeatedly. If anyone else is modifying this Handle
107 -- at the same time, we'll be screwed.
109 getUserData :: BinHandle -> UserData
110 getUserData bh = bh_usr bh
112 setUserData :: BinHandle -> UserData -> BinHandle
113 setUserData bh us = bh { bh_usr = us }
116 ---------------------------------------------------------------
118 ---------------------------------------------------------------
120 newtype Bin a = BinPtr Int
121 deriving (Eq, Ord, Show, Bounded)
123 castBin :: Bin a -> Bin b
124 castBin (BinPtr i) = BinPtr i
126 ---------------------------------------------------------------
128 ---------------------------------------------------------------
131 put_ :: BinHandle -> a -> IO ()
132 put :: BinHandle -> a -> IO (Bin a)
133 get :: BinHandle -> IO a
135 -- define one of put_, put. Use of put_ is recommended because it
136 -- is more likely that tail-calls can kick in, and we rarely need the
137 -- position return value.
138 put_ bh a = do put bh a; return ()
139 put bh a = do p <- tellBin bh; put_ bh a; return p
141 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
142 putAt bh p x = do seekBin bh p; put bh x; return ()
144 getAt :: Binary a => BinHandle -> Bin a -> IO a
145 getAt bh p = do seekBin bh p; get bh
147 openBinIO_ :: IO.Handle -> IO BinHandle
148 openBinIO_ h = openBinIO h
150 openBinIO :: IO.Handle -> IO BinHandle
154 return (BinIO noUserData r h)
156 openBinMem :: Int -> IO BinHandle
158 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
160 arr <- mallocForeignPtrBytes size
161 arr_r <- newIORef arr
162 ix_r <- newFastMutInt
163 writeFastMutInt ix_r 0
164 sz_r <- newFastMutInt
165 writeFastMutInt sz_r size
166 return (BinMem noUserData ix_r sz_r arr_r)
168 tellBin :: BinHandle -> IO (Bin a)
169 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
170 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
172 seekBin :: BinHandle -> Bin a -> IO ()
173 seekBin (BinIO _ ix_r h) (BinPtr p) = do
174 writeFastMutInt ix_r p
175 hSeek h AbsoluteSeek (fromIntegral p)
176 seekBin h@(BinMem _ ix_r sz_r _) (BinPtr p) = do
177 sz <- readFastMutInt sz_r
179 then do expandBin h p; writeFastMutInt ix_r p
180 else writeFastMutInt ix_r p
182 seekBy :: BinHandle -> Int -> IO ()
183 seekBy (BinIO _ ix_r h) off = do
184 ix <- readFastMutInt ix_r
186 writeFastMutInt ix_r ix'
187 hSeek h AbsoluteSeek (fromIntegral ix')
188 seekBy h@(BinMem _ ix_r sz_r _) off = do
189 sz <- readFastMutInt sz_r
190 ix <- readFastMutInt ix_r
193 then do expandBin h ix'; writeFastMutInt ix_r ix'
194 else writeFastMutInt ix_r ix'
196 isEOFBin :: BinHandle -> IO Bool
197 isEOFBin (BinMem _ ix_r sz_r _) = do
198 ix <- readFastMutInt ix_r
199 sz <- readFastMutInt sz_r
201 isEOFBin (BinIO _ _ h) = hIsEOF h
203 writeBinMem :: BinHandle -> FilePath -> IO ()
204 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
205 writeBinMem (BinMem _ ix_r _ arr_r) fn = do
206 h <- openBinaryFile fn WriteMode
207 arr <- readIORef arr_r
208 ix <- readFastMutInt ix_r
209 withForeignPtr arr $ \p -> hPutBuf h p ix
212 readBinMem :: FilePath -> IO BinHandle
213 -- Return a BinHandle with a totally undefined State
214 readBinMem filename = do
215 h <- openBinaryFile filename ReadMode
216 filesize' <- hFileSize h
217 let filesize = fromIntegral filesize'
218 arr <- mallocForeignPtrBytes (filesize*2)
219 count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
220 when (count /= filesize) $
221 error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
223 arr_r <- newIORef arr
224 ix_r <- newFastMutInt
225 writeFastMutInt ix_r 0
226 sz_r <- newFastMutInt
227 writeFastMutInt sz_r filesize
228 return (BinMem noUserData ix_r sz_r arr_r)
230 fingerprintBinMem :: BinHandle -> IO Fingerprint
231 fingerprintBinMem (BinIO _ _ _) = error "Binary.md5BinMem: not a memory handle"
232 fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
233 arr <- readIORef arr_r
234 ix <- readFastMutInt ix_r
235 withForeignPtr arr $ \p -> fingerprintData p ix
237 -- expand the size of the array to include a specified offset
238 expandBin :: BinHandle -> Int -> IO ()
239 expandBin (BinMem _ _ sz_r arr_r) off = do
240 sz <- readFastMutInt sz_r
241 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
242 arr <- readIORef arr_r
243 arr' <- mallocForeignPtrBytes sz'
244 withForeignPtr arr $ \old ->
245 withForeignPtr arr' $ \new ->
247 writeFastMutInt sz_r sz'
248 writeIORef arr_r arr'
249 when False $ -- disabled
250 hPutStrLn stderr ("Binary: expanding to size: " ++ show sz')
252 expandBin (BinIO _ _ _) _ = return ()
253 -- no need to expand a file, we'll assume they expand by themselves.
255 -- -----------------------------------------------------------------------------
256 -- Low-level reading/writing of bytes
258 putWord8 :: BinHandle -> Word8 -> IO ()
259 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
260 ix <- readFastMutInt ix_r
261 sz <- readFastMutInt sz_r
262 -- double the size of the array if it overflows
264 then do expandBin h ix
266 else do arr <- readIORef arr_r
267 withForeignPtr arr $ \p -> pokeByteOff p ix w
268 writeFastMutInt ix_r (ix+1)
270 putWord8 (BinIO _ ix_r h) w = do
271 ix <- readFastMutInt ix_r
272 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
273 writeFastMutInt ix_r (ix+1)
276 getWord8 :: BinHandle -> IO Word8
277 getWord8 (BinMem _ ix_r sz_r arr_r) = do
278 ix <- readFastMutInt ix_r
279 sz <- readFastMutInt sz_r
281 ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
282 arr <- readIORef arr_r
283 w <- withForeignPtr arr $ \p -> peekByteOff p ix
284 writeFastMutInt ix_r (ix+1)
286 getWord8 (BinIO _ ix_r h) = do
287 ix <- readFastMutInt ix_r
289 writeFastMutInt ix_r (ix+1)
290 return $! (fromIntegral (ord c)) -- XXX not really correct
292 putByte :: BinHandle -> Word8 -> IO ()
293 putByte bh w = put_ bh w
295 getByte :: BinHandle -> IO Word8
298 -- -----------------------------------------------------------------------------
299 -- Primitve Word writes
301 instance Binary Word8 where
305 instance Binary Word16 where
306 put_ h w = do -- XXX too slow.. inline putWord8?
307 putByte h (fromIntegral (w `shiftR` 8))
308 putByte h (fromIntegral (w .&. 0xff))
312 return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
315 instance Binary Word32 where
317 putByte h (fromIntegral (w `shiftR` 24))
318 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
319 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
320 putByte h (fromIntegral (w .&. 0xff))
326 return $! ((fromIntegral w1 `shiftL` 24) .|.
327 (fromIntegral w2 `shiftL` 16) .|.
328 (fromIntegral w3 `shiftL` 8) .|.
331 instance Binary Word64 where
333 putByte h (fromIntegral (w `shiftR` 56))
334 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
335 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
336 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
337 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
338 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
339 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
340 putByte h (fromIntegral (w .&. 0xff))
350 return $! ((fromIntegral w1 `shiftL` 56) .|.
351 (fromIntegral w2 `shiftL` 48) .|.
352 (fromIntegral w3 `shiftL` 40) .|.
353 (fromIntegral w4 `shiftL` 32) .|.
354 (fromIntegral w5 `shiftL` 24) .|.
355 (fromIntegral w6 `shiftL` 16) .|.
356 (fromIntegral w7 `shiftL` 8) .|.
359 -- -----------------------------------------------------------------------------
360 -- Primitve Int writes
362 instance Binary Int8 where
363 put_ h w = put_ h (fromIntegral w :: Word8)
364 get h = do w <- get h; return $! (fromIntegral (w::Word8))
366 instance Binary Int16 where
367 put_ h w = put_ h (fromIntegral w :: Word16)
368 get h = do w <- get h; return $! (fromIntegral (w::Word16))
370 instance Binary Int32 where
371 put_ h w = put_ h (fromIntegral w :: Word32)
372 get h = do w <- get h; return $! (fromIntegral (w::Word32))
374 instance Binary Int64 where
375 put_ h w = put_ h (fromIntegral w :: Word64)
376 get h = do w <- get h; return $! (fromIntegral (w::Word64))
378 -- -----------------------------------------------------------------------------
379 -- Instances for standard types
381 instance Binary () where
382 put_ _ () = return ()
384 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
386 instance Binary Bool where
387 put_ bh b = putByte bh (fromIntegral (fromEnum b))
388 get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x))
389 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
391 instance Binary Char where
392 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
393 get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
394 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
396 instance Binary Int where
397 #if SIZEOF_HSINT == 4
398 put_ bh i = put_ bh (fromIntegral i :: Int32)
401 return $! (fromIntegral (x :: Int32))
402 #elif SIZEOF_HSINT == 8
403 put_ bh i = put_ bh (fromIntegral i :: Int64)
406 return $! (fromIntegral (x :: Int64))
408 #error "unsupported sizeof(HsInt)"
410 -- getF bh = getBitsF bh 32
412 instance Binary a => Binary [a] where
416 then putByte bh (fromIntegral len :: Word8)
417 else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32)
423 else return (fromIntegral b :: Word32)
424 let loop 0 = return []
425 loop n = do a <- get bh; as <- loop (n-1); return (a:as)
428 instance (Binary a, Binary b) => Binary (a,b) where
429 put_ bh (a,b) = do put_ bh a; put_ bh b
430 get bh = do a <- get bh
434 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
435 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
436 get bh = do a <- get bh
441 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
442 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
443 get bh = do a <- get bh
449 instance Binary a => Binary (Maybe a) where
450 put_ bh Nothing = putByte bh 0
451 put_ bh (Just a) = do putByte bh 1; put_ bh a
452 get bh = do h <- getWord8 bh
455 _ -> do x <- get bh; return (Just x)
457 instance (Binary a, Binary b) => Binary (Either a b) where
458 put_ bh (Left a) = do putByte bh 0; put_ bh a
459 put_ bh (Right b) = do putByte bh 1; put_ bh b
460 get bh = do h <- getWord8 bh
462 0 -> do a <- get bh ; return (Left a)
463 _ -> do b <- get bh ; return (Right b)
465 #if defined(__GLASGOW_HASKELL__) || 1
466 --to quote binary-0.3 on this code idea,
468 -- TODO This instance is not architecture portable. GMP stores numbers as
469 -- arrays of machine sized words, so the byte format is not portable across
470 -- architectures with different endianess and word size.
472 -- This makes it hard (impossible) to make an equivalent instance
473 -- with code that is compilable with non-GHC. Do we need any instance
474 -- Binary Integer, and if so, does it have to be blazing fast? Or can
475 -- we just change this instance to be portable like the rest of the
476 -- instances? (binary package has code to steal for that)
478 -- yes, we need Binary Integer and Binary Rational in basicTypes/Literal.lhs
480 instance Binary Integer where
481 -- XXX This is hideous
482 put_ bh i = put_ bh (show i)
483 get bh = do str <- get bh
485 [(i, "")] -> return i
486 _ -> fail ("Binary Integer: got " ++ show str)
489 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
490 put_ bh (J# s# a#) = do
493 let sz# = sizeofByteArray# a# -- in *bytes*
494 put_ bh (I# sz#) -- in *bytes*
495 putByteArray bh a# sz#
500 0 -> do (I# i#) <- get bh
502 _ -> do (I# s#) <- get bh
504 (BA a#) <- getByteArray bh sz
508 -- As for the rest of this code, even though this module
509 -- exports it, it doesn't seem to be used anywhere else
512 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
513 putByteArray bh a s# = loop 0#
515 | n# ==# s# = return ()
517 putByte bh (indexByteArray a n#)
520 getByteArray :: BinHandle -> Int -> IO ByteArray
521 getByteArray bh (I# sz) = do
522 (MBA arr) <- newByteArray sz
524 | n ==# sz = return ()
527 writeByteArray arr n w
533 data ByteArray = BA ByteArray#
534 data MBA = MBA (MutableByteArray# RealWorld)
536 newByteArray :: Int# -> IO MBA
537 newByteArray sz = IO $ \s ->
538 case newByteArray# sz s of { (# s, arr #) ->
541 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
542 freezeByteArray arr = IO $ \s ->
543 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
546 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
547 writeByteArray arr i (W8# w) = IO $ \s ->
548 case writeWord8Array# arr i w s of { s ->
551 indexByteArray :: ByteArray# -> Int# -> Word8
552 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
554 instance (Integral a, Binary a) => Binary (Ratio a) where
555 put_ bh (a :% b) = do put_ bh a; put_ bh b
556 get bh = do a <- get bh; b <- get bh; return (a :% b)
559 instance Binary (Bin a) where
560 put_ bh (BinPtr i) = put_ bh i
561 get bh = do i <- get bh; return (BinPtr i)
563 -- -----------------------------------------------------------------------------
564 -- Lazy reading/writing
566 lazyPut :: Binary a => BinHandle -> a -> IO ()
568 -- output the obj with a ptr to skip over it:
570 put_ bh pre_a -- save a slot for the ptr
571 put_ bh a -- dump the object
572 q <- tellBin bh -- q = ptr to after object
573 putAt bh pre_a q -- fill in slot before a with ptr to q
574 seekBin bh q -- finally carry on writing at q
576 lazyGet :: Binary a => BinHandle -> IO a
578 p <- get bh -- a BinPtr
580 a <- unsafeInterleaveIO (getAt bh p_a)
581 seekBin bh p -- skip over the object for now
584 -- -----------------------------------------------------------------------------
586 -- -----------------------------------------------------------------------------
590 -- for *deserialising* only:
591 ud_dict :: Dictionary,
592 ud_symtab :: SymbolTable,
594 -- for *serialising* only:
595 ud_put_name :: BinHandle -> Name -> IO (),
596 ud_put_fs :: BinHandle -> FastString -> IO ()
599 newReadState :: Dictionary -> IO UserData
600 newReadState dict = do
601 return UserData { ud_dict = dict,
602 ud_symtab = undef "symtab",
603 ud_put_name = undef "put_name",
604 ud_put_fs = undef "put_fs"
607 newWriteState :: (BinHandle -> Name -> IO ())
608 -> (BinHandle -> FastString -> IO ())
610 newWriteState put_name put_fs = do
611 return UserData { ud_dict = undef "dict",
612 ud_symtab = undef "symtab",
613 ud_put_name = put_name,
618 noUserData = undef "UserData"
621 undef s = panic ("Binary.UserData: no " ++ s)
623 ---------------------------------------------------------
625 ---------------------------------------------------------
627 type Dictionary = Array Int FastString -- The dictionary
628 -- Should be 0-indexed
630 putDictionary :: BinHandle -> Int -> UniqFM (Int,FastString) -> IO ()
631 putDictionary bh sz dict = do
633 mapM_ (putFS bh) (elems (array (0,sz-1) (eltsUFM dict)))
635 getDictionary :: BinHandle -> IO Dictionary
636 getDictionary bh = do
638 elems <- sequence (take sz (repeat (getFS bh)))
639 return (listArray (0,sz-1) elems)
641 ---------------------------------------------------------
643 ---------------------------------------------------------
645 -- On disk, the symbol table is an array of IfaceExtName, when
646 -- reading it in we turn it into a SymbolTable.
648 type SymbolTable = Array Int Name
650 ---------------------------------------------------------
651 -- Reading and writing FastStrings
652 ---------------------------------------------------------
654 putFS :: BinHandle -> FastString -> IO ()
655 putFS bh (FastString _ l _ buf _) = do
657 withForeignPtr buf $ \ptr ->
659 go n | n == l = return ()
661 b <- peekElemOff ptr n
667 {- -- possible faster version, not quite there yet:
668 getFS bh@BinMem{} = do
670 arr <- readIORef (arr_r bh)
671 off <- readFastMutInt (off_r bh)
672 return $! (mkFastSubStringBA# arr off l)
674 getFS :: BinHandle -> IO FastString
677 fp <- mallocForeignPtrBytes l
678 withForeignPtr fp $ \ptr -> do
680 go n | n == l = mkFastStringForeignPtr ptr fp l
688 instance Binary FastString where
690 case getUserData bh of
691 UserData { ud_put_fs = put_fs } -> put_fs bh f
695 return $! (ud_dict (getUserData bh) ! j)
697 -- Here to avoid loop
699 instance Binary Fingerprint where
700 put_ h (Fingerprint w1 w2) = do put_ h w1; put_ h w2
701 get h = do w1 <- get h; w2 <- get h; return (Fingerprint w1 w2)