3 -- (c) The University of Glasgow 2002-2006
5 -- Binary I/O library, with special tweaks for GHC
7 -- Based on the nhc98 Binary library, which is copyright
8 -- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
9 -- Under the terms of the license for that software, we must tell you
10 -- where you can obtain the original version of the Binary library, namely
11 -- http://www.cs.york.ac.uk/fp/nhc98/
18 openBinIO, openBinIO_,
33 -- for writing instances:
41 #ifdef __GLASGOW_HASKELL__
48 UserData(..), getUserData, setUserData,
49 newReadState, newWriteState,
50 putDictionary, getDictionary,
53 #include "HsVersions.h"
55 -- The *host* architecture version:
58 import {-# SOURCE #-} Name (Name)
73 import Data.Char ( ord, chr )
74 import Data.Array.Base ( unsafeRead, unsafeWrite )
75 import Control.Monad ( when )
76 import System.IO as IO
77 import System.IO.Unsafe ( unsafeInterleaveIO )
78 import System.IO.Error ( mkIOError, eofErrorType )
79 import GHC.Real ( Ratio(..) )
81 import GHC.IOBase ( IO(..) )
82 import GHC.Word ( Word8(..) )
83 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
84 -- openFileEx is available from the lang package, but we want to
85 -- be independent of hslibs libraries.
86 import GHC.Handle ( openFileEx, IOModeEx(..) )
88 import System.IO ( openBinaryFile )
91 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
92 openBinaryFile f mode = openFileEx f (BinaryMode mode)
95 type BinArray = IOUArray Int Word8
97 ---------------------------------------------------------------
99 ---------------------------------------------------------------
102 = BinMem { -- binary data stored in an unboxed array
103 bh_usr :: UserData, -- sigh, need parameterized modules :-)
104 _off_r :: !FastMutInt, -- the current offset
105 _sz_r :: !FastMutInt, -- size of the array (cached)
106 _arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
108 -- XXX: should really store a "high water mark" for dumping out
109 -- the binary data to a file.
111 | BinIO { -- binary data stored in a file
113 _off_r :: !FastMutInt, -- the current offset (cached)
114 _hdl :: !IO.Handle -- the file handle (must be seekable)
116 -- cache the file ptr in BinIO; using hTell is too expensive
117 -- to call repeatedly. If anyone else is modifying this Handle
118 -- at the same time, we'll be screwed.
120 getUserData :: BinHandle -> UserData
121 getUserData bh = bh_usr bh
123 setUserData :: BinHandle -> UserData -> BinHandle
124 setUserData bh us = bh { bh_usr = us }
127 ---------------------------------------------------------------
129 ---------------------------------------------------------------
131 newtype Bin a = BinPtr Int
132 deriving (Eq, Ord, Show, Bounded)
134 castBin :: Bin a -> Bin b
135 castBin (BinPtr i) = BinPtr i
137 ---------------------------------------------------------------
139 ---------------------------------------------------------------
142 put_ :: BinHandle -> a -> IO ()
143 put :: BinHandle -> a -> IO (Bin a)
144 get :: BinHandle -> IO a
146 -- define one of put_, put. Use of put_ is recommended because it
147 -- is more likely that tail-calls can kick in, and we rarely need the
148 -- position return value.
149 put_ bh a = do put bh a; return ()
150 put bh a = do p <- tellBin bh; put_ bh a; return p
152 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
153 putAt bh p x = do seekBin bh p; put bh x; return ()
155 getAt :: Binary a => BinHandle -> Bin a -> IO a
156 getAt bh p = do seekBin bh p; get bh
158 openBinIO_ :: IO.Handle -> IO BinHandle
159 openBinIO_ h = openBinIO h
161 openBinIO :: IO.Handle -> IO BinHandle
165 return (BinIO noUserData r h)
167 openBinMem :: Int -> IO BinHandle
169 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
171 arr <- newArray_ (0,size-1)
172 arr_r <- newIORef arr
173 ix_r <- newFastMutInt
174 writeFastMutInt ix_r 0
175 sz_r <- newFastMutInt
176 writeFastMutInt sz_r size
177 return (BinMem noUserData ix_r sz_r arr_r)
179 tellBin :: BinHandle -> IO (Bin a)
180 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
181 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
183 seekBin :: BinHandle -> Bin a -> IO ()
184 seekBin (BinIO _ ix_r h) (BinPtr p) = do
185 writeFastMutInt ix_r p
186 hSeek h AbsoluteSeek (fromIntegral p)
187 seekBin h@(BinMem _ ix_r sz_r _) (BinPtr p) = do
188 sz <- readFastMutInt sz_r
190 then do expandBin h p; writeFastMutInt ix_r p
191 else writeFastMutInt ix_r p
193 isEOFBin :: BinHandle -> IO Bool
194 isEOFBin (BinMem _ ix_r sz_r _) = do
195 ix <- readFastMutInt ix_r
196 sz <- readFastMutInt sz_r
198 isEOFBin (BinIO _ _ h) = hIsEOF h
200 writeBinMem :: BinHandle -> FilePath -> IO ()
201 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
202 writeBinMem (BinMem _ ix_r _ arr_r) fn = do
203 h <- openBinaryFile fn WriteMode
204 arr <- readIORef arr_r
205 ix <- readFastMutInt ix_r
209 readBinMem :: FilePath -> IO BinHandle
210 -- Return a BinHandle with a totally undefined State
211 readBinMem filename = do
212 h <- openBinaryFile filename ReadMode
213 filesize' <- hFileSize h
214 let filesize = fromIntegral filesize'
215 arr <- newArray_ (0,filesize-1)
216 count <- hGetArray h arr filesize
217 when (count /= filesize)
218 (error ("Binary.readBinMem: only read " ++ show count ++ " bytes"))
220 arr_r <- newIORef arr
221 ix_r <- newFastMutInt
222 writeFastMutInt ix_r 0
223 sz_r <- newFastMutInt
224 writeFastMutInt sz_r filesize
225 return (BinMem noUserData ix_r sz_r arr_r)
227 -- expand the size of the array to include a specified offset
228 expandBin :: BinHandle -> Int -> IO ()
229 expandBin (BinMem _ _ sz_r arr_r) off = do
230 sz <- readFastMutInt sz_r
231 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
232 arr <- readIORef arr_r
233 arr' <- newArray_ (0,sz'-1)
234 sequence_ [ unsafeRead arr i >>= unsafeWrite arr' i
235 | i <- [ 0 .. sz-1 ] ]
236 writeFastMutInt sz_r sz'
237 writeIORef arr_r arr'
239 hPutStrLn stderr ("Binary: expanding to size: " ++ show sz')
241 expandBin (BinIO _ _ _) _ = return ()
242 -- no need to expand a file, we'll assume they expand by themselves.
244 -- -----------------------------------------------------------------------------
245 -- Low-level reading/writing of bytes
247 putWord8 :: BinHandle -> Word8 -> IO ()
248 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
249 ix <- readFastMutInt ix_r
250 sz <- readFastMutInt sz_r
251 -- double the size of the array if it overflows
253 then do expandBin h ix
255 else do arr <- readIORef arr_r
257 writeFastMutInt ix_r (ix+1)
259 putWord8 (BinIO _ ix_r h) w = do
260 ix <- readFastMutInt ix_r
261 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
262 writeFastMutInt ix_r (ix+1)
265 getWord8 :: BinHandle -> IO Word8
266 getWord8 (BinMem _ ix_r sz_r arr_r) = do
267 ix <- readFastMutInt ix_r
268 sz <- readFastMutInt sz_r
270 ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
271 arr <- readIORef arr_r
272 w <- unsafeRead arr ix
273 writeFastMutInt ix_r (ix+1)
275 getWord8 (BinIO _ ix_r h) = do
276 ix <- readFastMutInt ix_r
278 writeFastMutInt ix_r (ix+1)
279 return $! (fromIntegral (ord c)) -- XXX not really correct
281 putByte :: BinHandle -> Word8 -> IO ()
282 putByte bh w = put_ bh w
284 getByte :: BinHandle -> IO Word8
287 -- -----------------------------------------------------------------------------
288 -- Primitve Word writes
290 instance Binary Word8 where
294 instance Binary Word16 where
295 put_ h w = do -- XXX too slow.. inline putWord8?
296 putByte h (fromIntegral (w `shiftR` 8))
297 putByte h (fromIntegral (w .&. 0xff))
301 return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
304 instance Binary Word32 where
306 putByte h (fromIntegral (w `shiftR` 24))
307 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
308 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
309 putByte h (fromIntegral (w .&. 0xff))
315 return $! ((fromIntegral w1 `shiftL` 24) .|.
316 (fromIntegral w2 `shiftL` 16) .|.
317 (fromIntegral w3 `shiftL` 8) .|.
320 instance Binary Word64 where
322 putByte h (fromIntegral (w `shiftR` 56))
323 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
324 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
325 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
326 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
327 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
328 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
329 putByte h (fromIntegral (w .&. 0xff))
339 return $! ((fromIntegral w1 `shiftL` 56) .|.
340 (fromIntegral w2 `shiftL` 48) .|.
341 (fromIntegral w3 `shiftL` 40) .|.
342 (fromIntegral w4 `shiftL` 32) .|.
343 (fromIntegral w5 `shiftL` 24) .|.
344 (fromIntegral w6 `shiftL` 16) .|.
345 (fromIntegral w7 `shiftL` 8) .|.
348 -- -----------------------------------------------------------------------------
349 -- Primitve Int writes
351 instance Binary Int8 where
352 put_ h w = put_ h (fromIntegral w :: Word8)
353 get h = do w <- get h; return $! (fromIntegral (w::Word8))
355 instance Binary Int16 where
356 put_ h w = put_ h (fromIntegral w :: Word16)
357 get h = do w <- get h; return $! (fromIntegral (w::Word16))
359 instance Binary Int32 where
360 put_ h w = put_ h (fromIntegral w :: Word32)
361 get h = do w <- get h; return $! (fromIntegral (w::Word32))
363 instance Binary Int64 where
364 put_ h w = put_ h (fromIntegral w :: Word64)
365 get h = do w <- get h; return $! (fromIntegral (w::Word64))
367 -- -----------------------------------------------------------------------------
368 -- Instances for standard types
370 instance Binary () where
371 put_ _ () = return ()
373 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
375 instance Binary Bool where
376 put_ bh b = putByte bh (fromIntegral (fromEnum b))
377 get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x))
378 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
380 instance Binary Char where
381 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
382 get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
383 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
385 instance Binary Int where
386 #if SIZEOF_HSINT == 4
387 put_ bh i = put_ bh (fromIntegral i :: Int32)
390 return $! (fromIntegral (x :: Int32))
391 #elif SIZEOF_HSINT == 8
392 put_ bh i = put_ bh (fromIntegral i :: Int64)
395 return $! (fromIntegral (x :: Int64))
397 #error "unsupported sizeof(HsInt)"
399 -- getF bh = getBitsF bh 32
401 instance Binary a => Binary [a] where
405 then putByte bh (fromIntegral len :: Word8)
406 else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32)
412 else return (fromIntegral b :: Word32)
413 let loop 0 = return []
414 loop n = do a <- get bh; as <- loop (n-1); return (a:as)
417 instance (Binary a, Binary b) => Binary (a,b) where
418 put_ bh (a,b) = do put_ bh a; put_ bh b
419 get bh = do a <- get bh
423 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
424 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
425 get bh = do a <- get bh
430 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
431 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
432 get bh = do a <- get bh
438 instance Binary a => Binary (Maybe a) where
439 put_ bh Nothing = putByte bh 0
440 put_ bh (Just a) = do putByte bh 1; put_ bh a
441 get bh = do h <- getWord8 bh
444 _ -> do x <- get bh; return (Just x)
446 instance (Binary a, Binary b) => Binary (Either a b) where
447 put_ bh (Left a) = do putByte bh 0; put_ bh a
448 put_ bh (Right b) = do putByte bh 1; put_ bh b
449 get bh = do h <- getWord8 bh
451 0 -> do a <- get bh ; return (Left a)
452 _ -> do b <- get bh ; return (Right b)
454 #if defined(__GLASGOW_HASKELL__) || 1
455 --to quote binary-0.3 on this code idea,
457 -- TODO This instance is not architecture portable. GMP stores numbers as
458 -- arrays of machine sized words, so the byte format is not portable across
459 -- architectures with different endianess and word size.
461 -- This makes it hard (impossible) to make an equivalent instance
462 -- with code that is compilable with non-GHC. Do we need any instance
463 -- Binary Integer, and if so, does it have to be blazing fast? Or can
464 -- we just change this instance to be portable like the rest of the
465 -- instances? (binary package has code to steal for that)
467 -- yes, we need Binary Integer and Binary Rational in basicTypes/Literal.lhs
469 instance Binary Integer where
470 -- XXX This is hideous
471 put_ bh i = put_ bh (show i)
472 get bh = do str <- get bh
474 [(i, "")] -> return i
475 _ -> fail ("Binary Integer: got " ++ show str)
478 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
479 put_ bh (J# s# a#) = do
482 let sz# = sizeofByteArray# a# -- in *bytes*
483 put_ bh (I# sz#) -- in *bytes*
484 putByteArray bh a# sz#
489 0 -> do (I# i#) <- get bh
491 _ -> do (I# s#) <- get bh
493 (BA a#) <- getByteArray bh sz
497 -- As for the rest of this code, even though this module
498 -- exports it, it doesn't seem to be used anywhere else
501 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
502 putByteArray bh a s# = loop 0#
504 | n# ==# s# = return ()
506 putByte bh (indexByteArray a n#)
509 getByteArray :: BinHandle -> Int -> IO ByteArray
510 getByteArray bh (I# sz) = do
511 (MBA arr) <- newByteArray sz
513 | n ==# sz = return ()
516 writeByteArray arr n w
522 data ByteArray = BA ByteArray#
523 data MBA = MBA (MutableByteArray# RealWorld)
525 newByteArray :: Int# -> IO MBA
526 newByteArray sz = IO $ \s ->
527 case newByteArray# sz s of { (# s, arr #) ->
530 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
531 freezeByteArray arr = IO $ \s ->
532 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
535 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
536 writeByteArray arr i (W8# w) = IO $ \s ->
537 case writeWord8Array# arr i w s of { s ->
540 indexByteArray :: ByteArray# -> Int# -> Word8
541 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
543 instance (Integral a, Binary a) => Binary (Ratio a) where
544 put_ bh (a :% b) = do put_ bh a; put_ bh b
545 get bh = do a <- get bh; b <- get bh; return (a :% b)
548 instance Binary (Bin a) where
549 put_ bh (BinPtr i) = put_ bh i
550 get bh = do i <- get bh; return (BinPtr i)
552 -- -----------------------------------------------------------------------------
553 -- Lazy reading/writing
555 lazyPut :: Binary a => BinHandle -> a -> IO ()
557 -- output the obj with a ptr to skip over it:
559 put_ bh pre_a -- save a slot for the ptr
560 put_ bh a -- dump the object
561 q <- tellBin bh -- q = ptr to after object
562 putAt bh pre_a q -- fill in slot before a with ptr to q
563 seekBin bh q -- finally carry on writing at q
565 lazyGet :: Binary a => BinHandle -> IO a
567 p <- get bh -- a BinPtr
569 a <- unsafeInterleaveIO (getAt bh p_a)
570 seekBin bh p -- skip over the object for now
573 -- -----------------------------------------------------------------------------
575 -- -----------------------------------------------------------------------------
579 -- for *deserialising* only:
580 ud_dict :: Dictionary,
581 ud_symtab :: SymbolTable,
583 -- for *serialising* only:
584 ud_dict_next :: !FastMutInt, -- The next index to use
585 ud_dict_map :: !(IORef (UniqFM (Int,FastString))),
586 -- indexed by FastString
588 ud_symtab_next :: !FastMutInt, -- The next index to use
589 ud_symtab_map :: !(IORef (UniqFM (Int,Name)))
593 newReadState :: Dictionary -> IO UserData
594 newReadState dict = do
595 dict_next <- newFastMutInt
596 dict_map <- newIORef (undef "dict_map")
597 symtab_next <- newFastMutInt
598 symtab_map <- newIORef (undef "symtab_map")
599 return UserData { ud_dict = dict,
600 ud_symtab = undef "symtab",
601 ud_dict_next = dict_next,
602 ud_dict_map = dict_map,
603 ud_symtab_next = symtab_next,
604 ud_symtab_map = symtab_map
607 newWriteState :: IO UserData
609 dict_next <- newFastMutInt
610 writeFastMutInt dict_next 0
611 dict_map <- newIORef emptyUFM
612 symtab_next <- newFastMutInt
613 writeFastMutInt symtab_next 0
614 symtab_map <- newIORef emptyUFM
615 return UserData { ud_dict = undef "dict",
616 ud_symtab = undef "symtab",
617 ud_dict_next = dict_next,
618 ud_dict_map = dict_map,
619 ud_symtab_next = symtab_next,
620 ud_symtab_map = symtab_map
624 noUserData = undef "UserData"
627 undef s = panic ("Binary.UserData: no " ++ s)
629 ---------------------------------------------------------
631 ---------------------------------------------------------
633 type Dictionary = Array Int FastString -- The dictionary
634 -- Should be 0-indexed
636 putDictionary :: BinHandle -> Int -> UniqFM (Int,FastString) -> IO ()
637 putDictionary bh sz dict = do
639 mapM_ (putFS bh) (elems (array (0,sz-1) (eltsUFM dict)))
641 getDictionary :: BinHandle -> IO Dictionary
642 getDictionary bh = do
644 elems <- sequence (take sz (repeat (getFS bh)))
645 return (listArray (0,sz-1) elems)
647 ---------------------------------------------------------
649 ---------------------------------------------------------
651 -- On disk, the symbol table is an array of IfaceExtName, when
652 -- reading it in we turn it into a SymbolTable.
654 type SymbolTable = Array Int Name
656 ---------------------------------------------------------
657 -- Reading and writing FastStrings
658 ---------------------------------------------------------
660 putFS :: BinHandle -> FastString -> IO ()
661 putFS bh (FastString _ l _ buf _) = do
663 withForeignPtr buf $ \ptr ->
665 go n | n == l = return ()
667 b <- peekElemOff ptr n
673 {- -- possible faster version, not quite there yet:
674 getFS bh@BinMem{} = do
676 arr <- readIORef (arr_r bh)
677 off <- readFastMutInt (off_r bh)
678 return $! (mkFastSubStringBA# arr off l)
680 getFS :: BinHandle -> IO FastString
683 fp <- mallocForeignPtrBytes l
684 withForeignPtr fp $ \ptr -> do
686 go n | n == l = mkFastStringForeignPtr ptr fp l
694 instance Binary FastString where
696 case getUserData bh of {
697 UserData { ud_dict_next = j_r,
698 ud_dict_map = out_r} -> do
699 out <- readIORef out_r
700 let uniq = getUnique f
701 case lookupUFM out uniq of
702 Just (j, _) -> put_ bh j
704 j <- readFastMutInt j_r
706 writeFastMutInt j_r (j + 1)
707 writeIORef out_r $! addToUFM out uniq (j, f)
712 return $! (ud_dict (getUserData bh) ! j)