3 -- (c) The University of Glasgow 2002
5 -- Binary I/O library, with special tweaks for GHC
7 -- Based on the nhc98 Binary library, which is copyright
8 -- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
9 -- Under the terms of the license for that software, we must tell you
10 -- where you can obtain the original version of the Binary library, namely
11 -- http://www.cs.york.ac.uk/fp/nhc98/
18 openBinIO, openBinIO_,
33 -- for writing instances:
46 getBinFileWithDict, -- :: Binary a => FilePath -> IO a
47 putBinFileWithDict, -- :: Binary a => FilePath -> Module -> a -> IO ()
53 import {-# SOURCE #-} Module
58 #if __GLASGOW_HASKELL__ < 503
66 import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
69 import PrelIOBase ( IOError(..), IOErrorType(..)
70 #if __GLASGOW_HASKELL__ > 411
74 import PrelReal ( Ratio(..) )
75 import PrelIOBase ( IO(..) )
83 import Data.Char ( ord, chr )
84 import Data.Array.Base ( unsafeRead, unsafeWrite )
85 import Control.Monad ( when )
86 import Control.Exception ( throw )
87 import System.IO as IO
88 import System.IO.Unsafe ( unsafeInterleaveIO )
89 import System.IO.Error ( mkIOError, eofErrorType )
90 import GHC.Real ( Ratio(..) )
92 import GHC.IOBase ( IO(..) )
93 import GHC.Word ( Word8(..) )
94 import GHC.Handle ( openFileEx, IOModeEx(..) )
97 #if __GLASGOW_HASKELL__ < 503
98 type BinArray = MutableByteArray RealWorld Int
99 newArray_ bounds = stToIO (newCharArray bounds)
100 unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
101 unsafeRead arr ix = stToIO (readWord8Array arr ix)
102 #if __GLASGOW_HASKELL__ < 411
103 newByteArray# = newCharArray#
105 hPutArray h arr sz = hPutBufBAFull h arr sz
106 hGetArray h sz = hGetBufBAFull h sz
108 mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
109 mkIOError t location maybe_hdl maybe_filename
110 = IOException (IOError maybe_hdl t location ""
111 #if __GLASGOW_HASKELL__ > 411
119 #define SIZEOF_HSINT INT_SIZE_IN_BYTES
122 #ifndef SIZEOF_HSWORD
123 #define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
127 type BinArray = IOUArray Int Word8
131 = BinMem { -- binary data stored in an unboxed array
132 state :: BinHandleState, -- sigh, need parameterized modules :-)
133 off_r :: !FastMutInt, -- the current offset
134 sz_r :: !FastMutInt, -- size of the array (cached)
135 arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
137 -- XXX: should really store a "high water mark" for dumping out
138 -- the binary data to a file.
140 | BinIO { -- binary data stored in a file
141 state :: BinHandleState,
142 off_r :: !FastMutInt, -- the current offset (cached)
143 hdl :: !IO.Handle -- the file handle (must be seekable)
145 -- cache the file ptr in BinIO; using hTell is too expensive
146 -- to call repeatedly. If anyone else is modifying this Handle
147 -- at the same time, we'll be screwed.
149 newtype Bin a = BinPtr Int
150 deriving (Eq, Ord, Show, Bounded)
152 castBin :: Bin a -> Bin b
153 castBin (BinPtr i) = BinPtr i
156 put_ :: BinHandle -> a -> IO ()
157 put :: BinHandle -> a -> IO (Bin a)
158 get :: BinHandle -> IO a
160 -- define one of put_, put. Use of put_ is recommended because it
161 -- is more likely that tail-calls can kick in, and we rarely need the
162 -- position return value.
163 put_ bh a = do put bh a; return ()
164 put bh a = do p <- tellBin bh; put_ bh a; return p
166 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
167 putAt bh p x = do seekBin bh p; put bh x; return ()
169 getAt :: Binary a => BinHandle -> Bin a -> IO a
170 getAt bh p = do seekBin bh p; get bh
172 openBinIO_ :: IO.Handle -> IO BinHandle
173 openBinIO_ h = openBinIO h noBinHandleUserData
175 openBinIO :: IO.Handle -> Module -> IO BinHandle
179 state <- newWriteState mod
180 return (BinIO state r h)
182 openBinMem :: Int -> Module -> IO BinHandle
184 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
186 arr <- newArray_ (0,size-1)
187 arr_r <- newIORef arr
188 ix_r <- newFastMutInt
189 writeFastMutInt ix_r 0
190 sz_r <- newFastMutInt
191 writeFastMutInt sz_r size
192 state <- newWriteState mod
193 return (BinMem state ix_r sz_r arr_r)
195 noBinHandleUserData = error "Binary.BinHandle: no user data"
197 getUserData :: BinHandle -> BinHandleState
198 getUserData bh = state bh
200 tellBin :: BinHandle -> IO (Bin a)
201 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
202 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
204 seekBin :: BinHandle -> Bin a -> IO ()
205 seekBin (BinIO _ ix_r h) (BinPtr p) = do
206 writeFastMutInt ix_r p
207 hSeek h AbsoluteSeek (fromIntegral p)
208 seekBin h@(BinMem _ ix_r sz_r a) (BinPtr p) = do
209 sz <- readFastMutInt sz_r
211 then do expandBin h p; writeFastMutInt ix_r p
212 else writeFastMutInt ix_r p
214 isEOFBin :: BinHandle -> IO Bool
215 isEOFBin (BinMem _ ix_r sz_r a) = do
216 ix <- readFastMutInt ix_r
217 sz <- readFastMutInt sz_r
219 isEOFBin (BinIO _ ix_r h) = hIsEOF h
221 writeBinMem :: BinHandle -> FilePath -> IO ()
222 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
223 writeBinMem (BinMem _ ix_r sz_r arr_r) fn = do
224 h <- openFileEx fn (BinaryMode WriteMode)
225 arr <- readIORef arr_r
226 ix <- readFastMutInt ix_r
228 #if __GLASGOW_HASKELL__ < 500
229 -- workaround a bug in ghc 4.08's implementation of hPutBuf (it doesn't
230 -- set the FILEOBJ_RW_WRITTEN flag on the file object, so the file doens't
231 -- get flushed properly). Adding an extra '\0' doens't do any harm.
236 readBinMem :: FilePath -> IO BinHandle
237 readBinMem filename = do
238 h <- openFileEx filename (BinaryMode ReadMode)
239 filesize' <- hFileSize h
240 let filesize = fromIntegral filesize'
241 arr <- newArray_ (0,filesize-1)
242 count <- hGetArray h arr filesize
243 when (count /= filesize)
244 (error ("Binary.readBinMem: only read " ++ show count ++ " bytes"))
246 arr_r <- newIORef arr
247 ix_r <- newFastMutInt
248 writeFastMutInt ix_r 0
249 sz_r <- newFastMutInt
250 writeFastMutInt sz_r filesize
251 return (BinMem initReadState ix_r sz_r arr_r)
253 -- expand the size of the array to include a specified offset
254 expandBin :: BinHandle -> Int -> IO ()
255 expandBin (BinMem _ ix_r sz_r arr_r) off = do
256 sz <- readFastMutInt sz_r
257 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
258 arr <- readIORef arr_r
259 arr' <- newArray_ (0,sz'-1)
260 sequence_ [ unsafeRead arr i >>= unsafeWrite arr' i
261 | i <- [ 0 .. sz-1 ] ]
262 writeFastMutInt sz_r sz'
263 writeIORef arr_r arr'
264 hPutStrLn stderr ("expanding to size: " ++ show sz')
266 expandBin (BinIO _ _ _) _ = return ()
267 -- no need to expand a file, we'll assume they expand by themselves.
269 -- -----------------------------------------------------------------------------
270 -- Low-level reading/writing of bytes
272 putWord8 :: BinHandle -> Word8 -> IO ()
273 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
274 ix <- readFastMutInt ix_r
275 sz <- readFastMutInt sz_r
276 -- double the size of the array if it overflows
278 then do expandBin h ix
280 else do arr <- readIORef arr_r
282 writeFastMutInt ix_r (ix+1)
284 putWord8 (BinIO _ ix_r h) w = do
285 ix <- readFastMutInt ix_r
286 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
287 writeFastMutInt ix_r (ix+1)
290 getWord8 :: BinHandle -> IO Word8
291 getWord8 (BinMem _ ix_r sz_r arr_r) = do
292 ix <- readFastMutInt ix_r
293 sz <- readFastMutInt sz_r
295 throw (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
296 arr <- readIORef arr_r
297 w <- unsafeRead arr ix
298 writeFastMutInt ix_r (ix+1)
300 getWord8 (BinIO _ ix_r h) = do
301 ix <- readFastMutInt ix_r
303 writeFastMutInt ix_r (ix+1)
304 return (fromIntegral (ord c)) -- XXX not really correct
306 putByte :: BinHandle -> Word8 -> IO ()
307 putByte bh w = put_ bh w
309 getByte :: BinHandle -> IO Word8
312 -- -----------------------------------------------------------------------------
313 -- Primitve Word writes
315 instance Binary Word8 where
319 instance Binary Word16 where
320 put_ h w = do -- XXX too slow.. inline putWord8?
321 putByte h (fromIntegral (w `shiftR` 8))
322 putByte h (fromIntegral (w .&. 0xff))
326 return ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
329 instance Binary Word32 where
331 putByte h (fromIntegral (w `shiftR` 24))
332 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
333 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
334 putByte h (fromIntegral (w .&. 0xff))
340 return ((fromIntegral w1 `shiftL` 24) .|.
341 (fromIntegral w2 `shiftL` 16) .|.
342 (fromIntegral w3 `shiftL` 8) .|.
346 instance Binary Word64 where
348 putByte h (fromIntegral (w `shiftR` 56))
349 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
350 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
351 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
352 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
353 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
354 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
355 putByte h (fromIntegral (w .&. 0xff))
365 return ((fromIntegral w1 `shiftL` 56) .|.
366 (fromIntegral w2 `shiftL` 48) .|.
367 (fromIntegral w3 `shiftL` 40) .|.
368 (fromIntegral w4 `shiftL` 32) .|.
369 (fromIntegral w5 `shiftL` 24) .|.
370 (fromIntegral w6 `shiftL` 16) .|.
371 (fromIntegral w7 `shiftL` 8) .|.
374 -- -----------------------------------------------------------------------------
375 -- Primitve Int writes
377 instance Binary Int8 where
378 put_ h w = put_ h (fromIntegral w :: Word8)
379 get h = do w <- get h; return (fromIntegral (w::Word8))
381 instance Binary Int16 where
382 put_ h w = put_ h (fromIntegral w :: Word16)
383 get h = do w <- get h; return (fromIntegral (w::Word16))
385 instance Binary Int32 where
386 put_ h w = put_ h (fromIntegral w :: Word32)
387 get h = do w <- get h; return (fromIntegral (w::Word32))
389 instance Binary Int64 where
390 put_ h w = put_ h (fromIntegral w :: Word64)
391 get h = do w <- get h; return (fromIntegral (w::Word64))
393 -- -----------------------------------------------------------------------------
394 -- Instances for standard types
396 instance Binary () where
397 put_ bh () = return ()
399 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
401 instance Binary Bool where
402 put_ bh b = putByte bh (fromIntegral (fromEnum b))
403 get bh = do x <- getWord8 bh; return (toEnum (fromIntegral x))
404 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
406 instance Binary Char where
407 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
408 get bh = do x <- get bh; return (chr (fromIntegral (x :: Word32)))
409 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
411 instance Binary Int where
412 #if SIZEOF_HSINT == 4
413 put_ bh i = put_ bh (fromIntegral i :: Int32)
416 return (fromIntegral (x :: Int32))
417 #elif SIZEOF_HSINT == 8
418 put_ bh i = put_ bh (fromIntegral i :: Int64)
421 return (fromIntegral (x :: Int64))
423 #error "unsupported sizeof(HsInt)"
425 -- getF bh = getBitsF bh 32
427 instance Binary a => Binary [a] where
428 put_ bh [] = putByte bh 0
429 put_ bh (x:xs) = do putByte bh 1; put_ bh x; put_ bh xs
430 get bh = do h <- getWord8 bh
437 instance (Binary a, Binary b) => Binary (a,b) where
438 put_ bh (a,b) = do put_ bh a; put_ bh b
439 get bh = do a <- get bh
443 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
444 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
445 get bh = do a <- get bh
450 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
451 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
452 get bh = do a <- get bh
458 instance Binary a => Binary (Maybe a) where
459 put_ bh Nothing = putByte bh 0
460 put_ bh (Just a) = do putByte bh 1; put_ bh a
461 get bh = do h <- getWord8 bh
464 _ -> do x <- get bh; return (Just x)
466 instance (Binary a, Binary b) => Binary (Either a b) where
467 put_ bh (Left a) = do putByte bh 0; put_ bh a
468 put_ bh (Right b) = do putByte bh 1; put_ bh b
469 get bh = do h <- getWord8 bh
471 0 -> do a <- get bh ; return (Left a)
472 _ -> do b <- get bh ; return (Right b)
474 #ifdef __GLASGOW_HASKELL__
475 instance Binary Integer where
476 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
477 put_ bh (J# s# a#) = do
480 let sz# = sizeofByteArray# a# -- in *bytes*
481 put_ bh (I# sz#) -- in *bytes*
482 putByteArray bh a# sz#
487 0 -> do (I# i#) <- get bh
489 _ -> do (I# s#) <- get bh
491 (BA a#) <- getByteArray bh sz
494 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
495 putByteArray bh a s# = loop 0#
497 | n# ==# s# = return ()
499 putByte bh (indexByteArray a n#)
502 getByteArray :: BinHandle -> Int -> IO ByteArray
503 getByteArray bh (I# sz) = do
504 (MBA arr) <- newByteArray sz
506 | n ==# sz = return ()
509 writeByteArray arr n w
515 data ByteArray = BA ByteArray#
516 data MBA = MBA (MutableByteArray# RealWorld)
518 newByteArray :: Int# -> IO MBA
519 newByteArray sz = IO $ \s ->
520 case newByteArray# sz s of { (# s, arr #) ->
523 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
524 freezeByteArray arr = IO $ \s ->
525 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
528 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
530 #if __GLASGOW_HASKELL__ < 503
531 writeByteArray arr i w8 = IO $ \s ->
532 case word8ToWord w8 of { W# w# ->
533 case writeCharArray# arr i (chr# (word2Int# w#)) s of { s ->
536 writeByteArray arr i (W8# w) = IO $ \s ->
537 case writeWord8Array# arr i w s of { s ->
541 #if __GLASGOW_HASKELL__ < 503
542 indexByteArray a# n# = fromIntegral (I# (ord# (indexCharArray# a# n#)))
544 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
547 instance (Integral a, Binary a) => Binary (Ratio a) where
548 put_ bh (a :% b) = do put_ bh a; put_ bh b
549 get bh = do a <- get bh; b <- get bh; return (a :% b)
552 instance Binary (Bin a) where
553 put_ bh (BinPtr i) = put_ bh i
554 get bh = do i <- get bh; return (BinPtr i)
556 -- -----------------------------------------------------------------------------
557 -- unboxed mutable Ints
559 #ifdef __GLASGOW_HASKELL__
560 data FastMutInt = FastMutInt (MutableByteArray# RealWorld)
562 newFastMutInt = IO $ \s ->
563 case newByteArray# size s of { (# s, arr #) ->
564 (# s, FastMutInt arr #) }
565 where I# size = SIZEOF_HSWORD
567 readFastMutInt (FastMutInt arr) = IO $ \s ->
568 case readIntArray# arr 0# s of { (# s, i #) ->
571 writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s ->
572 case writeIntArray# arr 0# i s of { s ->
576 -- -----------------------------------------------------------------------------
577 -- Lazy reading/writing
579 lazyPut :: Binary a => BinHandle -> a -> IO ()
581 -- output the obj with a ptr to skip over it:
583 put_ bh pre_a -- save a slot for the ptr
584 put_ bh a -- dump the object
585 q <- tellBin bh -- q = ptr to after object
586 putAt bh pre_a q -- fill in slot before a with ptr to q
587 seekBin bh q -- finally carry on writing at q
589 lazyGet :: Binary a => BinHandle -> IO a
591 p <- get bh -- a BinPtr
593 a <- unsafeInterleaveIO (getAt bh p_a)
594 seekBin bh p -- skip over the object for now
597 -- -----------------------------------------------------------------------------
600 type BinHandleState =
603 IORef (UniqFM (Int,FastString)),
604 Array Int FastString)
606 initReadState :: BinHandleState
607 initReadState = (undef, undef, undef, undef)
609 newWriteState :: Module -> IO BinHandleState
612 out_r <- newIORef emptyUFM
613 return (m,j_r,out_r,undef)
615 undef = error "Binary.BinHandleState"
617 -- -----------------------------------------------------------------------------
618 -- FastString binary interface
620 getBinFileWithDict :: Binary a => FilePath -> IO a
621 getBinFileWithDict file_path = do
622 bh <- Binary.readBinMem file_path
623 dict_p <- Binary.get bh -- get the dictionary ptr
626 dict <- getDictionary bh
628 let (mod, j_r, out_r, _) = state bh
629 get bh{ state = (mod,j_r,out_r,dict) }
631 initBinMemSize = (1024*1024) :: Int
633 putBinFileWithDict :: Binary a => FilePath -> Module -> a -> IO ()
634 putBinFileWithDict file_path mod a = do
635 bh <- openBinMem initBinMemSize mod
637 put_ bh p -- placeholder for ptr to dictionary
639 let (_, j_r, fm_r, _) = state bh
643 putAt bh p dict_p -- fill in the placeholder
644 seekBin bh dict_p -- seek back to the end of the file
645 putDictionary bh j (constructDictionary j fm)
646 writeBinMem bh file_path
648 type Dictionary = Array Int FastString
649 -- should be 0-indexed
651 putDictionary :: BinHandle -> Int -> Dictionary -> IO ()
652 putDictionary bh sz dict = do
654 mapM_ (putFS bh) (elems dict)
656 getDictionary :: BinHandle -> IO Dictionary
657 getDictionary bh = do
659 elems <- sequence (take sz (repeat (getFS bh)))
660 return (listArray (0,sz-1) elems)
662 constructDictionary :: Int -> UniqFM (Int,FastString) -> Dictionary
663 constructDictionary j fm = array (0,j-1) (eltsUFM fm)
665 putFS bh (FastString id l ba) = do
668 putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
669 -- Note: the length of the FastString is *not* the same as
670 -- the size of the ByteArray: the latter is rounded up to a
671 -- multiple of the word size.
675 (BA ba) <- getByteArray bh (I# l)
676 return (mkFastSubStringBA# ba 0# l)
677 -- XXX ToDo: one too many copies here
679 instance Binary FastString where
680 put_ bh f@(FastString id l ba) =
681 case getUserData bh of { (_, j_r, out_r, dict) -> do
682 out <- readIORef out_r
683 let uniq = getUnique f
684 case lookupUFM out uniq of
685 Just (j,f) -> put_ bh j
690 writeIORef out_r (addToUFM out uniq (j,f))
692 put_ bh s = error ("Binary.put_(FastString): " ++ show (unpackFS s))
696 case getUserData bh of (_, _, _, arr) -> return (arr ! j)