3 -- (c) The University of Glasgow 2002
5 -- Binary I/O library, with special tweaks for GHC
12 openBinIO, openBinIO_,
27 -- for writing instances:
40 getBinFileWithDict, -- :: Binary a => FilePath -> IO a
41 putBinFileWithDict, -- :: Binary a => FilePath -> Module -> a -> IO ()
47 import {-# SOURCE #-} Module
52 #if __GLASGOW_HASKELL__ < 503
60 import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
63 import PrelIOBase ( IOError(..), IOErrorType(..), IOException(..) )
64 import PrelReal ( Ratio(..) )
65 import PrelIOBase ( IO(..) )
73 import Data.Char ( ord, chr )
74 import Data.Array.Base ( unsafeRead, unsafeWrite )
75 import Control.Monad ( when )
76 import Control.Exception ( throw )
77 import System.IO as IO
78 import System.IO.Unsafe ( unsafeInterleaveIO )
79 import System.IO.Error ( mkIOError, eofErrorType )
80 import GHC.Real ( Ratio(..) )
82 import GHC.IOBase ( IO(..) )
83 import GHC.Word ( Word8(..) )
86 #if __GLASGOW_HASKELL__ < 503
87 type BinArray = MutableByteArray RealWorld Int
88 newArray_ bounds = stToIO (newCharArray bounds)
89 unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
90 unsafeRead arr ix = stToIO (readWord8Array arr ix)
91 #if __GLASGOW_HASKELL__ < 411
92 newByteArray# = newCharArray#
94 hPutArray h arr sz = hPutBufBAFull h arr sz
95 hGetArray h sz = hGetBufBAFull h sz
97 mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
98 mkIOError t location maybe_hdl maybe_filename
99 = IOException (IOError maybe_hdl t location ""
100 #if __GLASGOW_HASKELL__ > 411
108 #define SIZEOF_HSINT INT_SIZE_IN_BYTES
111 #ifndef SIZEOF_HSWORD
112 #define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
116 type BinArray = IOUArray Int Word8
120 = BinMem { -- binary data stored in an unboxed array
121 state :: BinHandleState, -- sigh, need parameterized modules :-)
122 off_r :: !FastMutInt, -- the current offset
123 sz_r :: !FastMutInt, -- size of the array (cached)
124 arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
126 -- XXX: should really store a "high water mark" for dumping out
127 -- the binary data to a file.
129 | BinIO { -- binary data stored in a file
130 state :: BinHandleState,
131 off_r :: !FastMutInt, -- the current offset (cached)
132 hdl :: !IO.Handle -- the file handle (must be seekable)
134 -- cache the file ptr in BinIO; using hTell is too expensive
135 -- to call repeatedly. If anyone else is modifying this Handle
136 -- at the same time, we'll be screwed.
138 newtype Bin a = BinPtr Int
139 deriving (Eq, Ord, Show, Bounded)
141 castBin :: Bin a -> Bin b
142 castBin (BinPtr i) = BinPtr i
145 put_ :: BinHandle -> a -> IO ()
146 put :: BinHandle -> a -> IO (Bin a)
147 get :: BinHandle -> IO a
149 -- define one of put_, put. Use of put_ is recommended because it
150 -- is more likely that tail-calls can kick in, and we rarely need the
151 -- position return value.
152 put_ bh a = do put bh a; return ()
153 put bh a = do p <- tellBin bh; put_ bh a; return p
155 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
156 putAt bh p x = do seekBin bh p; put bh x; return ()
158 getAt :: Binary a => BinHandle -> Bin a -> IO a
159 getAt bh p = do seekBin bh p; get bh
161 openBinIO_ :: IO.Handle -> IO BinHandle
162 openBinIO_ h = openBinIO h noBinHandleUserData
164 openBinIO :: IO.Handle -> Module -> IO BinHandle
168 state <- newWriteState mod
169 return (BinIO state r h)
171 openBinMem :: Int -> Module -> IO BinHandle
173 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
175 arr <- newArray_ (0,size-1)
176 arr_r <- newIORef arr
177 ix_r <- newFastMutInt
178 writeFastMutInt ix_r 0
179 sz_r <- newFastMutInt
180 writeFastMutInt sz_r size
181 state <- newWriteState mod
182 return (BinMem state ix_r sz_r arr_r)
184 noBinHandleUserData = error "Binary.BinHandle: no user data"
186 getUserData :: BinHandle -> BinHandleState
187 getUserData bh = state bh
189 tellBin :: BinHandle -> IO (Bin a)
190 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
191 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
193 seekBin :: BinHandle -> Bin a -> IO ()
194 seekBin (BinIO _ ix_r h) (BinPtr p) = do
195 writeFastMutInt ix_r p
196 hSeek h AbsoluteSeek (fromIntegral p)
197 seekBin h@(BinMem _ ix_r sz_r a) (BinPtr p) = do
198 sz <- readFastMutInt sz_r
200 then do expandBin h p; writeFastMutInt ix_r p
201 else writeFastMutInt ix_r p
203 isEOFBin :: BinHandle -> IO Bool
204 isEOFBin (BinMem _ ix_r sz_r a) = do
205 ix <- readFastMutInt ix_r
206 sz <- readFastMutInt sz_r
208 isEOFBin (BinIO _ ix_r h) = hIsEOF h
210 writeBinMem :: BinHandle -> FilePath -> IO ()
211 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
212 writeBinMem (BinMem _ ix_r sz_r arr_r) fn = do
213 h <- openFile fn WriteMode
214 arr <- readIORef arr_r
215 ix <- readFastMutInt ix_r
217 #if __GLASGOW_HASKELL__ < 500
218 -- workaround a bug in ghc 4.08's implementation of hPutBuf (it doesn't
219 -- set the FILEOBJ_RW_WRITTEN flag on the file object, so the file doens't
220 -- get flushed properly). Adding an extra '\0' doens't do any harm.
225 readBinMem :: FilePath -> IO BinHandle
226 readBinMem filename = do
227 h <- openFile filename ReadMode
228 filesize' <- hFileSize h
229 let filesize = fromIntegral filesize'
230 arr <- newArray_ (0,filesize-1)
231 count <- hGetArray h arr filesize
232 when (count /= filesize)
233 (error ("Binary.readBinMem: only read " ++ show count ++ " bytes"))
235 arr_r <- newIORef arr
236 ix_r <- newFastMutInt
237 writeFastMutInt ix_r 0
238 sz_r <- newFastMutInt
239 writeFastMutInt sz_r filesize
240 return (BinMem initReadState ix_r sz_r arr_r)
242 -- expand the size of the array to include a specified offset
243 expandBin :: BinHandle -> Int -> IO ()
244 expandBin (BinMem _ ix_r sz_r arr_r) off = do
245 sz <- readFastMutInt sz_r
246 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
247 arr <- readIORef arr_r
248 arr' <- newArray_ (0,sz'-1)
249 sequence_ [ unsafeRead arr i >>= unsafeWrite arr' i
250 | i <- [ 0 .. sz-1 ] ]
251 writeFastMutInt sz_r sz'
252 writeIORef arr_r arr'
253 hPutStrLn stderr ("expanding to size: " ++ show sz')
255 expandBin (BinIO _ _ _) _ = return ()
256 -- no need to expand a file, we'll assume they expand by themselves.
258 -- -----------------------------------------------------------------------------
259 -- Low-level reading/writing of bytes
261 putWord8 :: BinHandle -> Word8 -> IO ()
262 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
263 ix <- readFastMutInt ix_r
264 sz <- readFastMutInt sz_r
265 -- double the size of the array if it overflows
267 then do expandBin h ix
269 else do arr <- readIORef arr_r
271 writeFastMutInt ix_r (ix+1)
273 putWord8 (BinIO _ ix_r h) w = do
274 ix <- readFastMutInt ix_r
275 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
276 writeFastMutInt ix_r (ix+1)
279 getWord8 :: BinHandle -> IO Word8
280 getWord8 (BinMem _ ix_r sz_r arr_r) = do
281 ix <- readFastMutInt ix_r
282 sz <- readFastMutInt sz_r
284 throw (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
285 arr <- readIORef arr_r
286 w <- unsafeRead arr ix
287 writeFastMutInt ix_r (ix+1)
289 getWord8 (BinIO _ ix_r h) = do
290 ix <- readFastMutInt ix_r
292 writeFastMutInt ix_r (ix+1)
293 return (fromIntegral (ord c)) -- XXX not really correct
295 putByte :: BinHandle -> Word8 -> IO ()
296 putByte bh w = put_ bh w
298 getByte :: BinHandle -> IO Word8
301 -- -----------------------------------------------------------------------------
302 -- Primitve Word writes
304 instance Binary Word8 where
308 instance Binary Word16 where
309 put_ h w = do -- XXX too slow.. inline putWord8?
310 putByte h (fromIntegral (w `shiftR` 8))
311 putByte h (fromIntegral (w .&. 0xff))
315 return ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
318 instance Binary Word32 where
320 putByte h (fromIntegral (w `shiftR` 24))
321 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
322 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
323 putByte h (fromIntegral (w .&. 0xff))
329 return ((fromIntegral w1 `shiftL` 24) .|.
330 (fromIntegral w2 `shiftL` 16) .|.
331 (fromIntegral w3 `shiftL` 8) .|.
335 instance Binary Word64 where
337 putByte h (fromIntegral (w `shiftR` 56))
338 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
339 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
340 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
341 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
342 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
343 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
344 putByte h (fromIntegral (w .&. 0xff))
354 return ((fromIntegral w1 `shiftL` 56) .|.
355 (fromIntegral w2 `shiftL` 48) .|.
356 (fromIntegral w3 `shiftL` 40) .|.
357 (fromIntegral w4 `shiftL` 32) .|.
358 (fromIntegral w5 `shiftL` 24) .|.
359 (fromIntegral w6 `shiftL` 16) .|.
360 (fromIntegral w7 `shiftL` 8) .|.
363 -- -----------------------------------------------------------------------------
364 -- Primitve Int writes
366 instance Binary Int8 where
367 put_ h w = put_ h (fromIntegral w :: Word8)
368 get h = do w <- get h; return (fromIntegral (w::Word8))
370 instance Binary Int16 where
371 put_ h w = put_ h (fromIntegral w :: Word16)
372 get h = do w <- get h; return (fromIntegral (w::Word16))
374 instance Binary Int32 where
375 put_ h w = put_ h (fromIntegral w :: Word32)
376 get h = do w <- get h; return (fromIntegral (w::Word32))
378 instance Binary Int64 where
379 put_ h w = put_ h (fromIntegral w :: Word64)
380 get h = do w <- get h; return (fromIntegral (w::Word64))
382 -- -----------------------------------------------------------------------------
383 -- Instances for standard types
385 instance Binary () where
386 put_ bh () = return ()
388 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
390 instance Binary Bool where
391 put_ bh b = putByte bh (fromIntegral (fromEnum b))
392 get bh = do x <- getWord8 bh; return (toEnum (fromIntegral x))
393 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
395 instance Binary Char where
396 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
397 get bh = do x <- get bh; return (chr (fromIntegral (x :: Word32)))
398 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
400 instance Binary Int where
401 #if SIZEOF_HSINT == 4
402 put_ bh i = put_ bh (fromIntegral i :: Int32)
405 return (fromIntegral (x :: Int32))
406 #elif SIZEOF_HSINT == 8
407 put_ bh i = put_ bh (fromIntegral i :: Int64)
410 return (fromIntegral (x :: Int64))
412 #error "unsupported sizeof(HsInt)"
414 -- getF bh = getBitsF bh 32
416 instance Binary a => Binary [a] where
417 put_ bh [] = putByte bh 0
418 put_ bh (x:xs) = do putByte bh 1; put_ bh x; put_ bh xs
419 get bh = do h <- getWord8 bh
426 instance (Binary a, Binary b) => Binary (a,b) where
427 put_ bh (a,b) = do put_ bh a; put_ bh b
428 get bh = do a <- get bh
432 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
433 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
434 get bh = do a <- get bh
439 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
440 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
441 get bh = do a <- get bh
447 instance Binary a => Binary (Maybe a) where
448 put_ bh Nothing = putByte bh 0
449 put_ bh (Just a) = do putByte bh 1; put_ bh a
450 get bh = do h <- getWord8 bh
453 _ -> do x <- get bh; return (Just x)
455 instance (Binary a, Binary b) => Binary (Either a b) where
456 put_ bh (Left a) = do putByte bh 0; put_ bh a
457 put_ bh (Right b) = do putByte bh 1; put_ bh b
458 get bh = do h <- getWord8 bh
460 0 -> do a <- get bh ; return (Left a)
461 _ -> do b <- get bh ; return (Right b)
463 #ifdef __GLASGOW_HASKELL__
464 instance Binary Integer where
465 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
466 put_ bh (J# s# a#) = do
469 let sz# = sizeofByteArray# a# -- in *bytes*
470 put_ bh (I# sz#) -- in *bytes*
471 putByteArray bh a# sz#
476 0 -> do (I# i#) <- get bh
478 _ -> do (I# s#) <- get bh
480 (BA a#) <- getByteArray bh sz
483 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
484 putByteArray bh a s# = loop 0#
486 | n# ==# s# = return ()
488 putByte bh (indexByteArray a n#)
491 getByteArray :: BinHandle -> Int -> IO ByteArray
492 getByteArray bh (I# sz) = do
493 (MBA arr) <- newByteArray sz
495 | n ==# sz = return ()
498 writeByteArray arr n w
504 data ByteArray = BA ByteArray#
505 data MBA = MBA (MutableByteArray# RealWorld)
507 newByteArray :: Int# -> IO MBA
508 newByteArray sz = IO $ \s ->
509 case newByteArray# sz s of { (# s, arr #) ->
512 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
513 freezeByteArray arr = IO $ \s ->
514 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
517 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
519 #if __GLASGOW_HASKELL__ < 503
520 writeByteArray arr i w8 = IO $ \s ->
521 case word8ToWord w8 of { W# w# ->
522 case writeCharArray# arr i (chr# (word2Int# w#)) s of { s ->
525 writeByteArray arr i (W8# w) = IO $ \s ->
526 case writeWord8Array# arr i w s of { s ->
530 #if __GLASGOW_HASKELL__ < 503
531 indexByteArray a# n# = fromIntegral (I# (ord# (indexCharArray# a# n#)))
533 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
536 instance (Integral a, Binary a) => Binary (Ratio a) where
537 put_ bh (a :% b) = do put_ bh a; put_ bh b
538 get bh = do a <- get bh; b <- get bh; return (a :% b)
541 instance Binary (Bin a) where
542 put_ bh (BinPtr i) = put_ bh i
543 get bh = do i <- get bh; return (BinPtr i)
545 -- -----------------------------------------------------------------------------
546 -- unboxed mutable Ints
548 #ifdef __GLASGOW_HASKELL__
549 data FastMutInt = FastMutInt (MutableByteArray# RealWorld)
551 newFastMutInt = IO $ \s ->
552 case newByteArray# size s of { (# s, arr #) ->
553 (# s, FastMutInt arr #) }
554 where I# size = SIZEOF_HSWORD
556 readFastMutInt (FastMutInt arr) = IO $ \s ->
557 case readIntArray# arr 0# s of { (# s, i #) ->
560 writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s ->
561 case writeIntArray# arr 0# i s of { s ->
565 -- -----------------------------------------------------------------------------
566 -- Lazy reading/writing
568 lazyPut :: Binary a => BinHandle -> a -> IO ()
570 -- output the obj with a ptr to skip over it:
572 put_ bh pre_a -- save a slot for the ptr
573 put_ bh a -- dump the object
574 q <- tellBin bh -- q = ptr to after object
575 putAt bh pre_a q -- fill in slot before a with ptr to q
576 seekBin bh q -- finally carry on writing at q
578 lazyGet :: Binary a => BinHandle -> IO a
580 p <- get bh -- a BinPtr
582 a <- unsafeInterleaveIO (getAt bh p_a)
583 seekBin bh p -- skip over the object for now
586 -- -----------------------------------------------------------------------------
589 type BinHandleState =
592 IORef (UniqFM (Int,FastString)),
593 Array Int FastString)
595 initReadState :: BinHandleState
596 initReadState = (undef, undef, undef, undef)
598 newWriteState :: Module -> IO BinHandleState
601 out_r <- newIORef emptyUFM
602 return (m,j_r,out_r,undef)
604 undef = error "Binary.BinHandleState"
606 -- -----------------------------------------------------------------------------
607 -- FastString binary interface
609 getBinFileWithDict :: Binary a => FilePath -> IO a
610 getBinFileWithDict file_path = do
611 bh <- Binary.readBinMem file_path
612 dict_p <- Binary.get bh -- get the dictionary ptr
615 dict <- getDictionary bh
617 let (mod, j_r, out_r, _) = state bh
618 get bh{ state = (mod,j_r,out_r,dict) }
620 initBinMemSize = (1024*1024) :: Int
622 putBinFileWithDict :: Binary a => FilePath -> Module -> a -> IO ()
623 putBinFileWithDict file_path mod a = do
624 bh <- openBinMem initBinMemSize mod
626 put_ bh p -- placeholder for ptr to dictionary
628 let (_, j_r, fm_r, _) = state bh
632 putAt bh p dict_p -- fill in the placeholder
633 seekBin bh dict_p -- seek back to the end of the file
634 putDictionary bh j (constructDictionary j fm)
635 writeBinMem bh file_path
637 type Dictionary = Array Int FastString
638 -- should be 0-indexed
640 putDictionary :: BinHandle -> Int -> Dictionary -> IO ()
641 putDictionary bh sz dict = do
643 mapM_ (putFS bh) (elems dict)
645 getDictionary :: BinHandle -> IO Dictionary
646 getDictionary bh = do
648 elems <- sequence (take sz (repeat (getFS bh)))
649 return (listArray (0,sz-1) elems)
651 constructDictionary :: Int -> UniqFM (Int,FastString) -> Dictionary
652 constructDictionary j fm = array (0,j-1) (eltsUFM fm)
654 putFS bh (FastString id l ba) = do
657 putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
658 -- Note: the length of the FastString is *not* the same as
659 -- the size of the ByteArray: the latter is rounded up to a
660 -- multiple of the word size.
664 (BA ba) <- getByteArray bh (I# l)
665 return (mkFastSubStringBA# ba 0# l)
666 -- XXX ToDo: one too many copies here
668 instance Binary FastString where
669 put_ bh f@(FastString id l ba) =
670 case getUserData bh of { (_, j_r, out_r, dict) -> do
671 out <- readIORef out_r
672 let uniq = getUnique f
673 case lookupUFM out uniq of
674 Just (j,f) -> put_ bh j
679 writeIORef out_r (addToUFM out uniq (j,f))
681 put_ bh s = error ("Binary.put_(FastString): " ++ show (unpackFS s))
685 case getUserData bh of (_, _, _, arr) -> return (arr ! j)