3 -- (c) The University of Glasgow 2002
5 -- Binary I/O library, with special tweaks for GHC
7 -- Based on the nhc98 Binary library, which is copyright
8 -- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
9 -- Under the terms of the license for that software, we must tell you
10 -- where you can obtain the original version of the Binary library, namely
11 -- http://www.cs.york.ac.uk/fp/nhc98/
18 openBinIO, openBinIO_,
31 -- for writing instances:
44 getBinFileWithDict, -- :: Binary a => FilePath -> IO a
45 putBinFileWithDict, -- :: Binary a => FilePath -> ModuleName -> a -> IO ()
49 #include "HsVersions.h"
51 -- The *host* architecture version:
59 import PackageConfig ( PackageId, packageIdFS, fsToPackageId )
68 import Data.Char ( ord, chr )
69 import Data.Array.Base ( unsafeRead, unsafeWrite )
70 import Control.Monad ( when )
71 import Control.Exception ( throwDyn )
72 import System.IO as IO
73 import System.IO.Unsafe ( unsafeInterleaveIO )
74 import System.IO.Error ( mkIOError, eofErrorType )
75 import GHC.Real ( Ratio(..) )
77 import GHC.IOBase ( IO(..) )
78 import GHC.Word ( Word8(..) )
79 #if __GLASGOW_HASKELL__ < 601
80 -- openFileEx is available from the lang package, but we want to
81 -- be independent of hslibs libraries.
82 import GHC.Handle ( openFileEx, IOModeEx(..) )
84 import System.IO ( openBinaryFile )
87 #if __GLASGOW_HASKELL__ < 601
88 openBinaryFile f mode = openFileEx f (BinaryMode mode)
91 type BinArray = IOUArray Int Word8
93 ---------------------------------------------------------------
95 ---------------------------------------------------------------
98 = BinMem { -- binary data stored in an unboxed array
99 bh_usr :: UserData, -- sigh, need parameterized modules :-)
100 off_r :: !FastMutInt, -- the current offset
101 sz_r :: !FastMutInt, -- size of the array (cached)
102 arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
104 -- XXX: should really store a "high water mark" for dumping out
105 -- the binary data to a file.
107 | BinIO { -- binary data stored in a file
109 off_r :: !FastMutInt, -- the current offset (cached)
110 hdl :: !IO.Handle -- the file handle (must be seekable)
112 -- cache the file ptr in BinIO; using hTell is too expensive
113 -- to call repeatedly. If anyone else is modifying this Handle
114 -- at the same time, we'll be screwed.
116 getUserData :: BinHandle -> UserData
117 getUserData bh = bh_usr bh
119 setUserData :: BinHandle -> UserData -> BinHandle
120 setUserData bh us = bh { bh_usr = us }
123 ---------------------------------------------------------------
125 ---------------------------------------------------------------
127 newtype Bin a = BinPtr Int
128 deriving (Eq, Ord, Show, Bounded)
130 castBin :: Bin a -> Bin b
131 castBin (BinPtr i) = BinPtr i
133 ---------------------------------------------------------------
135 ---------------------------------------------------------------
138 put_ :: BinHandle -> a -> IO ()
139 put :: BinHandle -> a -> IO (Bin a)
140 get :: BinHandle -> IO a
142 -- define one of put_, put. Use of put_ is recommended because it
143 -- is more likely that tail-calls can kick in, and we rarely need the
144 -- position return value.
145 put_ bh a = do put bh a; return ()
146 put bh a = do p <- tellBin bh; put_ bh a; return p
148 putAt :: Binary a => BinHandle -> Bin a -> a -> IO ()
149 putAt bh p x = do seekBin bh p; put bh x; return ()
151 getAt :: Binary a => BinHandle -> Bin a -> IO a
152 getAt bh p = do seekBin bh p; get bh
154 openBinIO_ :: IO.Handle -> IO BinHandle
155 openBinIO_ h = openBinIO h
157 openBinIO :: IO.Handle -> IO BinHandle
161 return (BinIO noUserData r h)
163 openBinMem :: Int -> IO BinHandle
165 | size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
167 arr <- newArray_ (0,size-1)
168 arr_r <- newIORef arr
169 ix_r <- newFastMutInt
170 writeFastMutInt ix_r 0
171 sz_r <- newFastMutInt
172 writeFastMutInt sz_r size
173 return (BinMem noUserData ix_r sz_r arr_r)
175 tellBin :: BinHandle -> IO (Bin a)
176 tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
177 tellBin (BinMem _ r _ _) = do ix <- readFastMutInt r; return (BinPtr ix)
179 seekBin :: BinHandle -> Bin a -> IO ()
180 seekBin (BinIO _ ix_r h) (BinPtr p) = do
181 writeFastMutInt ix_r p
182 hSeek h AbsoluteSeek (fromIntegral p)
183 seekBin h@(BinMem _ ix_r sz_r a) (BinPtr p) = do
184 sz <- readFastMutInt sz_r
186 then do expandBin h p; writeFastMutInt ix_r p
187 else writeFastMutInt ix_r p
189 isEOFBin :: BinHandle -> IO Bool
190 isEOFBin (BinMem _ ix_r sz_r a) = do
191 ix <- readFastMutInt ix_r
192 sz <- readFastMutInt sz_r
194 isEOFBin (BinIO _ ix_r h) = hIsEOF h
196 writeBinMem :: BinHandle -> FilePath -> IO ()
197 writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
198 writeBinMem (BinMem _ ix_r sz_r arr_r) fn = do
199 h <- openBinaryFile fn WriteMode
200 arr <- readIORef arr_r
201 ix <- readFastMutInt ix_r
203 #if __GLASGOW_HASKELL__ <= 500
204 -- workaround a bug in old implementation of hPutBuf (it doesn't
205 -- set the FILEOBJ_RW_WRITTEN flag on the file object, so the file doens't
206 -- get flushed properly). Adding an extra '\0' doens't do any harm.
211 readBinMem :: FilePath -> IO BinHandle
212 -- Return a BinHandle with a totally undefined State
213 readBinMem filename = do
214 h <- openBinaryFile filename ReadMode
215 filesize' <- hFileSize h
216 let filesize = fromIntegral filesize'
217 arr <- newArray_ (0,filesize-1)
218 count <- hGetArray h arr filesize
219 when (count /= filesize)
220 (error ("Binary.readBinMem: only read " ++ show count ++ " bytes"))
222 arr_r <- newIORef arr
223 ix_r <- newFastMutInt
224 writeFastMutInt ix_r 0
225 sz_r <- newFastMutInt
226 writeFastMutInt sz_r filesize
227 return (BinMem noUserData ix_r sz_r arr_r)
229 -- expand the size of the array to include a specified offset
230 expandBin :: BinHandle -> Int -> IO ()
231 expandBin (BinMem _ ix_r sz_r arr_r) off = do
232 sz <- readFastMutInt sz_r
233 let sz' = head (dropWhile (<= off) (iterate (* 2) sz))
234 arr <- readIORef arr_r
235 arr' <- newArray_ (0,sz'-1)
236 sequence_ [ unsafeRead arr i >>= unsafeWrite arr' i
237 | i <- [ 0 .. sz-1 ] ]
238 writeFastMutInt sz_r sz'
239 writeIORef arr_r arr'
241 hPutStrLn stderr ("Binary: expanding to size: " ++ show sz')
244 expandBin (BinIO _ _ _) _ = return ()
245 -- no need to expand a file, we'll assume they expand by themselves.
247 -- -----------------------------------------------------------------------------
248 -- Low-level reading/writing of bytes
250 putWord8 :: BinHandle -> Word8 -> IO ()
251 putWord8 h@(BinMem _ ix_r sz_r arr_r) w = do
252 ix <- readFastMutInt ix_r
253 sz <- readFastMutInt sz_r
254 -- double the size of the array if it overflows
256 then do expandBin h ix
258 else do arr <- readIORef arr_r
260 writeFastMutInt ix_r (ix+1)
262 putWord8 (BinIO _ ix_r h) w = do
263 ix <- readFastMutInt ix_r
264 hPutChar h (chr (fromIntegral w)) -- XXX not really correct
265 writeFastMutInt ix_r (ix+1)
268 getWord8 :: BinHandle -> IO Word8
269 getWord8 (BinMem _ ix_r sz_r arr_r) = do
270 ix <- readFastMutInt ix_r
271 sz <- readFastMutInt sz_r
273 #if __GLASGOW_HASKELL__ <= 408
274 throw (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
276 ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
278 arr <- readIORef arr_r
279 w <- unsafeRead arr ix
280 writeFastMutInt ix_r (ix+1)
282 getWord8 (BinIO _ ix_r h) = do
283 ix <- readFastMutInt ix_r
285 writeFastMutInt ix_r (ix+1)
286 return $! (fromIntegral (ord c)) -- XXX not really correct
288 putByte :: BinHandle -> Word8 -> IO ()
289 putByte bh w = put_ bh w
291 getByte :: BinHandle -> IO Word8
294 -- -----------------------------------------------------------------------------
295 -- Primitve Word writes
297 instance Binary Word8 where
301 instance Binary Word16 where
302 put_ h w = do -- XXX too slow.. inline putWord8?
303 putByte h (fromIntegral (w `shiftR` 8))
304 putByte h (fromIntegral (w .&. 0xff))
308 return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
311 instance Binary Word32 where
313 putByte h (fromIntegral (w `shiftR` 24))
314 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
315 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
316 putByte h (fromIntegral (w .&. 0xff))
322 return $! ((fromIntegral w1 `shiftL` 24) .|.
323 (fromIntegral w2 `shiftL` 16) .|.
324 (fromIntegral w3 `shiftL` 8) .|.
328 instance Binary Word64 where
330 putByte h (fromIntegral (w `shiftR` 56))
331 putByte h (fromIntegral ((w `shiftR` 48) .&. 0xff))
332 putByte h (fromIntegral ((w `shiftR` 40) .&. 0xff))
333 putByte h (fromIntegral ((w `shiftR` 32) .&. 0xff))
334 putByte h (fromIntegral ((w `shiftR` 24) .&. 0xff))
335 putByte h (fromIntegral ((w `shiftR` 16) .&. 0xff))
336 putByte h (fromIntegral ((w `shiftR` 8) .&. 0xff))
337 putByte h (fromIntegral (w .&. 0xff))
347 return $! ((fromIntegral w1 `shiftL` 56) .|.
348 (fromIntegral w2 `shiftL` 48) .|.
349 (fromIntegral w3 `shiftL` 40) .|.
350 (fromIntegral w4 `shiftL` 32) .|.
351 (fromIntegral w5 `shiftL` 24) .|.
352 (fromIntegral w6 `shiftL` 16) .|.
353 (fromIntegral w7 `shiftL` 8) .|.
356 -- -----------------------------------------------------------------------------
357 -- Primitve Int writes
359 instance Binary Int8 where
360 put_ h w = put_ h (fromIntegral w :: Word8)
361 get h = do w <- get h; return $! (fromIntegral (w::Word8))
363 instance Binary Int16 where
364 put_ h w = put_ h (fromIntegral w :: Word16)
365 get h = do w <- get h; return $! (fromIntegral (w::Word16))
367 instance Binary Int32 where
368 put_ h w = put_ h (fromIntegral w :: Word32)
369 get h = do w <- get h; return $! (fromIntegral (w::Word32))
371 instance Binary Int64 where
372 put_ h w = put_ h (fromIntegral w :: Word64)
373 get h = do w <- get h; return $! (fromIntegral (w::Word64))
375 -- -----------------------------------------------------------------------------
376 -- Instances for standard types
378 instance Binary () where
379 put_ bh () = return ()
381 -- getF bh p = case getBitsF bh 0 p of (_,b) -> ((),b)
383 instance Binary Bool where
384 put_ bh b = putByte bh (fromIntegral (fromEnum b))
385 get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x))
386 -- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
388 instance Binary Char where
389 put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
390 get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
391 -- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
393 instance Binary Int where
394 #if SIZEOF_HSINT == 4
395 put_ bh i = put_ bh (fromIntegral i :: Int32)
398 return $! (fromIntegral (x :: Int32))
399 #elif SIZEOF_HSINT == 8
400 put_ bh i = put_ bh (fromIntegral i :: Int64)
403 return $! (fromIntegral (x :: Int64))
405 #error "unsupported sizeof(HsInt)"
407 -- getF bh = getBitsF bh 32
409 instance Binary a => Binary [a] where
413 then putByte bh (fromIntegral len :: Word8)
414 else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32)
420 else return (fromIntegral b :: Word32)
421 let loop 0 = return []
422 loop n = do a <- get bh; as <- loop (n-1); return (a:as)
425 instance (Binary a, Binary b) => Binary (a,b) where
426 put_ bh (a,b) = do put_ bh a; put_ bh b
427 get bh = do a <- get bh
431 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
432 put_ bh (a,b,c) = do put_ bh a; put_ bh b; put_ bh c
433 get bh = do a <- get bh
438 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
439 put_ bh (a,b,c,d) = do put_ bh a; put_ bh b; put_ bh c; put_ bh d
440 get bh = do a <- get bh
446 instance Binary a => Binary (Maybe a) where
447 put_ bh Nothing = putByte bh 0
448 put_ bh (Just a) = do putByte bh 1; put_ bh a
449 get bh = do h <- getWord8 bh
452 _ -> do x <- get bh; return (Just x)
454 instance (Binary a, Binary b) => Binary (Either a b) where
455 put_ bh (Left a) = do putByte bh 0; put_ bh a
456 put_ bh (Right b) = do putByte bh 1; put_ bh b
457 get bh = do h <- getWord8 bh
459 0 -> do a <- get bh ; return (Left a)
460 _ -> do b <- get bh ; return (Right b)
462 #ifdef __GLASGOW_HASKELL__
463 instance Binary Integer where
464 put_ bh (S# i#) = do putByte bh 0; put_ bh (I# i#)
465 put_ bh (J# s# a#) = do
468 let sz# = sizeofByteArray# a# -- in *bytes*
469 put_ bh (I# sz#) -- in *bytes*
470 putByteArray bh a# sz#
475 0 -> do (I# i#) <- get bh
477 _ -> do (I# s#) <- get bh
479 (BA a#) <- getByteArray bh sz
482 putByteArray :: BinHandle -> ByteArray# -> Int# -> IO ()
483 putByteArray bh a s# = loop 0#
485 | n# ==# s# = return ()
487 putByte bh (indexByteArray a n#)
490 getByteArray :: BinHandle -> Int -> IO ByteArray
491 getByteArray bh (I# sz) = do
492 (MBA arr) <- newByteArray sz
494 | n ==# sz = return ()
497 writeByteArray arr n w
503 data ByteArray = BA ByteArray#
504 data MBA = MBA (MutableByteArray# RealWorld)
506 newByteArray :: Int# -> IO MBA
507 newByteArray sz = IO $ \s ->
508 case newByteArray# sz s of { (# s, arr #) ->
511 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
512 freezeByteArray arr = IO $ \s ->
513 case unsafeFreezeByteArray# arr s of { (# s, arr #) ->
516 writeByteArray :: MutableByteArray# RealWorld -> Int# -> Word8 -> IO ()
518 #if __GLASGOW_HASKELL__ < 503
519 writeByteArray arr i w8 = IO $ \s ->
520 case word8ToWord w8 of { W# w# ->
521 case writeCharArray# arr i (chr# (word2Int# w#)) s of { s ->
524 writeByteArray arr i (W8# w) = IO $ \s ->
525 case writeWord8Array# arr i w s of { s ->
529 #if __GLASGOW_HASKELL__ < 503
530 indexByteArray a# n# = fromIntegral (I# (ord# (indexCharArray# a# n#)))
532 indexByteArray a# n# = W8# (indexWord8Array# a# n#)
535 instance (Integral a, Binary a) => Binary (Ratio a) where
536 put_ bh (a :% b) = do put_ bh a; put_ bh b
537 get bh = do a <- get bh; b <- get bh; return (a :% b)
540 instance Binary (Bin a) where
541 put_ bh (BinPtr i) = put_ bh i
542 get bh = do i <- get bh; return (BinPtr i)
544 -- -----------------------------------------------------------------------------
545 -- Lazy reading/writing
547 lazyPut :: Binary a => BinHandle -> a -> IO ()
549 -- output the obj with a ptr to skip over it:
551 put_ bh pre_a -- save a slot for the ptr
552 put_ bh a -- dump the object
553 q <- tellBin bh -- q = ptr to after object
554 putAt bh pre_a q -- fill in slot before a with ptr to q
555 seekBin bh q -- finally carry on writing at q
557 lazyGet :: Binary a => BinHandle -> IO a
559 p <- get bh -- a BinPtr
561 a <- unsafeInterleaveIO (getAt bh p_a)
562 seekBin bh p -- skip over the object for now
565 -- --------------------------------------------------------------
566 -- Main wrappers: getBinFileWithDict, putBinFileWithDict
568 -- This layer is built on top of the stuff above,
569 -- and should not know anything about BinHandles
570 -- --------------------------------------------------------------
572 initBinMemSize = (1024*1024) :: Int
574 #if WORD_SIZE_IN_BITS == 32
575 binaryInterfaceMagic = 0x1face :: Word32
576 #elif WORD_SIZE_IN_BITS == 64
577 binaryInterfaceMagic = 0x1face64 :: Word32
580 getBinFileWithDict :: Binary a => FilePath -> IO a
581 getBinFileWithDict file_path = do
582 bh <- Binary.readBinMem file_path
584 -- Read the magic number to check that this really is a GHC .hi file
585 -- (This magic number does not change when we change
586 -- GHC interface file format)
588 when (magic /= binaryInterfaceMagic) $
589 throwDyn (ProgramError (
590 "magic number mismatch: old/corrupt interface file?"))
592 -- Read the dictionary
593 -- The next word in the file is a pointer to where the dictionary is
594 -- (probably at the end of the file)
595 dict_p <- Binary.get bh -- Get the dictionary ptr
596 data_p <- tellBin bh -- Remember where we are now
598 dict <- getDictionary bh
599 seekBin bh data_p -- Back to where we were before
601 -- Initialise the user-data field of bh
602 let bh' = setUserData bh (initReadState dict)
604 -- At last, get the thing
607 putBinFileWithDict :: Binary a => FilePath -> a -> IO ()
608 putBinFileWithDict file_path the_thing = do
609 bh <- openBinMem initBinMemSize
610 put_ bh binaryInterfaceMagic
612 -- Remember where the dictionary pointer will go
613 dict_p_p <- tellBin bh
614 put_ bh dict_p_p -- Placeholder for ptr to dictionary
616 -- Make some intial state
617 usr_state <- newWriteState
619 -- Put the main thing,
620 put_ (setUserData bh usr_state) the_thing
622 -- Get the final-state
623 j <- readIORef (ud_next usr_state)
624 fm <- readIORef (ud_map usr_state)
625 dict_p <- tellBin bh -- This is where the dictionary will start
627 -- Write the dictionary pointer at the fornt of the file
628 putAt bh dict_p_p dict_p -- Fill in the placeholder
629 seekBin bh dict_p -- Seek back to the end of the file
631 -- Write the dictionary itself
632 putDictionary bh j (constructDictionary j fm)
634 -- And send the result to the file
635 writeBinMem bh file_path
637 -- -----------------------------------------------------------------------------
639 -- -----------------------------------------------------------------------------
642 UserData { -- This field is used only when reading
643 ud_dict :: Dictionary,
645 -- The next two fields are only used when writing
646 ud_next :: IORef Int, -- The next index to use
647 ud_map :: IORef (UniqFM (Int,FastString))
650 noUserData = error "Binary.UserData: no user data"
652 initReadState :: Dictionary -> UserData
653 initReadState dict = UserData{ ud_dict = dict,
654 ud_next = undef "next",
655 ud_map = undef "map" }
657 newWriteState :: IO UserData
660 out_r <- newIORef emptyUFM
661 return (UserData { ud_dict = panic "dict",
666 undef s = panic ("Binary.UserData: no " ++ s)
668 ---------------------------------------------------------
670 ---------------------------------------------------------
672 type Dictionary = Array Int FastString -- The dictionary
673 -- Should be 0-indexed
675 putDictionary :: BinHandle -> Int -> Dictionary -> IO ()
676 putDictionary bh sz dict = do
678 mapM_ (putFS bh) (elems dict)
680 getDictionary :: BinHandle -> IO Dictionary
681 getDictionary bh = do
683 elems <- sequence (take sz (repeat (getFS bh)))
684 return (listArray (0,sz-1) elems)
686 constructDictionary :: Int -> UniqFM (Int,FastString) -> Dictionary
687 constructDictionary j fm = array (0,j-1) (eltsUFM fm)
689 ---------------------------------------------------------
690 -- Reading and writing FastStrings
691 ---------------------------------------------------------
693 putFS bh (FastString id l _ buf _) = do
695 withForeignPtr buf $ \ptr ->
697 go n | n == l = return ()
699 b <- peekElemOff ptr n
705 {- -- possible faster version, not quite there yet:
706 getFS bh@BinMem{} = do
708 arr <- readIORef (arr_r bh)
709 off <- readFastMutInt (off_r bh)
710 return $! (mkFastSubStringBA# arr off l)
714 fp <- mallocForeignPtrBytes l
715 withForeignPtr fp $ \ptr -> do
717 go n | n == l = mkFastStringForeignPtr ptr fp l
725 #if __GLASGOW_HASKELL__ < 600
726 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
727 mallocForeignPtrBytes n = do
729 newForeignPtr r (finalizerFree r)
731 foreign import ccall unsafe "stdlib.h free"
732 finalizerFree :: Ptr a -> IO ()
735 instance Binary PackageId where
736 put_ bh pid = put_ bh (packageIdFS pid)
737 get bh = do { fs <- get bh; return (fsToPackageId fs) }
739 instance Binary FastString where
740 put_ bh f@(FastString id l _ fp _) =
741 case getUserData bh of {
742 UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do
743 out <- readIORef out_r
744 let uniq = getUnique f
745 case lookupUFM out uniq of
746 Just (j,f) -> put_ bh j
751 writeIORef out_r (addToUFM out uniq (j,f))
756 return $! (ud_dict (getUserData bh) ! j)