-- (c) The University of Glasgow 2002
--
-- Binary I/O library, with special tweaks for GHC
+--
+-- Based on the nhc98 Binary library, which is copyright
+-- (c) Malcolm Wallace and Colin Runciman, University of York, 1998.
+-- Under the terms of the license for that software, we must tell you
+-- where you can obtain the original version of the Binary library, namely
+-- http://www.cs.york.ac.uk/fp/nhc98/
module Binary
( {-type-} Bin,
openBinMem,
-- closeBin,
- getUserData,
-
seekBin,
tellBin,
castBin,
putByteArray,
getBinFileWithDict, -- :: Binary a => FilePath -> IO a
- putBinFileWithDict, -- :: Binary a => FilePath -> Module -> a -> IO ()
+ putBinFileWithDict, -- :: Binary a => FilePath -> ModuleName -> a -> IO ()
) where
+#include "HsVersions.h"
+
+-- The *host* architecture version:
#include "MachDeps.h"
-import {-# SOURCE #-} Module
import FastString
import Unique
+import Panic
import UniqFM
+import FastMutInt
+import PackageConfig ( PackageId, packageIdFS, fsToPackageId )
-#if __GLASGOW_HASKELL__ < 503
-import IOExts
-import Bits
-import Int
-import Word
-import Char
-import Monad
-import Exception
-import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
-import Array
-import IO
-import PrelIOBase ( IOError(..), IOErrorType(..) )
-import PrelReal ( Ratio(..) )
-import PrelIOBase ( IO(..) )
-#else
+import Foreign
import Data.Array.IO
import Data.Array
import Data.Bits
import Data.Char ( ord, chr )
import Data.Array.Base ( unsafeRead, unsafeWrite )
import Control.Monad ( when )
-import Control.Exception ( throw )
+import Control.Exception ( throwDyn )
import System.IO as IO
import System.IO.Unsafe ( unsafeInterleaveIO )
import System.IO.Error ( mkIOError, eofErrorType )
import GHC.Exts
import GHC.IOBase ( IO(..) )
import GHC.Word ( Word8(..) )
+#if __GLASGOW_HASKELL__ < 601
+-- openFileEx is available from the lang package, but we want to
+-- be independent of hslibs libraries.
+import GHC.Handle ( openFileEx, IOModeEx(..) )
+#else
+import System.IO ( openBinaryFile )
#endif
-#if __GLASGOW_HASKELL__ < 503
-type BinArray = MutableByteArray RealWorld Int
-newArray_ bounds = stToIO (newCharArray bounds)
-unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
-unsafeRead arr ix = stToIO (readWord8Array arr ix)
-newByteArray# = newCharArray#
-hPutArray h arr sz = hPutBufBAFull h arr sz
-hGetArray h sz = hGetBufBAFull h sz
-
-mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
-mkIOError t location maybe_hdl maybe_filename
- = IOException (IOError maybe_hdl t location "")
-
-eofErrorType = EOF
-
-#ifndef SIZEOF_HSINT
-#define SIZEOF_HSINT INT_SIZE_IN_BYTES
-#endif
-
-#ifndef SIZEOF_HSWORD
-#define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
+#if __GLASGOW_HASKELL__ < 601
+openBinaryFile f mode = openFileEx f (BinaryMode mode)
#endif
-#else
type BinArray = IOUArray Int Word8
-#endif
+
+---------------------------------------------------------------
+-- BinHandle
+---------------------------------------------------------------
data BinHandle
= BinMem { -- binary data stored in an unboxed array
- state :: BinHandleState, -- sigh, need parameterized modules :-)
+ bh_usr :: UserData, -- sigh, need parameterized modules :-)
off_r :: !FastMutInt, -- the current offset
sz_r :: !FastMutInt, -- size of the array (cached)
arr_r :: !(IORef BinArray) -- the array (bounds: (0,size-1))
-- the binary data to a file.
| BinIO { -- binary data stored in a file
- state :: BinHandleState,
+ bh_usr :: UserData,
off_r :: !FastMutInt, -- the current offset (cached)
hdl :: !IO.Handle -- the file handle (must be seekable)
}
-- to call repeatedly. If anyone else is modifying this Handle
-- at the same time, we'll be screwed.
+getUserData :: BinHandle -> UserData
+getUserData bh = bh_usr bh
+
+setUserData :: BinHandle -> UserData -> BinHandle
+setUserData bh us = bh { bh_usr = us }
+
+
+---------------------------------------------------------------
+-- Bin
+---------------------------------------------------------------
+
newtype Bin a = BinPtr Int
deriving (Eq, Ord, Show, Bounded)
castBin :: Bin a -> Bin b
castBin (BinPtr i) = BinPtr i
+---------------------------------------------------------------
+-- class Binary
+---------------------------------------------------------------
+
class Binary a where
put_ :: BinHandle -> a -> IO ()
put :: BinHandle -> a -> IO (Bin a)
getAt bh p = do seekBin bh p; get bh
openBinIO_ :: IO.Handle -> IO BinHandle
-openBinIO_ h = openBinIO h noBinHandleUserData
+openBinIO_ h = openBinIO h
-openBinIO :: IO.Handle -> Module -> IO BinHandle
-openBinIO h mod = do
+openBinIO :: IO.Handle -> IO BinHandle
+openBinIO h = do
r <- newFastMutInt
writeFastMutInt r 0
- state <- newWriteState mod
- return (BinIO state r h)
+ return (BinIO noUserData r h)
-openBinMem :: Int -> Module -> IO BinHandle
-openBinMem size mod
+openBinMem :: Int -> IO BinHandle
+openBinMem size
| size <= 0 = error "Data.Binary.openBinMem: size must be >= 0"
| otherwise = do
arr <- newArray_ (0,size-1)
writeFastMutInt ix_r 0
sz_r <- newFastMutInt
writeFastMutInt sz_r size
- state <- newWriteState mod
- return (BinMem state ix_r sz_r arr_r)
-
-noBinHandleUserData = error "Binary.BinHandle: no user data"
-
-getUserData :: BinHandle -> BinHandleState
-getUserData bh = state bh
+ return (BinMem noUserData ix_r sz_r arr_r)
tellBin :: BinHandle -> IO (Bin a)
tellBin (BinIO _ r _) = do ix <- readFastMutInt r; return (BinPtr ix)
writeBinMem :: BinHandle -> FilePath -> IO ()
writeBinMem (BinIO _ _ _) _ = error "Data.Binary.writeBinMem: not a memory handle"
writeBinMem (BinMem _ ix_r sz_r arr_r) fn = do
- h <- openFile fn WriteMode
+ h <- openBinaryFile fn WriteMode
arr <- readIORef arr_r
ix <- readFastMutInt ix_r
hPutArray h arr ix
-#if __GLASGOW_HASKELL__ < 500
- -- workaround a bug in ghc 4.08's implementation of hPutBuf (it doesn't
+#if __GLASGOW_HASKELL__ <= 500
+ -- workaround a bug in old implementation of hPutBuf (it doesn't
-- set the FILEOBJ_RW_WRITTEN flag on the file object, so the file doens't
-- get flushed properly). Adding an extra '\0' doens't do any harm.
hPutChar h '\0'
hClose h
readBinMem :: FilePath -> IO BinHandle
+-- Return a BinHandle with a totally undefined State
readBinMem filename = do
- h <- openFile filename ReadMode
+ h <- openBinaryFile filename ReadMode
filesize' <- hFileSize h
let filesize = fromIntegral filesize'
arr <- newArray_ (0,filesize-1)
writeFastMutInt ix_r 0
sz_r <- newFastMutInt
writeFastMutInt sz_r filesize
- return (BinMem initReadState ix_r sz_r arr_r)
+ return (BinMem noUserData ix_r sz_r arr_r)
-- expand the size of the array to include a specified offset
expandBin :: BinHandle -> Int -> IO ()
| i <- [ 0 .. sz-1 ] ]
writeFastMutInt sz_r sz'
writeIORef arr_r arr'
- hPutStrLn stderr ("expanding to size: " ++ show sz')
+#ifdef DEBUG
+ hPutStrLn stderr ("Binary: expanding to size: " ++ show sz')
+#endif
return ()
expandBin (BinIO _ _ _) _ = return ()
-- no need to expand a file, we'll assume they expand by themselves.
ix <- readFastMutInt ix_r
sz <- readFastMutInt sz_r
when (ix >= sz) $
+#if __GLASGOW_HASKELL__ <= 408
throw (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
+#else
+ ioError (mkIOError eofErrorType "Data.Binary.getWord8" Nothing Nothing)
+#endif
arr <- readIORef arr_r
w <- unsafeRead arr ix
writeFastMutInt ix_r (ix+1)
ix <- readFastMutInt ix_r
c <- hGetChar h
writeFastMutInt ix_r (ix+1)
- return (fromIntegral (ord c)) -- XXX not really correct
+ return $! (fromIntegral (ord c)) -- XXX not really correct
putByte :: BinHandle -> Word8 -> IO ()
putByte bh w = put_ bh w
get h = do
w1 <- getWord8 h
w2 <- getWord8 h
- return ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
+ return $! ((fromIntegral w1 `shiftL` 8) .|. fromIntegral w2)
instance Binary Word32 where
w2 <- getWord8 h
w3 <- getWord8 h
w4 <- getWord8 h
- return ((fromIntegral w1 `shiftL` 24) .|.
- (fromIntegral w2 `shiftL` 16) .|.
- (fromIntegral w3 `shiftL` 8) .|.
- (fromIntegral w4))
+ return $! ((fromIntegral w1 `shiftL` 24) .|.
+ (fromIntegral w2 `shiftL` 16) .|.
+ (fromIntegral w3 `shiftL` 8) .|.
+ (fromIntegral w4))
instance Binary Word64 where
w6 <- getWord8 h
w7 <- getWord8 h
w8 <- getWord8 h
- return ((fromIntegral w1 `shiftL` 56) .|.
- (fromIntegral w2 `shiftL` 48) .|.
- (fromIntegral w3 `shiftL` 40) .|.
- (fromIntegral w4 `shiftL` 32) .|.
- (fromIntegral w5 `shiftL` 24) .|.
- (fromIntegral w6 `shiftL` 16) .|.
- (fromIntegral w7 `shiftL` 8) .|.
- (fromIntegral w8))
+ return $! ((fromIntegral w1 `shiftL` 56) .|.
+ (fromIntegral w2 `shiftL` 48) .|.
+ (fromIntegral w3 `shiftL` 40) .|.
+ (fromIntegral w4 `shiftL` 32) .|.
+ (fromIntegral w5 `shiftL` 24) .|.
+ (fromIntegral w6 `shiftL` 16) .|.
+ (fromIntegral w7 `shiftL` 8) .|.
+ (fromIntegral w8))
-- -----------------------------------------------------------------------------
-- Primitve Int writes
instance Binary Int8 where
put_ h w = put_ h (fromIntegral w :: Word8)
- get h = do w <- get h; return (fromIntegral (w::Word8))
+ get h = do w <- get h; return $! (fromIntegral (w::Word8))
instance Binary Int16 where
put_ h w = put_ h (fromIntegral w :: Word16)
- get h = do w <- get h; return (fromIntegral (w::Word16))
+ get h = do w <- get h; return $! (fromIntegral (w::Word16))
instance Binary Int32 where
put_ h w = put_ h (fromIntegral w :: Word32)
- get h = do w <- get h; return (fromIntegral (w::Word32))
+ get h = do w <- get h; return $! (fromIntegral (w::Word32))
instance Binary Int64 where
put_ h w = put_ h (fromIntegral w :: Word64)
- get h = do w <- get h; return (fromIntegral (w::Word64))
+ get h = do w <- get h; return $! (fromIntegral (w::Word64))
-- -----------------------------------------------------------------------------
-- Instances for standard types
instance Binary Bool where
put_ bh b = putByte bh (fromIntegral (fromEnum b))
- get bh = do x <- getWord8 bh; return (toEnum (fromIntegral x))
+ get bh = do x <- getWord8 bh; return $! (toEnum (fromIntegral x))
-- getF bh p = case getBitsF bh 1 p of (x,b) -> (toEnum x,b)
instance Binary Char where
put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
- get bh = do x <- get bh; return (chr (fromIntegral (x :: Word32)))
+ get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
-- getF bh p = case getBitsF bh 8 p of (x,b) -> (toEnum x,b)
instance Binary Int where
put_ bh i = put_ bh (fromIntegral i :: Int32)
get bh = do
x <- get bh
- return (fromIntegral (x :: Int32))
+ return $! (fromIntegral (x :: Int32))
#elif SIZEOF_HSINT == 8
put_ bh i = put_ bh (fromIntegral i :: Int64)
get bh = do
x <- get bh
- return (fromIntegral (x :: Int64))
+ return $! (fromIntegral (x :: Int64))
#else
#error "unsupported sizeof(HsInt)"
#endif
-- getF bh = getBitsF bh 32
instance Binary a => Binary [a] where
- put_ bh [] = putByte bh 0
- put_ bh (x:xs) = do putByte bh 1; put_ bh x; put_ bh xs
- get bh = do h <- getWord8 bh
- case h of
- 0 -> return []
- _ -> do x <- get bh
- xs <- get bh
- return (x:xs)
+ put_ bh l = do
+ let len = length l
+ if (len < 0xff)
+ then putByte bh (fromIntegral len :: Word8)
+ else do putByte bh 0xff; put_ bh (fromIntegral len :: Word32)
+ mapM_ (put_ bh) l
+ get bh = do
+ b <- getByte bh
+ len <- if b == 0xff
+ then get bh
+ else return (fromIntegral b :: Word32)
+ let loop 0 = return []
+ loop n = do a <- get bh; as <- loop (n-1); return (a:as)
+ loop len
instance (Binary a, Binary b) => Binary (a,b) where
put_ bh (a,b) = do put_ bh a; put_ bh b
get bh = do i <- get bh; return (BinPtr i)
-- -----------------------------------------------------------------------------
--- unboxed mutable Ints
-
-#ifdef __GLASGOW_HASKELL__
-data FastMutInt = FastMutInt (MutableByteArray# RealWorld)
-
-newFastMutInt = IO $ \s ->
- case newByteArray# size s of { (# s, arr #) ->
- (# s, FastMutInt arr #) }
- where I# size = SIZEOF_HSWORD
-
-readFastMutInt (FastMutInt arr) = IO $ \s ->
- case readIntArray# arr 0# s of { (# s, i #) ->
- (# s, I# i #) }
-
-writeFastMutInt (FastMutInt arr) (I# i) = IO $ \s ->
- case writeIntArray# arr 0# i s of { s ->
- (# s, () #) }
-#endif
-
--- -----------------------------------------------------------------------------
-- Lazy reading/writing
lazyPut :: Binary a => BinHandle -> a -> IO ()
seekBin bh p -- skip over the object for now
return a
--- -----------------------------------------------------------------------------
--- BinHandleState
-
-type BinHandleState =
- (Module,
- IORef Int,
- IORef (UniqFM (Int,FastString)),
- Array Int FastString)
-
-initReadState :: BinHandleState
-initReadState = (undef, undef, undef, undef)
-
-newWriteState :: Module -> IO BinHandleState
-newWriteState m = do
- j_r <- newIORef 0
- out_r <- newIORef emptyUFM
- return (m,j_r,out_r,undef)
+-- --------------------------------------------------------------
+-- Main wrappers: getBinFileWithDict, putBinFileWithDict
+--
+-- This layer is built on top of the stuff above,
+-- and should not know anything about BinHandles
+-- --------------------------------------------------------------
-undef = error "Binary.BinHandleState"
+initBinMemSize = (1024*1024) :: Int
--- -----------------------------------------------------------------------------
--- FastString binary interface
+#if WORD_SIZE_IN_BITS == 32
+binaryInterfaceMagic = 0x1face :: Word32
+#elif WORD_SIZE_IN_BITS == 64
+binaryInterfaceMagic = 0x1face64 :: Word32
+#endif
getBinFileWithDict :: Binary a => FilePath -> IO a
getBinFileWithDict file_path = do
bh <- Binary.readBinMem file_path
- dict_p <- Binary.get bh -- get the dictionary ptr
- data_p <- tellBin bh
+
+ -- Read the magic number to check that this really is a GHC .hi file
+ -- (This magic number does not change when we change
+ -- GHC interface file format)
+ magic <- get bh
+ when (magic /= binaryInterfaceMagic) $
+ throwDyn (ProgramError (
+ "magic number mismatch: old/corrupt interface file?"))
+
+ -- Read the dictionary
+ -- The next word in the file is a pointer to where the dictionary is
+ -- (probably at the end of the file)
+ dict_p <- Binary.get bh -- Get the dictionary ptr
+ data_p <- tellBin bh -- Remember where we are now
seekBin bh dict_p
dict <- getDictionary bh
- seekBin bh data_p
- let (mod, j_r, out_r, _) = state bh
- get bh{ state = (mod,j_r,out_r,dict) }
-
-initBinMemSize = (1024*1024) :: Int
-
-putBinFileWithDict :: Binary a => FilePath -> Module -> a -> IO ()
-putBinFileWithDict file_path mod a = do
- bh <- openBinMem initBinMemSize mod
- p <- tellBin bh
- put_ bh p -- placeholder for ptr to dictionary
- put_ bh a
- let (_, j_r, fm_r, _) = state bh
- j <- readIORef j_r
- fm <- readIORef fm_r
- dict_p <- tellBin bh
- putAt bh p dict_p -- fill in the placeholder
- seekBin bh dict_p -- seek back to the end of the file
+ seekBin bh data_p -- Back to where we were before
+
+ -- Initialise the user-data field of bh
+ let bh' = setUserData bh (initReadState dict)
+
+ -- At last, get the thing
+ get bh'
+
+putBinFileWithDict :: Binary a => FilePath -> a -> IO ()
+putBinFileWithDict file_path the_thing = do
+ bh <- openBinMem initBinMemSize
+ put_ bh binaryInterfaceMagic
+
+ -- Remember where the dictionary pointer will go
+ dict_p_p <- tellBin bh
+ put_ bh dict_p_p -- Placeholder for ptr to dictionary
+
+ -- Make some intial state
+ usr_state <- newWriteState
+
+ -- Put the main thing,
+ put_ (setUserData bh usr_state) the_thing
+
+ -- Get the final-state
+ j <- readIORef (ud_next usr_state)
+ fm <- readIORef (ud_map usr_state)
+ dict_p <- tellBin bh -- This is where the dictionary will start
+
+ -- Write the dictionary pointer at the fornt of the file
+ putAt bh dict_p_p dict_p -- Fill in the placeholder
+ seekBin bh dict_p -- Seek back to the end of the file
+
+ -- Write the dictionary itself
putDictionary bh j (constructDictionary j fm)
+
+ -- And send the result to the file
writeBinMem bh file_path
-type Dictionary = Array Int FastString
- -- should be 0-indexed
+-- -----------------------------------------------------------------------------
+-- UserData
+-- -----------------------------------------------------------------------------
+
+data UserData =
+ UserData { -- This field is used only when reading
+ ud_dict :: Dictionary,
+
+ -- The next two fields are only used when writing
+ ud_next :: IORef Int, -- The next index to use
+ ud_map :: IORef (UniqFM (Int,FastString))
+ }
+
+noUserData = error "Binary.UserData: no user data"
+
+initReadState :: Dictionary -> UserData
+initReadState dict = UserData{ ud_dict = dict,
+ ud_next = undef "next",
+ ud_map = undef "map" }
+
+newWriteState :: IO UserData
+newWriteState = do
+ j_r <- newIORef 0
+ out_r <- newIORef emptyUFM
+ return (UserData { ud_dict = panic "dict",
+ ud_next = j_r,
+ ud_map = out_r })
+
+
+undef s = panic ("Binary.UserData: no " ++ s)
+
+---------------------------------------------------------
+-- The Dictionary
+---------------------------------------------------------
+
+type Dictionary = Array Int FastString -- The dictionary
+ -- Should be 0-indexed
putDictionary :: BinHandle -> Int -> Dictionary -> IO ()
putDictionary bh sz dict = do
constructDictionary :: Int -> UniqFM (Int,FastString) -> Dictionary
constructDictionary j fm = array (0,j-1) (eltsUFM fm)
-putFS bh (FastString id l ba) = do
- put_ bh (I# l)
- putByteArray bh ba l
-putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
- -- Note: the length of the FastString is *not* the same as
- -- the size of the ByteArray: the latter is rounded up to a
- -- multiple of the word size.
+---------------------------------------------------------
+-- Reading and writing FastStrings
+---------------------------------------------------------
+
+putFS bh (FastString id l _ buf _) = do
+ put_ bh l
+ withForeignPtr buf $ \ptr ->
+ let
+ go n | n == l = return ()
+ | otherwise = do
+ b <- peekElemOff ptr n
+ putByte bh b
+ go (n+1)
+ in
+ go 0
-getFS bh = do
+{- -- possible faster version, not quite there yet:
+getFS bh@BinMem{} = do
(I# l) <- get bh
- (BA ba) <- getByteArray bh (I# l)
- return (mkFastSubStringBA# ba 0# l)
- -- XXX ToDo: one too many copies here
+ arr <- readIORef (arr_r bh)
+ off <- readFastMutInt (off_r bh)
+ return $! (mkFastSubStringBA# arr off l)
+-}
+getFS bh = do
+ l <- get bh
+ fp <- mallocForeignPtrBytes l
+ withForeignPtr fp $ \ptr -> do
+ let
+ go n | n == l = mkFastStringForeignPtr ptr fp l
+ | otherwise = do
+ b <- getByte bh
+ pokeElemOff ptr n b
+ go (n+1)
+ --
+ go 0
+
+instance Binary PackageId where
+ put_ bh pid = put_ bh (packageIdFS pid)
+ get bh = do { fs <- get bh; return (fsToPackageId fs) }
instance Binary FastString where
- put_ bh f@(FastString id l ba) =
- case getUserData bh of { (_, j_r, out_r, dict) -> do
+ put_ bh f@(FastString id l _ fp _) =
+ case getUserData bh of {
+ UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do
out <- readIORef out_r
let uniq = getUnique f
case lookupUFM out uniq of
get bh = do
j <- get bh
- case getUserData bh of (_, _, _, arr) -> return (arr ! j)
+ return $! (ud_dict (getUserData bh) ! j)