X-Git-Url: http://git.megacz.com/?p=ghc-base.git;a=blobdiff_plain;f=GHC%2FIO%2FHandle%2FTypes.hs;h=bff46813980da6ffdec52591f7508de43b4a70cd;hp=f3cf717065799c70a01f751c3a1a5bd4b1f2d9c2;hb=HEAD;hpb=d2063b5b0be014545b21819172c87756efcb0b0c diff --git a/GHC/IO/Handle/Types.hs b/GHC/IO/Handle/Types.hs index f3cf717..bff4681 100644 --- a/GHC/IO/Handle/Types.hs +++ b/GHC/IO/Handle/Types.hs @@ -1,5 +1,11 @@ -{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-} +{-# LANGUAGE CPP + , NoImplicitPrelude + , ExistentialQuantification + , DeriveDataTypeable + #-} +{-# OPTIONS_GHC -funbox-strict-fields #-} {-# OPTIONS_HADDOCK hide #-} + ----------------------------------------------------------------------------- -- | -- Module : GHC.IO.Handle.Types @@ -41,6 +47,9 @@ import GHC.Read import GHC.Word import GHC.IO.Device import Data.Typeable +#ifdef DEBUG +import Control.Monad +#endif -- --------------------------------------------------------------------------- -- Handle type @@ -86,15 +95,6 @@ import Data.Typeable -- enough information to identify the handle for debugging. A handle is -- equal according to '==' only to itself; no attempt -- is made to compare the internal state of different handles for equality. --- --- GHC note: a 'Handle' will be automatically closed when the garbage --- collector detects that it has become unreferenced by the program. --- However, relying on this behaviour is not generally recommended: --- the garbage collector is unpredictable. If possible, use explicit --- an explicit 'hClose' to close 'Handle's when they are no longer --- required. GHC does not currently attempt to free up file --- descriptors when they have run out, it is your responsibility to --- ensure that this doesn't happen. data Handle = FileHandle -- A normal handle to a file @@ -121,17 +121,18 @@ instance Eq Handle where _ == _ = False data Handle__ - = forall dev . (IODevice dev, BufferedIO dev, Typeable dev) => + = forall dev enc_state dec_state . (IODevice dev, BufferedIO dev, Typeable dev) => Handle__ { haDevice :: !dev, haType :: HandleType, -- type (read/write/append etc.) haByteBuffer :: !(IORef (Buffer Word8)), haBufferMode :: BufferMode, - haLastDecode :: !(IORef (Buffer Word8)), + haLastDecode :: !(IORef (dec_state, Buffer Word8)), haCharBuffer :: !(IORef (Buffer CharBufElem)), -- the current buffer haBuffers :: !(IORef (BufferList CharBufElem)), -- spare buffers - haEncoder :: Maybe TextEncoder, - haDecoder :: Maybe TextDecoder, + haEncoder :: Maybe (TextEncoder enc_state), + haDecoder :: Maybe (TextDecoder dec_state), + haCodec :: Maybe TextEncoding, haInputNL :: Newline, haOutputNL :: Newline, haOtherSide :: Maybe (MVar Handle__) -- ptr to the write side of a @@ -187,6 +188,13 @@ checkHandleInvariants h_ = do checkBuffer bbuf cbuf <- readIORef (haCharBuffer h_) checkBuffer cbuf + when (isWriteBuffer cbuf && not (isEmptyBuffer cbuf)) $ + error ("checkHandleInvariants: char write buffer non-empty: " ++ + summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf) + when (isWriteBuffer bbuf /= isWriteBuffer cbuf) $ + error ("checkHandleInvariants: buffer modes differ: " ++ + summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf) + #else checkHandleInvariants _ = return () #endif @@ -265,25 +273,46 @@ buffer, and then provide it immediately to the caller. [note Buffered Writing] -Characters are written into the Char buffer by e.g. hPutStr. When the -buffer is full, we call writeTextDevice, which encodes the Char buffer -into the byte buffer, and then immediately writes it all out to the -underlying device. The Char buffer will always be empty afterward. -This might require multiple decoding/writing cycles. +Characters are written into the Char buffer by e.g. hPutStr. At the +end of the operation, or when the char buffer is full, the buffer is +decoded to the byte buffer (see writeCharBuffer). This is so that we +can detect encoding errors at the right point. + +Hence, the Char buffer is always empty between Handle operations. [note Buffer Sizing] -Since the buffer mode makes no difference when reading, we can just -use the default buffer size for both the byte and the Char buffer. -Ineed, we must have room for at least one Char in the Char buffer, -because we have to implement hLookAhead, which requires caching a Char -in the Handle. Furthermore, when doing newline translation, we need -room for at least two Chars in the read buffer, so we can spot the -\r\n sequence. +The char buffer is always a default size (dEFAULT_CHAR_BUFFER_SIZE). +The byte buffer size is chosen by the underlying device (via its +IODevice.newBuffer). Hence the size of these buffers is not under +user control. + +There are certain minimum sizes for these buffers imposed by the +library (but not checked): + + - we must be able to buffer at least one character, so that + hLookAhead can work + + - the byte buffer must be able to store at least one encoded + character in the current encoding (6 bytes?) + + - when reading, the char buffer must have room for two characters, so + that we can spot the \r\n sequence. + +How do we implement hSetBuffering? + +For reading, we have never used the user-supplied buffer size, because +there's no point: we always pass all available data to the reader +immediately. Buffering would imply waiting until a certain amount of +data is available, which has no advantages. So hSetBuffering is +essentially a no-op for read handles, except that it turns on/off raw +mode for the underlying device if necessary. -For writing, however, when the buffer mode is NoBuffering, we use a -1-element Char buffer to force flushing of the buffer after each Char -is read. +For writing, the buffering mode is handled by the write operations +themselves (hPutChar and hPutStr). Every write ends with +writeCharBuffer, which checks whether the buffer should be flushed +according to the current buffering mode. Additionally, we look for +newlines and flush if the mode is LineBuffering. [note Buffer Flushing] @@ -292,8 +321,7 @@ is read. We must be able to flush the Char buffer, in order to implement hSetEncoding, and things like hGetBuf which want to read raw bytes. -Flushing the Char buffer on a write Handle is easy: just call -writeTextDevice to encode and write the date. +Flushing the Char buffer on a write Handle is easy: it is always empty. Flushing the Char buffer on a read Handle involves rewinding the byte buffer to the point representing the next Char in the Char buffer. @@ -322,9 +350,9 @@ and hence it is only possible on a seekable Handle. -- Newline translation -- | The representation of a newline in the external file or stream. -data Newline = LF -- ^ "\n" - | CRLF -- ^ "\r\n" - deriving Eq +data Newline = LF -- ^ '\n' + | CRLF -- ^ '\r\n' + deriving (Eq, Ord, Read, Show) -- | Specifies the translation, if any, of newline characters between -- internal Strings and the external file or stream. Haskell Strings @@ -337,9 +365,10 @@ data NewlineMode outputNL :: Newline -- ^ the representation of newlines on output } - deriving Eq + deriving (Eq, Ord, Read, Show) --- | The native newline representation for the current platform +-- | The native newline representation for the current platform: 'LF' +-- on Unix systems, 'CRLF' on Windows. nativeNewline :: Newline #ifdef mingw32_HOST_OS nativeNewline = CRLF @@ -347,7 +376,7 @@ nativeNewline = CRLF nativeNewline = LF #endif --- | Map "\r\n" into "\n" on input, and "\n" to the native newline +-- | Map '\r\n' into '\n' on input, and '\n' to the native newline -- represetnation on output. This mode can be used on any platform, and -- works with text files using any newline convention. The downside is -- that @readFile >>= writeFile@ might yield a different file.