X-Git-Url: http://git.megacz.com/?p=ghc-base.git;a=blobdiff_plain;f=GHC%2FIO%2FHandle%2FTypes.hs;h=bff46813980da6ffdec52591f7508de43b4a70cd;hp=f3cf717065799c70a01f751c3a1a5bd4b1f2d9c2;hb=HEAD;hpb=d2063b5b0be014545b21819172c87756efcb0b0c

diff --git a/GHC/IO/Handle/Types.hs b/GHC/IO/Handle/Types.hs
index f3cf717..bff4681 100644
--- a/GHC/IO/Handle/Types.hs
+++ b/GHC/IO/Handle/Types.hs
@@ -1,5 +1,11 @@
-{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
+{-# LANGUAGE CPP
+           , NoImplicitPrelude
+           , ExistentialQuantification
+           , DeriveDataTypeable
+  #-}
+{-# OPTIONS_GHC -funbox-strict-fields #-}
 {-# OPTIONS_HADDOCK hide #-}
+
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  GHC.IO.Handle.Types
@@ -41,6 +47,9 @@ import GHC.Read
 import GHC.Word
 import GHC.IO.Device
 import Data.Typeable
+#ifdef DEBUG
+import Control.Monad
+#endif
 
 -- ---------------------------------------------------------------------------
 -- Handle type
@@ -86,15 +95,6 @@ import Data.Typeable
 -- enough information to identify the handle for debugging.  A handle is
 -- equal according to '==' only to itself; no attempt
 -- is made to compare the internal state of different handles for equality.
---
--- GHC note: a 'Handle' will be automatically closed when the garbage
--- collector detects that it has become unreferenced by the program.
--- However, relying on this behaviour is not generally recommended:
--- the garbage collector is unpredictable.  If possible, use explicit
--- an explicit 'hClose' to close 'Handle's when they are no longer
--- required.  GHC does not currently attempt to free up file
--- descriptors when they have run out, it is your responsibility to
--- ensure that this doesn't happen.
 
 data Handle 
   = FileHandle                          -- A normal handle to a file
@@ -121,17 +121,18 @@ instance Eq Handle where
  _ == _ = False 
 
 data Handle__
-  = forall dev . (IODevice dev, BufferedIO dev, Typeable dev) =>
+  = forall dev enc_state dec_state . (IODevice dev, BufferedIO dev, Typeable dev) =>
     Handle__ {
       haDevice      :: !dev,
       haType        :: HandleType,           -- type (read/write/append etc.)
       haByteBuffer  :: !(IORef (Buffer Word8)),
       haBufferMode  :: BufferMode,
-      haLastDecode  :: !(IORef (Buffer Word8)),
+      haLastDecode  :: !(IORef (dec_state, Buffer Word8)),
       haCharBuffer  :: !(IORef (Buffer CharBufElem)), -- the current buffer
       haBuffers     :: !(IORef (BufferList CharBufElem)),  -- spare buffers
-      haEncoder     :: Maybe TextEncoder,
-      haDecoder     :: Maybe TextDecoder,
+      haEncoder     :: Maybe (TextEncoder enc_state),
+      haDecoder     :: Maybe (TextDecoder dec_state),
+      haCodec       :: Maybe TextEncoding,
       haInputNL     :: Newline,
       haOutputNL    :: Newline,
       haOtherSide   :: Maybe (MVar Handle__) -- ptr to the write side of a 
@@ -187,6 +188,13 @@ checkHandleInvariants h_ = do
  checkBuffer bbuf
  cbuf <- readIORef (haCharBuffer h_)
  checkBuffer cbuf
+ when (isWriteBuffer cbuf && not (isEmptyBuffer cbuf)) $
+   error ("checkHandleInvariants: char write buffer non-empty: " ++
+          summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
+ when (isWriteBuffer bbuf /= isWriteBuffer cbuf) $
+   error ("checkHandleInvariants: buffer modes differ: " ++
+          summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
+
 #else
 checkHandleInvariants _ = return ()
 #endif
@@ -265,25 +273,46 @@ buffer, and then provide it immediately to the caller.
 
 [note Buffered Writing]
 
-Characters are written into the Char buffer by e.g. hPutStr.  When the
-buffer is full, we call writeTextDevice, which encodes the Char buffer
-into the byte buffer, and then immediately writes it all out to the
-underlying device.  The Char buffer will always be empty afterward.
-This might require multiple decoding/writing cycles.
+Characters are written into the Char buffer by e.g. hPutStr.  At the
+end of the operation, or when the char buffer is full, the buffer is
+decoded to the byte buffer (see writeCharBuffer).  This is so that we
+can detect encoding errors at the right point.
+
+Hence, the Char buffer is always empty between Handle operations.
 
 [note Buffer Sizing]
 
-Since the buffer mode makes no difference when reading, we can just
-use the default buffer size for both the byte and the Char buffer.
-Ineed, we must have room for at least one Char in the Char buffer,
-because we have to implement hLookAhead, which requires caching a Char
-in the Handle.  Furthermore, when doing newline translation, we need
-room for at least two Chars in the read buffer, so we can spot the
-\r\n sequence.
+The char buffer is always a default size (dEFAULT_CHAR_BUFFER_SIZE).
+The byte buffer size is chosen by the underlying device (via its
+IODevice.newBuffer).  Hence the size of these buffers is not under
+user control.
+
+There are certain minimum sizes for these buffers imposed by the
+library (but not checked):
+
+ - we must be able to buffer at least one character, so that
+   hLookAhead can work
+
+ - the byte buffer must be able to store at least one encoded
+   character in the current encoding (6 bytes?)
+
+ - when reading, the char buffer must have room for two characters, so
+   that we can spot the \r\n sequence.
+
+How do we implement hSetBuffering?
+
+For reading, we have never used the user-supplied buffer size, because
+there's no point: we always pass all available data to the reader
+immediately.  Buffering would imply waiting until a certain amount of
+data is available, which has no advantages.  So hSetBuffering is
+essentially a no-op for read handles, except that it turns on/off raw
+mode for the underlying device if necessary.
 
-For writing, however, when the buffer mode is NoBuffering, we use a
-1-element Char buffer to force flushing of the buffer after each Char
-is read.
+For writing, the buffering mode is handled by the write operations
+themselves (hPutChar and hPutStr).  Every write ends with
+writeCharBuffer, which checks whether the buffer should be flushed
+according to the current buffering mode.  Additionally, we look for
+newlines and flush if the mode is LineBuffering.
 
 [note Buffer Flushing]
 
@@ -292,8 +321,7 @@ is read.
 We must be able to flush the Char buffer, in order to implement
 hSetEncoding, and things like hGetBuf which want to read raw bytes.
 
-Flushing the Char buffer on a write Handle is easy: just call
-writeTextDevice to encode and write the date.
+Flushing the Char buffer on a write Handle is easy: it is always empty.
 
 Flushing the Char buffer on a read Handle involves rewinding the byte
 buffer to the point representing the next Char in the Char buffer.
@@ -322,9 +350,9 @@ and hence it is only possible on a seekable Handle.
 -- Newline translation
 
 -- | The representation of a newline in the external file or stream.
-data Newline = LF    -- ^ "\n"
-             | CRLF  -- ^ "\r\n"
-             deriving Eq
+data Newline = LF    -- ^ '\n'
+             | CRLF  -- ^ '\r\n'
+             deriving (Eq, Ord, Read, Show)
 
 -- | Specifies the translation, if any, of newline characters between
 -- internal Strings and the external file or stream.  Haskell Strings
@@ -337,9 +365,10 @@ data NewlineMode
                   outputNL :: Newline
                     -- ^ the representation of newlines on output
                  }
-             deriving Eq
+             deriving (Eq, Ord, Read, Show)
 
--- | The native newline representation for the current platform
+-- | The native newline representation for the current platform: 'LF'
+-- on Unix systems, 'CRLF' on Windows.
 nativeNewline :: Newline
 #ifdef mingw32_HOST_OS
 nativeNewline = CRLF
@@ -347,7 +376,7 @@ nativeNewline = CRLF
 nativeNewline = LF
 #endif
 
--- | Map "\r\n" into "\n" on input, and "\n" to the native newline
+-- | Map '\r\n' into '\n' on input, and '\n' to the native newline
 -- represetnation on output.  This mode can be used on any platform, and
 -- works with text files using any newline convention.  The downside is
 -- that @readFile >>= writeFile@ might yield a different file.