-- -----------------------------------------------------------------------------
-latin1, utf8, utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding
- :: TextEncoding
-
-- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes
-- directly to the first 256 Unicode code points, and is thus not a
--- complete Unicode encoding.
+-- complete Unicode encoding. An attempt to write a character greater than
+-- '\255' to a 'Handle' using the 'latin1' encoding will result in an error.
+latin1 :: TextEncoding
latin1 = Latin1.latin1_checked
--- | The UTF-8 unicode encoding
+-- | The UTF-8 Unicode encoding
+utf8 :: TextEncoding
utf8 = UTF8.utf8
--- | The UTF-16 unicode encoding (a byte-order-mark should be used to
+-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
+utf16 :: TextEncoding
utf16 = UTF16.utf16
--- | The UTF-16 unicode encoding (litte-endian)
+-- | The UTF-16 Unicode encoding (litte-endian)
+utf16le :: TextEncoding
utf16le = UTF16.utf16le
--- | The UTF-16 unicode encoding (big-endian)
+-- | The UTF-16 Unicode encoding (big-endian)
+utf16be :: TextEncoding
utf16be = UTF16.utf16be
--- | The UTF-32 unicode encoding (a byte-order-mark should be used to
+-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
+utf32 :: TextEncoding
utf32 = UTF32.utf32
--- | The UTF-32 unicode encoding (litte-endian)
+-- | The UTF-32 Unicode encoding (litte-endian)
+utf32le :: TextEncoding
utf32le = UTF32.utf32le
--- | The UTF-32 unicode encoding (big-endian)
+-- | The UTF-32 Unicode encoding (big-endian)
+utf32be :: TextEncoding
utf32be = UTF32.utf32be
--- | The text encoding of the current locale
+-- | The Unicode encoding of the current locale
+localeEncoding :: TextEncoding
#if !defined(mingw32_HOST_OS)
localeEncoding = Iconv.localeEncoding
#else
localeEncoding = Latin1.latin1
#endif
--- | Acquire the named text encoding
+-- | Look up the named Unicode encoding. May fail with
+--
+-- * 'isDoesNotExistError' if the encoding is unknown
+--
+-- The set of known encodings is system-dependent.
+--
mkTextEncoding :: String -> IO TextEncoding
#if !defined(mingw32_HOST_OS)
mkTextEncoding = Iconv.mkTextEncoding
mkTextEncoding "UTF-32LE" = return utf32le
mkTextEncoding "UTF-32BE" = return utf32be
mkTextEncoding e = ioException
- (IOError Nothing InvalidArgument "mkTextEncoding"
+ (IOError Nothing NoSuchThing "mkTextEncoding"
("unknown encoding:" ++ e) Nothing Nothing)
#endif
-- hSetEncoding
-- | The action 'hSetEncoding' @hdl@ @encoding@ changes the text encoding
--- for the handle @hdl@ to @encoding@. Encodings are available from the
--- module "GHC.IO.Encoding". The default encoding when a 'Handle' is
+-- for the handle @hdl@ to @encoding@. The default encoding when a 'Handle' is
-- created is 'localeEncoding', namely the default encoding for the current
-- locale.
--
-- stop further encoding or decoding on an existing 'Handle', use
-- 'hSetBinaryMode'.
--
+-- 'hSetEncoding' may need to flush buffered data in order to change
+-- the encoding.
+--
hSetEncoding :: Handle -> TextEncoding -> IO ()
hSetEncoding hdl encoding = do
withHandle "hSetEncoding" hdl $ \h_@Handle__{..} -> do
-- 'hPutBuf' ignores any text encoding that applies to the 'Handle',
-- writing the bytes directly to the underlying file or device.
--
+-- 'hPutBuf' ignores the prevailing 'TextEncoding' and
+-- 'NewlineMode' on the 'Handle', and writes bytes directly.
+--
-- This operation may fail with:
--
-- * 'ResourceVanished' if the handle is a pipe or socket, and the
-- If the handle is a pipe or socket, and the writing end
-- is closed, 'hGetBuf' will behave as if EOF was reached.
--
+-- 'hGetBuf' ignores the prevailing 'TextEncoding' and 'NewlineMode'
+-- on the 'Handle', and reads bytes directly.
hGetBuf :: Handle -> Ptr a -> Int -> IO Int
hGetBuf h ptr count
-- If the handle is a pipe or socket, and the writing end
-- is closed, 'hGetBufNonBlocking' will behave as if EOF was reached.
--
+-- 'hGetBufNonBlocking' ignores the prevailing 'TextEncoding' and
+-- 'NewlineMode' on the 'Handle', and reads bytes directly.
+
hGetBufNonBlocking :: Handle -> Ptr a -> Int -> IO Int
hGetBufNonBlocking h ptr count
| count == 0 = return 0
-- Newline translation
-- | The representation of a newline in the external file or stream.
-data Newline = LF -- ^ "\n"
- | CRLF -- ^ "\r\n"
+data Newline = LF -- ^ '\n'
+ | CRLF -- ^ '\r\n'
deriving Eq
-- | Specifies the translation, if any, of newline characters between
}
deriving Eq
--- | The native newline representation for the current platform
+-- | The native newline representation for the current platform: 'LF'
+-- on Unix systems, 'CRLF' on Windows.
nativeNewline :: Newline
#ifdef mingw32_HOST_OS
nativeNewline = CRLF
nativeNewline = LF
#endif
--- | Map "\r\n" into "\n" on input, and "\n" to the native newline
+-- | Map '\r\n' into '\n' on input, and '\n' to the native newline
-- represetnation on output. This mode can be used on any platform, and
-- works with text files using any newline convention. The downside is
-- that @readFile >>= writeFile@ might yield a different file.
openTempFile,
openBinaryTempFile,
+
+#if !defined(__NHC__) && !defined(__HUGS__)
+ -- * Unicode encoding\/decoding
+
+ -- | A text-mode 'Handle' has an associated 'TextEncoding', which
+ -- is used to decode bytes into Unicode characters when reading,
+ -- and encode Unicode characters into bytes when writing.
+ --
+ -- The default 'TextEncoding' is the same as the default encoding
+ -- on your system, which is also available as 'localeEncoding'.
+ -- (GHC note: on Windows, currently 'localeEncoding' is always
+ -- 'latin1'; there is no support for encoding and decoding using
+ -- the ANSI code page).
+ --
+ -- Encoding and decoding errors are always detected and reported,
+ -- except during lazy I/O ('hGetContents', 'getContents', and
+ -- 'readFile'), where a decoding error merely results in
+ -- termination of the character stream, as with other I/O errors.
+
+ hSetEncoding,
+
+ -- ** Unicode encodings
+ TextEncoding,
+ latin1,
+ utf8,
+ utf16, utf16le, utf16be,
+ utf32, utf32le, utf32be,
+ localeEncoding,
+ mkTextEncoding,
+#endif
+
+#if !defined(__NHC__) && !defined(__HUGS__)
+ -- * Newline conversion
+
+ -- | In Haskell, a newline is always represented by the character
+ -- '\n'. However, in files and external character streams, a
+ -- newline may be represented by another character sequence, such
+ -- as '\r\n'.
+ --
+ -- A text-mode 'Handle' has an associated 'NewlineMode' that
+ -- specifies how to transate newline characters. The
+ -- 'NewlineMode' specifies the input and output translation
+ -- separately, so that for instance you can translate '\r\n'
+ -- to '\n' on input, but leave newlines as '\n' on output.
+ --
+ -- The default 'NewlineMode' for a 'Handle' is
+ -- 'nativeNewlineMode', which does no translation on Unix systems,
+ -- but translates '\r\n' to '\n' and back on Windows.
+ --
+ -- Binary-mode 'Handle's do no newline translation at all.
+ --
+ hSetNewlineMode,
+ Newline(..), nativeNewline,
+ NewlineMode(..),
+ noNewlineTranslation, universalNewlineMode, nativeNewlineMode,
+#endif
) where
import Control.Exception.Base
import GHC.IO.Handle
import GHC.IORef
import GHC.IO.Exception ( userError )
--- import GHC.Exception
+import GHC.IO.Encoding
+import GHC.Exception
import GHC.Num
import Text.Read
import GHC.Show