From: Simon Marlow Date: Mon, 13 Jul 2009 11:31:04 +0000 (+0000) Subject: Export Unicode and newline functionality from System.IO; update Haddock docs X-Git-Tag: ghc-darcs-git-switchover~352 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=ee7be4593b1b17d4ef45c37963b8b19d53865ab6;hp=95ff1b673ad261f3fde2c1f89dd987b2a638dc78;p=ghc-base.git Export Unicode and newline functionality from System.IO; update Haddock docs --- diff --git a/GHC/IO/Encoding.hs b/GHC/IO/Encoding.hs index bb976e3..78aad98 100644 --- a/GHC/IO/Encoding.hs +++ b/GHC/IO/Encoding.hs @@ -43,45 +43,57 @@ import GHC.IO.Exception -- ----------------------------------------------------------------------------- -latin1, utf8, utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding - :: TextEncoding - -- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes -- directly to the first 256 Unicode code points, and is thus not a --- complete Unicode encoding. +-- complete Unicode encoding. An attempt to write a character greater than +-- '\255' to a 'Handle' using the 'latin1' encoding will result in an error. +latin1 :: TextEncoding latin1 = Latin1.latin1_checked --- | The UTF-8 unicode encoding +-- | The UTF-8 Unicode encoding +utf8 :: TextEncoding utf8 = UTF8.utf8 --- | The UTF-16 unicode encoding (a byte-order-mark should be used to +-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to -- indicate endianness). +utf16 :: TextEncoding utf16 = UTF16.utf16 --- | The UTF-16 unicode encoding (litte-endian) +-- | The UTF-16 Unicode encoding (litte-endian) +utf16le :: TextEncoding utf16le = UTF16.utf16le --- | The UTF-16 unicode encoding (big-endian) +-- | The UTF-16 Unicode encoding (big-endian) +utf16be :: TextEncoding utf16be = UTF16.utf16be --- | The UTF-32 unicode encoding (a byte-order-mark should be used to +-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to -- indicate endianness). +utf32 :: TextEncoding utf32 = UTF32.utf32 --- | The UTF-32 unicode encoding (litte-endian) +-- | The UTF-32 Unicode encoding (litte-endian) +utf32le :: TextEncoding utf32le = UTF32.utf32le --- | The UTF-32 unicode encoding (big-endian) +-- | The UTF-32 Unicode encoding (big-endian) +utf32be :: TextEncoding utf32be = UTF32.utf32be --- | The text encoding of the current locale +-- | The Unicode encoding of the current locale +localeEncoding :: TextEncoding #if !defined(mingw32_HOST_OS) localeEncoding = Iconv.localeEncoding #else localeEncoding = Latin1.latin1 #endif --- | Acquire the named text encoding +-- | Look up the named Unicode encoding. May fail with +-- +-- * 'isDoesNotExistError' if the encoding is unknown +-- +-- The set of known encodings is system-dependent. +-- mkTextEncoding :: String -> IO TextEncoding #if !defined(mingw32_HOST_OS) mkTextEncoding = Iconv.mkTextEncoding @@ -94,7 +106,7 @@ mkTextEncoding "UTF-32" = return utf32 mkTextEncoding "UTF-32LE" = return utf32le mkTextEncoding "UTF-32BE" = return utf32be mkTextEncoding e = ioException - (IOError Nothing InvalidArgument "mkTextEncoding" + (IOError Nothing NoSuchThing "mkTextEncoding" ("unknown encoding:" ++ e) Nothing Nothing) #endif diff --git a/GHC/IO/Handle.hs b/GHC/IO/Handle.hs index 1531b4a..969b805 100644 --- a/GHC/IO/Handle.hs +++ b/GHC/IO/Handle.hs @@ -246,8 +246,7 @@ hSetBuffering handle mode = -- hSetEncoding -- | The action 'hSetEncoding' @hdl@ @encoding@ changes the text encoding --- for the handle @hdl@ to @encoding@. Encodings are available from the --- module "GHC.IO.Encoding". The default encoding when a 'Handle' is +-- for the handle @hdl@ to @encoding@. The default encoding when a 'Handle' is -- created is 'localeEncoding', namely the default encoding for the current -- locale. -- @@ -255,6 +254,9 @@ hSetBuffering handle mode = -- stop further encoding or decoding on an existing 'Handle', use -- 'hSetBinaryMode'. -- +-- 'hSetEncoding' may need to flush buffered data in order to change +-- the encoding. +-- hSetEncoding :: Handle -> TextEncoding -> IO () hSetEncoding hdl encoding = do withHandle "hSetEncoding" hdl $ \h_@Handle__{..} -> do diff --git a/GHC/IO/Handle/Text.hs b/GHC/IO/Handle/Text.hs index d9e9672..754be02 100644 --- a/GHC/IO/Handle/Text.hs +++ b/GHC/IO/Handle/Text.hs @@ -680,6 +680,9 @@ commitBuffer' raw sz@(I# _) count@(I# _) flush release -- 'hPutBuf' ignores any text encoding that applies to the 'Handle', -- writing the bytes directly to the underlying file or device. -- +-- 'hPutBuf' ignores the prevailing 'TextEncoding' and +-- 'NewlineMode' on the 'Handle', and writes bytes directly. +-- -- This operation may fail with: -- -- * 'ResourceVanished' if the handle is a pipe or socket, and the @@ -784,6 +787,8 @@ writeChunkNonBlocking h_@Handle__{..} ptr bytes -- If the handle is a pipe or socket, and the writing end -- is closed, 'hGetBuf' will behave as if EOF was reached. -- +-- 'hGetBuf' ignores the prevailing 'TextEncoding' and 'NewlineMode' +-- on the 'Handle', and reads bytes directly. hGetBuf :: Handle -> Ptr a -> Int -> IO Int hGetBuf h ptr count @@ -868,6 +873,9 @@ readChunk h_@Handle__{..} ptr bytes -- If the handle is a pipe or socket, and the writing end -- is closed, 'hGetBufNonBlocking' will behave as if EOF was reached. -- +-- 'hGetBufNonBlocking' ignores the prevailing 'TextEncoding' and +-- 'NewlineMode' on the 'Handle', and reads bytes directly. + hGetBufNonBlocking :: Handle -> Ptr a -> Int -> IO Int hGetBufNonBlocking h ptr count | count == 0 = return 0 diff --git a/GHC/IO/Handle/Types.hs b/GHC/IO/Handle/Types.hs index cdde7d8..a45f298 100644 --- a/GHC/IO/Handle/Types.hs +++ b/GHC/IO/Handle/Types.hs @@ -322,8 +322,8 @@ and hence it is only possible on a seekable Handle. -- Newline translation -- | The representation of a newline in the external file or stream. -data Newline = LF -- ^ "\n" - | CRLF -- ^ "\r\n" +data Newline = LF -- ^ '\n' + | CRLF -- ^ '\r\n' deriving Eq -- | Specifies the translation, if any, of newline characters between @@ -339,7 +339,8 @@ data NewlineMode } deriving Eq --- | The native newline representation for the current platform +-- | The native newline representation for the current platform: 'LF' +-- on Unix systems, 'CRLF' on Windows. nativeNewline :: Newline #ifdef mingw32_HOST_OS nativeNewline = CRLF @@ -347,7 +348,7 @@ nativeNewline = CRLF nativeNewline = LF #endif --- | Map "\r\n" into "\n" on input, and "\n" to the native newline +-- | Map '\r\n' into '\n' on input, and '\n' to the native newline -- represetnation on output. This mode can be used on any platform, and -- works with text files using any newline convention. The downside is -- that @readFile >>= writeFile@ might yield a different file. diff --git a/System/IO.hs b/System/IO.hs index dfa589d..9560c26 100644 --- a/System/IO.hs +++ b/System/IO.hs @@ -159,6 +159,62 @@ module System.IO ( openTempFile, openBinaryTempFile, + +#if !defined(__NHC__) && !defined(__HUGS__) + -- * Unicode encoding\/decoding + + -- | A text-mode 'Handle' has an associated 'TextEncoding', which + -- is used to decode bytes into Unicode characters when reading, + -- and encode Unicode characters into bytes when writing. + -- + -- The default 'TextEncoding' is the same as the default encoding + -- on your system, which is also available as 'localeEncoding'. + -- (GHC note: on Windows, currently 'localeEncoding' is always + -- 'latin1'; there is no support for encoding and decoding using + -- the ANSI code page). + -- + -- Encoding and decoding errors are always detected and reported, + -- except during lazy I/O ('hGetContents', 'getContents', and + -- 'readFile'), where a decoding error merely results in + -- termination of the character stream, as with other I/O errors. + + hSetEncoding, + + -- ** Unicode encodings + TextEncoding, + latin1, + utf8, + utf16, utf16le, utf16be, + utf32, utf32le, utf32be, + localeEncoding, + mkTextEncoding, +#endif + +#if !defined(__NHC__) && !defined(__HUGS__) + -- * Newline conversion + + -- | In Haskell, a newline is always represented by the character + -- '\n'. However, in files and external character streams, a + -- newline may be represented by another character sequence, such + -- as '\r\n'. + -- + -- A text-mode 'Handle' has an associated 'NewlineMode' that + -- specifies how to transate newline characters. The + -- 'NewlineMode' specifies the input and output translation + -- separately, so that for instance you can translate '\r\n' + -- to '\n' on input, but leave newlines as '\n' on output. + -- + -- The default 'NewlineMode' for a 'Handle' is + -- 'nativeNewlineMode', which does no translation on Unix systems, + -- but translates '\r\n' to '\n' and back on Windows. + -- + -- Binary-mode 'Handle's do no newline translation at all. + -- + hSetNewlineMode, + Newline(..), nativeNewline, + NewlineMode(..), + noNewlineTranslation, universalNewlineMode, nativeNewlineMode, +#endif ) where import Control.Exception.Base @@ -180,7 +236,8 @@ import GHC.IO.Handle.FD import GHC.IO.Handle import GHC.IORef import GHC.IO.Exception ( userError ) --- import GHC.Exception +import GHC.IO.Encoding +import GHC.Exception import GHC.Num import Text.Read import GHC.Show