From 245a3e3e650e1b110f620e39925bfb0cc9b93002 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 5 Apr 2011 09:57:22 +0100 Subject: [PATCH] Add System.IO.char8, the encoding used by openBinaryFile, and correct the documentation for hSetBinaryMode which claimed that it was using the latin1 encoding when in fact it was using an unchecked modulo-256 version of it. --- GHC/IO/Encoding.hs | 13 ++++++++++++- GHC/IO/Handle.hs | 2 +- System/IO.hs | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/GHC/IO/Encoding.hs b/GHC/IO/Encoding.hs index 505824e..92ca843 100644 --- a/GHC/IO/Encoding.hs +++ b/GHC/IO/Encoding.hs @@ -22,6 +22,7 @@ module GHC.IO.Encoding ( utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding, fileSystemEncoding, foreignEncoding, + char8, mkTextEncoding, ) where @@ -125,6 +126,16 @@ fileSystemEncoding = CodePage.mkLocaleEncoding RoundtripFailure foreignEncoding = CodePage.mkLocaleEncoding IgnoreCodingFailure #endif +-- | An encoding in which Unicode code points are translated to bytes +-- by taking the code point modulo 256. When decoding, bytes are +-- translated directly into the equivalent code point. +-- +-- This encoding never fails in either direction. However, encoding +-- discards informaiton, so encode followed by decode is not the +-- identity. +char8 :: TextEncoding +char8 = Latin1.latin1 + -- | Look up the named Unicode encoding. May fail with -- -- * 'isDoesNotExistError' if the encoding is unknown @@ -183,7 +194,7 @@ mkTextEncoding e = case mb_coding_failure_mode of ("unknown encoding:" ++ e) Nothing Nothing) latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8) -latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for binary +latin1_encode input output = fmap (\(_why,input',output') -> (input',output')) $ Latin1.latin1_encode input output -- unchecked, used for char8 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer) diff --git a/GHC/IO/Handle.hs b/GHC/IO/Handle.hs index f42fd55..fcfa92d 100644 --- a/GHC/IO/Handle.hs +++ b/GHC/IO/Handle.hs @@ -551,7 +551,7 @@ hIsTerminalDevice handle = do -- | Select binary mode ('True') or text mode ('False') on a open handle. -- (See also 'openBinaryFile'.) -- --- This has the same effect as calling 'hSetEncoding' with 'latin1', together +-- This has the same effect as calling 'hSetEncoding' with 'char8', together -- with 'hSetNewlineMode' with 'noNewlineTranslation'. -- hSetBinaryMode :: Handle -> Bool -> IO () diff --git a/System/IO.hs b/System/IO.hs index ab52244..bf26835 100644 --- a/System/IO.hs +++ b/System/IO.hs @@ -201,6 +201,7 @@ module System.IO ( utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding, + char8, mkTextEncoding, #endif -- 1.7.10.4