1 {-# OPTIONS_GHC -XNoImplicitPrelude -funbox-strict-fields #-}
2 -----------------------------------------------------------------------------
4 -- Module : GHC.IO.Encoding
5 -- Copyright : (c) The University of Glasgow, 2008-2009
6 -- License : see libraries/base/LICENSE
8 -- Maintainer : libraries@haskell.org
9 -- Stability : internal
10 -- Portability : non-portable
12 -- Text codecs for I/O
14 -----------------------------------------------------------------------------
16 module GHC.IO.Encoding (
17 BufferCodec(..), TextEncoding(..), TextEncoder, TextDecoder,
18 latin1, latin1_encode, latin1_decode,
20 utf16, utf16le, utf16be,
21 utf32, utf32le, utf32be,
29 import GHC.IO.Encoding.Types
31 #if !defined(mingw32_HOST_OS)
32 import qualified GHC.IO.Encoding.Iconv as Iconv
34 import qualified GHC.IO.Encoding.Latin1 as Latin1
35 import qualified GHC.IO.Encoding.UTF8 as UTF8
36 import qualified GHC.IO.Encoding.UTF16 as UTF16
37 import qualified GHC.IO.Encoding.UTF32 as UTF32
39 #if defined(mingw32_HOST_OS)
41 import GHC.IO.Exception
44 -- -----------------------------------------------------------------------------
46 -- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes
47 -- directly to the first 256 Unicode code points, and is thus not a
48 -- complete Unicode encoding. An attempt to write a character greater than
49 -- '\255' to a 'Handle' using the 'latin1' encoding will result in an error.
50 latin1 :: TextEncoding
51 latin1 = Latin1.latin1_checked
53 -- | The UTF-8 Unicode encoding
57 -- | The UTF-8 Unicode encoding, with a byte-order-mark (BOM; the byte
58 -- sequence 0xEF 0xBB 0xBF). This encoding behaves like 'utf8',
59 -- except that on input, the BOM sequence is ignored at the beginning
60 -- of the stream, and on output, the BOM sequence is prepended.
62 -- The byte-order-mark is strictly unnecessary in UTF-8, but is
63 -- sometimes used to identify the encoding of a file.
65 utf8_bom :: TextEncoding
66 utf8_bom = UTF8.utf8_bom
68 -- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
69 -- indicate endianness).
73 -- | The UTF-16 Unicode encoding (litte-endian)
74 utf16le :: TextEncoding
75 utf16le = UTF16.utf16le
77 -- | The UTF-16 Unicode encoding (big-endian)
78 utf16be :: TextEncoding
79 utf16be = UTF16.utf16be
81 -- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
82 -- indicate endianness).
86 -- | The UTF-32 Unicode encoding (litte-endian)
87 utf32le :: TextEncoding
88 utf32le = UTF32.utf32le
90 -- | The UTF-32 Unicode encoding (big-endian)
91 utf32be :: TextEncoding
92 utf32be = UTF32.utf32be
94 -- | The Unicode encoding of the current locale
95 localeEncoding :: TextEncoding
96 #if !defined(mingw32_HOST_OS)
97 localeEncoding = Iconv.localeEncoding
99 localeEncoding = Latin1.latin1
102 -- | Look up the named Unicode encoding. May fail with
104 -- * 'isDoesNotExistError' if the encoding is unknown
106 -- The set of known encodings is system-dependent, but includes at least:
110 -- * @UTF-16@, @UTF-16BE@, @UTF-16LE@
112 -- * @UTF-32@, @UTF-32BE@, @UTF-32LE@
114 -- On systems using GNU iconv (e.g. Linux), there is additional
115 -- notation for specifying how illegal characters are handled:
117 -- * a suffix of @\/\/IGNORE@, e.g. @UTF-8\/\/IGNORE@, will cause
118 -- all illegal sequences on input to be ignored, and on output
119 -- will drop all code points that have no representation in the
122 -- * a suffix of @\/\/TRANSLIT@ will choose a replacement character
123 -- for illegal sequences or code points.
125 mkTextEncoding :: String -> IO TextEncoding
126 #if !defined(mingw32_HOST_OS)
127 mkTextEncoding = Iconv.mkTextEncoding
129 mkTextEncoding "UTF-8" = return utf8
130 mkTextEncoding "UTF-16" = return utf16
131 mkTextEncoding "UTF-16LE" = return utf16le
132 mkTextEncoding "UTF-16BE" = return utf16be
133 mkTextEncoding "UTF-32" = return utf32
134 mkTextEncoding "UTF-32LE" = return utf32le
135 mkTextEncoding "UTF-32BE" = return utf32be
136 mkTextEncoding e = ioException
137 (IOError Nothing NoSuchThing "mkTextEncoding"
138 ("unknown encoding:" ++ e) Nothing Nothing)
141 latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
142 latin1_encode = Latin1.latin1_encode -- unchecked, used for binary
143 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
145 latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
146 latin1_decode = Latin1.latin1_decode
147 --latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode