-{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
+{-# OPTIONS_GHC -XNoImplicitPrelude -funbox-strict-fields #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.IO.Encoding
import GHC.Word
#if !defined(mingw32_HOST_OS)
import qualified GHC.IO.Encoding.Iconv as Iconv
+#else
+import qualified GHC.IO.Encoding.CodePage as CodePage
+import Text.Read (reads)
#endif
import qualified GHC.IO.Encoding.Latin1 as Latin1
import qualified GHC.IO.Encoding.UTF8 as UTF8
#if !defined(mingw32_HOST_OS)
localeEncoding = Iconv.localeEncoding
#else
-localeEncoding = Latin1.latin1
+localeEncoding = CodePage.localeEncoding
#endif
-- | Look up the named Unicode encoding. May fail with
--
-- * 'isDoesNotExistError' if the encoding is unknown
--
--- The set of known encodings is system-dependent.
+-- The set of known encodings is system-dependent, but includes at least:
+--
+-- * @UTF-8@
+--
+-- * @UTF-16@, @UTF-16BE@, @UTF-16LE@
+--
+-- * @UTF-32@, @UTF-32BE@, @UTF-32LE@
+--
+-- On systems using GNU iconv (e.g. Linux), there is additional
+-- notation for specifying how illegal characters are handled:
+--
+-- * a suffix of @\/\/IGNORE@, e.g. @UTF-8\/\/IGNORE@, will cause
+-- all illegal sequences on input to be ignored, and on output
+-- will drop all code points that have no representation in the
+-- target encoding.
+--
+-- * a suffix of @\/\/TRANSLIT@ will choose a replacement character
+-- for illegal sequences or code points.
+--
+-- On Windows, you can access supported code pages with the prefix
+-- @CP@; for example, @\"CP1250\"@.
--
mkTextEncoding :: String -> IO TextEncoding
#if !defined(mingw32_HOST_OS)
mkTextEncoding "UTF-32" = return utf32
mkTextEncoding "UTF-32LE" = return utf32le
mkTextEncoding "UTF-32BE" = return utf32be
+mkTextEncoding ('C':'P':n)
+ | [(cp,"")] <- reads n = return $ CodePage.codePageEncoding cp
mkTextEncoding e = ioException
(IOError Nothing NoSuchThing "mkTextEncoding"
("unknown encoding:" ++ e) Nothing Nothing)