codePageEncoding 1201 = utf16be
codePageEncoding 12000 = utf32le
codePageEncoding 12001 = utf32be
-codePageEncoding cp = maybe latin1 buildEncoding (lookup cp codePageMap)
+codePageEncoding cp = maybe latin1 (buildEncoding cp) (lookup cp codePageMap)
-buildEncoding :: CodePageArrays -> TextEncoding
-buildEncoding SingleByteCP {decoderArray = dec, encoderArray = enc}
+buildEncoding :: Word32 -> CodePageArrays -> TextEncoding
+buildEncoding cp SingleByteCP {decoderArray = dec, encoderArray = enc}
= TextEncoding {
+ textEncodingName = "CP" ++ show cp,
mkTextDecoder = return $ simpleCodec
$ decodeFromSingleByte dec
, mkTextEncoder = return $ simpleCodec $ encodeToSingleByte enc
mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding charset = do
return (TextEncoding {
+ textEncodingName = charset,
mkTextDecoder = newIConv charset haskellChar iconvDecode,
mkTextEncoder = newIConv haskellChar charset iconvEncode})
-- Latin1
latin1 :: TextEncoding
-latin1 = TextEncoding { mkTextDecoder = latin1_DF,
+latin1 = TextEncoding { textEncodingName = "ISO8859-1",
+ mkTextDecoder = latin1_DF,
mkTextEncoder = latin1_EF }
latin1_DF :: IO (TextDecoder ())
})
latin1_checked :: TextEncoding
-latin1_checked = TextEncoding { mkTextDecoder = latin1_DF,
+latin1_checked = TextEncoding { textEncodingName = "ISO8859-1(checked)",
+ mkTextDecoder = latin1_DF,
mkTextEncoder = latin1_checked_EF }
latin1_checked_EF :: IO (TextEncoder ())
import GHC.Base
import GHC.Word
+import GHC.Show
-- import GHC.IO
import GHC.IO.Buffer
-- of bytes. The 'TextEncoding' for UTF-8 is 'utf8'.
data TextEncoding
= forall dstate estate . TextEncoding {
+ textEncodingName :: String,
+ -- ^ a string that can be passed to 'mkTextEncoding' to
+ -- create an equivalent 'TextEncoding'.
mkTextDecoder :: IO (TextDecoder dstate),
mkTextEncoder :: IO (TextEncoder estate)
}
+
+instance Show TextEncoding where
+ -- | Returns the value of 'textEncodingName'
+ show te = textEncodingName te
-- The UTF-16 codec: either UTF16BE or UTF16LE with a BOM
utf16 :: TextEncoding
-utf16 = TextEncoding { mkTextDecoder = utf16_DF,
+utf16 = TextEncoding { textEncodingName = "UTF-16",
+ mkTextDecoder = utf16_DF,
mkTextEncoder = utf16_EF }
utf16_DF :: IO (TextDecoder (Maybe DecodeBuffer))
-- UTF16LE and UTF16BE
utf16be :: TextEncoding
-utf16be = TextEncoding { mkTextDecoder = utf16be_DF,
+utf16be = TextEncoding { textEncodingName = "UTF-16BE",
+ mkTextDecoder = utf16be_DF,
mkTextEncoder = utf16be_EF }
utf16be_DF :: IO (TextDecoder ())
})
utf16le :: TextEncoding
-utf16le = TextEncoding { mkTextDecoder = utf16le_DF,
+utf16le = TextEncoding { textEncodingName = "UTF16-LE",
+ mkTextDecoder = utf16le_DF,
mkTextEncoder = utf16le_EF }
utf16le_DF :: IO (TextDecoder ())
-- The UTF-32 codec: either UTF-32BE or UTF-32LE with a BOM
utf32 :: TextEncoding
-utf32 = TextEncoding { mkTextDecoder = utf32_DF,
+utf32 = TextEncoding { textEncodingName = "UTF-32",
+ mkTextDecoder = utf32_DF,
mkTextEncoder = utf32_EF }
utf32_DF :: IO (TextDecoder (Maybe DecodeBuffer))
-- UTF32LE and UTF32BE
utf32be :: TextEncoding
-utf32be = TextEncoding { mkTextDecoder = utf32be_DF,
+utf32be = TextEncoding { textEncodingName = "UTF-32BE",
+ mkTextDecoder = utf32be_DF,
mkTextEncoder = utf32be_EF }
utf32be_DF :: IO (TextDecoder ())
utf32le :: TextEncoding
-utf32le = TextEncoding { mkTextDecoder = utf32le_DF,
+utf32le = TextEncoding { textEncodingName = "UTF-32LE",
+ mkTextDecoder = utf32le_DF,
mkTextEncoder = utf32le_EF }
utf32le_DF :: IO (TextDecoder ())
import Data.Maybe
utf8 :: TextEncoding
-utf8 = TextEncoding { mkTextDecoder = utf8_DF,
+utf8 = TextEncoding { textEncodingName = "UTF-8",
+ mkTextDecoder = utf8_DF,
mkTextEncoder = utf8_EF }
utf8_DF :: IO (TextDecoder ())
})
utf8_bom :: TextEncoding
-utf8_bom = TextEncoding { mkTextDecoder = utf8_bom_DF,
+utf8_bom = TextEncoding { textEncodingName = "UTF-8BOM",
+ mkTextDecoder = utf8_bom_DF,
mkTextEncoder = utf8_bom_EF }
utf8_bom_DF :: IO (TextDecoder Bool)