#if !defined(mingw32_HOST_OS)
) where
#else
- codePageEncoding,
- localeEncoding
+ codePageEncoding, mkCodePageEncoding,
+ localeEncoding, mkLocaleEncoding
) where
import GHC.Base
import GHC.Enum
import GHC.Word
import GHC.IO (unsafePerformIO)
+import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.IO.Buffer
-import GHC.IO.Exception
import Data.Bits
import Data.Maybe
import Data.List (lookup)
import GHC.IO.Encoding.CodePage.Table
-import GHC.IO.Encoding.Latin1 (latin1)
-import GHC.IO.Encoding.UTF8 (utf8)
-import GHC.IO.Encoding.UTF16 (utf16le, utf16be)
-import GHC.IO.Encoding.UTF32 (utf32le, utf32be)
+import GHC.IO.Encoding.Latin1 (mkLatin1)
+import GHC.IO.Encoding.UTF8 (mkUTF8)
+import GHC.IO.Encoding.UTF16 (mkUTF16le, mkUTF16be)
+import GHC.IO.Encoding.UTF32 (mkUTF32le, mkUTF32be)
-- note CodePage = UInt which might not work on Win64. But the Win32 package
-- also has this issue.
foreign import stdcall unsafe "windows.h GetACP"
getACP :: IO Word32
-{-# NOINLINE localeEncoding #-}
+{-# NOINLINE currentCodePage #-}
+currentCodePage :: Word32
+currentCodePage = unsafePerformIO getCurrentCodePage
+
localeEncoding :: TextEncoding
-localeEncoding = unsafePerformIO $ fmap codePageEncoding getCurrentCodePage
-
+localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
+
+mkLocaleEncoding :: CodingFailureMode -> TextEncoding
+mkLocaleEncoding cfm = mkCodePageEncoding cfm currentCodePage
+
codePageEncoding :: Word32 -> TextEncoding
-codePageEncoding 65001 = utf8
-codePageEncoding 1200 = utf16le
-codePageEncoding 1201 = utf16be
-codePageEncoding 12000 = utf32le
-codePageEncoding 12001 = utf32be
-codePageEncoding cp = maybe latin1 (buildEncoding cp) (lookup cp codePageMap)
-
-buildEncoding :: Word32 -> CodePageArrays -> TextEncoding
-buildEncoding cp SingleByteCP {decoderArray = dec, encoderArray = enc}
+codePageEncoding = mkCodePageEncoding ErrorOnCodingFailure
+
+mkCodePageEncoding :: CodingFailureMode -> Word32 -> TextEncoding
+mkCodePageEncoding cfm 65001 = mkUTF8 cfm
+mkCodePageEncoding cfm 1200 = mkUTF16le cfm
+mkCodePageEncoding cfm 1201 = mkUTF16be cfm
+mkCodePageEncoding cfm 12000 = mkUTF32le cfm
+mkCodePageEncoding cfm 12001 = mkUTF32be cfm
+mkCodePageEncoding cfm cp = maybe (mkLatin1 cfm) (buildEncoding cfm cp) (lookup cp codePageMap)
+
+buildEncoding :: CodingFailureMode -> Word32 -> CodePageArrays -> TextEncoding
+buildEncoding cfm cp SingleByteCP {decoderArray = dec, encoderArray = enc}
= TextEncoding {
- textEncodingName = "CP" ++ show cp,
- mkTextDecoder = return $ simpleCodec
- $ decodeFromSingleByte dec
- , mkTextEncoder = return $ simpleCodec $ encodeToSingleByte enc
+ textEncodingName = "CP" ++ show cp
+ , mkTextDecoder = return $ simpleCodec (recoverDecode cfm) $ decodeFromSingleByte dec
+ , mkTextEncoder = return $ simpleCodec (recoverEncode cfm) $ encodeToSingleByte enc
}
simpleCodec :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
+ -> (Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to))
-> BufferCodec from to ()
-simpleCodec f = BufferCodec {encode = f, close = return (), getState = return (),
- setState = return }
+simpleCodec r f = BufferCodec {
+ encode = f,
+ recover = r,
+ close = return (),
+ getState = return (),
+ setState = return
+ }
decodeFromSingleByte :: ConvArray Char -> DecodeBuffer
decodeFromSingleByte convArr
input@Buffer { bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer { bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
= let
- done !ir !ow = return (if ir==iw then input{ bufL=0, bufR=0}
- else input{ bufL=ir},
- output {bufR=ow})
+ done why !ir !ow = return (why,
+ if ir==iw then input{ bufL=0, bufR=0}
+ else input{ bufL=ir},
+ output {bufR=ow})
loop !ir !ow
- | ow >= os || ir >= iw = done ir ow
+ | ow >= os = done OutputUnderflow ir ow
+ | ir >= iw = done InputUnderflow ir ow
| otherwise = do
b <- readWord8Buf iraw ir
let c = lookupConv convArr b
ow' <- writeCharBuf oraw ow c
loop (ir+1) ow'
where
- invalid = if ir > ir0 then done ir ow else ioe_decodingError
+ invalid = done InvalidSequence ir ow
in loop ir0 ow0
encodeToSingleByte :: CompactArray Char Word8 -> EncodeBuffer
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
= let
- done !ir !ow = return (if ir==iw then input { bufL=0, bufR=0 }
- else input { bufL=ir },
- output {bufR=ow})
+ done why !ir !ow = return (why,
+ if ir==iw then input { bufL=0, bufR=0 }
+ else input { bufL=ir },
+ output {bufR=ow})
loop !ir !ow
- | ow >= os || ir >= iw = done ir ow
+ | ow >= os = done OutputUnderflow ir ow
+ | ir >= iw = done InputUnderflow ir ow
| otherwise = do
(c,ir') <- readCharBuf iraw ir
case lookupCompact maxChar indices values c of
writeWord8Buf oraw ow b
loop ir' (ow+1)
where
- invalid = if ir > ir0 then done ir ow else ioe_encodingError
+ invalid = done InvalidSequence ir ow
in
loop ir0 ow0
-ioe_decodingError :: IO a
-ioe_decodingError = ioException
- (IOError Nothing InvalidArgument "codePageEncoding"
- "invalid code page byte sequence" Nothing Nothing)
-
-ioe_encodingError :: IO a
-ioe_encodingError = ioException
- (IOError Nothing InvalidArgument "codePageEncoding"
- "character is not in the code page" Nothing Nothing)
-
--------------------------------------------
-- Array access functions