-- #hide
module GHC.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
- mkTextEncoding,
- latin1,
- utf8,
- utf16, utf16le, utf16be,
- utf32, utf32le, utf32be,
- localeEncoding
+ iconvEncoding, mkIconvEncoding,
+ localeEncoding, mkLocaleEncoding
#endif
) where
import Data.Maybe
import GHC.Base
import GHC.IO.Buffer
+import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.List (span)
import GHC.Num
| c_DEBUG_DUMP = puts s
| otherwise = return ()
-puts :: String -> IO ()
-puts s = do _ <- withCAStringLen (s ++ "\n") $ \(p, len) ->
- -- In reality should be withCString, but assume ASCII to avoid loop
- c_write 1 (castPtr p) (fromIntegral len)
- return ()
-
-- -----------------------------------------------------------------------------
-- iconv encoders/decoders
-{-# NOINLINE latin1 #-}
-latin1 :: TextEncoding
-latin1 = unsafePerformIO (mkTextEncoding "Latin1")
-
-{-# NOINLINE utf8 #-}
-utf8 :: TextEncoding
-utf8 = unsafePerformIO (mkTextEncoding "UTF8")
-
-{-# NOINLINE utf16 #-}
-utf16 :: TextEncoding
-utf16 = unsafePerformIO (mkTextEncoding "UTF16")
-
-{-# NOINLINE utf16le #-}
-utf16le :: TextEncoding
-utf16le = unsafePerformIO (mkTextEncoding "UTF16LE")
-
-{-# NOINLINE utf16be #-}
-utf16be :: TextEncoding
-utf16be = unsafePerformIO (mkTextEncoding "UTF16BE")
-
-{-# NOINLINE utf32 #-}
-utf32 :: TextEncoding
-utf32 = unsafePerformIO (mkTextEncoding "UTF32")
-
-{-# NOINLINE utf32le #-}
-utf32le :: TextEncoding
-utf32le = unsafePerformIO (mkTextEncoding "UTF32LE")
-
-{-# NOINLINE utf32be #-}
-utf32be :: TextEncoding
-utf32be = unsafePerformIO (mkTextEncoding "UTF32BE")
-
{-# NOINLINE localeEncodingName #-}
localeEncodingName :: String
localeEncodingName = unsafePerformIO $ do
cstr <- c_localeEncoding
peekCAString cstr -- Assume charset names are ASCII
-{-# NOINLINE localeEncoding #-}
localeEncoding :: TextEncoding
-localeEncoding = unsafePerformIO $ mkTextEncoding localeEncodingName
+localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
+
+mkLocaleEncoding :: CodingFailureMode -> TextEncoding
+mkLocaleEncoding cfm = unsafePerformIO $ mkIconvEncoding cfm localeEncodingName
-- We hope iconv_t is a storable type. It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
char_shift | charSize == 2 = 1
| otherwise = 2
-mkTextEncoding :: String -> IO TextEncoding
-mkTextEncoding charset = do
+iconvEncoding :: String -> IO TextEncoding
+iconvEncoding = mkIconvEncoding ErrorOnCodingFailure
+
+mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding
+mkIconvEncoding cfm charset = do
return (TextEncoding {
textEncodingName = charset,
- mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) iconvDecode,
- mkTextEncoder = newIConv haskellChar charset iconvEncode})
+ mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode,
+ mkTextEncoder = newIConv haskellChar charset (recoverEncode cfm) iconvEncode})
where
-- An annoying feature of GNU iconv is that the //PREFIXES only take
-- effect when they appear on the tocode parameter to iconv_open:
(raw_charset, suffix) = span (/= '/') charset
newIConv :: String -> String
- -> (IConv -> Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
+ -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
+ -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
-newIConv from to fn =
+newIConv from to rec fn =
-- Assume charset names are ASCII
withCAString from $ \ from_str ->
withCAString to $ \ to_str -> do
let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
return BufferCodec{
encode = fn iconvt,
+ recover = rec,
close = iclose,
-- iconv doesn't supply a way to save/restore the state
getState = return (),
setState = const $ return ()
}
-iconvDecode :: IConv -> Buffer Word8 -> Buffer CharBufElem
- -> IO (Buffer Word8, Buffer CharBufElem)
+iconvDecode :: IConv -> DecodeBuffer
iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
-iconvEncode :: IConv -> Buffer CharBufElem -> Buffer Word8
- -> IO (Buffer CharBufElem, Buffer Word8)
+iconvEncode :: IConv -> EncodeBuffer
iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
- -> IO (Buffer a, Buffer b)
+ -> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode iconv_t
input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
if (res /= -1)
then do -- all input translated
- return (new_input, new_output)
+ return (InputUnderflow, new_input, new_output)
else do
errno <- getErrno
case errno of
- e | e == eINVAL || e == e2BIG
- || e == eILSEQ && new_inleft' /= (iw-ir) -> do
- iconv_trace ("iconv ignoring error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
- -- Output overflow is harmless
- --
- -- Similarly, we ignore EILSEQ unless we converted no
- -- characters. Sometimes iconv reports EILSEQ for a
- -- character in the input even when there is no room
- -- in the output; in this case we might be about to
- -- change the encoding anyway, so the following bytes
- -- could very well be in a different encoding.
- -- This also helps with pinpointing EILSEQ errors: we
- -- don't report it until the rest of the characters in
- -- the buffer have been drained.
- return (new_input, new_output)
-
- e -> do
- iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
- throwErrno "iconvRecoder"
- -- illegal sequence, or some other error
+ e | e == e2BIG -> return (OutputUnderflow, new_input, new_output)
+ | e == eINVAL -> return (InputUnderflow, new_input, new_output)
+ -- Sometimes iconv reports EILSEQ for a
+ -- character in the input even when there is no room
+ -- in the output; in this case we might be about to
+ -- change the encoding anyway, so the following bytes
+ -- could very well be in a different encoding.
+ --
+ -- Because we can only say InvalidSequence if there is at least
+ -- one element left in the output, we have to special case this.
+ | e == eILSEQ -> return (if new_outleft' == 0 then OutputUnderflow else InvalidSequence, new_input, new_output)
+ | otherwise -> do
+ iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
+ throwErrno "iconvRecoder"
#endif /* !mingw32_HOST_OS */