3 , ForeignFunctionInterface
4 , NondecreasingIndentation
7 -----------------------------------------------------------------------------
9 -- Module : GHC.IO.Encoding.Iconv
10 -- Copyright : (c) The University of Glasgow, 2008-2009
11 -- License : see libraries/base/LICENSE
13 -- Maintainer : libraries@haskell.org
14 -- Stability : internal
15 -- Portability : non-portable
17 -- This module provides text encoding/decoding using iconv
19 -----------------------------------------------------------------------------
22 module GHC.IO.Encoding.Iconv (
23 #if !defined(mingw32_HOST_OS)
24 iconvEncoding, mkIconvEncoding,
25 localeEncoding, mkLocaleEncoding
30 #include "HsBaseConfig.h"
32 #if !defined(mingw32_HOST_OS)
34 import Foreign hiding (unsafePerformIO)
39 import GHC.IO.Encoding.Failure
40 import GHC.IO.Encoding.Types
41 import GHC.List (span)
45 import System.IO.Unsafe (unsafePerformIO)
46 import System.Posix.Internals
51 iconv_trace :: String -> IO ()
53 | c_DEBUG_DUMP = puts s
54 | otherwise = return ()
56 -- -----------------------------------------------------------------------------
57 -- iconv encoders/decoders
59 {-# NOINLINE localeEncodingName #-}
60 localeEncodingName :: String
61 localeEncodingName = unsafePerformIO $ do
62 -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
63 -- if we have either of them.
64 cstr <- c_localeEncoding
65 peekCAString cstr -- Assume charset names are ASCII
67 localeEncoding :: TextEncoding
68 localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
70 mkLocaleEncoding :: CodingFailureMode -> TextEncoding
71 mkLocaleEncoding cfm = unsafePerformIO $ mkIconvEncoding cfm localeEncodingName
73 -- We hope iconv_t is a storable type. It should be, since it has at least the
74 -- value -1, which is a possible return value from iconv_open.
75 type IConv = CLong -- ToDo: (#type iconv_t)
77 foreign import ccall unsafe "hs_iconv_open"
78 hs_iconv_open :: CString -> CString -> IO IConv
80 foreign import ccall unsafe "hs_iconv_close"
81 hs_iconv_close :: IConv -> IO CInt
83 foreign import ccall unsafe "hs_iconv"
84 hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
87 foreign import ccall unsafe "localeEncoding"
88 c_localeEncoding :: IO CString
91 #ifdef WORDS_BIGENDIAN
92 haskellChar | charSize == 2 = "UTF-16BE"
93 | otherwise = "UTF-32BE"
95 haskellChar | charSize == 2 = "UTF-16LE"
96 | otherwise = "UTF-32LE"
100 char_shift | charSize == 2 = 1
103 iconvEncoding :: String -> IO TextEncoding
104 iconvEncoding = mkIconvEncoding ErrorOnCodingFailure
106 mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding
107 mkIconvEncoding cfm charset = do
108 return (TextEncoding {
109 textEncodingName = charset,
110 mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode,
111 mkTextEncoder = newIConv haskellChar charset (recoverEncode cfm) iconvEncode})
113 -- An annoying feature of GNU iconv is that the //PREFIXES only take
114 -- effect when they appear on the tocode parameter to iconv_open:
115 (raw_charset, suffix) = span (/= '/') charset
117 newIConv :: String -> String
118 -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
119 -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
120 -> IO (BufferCodec a b ())
121 newIConv from to rec fn =
122 -- Assume charset names are ASCII
123 withCAString from $ \ from_str ->
124 withCAString to $ \ to_str -> do
125 iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
126 let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
131 -- iconv doesn't supply a way to save/restore the state
132 getState = return (),
133 setState = const $ return ()
136 iconvDecode :: IConv -> DecodeBuffer
137 iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
139 iconvEncode :: IConv -> EncodeBuffer
140 iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
142 iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
143 -> IO (CodingProgress, Buffer a, Buffer b)
145 input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
146 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
148 iconv_trace ("haskelChar=" ++ show haskellChar)
149 iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
150 iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
151 withRawBuffer iraw $ \ piraw -> do
152 withRawBuffer oraw $ \ poraw -> do
153 with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
154 with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
155 with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
156 with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
157 res <- hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
158 new_inleft <- peek p_inleft
159 new_outleft <- peek p_outleft
161 new_inleft' = fromIntegral new_inleft `shiftR` iscale
162 new_outleft' = fromIntegral new_outleft `shiftR` oscale
164 | new_inleft == 0 = input { bufL = 0, bufR = 0 }
165 | otherwise = input { bufL = iw - new_inleft' }
166 new_output = output{ bufR = os - new_outleft' }
167 iconv_trace ("iconv res=" ++ show res)
168 iconv_trace ("iconvRecode after, input=" ++ show (summaryBuffer new_input))
169 iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
171 then do -- all input translated
172 return (InputUnderflow, new_input, new_output)
176 e | e == e2BIG -> return (OutputUnderflow, new_input, new_output)
177 | e == eINVAL -> return (InputUnderflow, new_input, new_output)
178 -- Sometimes iconv reports EILSEQ for a
179 -- character in the input even when there is no room
180 -- in the output; in this case we might be about to
181 -- change the encoding anyway, so the following bytes
182 -- could very well be in a different encoding.
184 -- Because we can only say InvalidSequence if there is at least
185 -- one element left in the output, we have to special case this.
186 | e == eILSEQ -> return (if new_outleft' == 0 then OutputUnderflow else InvalidSequence, new_input, new_output)
188 iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
189 throwErrno "iconvRecoder"
191 #endif /* !mingw32_HOST_OS */