1 {-# OPTIONS_GHC -fno-implicit-prelude -#include "HsBase.h" #-}
2 -----------------------------------------------------------------------------
4 -- Module : GHC.IO.Encoding.Iconv
5 -- Copyright : (c) The University of Glasgow, 2008-2009
6 -- License : see libraries/base/LICENSE
8 -- Maintainer : libraries@haskell.org
9 -- Stability : internal
10 -- Portability : non-portable
12 -- This module provides text encoding/decoding using iconv
14 -----------------------------------------------------------------------------
17 module GHC.IO.Encoding.Iconv (
18 #if !defined(mingw32_HOST_OS)
22 utf16, utf16le, utf16be,
23 utf32, utf32le, utf32be,
28 #if !defined(mingw32_HOST_OS)
39 import GHC.IO.Encoding.Types
44 import System.Posix.Internals
47 iconv_trace :: String -> IO ()
51 iconv_trace s = puts s
53 puts :: String -> IO ()
54 puts s = do withCStringLen (s++"\n") $ \(p,len) ->
55 c_write 1 p (fromIntegral len)
60 iconv_trace _ = return ()
64 -- -----------------------------------------------------------------------------
65 -- iconv encoders/decoders
67 {-# NOINLINE latin1 #-}
68 latin1 :: TextEncoding
69 latin1 = unsafePerformIO (mkTextEncoding "Latin1")
73 utf8 = unsafePerformIO (mkTextEncoding "UTF8")
75 {-# NOINLINE utf16 #-}
77 utf16 = unsafePerformIO (mkTextEncoding "UTF16")
79 {-# NOINLINE utf16le #-}
80 utf16le :: TextEncoding
81 utf16le = unsafePerformIO (mkTextEncoding "UTF16LE")
83 {-# NOINLINE utf16be #-}
84 utf16be :: TextEncoding
85 utf16be = unsafePerformIO (mkTextEncoding "UTF16BE")
87 {-# NOINLINE utf32 #-}
89 utf32 = unsafePerformIO (mkTextEncoding "UTF32")
91 {-# NOINLINE utf32le #-}
92 utf32le :: TextEncoding
93 utf32le = unsafePerformIO (mkTextEncoding "UTF32LE")
95 {-# NOINLINE utf32be #-}
96 utf32be :: TextEncoding
97 utf32be = unsafePerformIO (mkTextEncoding "UTF32BE")
99 {-# NOINLINE localeEncoding #-}
100 localeEncoding :: TextEncoding
101 localeEncoding = unsafePerformIO (mkTextEncoding "")
103 -- We hope iconv_t is a storable type. It should be, since it has at least the
104 -- value -1, which is a possible return value from iconv_open.
105 type IConv = CLong -- ToDo: (#type iconv_t)
107 foreign import ccall unsafe "iconv_open"
108 iconv_open :: CString -> CString -> IO IConv
110 foreign import ccall unsafe "iconv_close"
111 iconv_close :: IConv -> IO CInt
113 foreign import ccall unsafe "iconv"
114 iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
117 haskellChar :: String
118 #ifdef WORDS_BIGENDIAN
119 haskellChar | charSize == 2 = "UTF16BE"
120 | otherwise = "UCS-4"
122 haskellChar | charSize == 2 = "UTF16LE"
123 | otherwise = "UCS-4LE"
127 char_shift | charSize == 2 = 1
130 mkTextEncoding :: String -> IO TextEncoding
131 mkTextEncoding charset = do
132 return (TextEncoding {
133 mkTextDecoder = newIConv charset haskellChar iconvDecode,
134 mkTextEncoder = newIConv haskellChar charset iconvEncode})
136 newIConv :: String -> String
137 -> (IConv -> Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
138 -> IO (BufferCodec a b)
139 newIConv from to fn =
140 withCString from $ \ from_str ->
141 withCString to $ \ to_str -> do
142 iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ iconv_open to_str from_str
143 let iclose = do throwErrnoIfMinus1 "Iconv.close" $ iconv_close iconvt
150 iconvDecode :: IConv -> Buffer Word8 -> Buffer CharBufElem
151 -> IO (Buffer Word8, Buffer CharBufElem)
152 iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
154 iconvEncode :: IConv -> Buffer CharBufElem -> Buffer Word8
155 -> IO (Buffer CharBufElem, Buffer Word8)
156 iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
158 iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
159 -> IO (Buffer a, Buffer b)
161 input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
162 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
164 iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
165 iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
166 withRawBuffer iraw $ \ piraw -> do
167 withRawBuffer oraw $ \ poraw -> do
168 with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
169 with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
170 with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
171 with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
172 res <- iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
173 new_inleft <- peek p_inleft
174 new_outleft <- peek p_outleft
176 new_inleft' = fromIntegral new_inleft `shiftR` iscale
177 new_outleft' = fromIntegral new_outleft `shiftR` oscale
179 | new_inleft == 0 = input { bufL = 0, bufR = 0 }
180 | otherwise = input { bufL = iw - new_inleft' }
181 new_output = output{ bufR = os - new_outleft' }
182 iconv_trace ("iconv res=" ++ show res)
183 iconv_trace ("iconvRecode after, input=" ++ show (summaryBuffer new_input))
184 iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
186 then do -- all input translated
187 return (new_input, new_output)
192 || (e == e2BIG || e == eILSEQ) && new_inleft' /= (iw-ir) -> do
193 iconv_trace ("iconv ignoring error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
194 -- Output overflow is relatively harmless, unless
195 -- we made no progress at all.
197 -- Similarly, we ignore EILSEQ unless we converted no
198 -- characters. Sometimes iconv reports EILSEQ for a
199 -- character in the input even when there is no room
200 -- in the output; in this case we might be about to
201 -- change the encoding anyway, so the following bytes
202 -- could very well be in a different encoding.
203 -- This also helps with pinpointing EILSEQ errors: we
204 -- don't report it until the rest of the characters in
205 -- the buffer have been drained.
206 return (new_input, new_output)
209 throwErrno "iconvRecoder"
210 -- illegal sequence, or some other error
212 #endif /* !mingw32_HOST_OS */