1 {-# OPTIONS_GHC -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
4 -- Module : Foreign.C.String
5 -- Copyright : (c) The FFI task force 2001
6 -- License : BSD-style (see the file libraries/base/LICENSE)
8 -- Maintainer : ffi@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
12 -- Utilities for primitive marshalling of C strings.
14 -- The marshalling converts each Haskell character, representing a Unicode
15 -- code point, to one or more bytes in a manner that, by default, is
16 -- determined by the current locale. As a consequence, no guarantees
17 -- can be made about the relative length of a Haskell string and its
18 -- corresponding C string, and therefore all the marshalling routines
19 -- include memory allocation. The translation between Unicode and the
20 -- encoding of the current locale may be lossy.
22 -----------------------------------------------------------------------------
24 module Foreign.C.String ( -- representation of strings in C
28 CString, -- = Ptr CChar
29 CStringLen, -- = (Ptr CChar, Int)
31 -- ** Using a locale-dependent encoding
33 -- | Currently these functions are identical to their @CAString@ counterparts;
34 -- eventually they will use an encoding determined by the current locale.
36 -- conversion of C strings into Haskell strings
38 peekCString, -- :: CString -> IO String
39 peekCStringLen, -- :: CStringLen -> IO String
41 -- conversion of Haskell strings into C strings
43 newCString, -- :: String -> IO CString
44 newCStringLen, -- :: String -> IO CStringLen
46 -- conversion of Haskell strings into C strings using temporary storage
48 withCString, -- :: String -> (CString -> IO a) -> IO a
49 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
51 charIsRepresentable, -- :: Char -> IO Bool
53 -- ** Using 8-bit characters
55 -- | These variants of the above functions are for use with C libraries
56 -- that are ignorant of Unicode. These functions should be used with
57 -- care, as a loss of information can occur.
59 castCharToCChar, -- :: Char -> CChar
60 castCCharToChar, -- :: CChar -> Char
62 peekCAString, -- :: CString -> IO String
63 peekCAStringLen, -- :: CStringLen -> IO String
64 newCAString, -- :: String -> IO CString
65 newCAStringLen, -- :: String -> IO CStringLen
66 withCAString, -- :: String -> (CString -> IO a) -> IO a
67 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
71 -- | These variants of the above functions are for use with C libraries
72 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
73 -- way. The only encodings supported are
75 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
77 -- * UTF-16 (as used on Windows systems).
79 CWString, -- = Ptr CWchar
80 CWStringLen, -- = (Ptr CWchar, Int)
82 peekCWString, -- :: CWString -> IO String
83 peekCWStringLen, -- :: CWStringLen -> IO String
84 newCWString, -- :: String -> IO CWString
85 newCWStringLen, -- :: String -> IO CWStringLen
86 withCWString, -- :: String -> (CWString -> IO a) -> IO a
87 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
91 import Foreign.Marshal.Array
92 import Foreign.C.Types
94 import Foreign.Storable
98 #ifdef __GLASGOW_HASKELL__
105 import Data.Char ( chr, ord )
106 #define unsafeChr chr
109 -----------------------------------------------------------------------------
112 -- representation of strings in C
113 -- ------------------------------
115 -- | A C string is a reference to an array of C characters terminated by NUL.
116 type CString = Ptr CChar
118 -- | A string with explicit length information in bytes instead of a
119 -- terminating NUL (allowing NUL characters in the middle of the string).
120 type CStringLen = (Ptr CChar, Int)
122 -- exported functions
123 -- ------------------
125 -- * the following routines apply the default conversion when converting the
126 -- C-land character encoding into the Haskell-land character encoding
128 -- | Marshal a NUL terminated C string into a Haskell string.
130 peekCString :: CString -> IO String
131 peekCString = peekCAString
133 -- | Marshal a C string with explicit length into a Haskell string.
135 peekCStringLen :: CStringLen -> IO String
136 peekCStringLen = peekCAStringLen
138 -- | Marshal a Haskell string into a NUL terminated C string.
140 -- * the Haskell string may /not/ contain any NUL characters
142 -- * new storage is allocated for the C string and must be
143 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
144 -- 'Foreign.Marshal.Alloc.finalizerFree'.
146 newCString :: String -> IO CString
147 newCString = newCAString
149 -- | Marshal a Haskell string into a C string (ie, character array) with
150 -- explicit length information.
152 -- * new storage is allocated for the C string and must be
153 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
154 -- 'Foreign.Marshal.Alloc.finalizerFree'.
156 newCStringLen :: String -> IO CStringLen
157 newCStringLen = newCAStringLen
159 -- | Marshal a Haskell string into a NUL terminated C string using temporary
162 -- * the Haskell string may /not/ contain any NUL characters
164 -- * the memory is freed when the subcomputation terminates (either
165 -- normally or via an exception), so the pointer to the temporary
166 -- storage must /not/ be used after this.
168 withCString :: String -> (CString -> IO a) -> IO a
169 withCString = withCAString
171 -- | Marshal a Haskell string into a NUL terminated C string using temporary
174 -- * the Haskell string may /not/ contain any NUL characters
176 -- * the memory is freed when the subcomputation terminates (either
177 -- normally or via an exception), so the pointer to the temporary
178 -- storage must /not/ be used after this.
180 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
181 withCStringLen = withCAStringLen
183 -- | Determines whether a character can be accurately encoded in a 'CString'.
184 -- Unrepresentable characters are converted to @\'?\'@.
186 -- Currently only Latin-1 characters are representable.
187 charIsRepresentable :: Char -> IO Bool
188 charIsRepresentable c = return (ord c < 256)
190 -- single byte characters
191 -- ----------------------
193 -- ** NOTE: These routines don't handle conversions! **
195 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
196 -- Haskell character.
197 castCCharToChar :: CChar -> Char
198 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
200 -- | Convert a Haskell character to a C character.
201 -- This function is only safe on the first 256 characters.
202 castCharToCChar :: Char -> CChar
203 castCharToCChar ch = fromIntegral (ord ch)
205 -- | Marshal a NUL terminated C string into a Haskell string.
207 peekCAString :: CString -> IO String
208 #ifndef __GLASGOW_HASKELL__
210 cs <- peekArray0 nUL cp
211 return (cCharsToChars cs)
214 l <- lengthArray0 nUL cp
215 if l <= 0 then return "" else loop "" (l-1)
218 xval <- peekElemOff cp i
219 let val = castCCharToChar xval
220 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
223 -- | Marshal a C string with explicit length into a Haskell string.
225 peekCAStringLen :: CStringLen -> IO String
226 #ifndef __GLASGOW_HASKELL__
227 peekCAStringLen (cp, len) = do
228 cs <- peekArray len cp
229 return (cCharsToChars cs)
231 peekCAStringLen (cp, len)
232 | len <= 0 = return "" -- being (too?) nice.
233 | otherwise = loop [] (len-1)
236 xval <- peekElemOff cp i
237 let val = castCCharToChar xval
238 -- blow away the coercion ASAP.
239 if (val `seq` (i == 0))
240 then return (val:acc)
241 else loop (val:acc) (i-1)
244 -- | Marshal a Haskell string into a NUL terminated C string.
246 -- * the Haskell string may /not/ contain any NUL characters
248 -- * new storage is allocated for the C string and must be
249 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
250 -- 'Foreign.Marshal.Alloc.finalizerFree'.
252 newCAString :: String -> IO CString
253 #ifndef __GLASGOW_HASKELL__
254 newCAString = newArray0 nUL . charsToCChars
257 ptr <- mallocArray0 (length str)
259 go [] n = pokeElemOff ptr n nUL
260 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
265 -- | Marshal a Haskell string into a C string (ie, character array) with
266 -- explicit length information.
268 -- * new storage is allocated for the C string and must be
269 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
270 -- 'Foreign.Marshal.Alloc.finalizerFree'.
272 newCAStringLen :: String -> IO CStringLen
273 #ifndef __GLASGOW_HASKELL__
274 newCAStringLen str = do
275 a <- newArray (charsToCChars str)
276 return (pairLength str a)
278 newCAStringLen str = do
279 ptr <- mallocArray0 len
281 go [] n = n `seq` return () -- make it strict in n
282 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
289 -- | Marshal a Haskell string into a NUL terminated C string using temporary
292 -- * the Haskell string may /not/ contain any NUL characters
294 -- * the memory is freed when the subcomputation terminates (either
295 -- normally or via an exception), so the pointer to the temporary
296 -- storage must /not/ be used after this.
298 withCAString :: String -> (CString -> IO a) -> IO a
299 #ifndef __GLASGOW_HASKELL__
300 withCAString = withArray0 nUL . charsToCChars
303 allocaArray0 (length str) $ \ptr ->
305 go [] n = pokeElemOff ptr n nUL
306 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
312 -- | Marshal a Haskell string into a NUL terminated C string using temporary
315 -- * the Haskell string may /not/ contain any NUL characters
317 -- * the memory is freed when the subcomputation terminates (either
318 -- normally or via an exception), so the pointer to the temporary
319 -- storage must /not/ be used after this.
321 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
322 #ifndef __GLASGOW_HASKELL__
323 withCAStringLen str act = withArray (charsToCChars str) $ act . pairLength str
325 withCAStringLen str f =
326 allocaArray len $ \ptr ->
328 go [] n = n `seq` return () -- make it strict in n
329 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
337 -- auxiliary definitions
338 -- ----------------------
340 -- C's end of string character
345 -- pair a C string with the length of the given Haskell string
347 pairLength :: String -> a -> (a, Int)
348 pairLength = flip (,) . length
350 #ifndef __GLASGOW_HASKELL__
351 -- cast [CChar] to [Char]
353 cCharsToChars :: [CChar] -> [Char]
354 cCharsToChars xs = map castCCharToChar xs
356 -- cast [Char] to [CChar]
358 charsToCChars :: [Char] -> [CChar]
359 charsToCChars xs = map castCharToCChar xs
362 -----------------------------------------------------------------------------
365 -- representation of wide strings in C
366 -- -----------------------------------
368 -- | A C wide string is a reference to an array of C wide characters
369 -- terminated by NUL.
370 type CWString = Ptr CWchar
372 -- | A wide character string with explicit length information in bytes
373 -- instead of a terminating NUL (allowing NUL characters in the middle
375 type CWStringLen = (Ptr CWchar, Int)
377 -- | Marshal a NUL terminated C wide string into a Haskell string.
379 peekCWString :: CWString -> IO String
381 cs <- peekArray0 wNUL cp
382 return (cWcharsToChars cs)
384 -- | Marshal a C wide string with explicit length into a Haskell string.
386 peekCWStringLen :: CWStringLen -> IO String
387 peekCWStringLen (cp, len) = do
388 cs <- peekArray len cp
389 return (cWcharsToChars cs)
391 -- | Marshal a Haskell string into a NUL terminated C wide string.
393 -- * the Haskell string may /not/ contain any NUL characters
395 -- * new storage is allocated for the C wide string and must
396 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
397 -- 'Foreign.Marshal.Alloc.finalizerFree'.
399 newCWString :: String -> IO CWString
400 newCWString = newArray0 wNUL . charsToCWchars
402 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
403 -- with explicit length information.
405 -- * new storage is allocated for the C wide string and must
406 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
407 -- 'Foreign.Marshal.Alloc.finalizerFree'.
409 newCWStringLen :: String -> IO CWStringLen
410 newCWStringLen str = do
411 a <- newArray (charsToCWchars str)
412 return (pairLength str a)
414 -- | Marshal a Haskell string into a NUL terminated C wide string using
415 -- temporary storage.
417 -- * the Haskell string may /not/ contain any NUL characters
419 -- * the memory is freed when the subcomputation terminates (either
420 -- normally or via an exception), so the pointer to the temporary
421 -- storage must /not/ be used after this.
423 withCWString :: String -> (CWString -> IO a) -> IO a
424 withCWString = withArray0 wNUL . charsToCWchars
426 -- | Marshal a Haskell string into a NUL terminated C wide string using
427 -- temporary storage.
429 -- * the Haskell string may /not/ contain any NUL characters
431 -- * the memory is freed when the subcomputation terminates (either
432 -- normally or via an exception), so the pointer to the temporary
433 -- storage must /not/ be used after this.
435 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
436 withCWStringLen str act = withArray (charsToCWchars str) $ act . pairLength str
438 -- auxiliary definitions
439 -- ----------------------
444 cWcharsToChars :: [CWchar] -> [Char]
445 charsToCWchars :: [Char] -> [CWchar]
447 #ifdef mingw32_HOST_OS
449 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
451 -- coding errors generate Chars in the surrogate range
452 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
454 fromUTF16 (c1:c2:wcs)
455 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
456 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
457 fromUTF16 (c:wcs) = c : fromUTF16 wcs
460 charsToCWchars = foldr utf16Char [] . map ord
463 | c < 0x10000 = fromIntegral c : wcs
464 | otherwise = let c' = c - 0x10000 in
465 fromIntegral (c' `div` 0x400 + 0xd800) :
466 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
468 #else /* !mingw32_HOST_OS */
470 cWcharsToChars xs = map castCWcharToChar xs
471 charsToCWchars xs = map castCharToCWchar xs
473 -- These conversions only make sense if __STDC_ISO_10646__ is defined
474 -- (meaning that wchar_t is ISO 10646, aka Unicode)
476 castCWcharToChar :: CWchar -> Char
477 castCWcharToChar ch = chr (fromIntegral ch )
479 castCharToCWchar :: Char -> CWchar
480 castCharToCWchar ch = fromIntegral (ord ch)
482 #endif /* !mingw32_HOST_OS */