1 {-# OPTIONS_GHC -XNoImplicitPrelude #-}
2 -----------------------------------------------------------------------------
4 -- Module : Foreign.C.String
5 -- Copyright : (c) The FFI task force 2001
6 -- License : BSD-style (see the file libraries/base/LICENSE)
8 -- Maintainer : ffi@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
12 -- Utilities for primitive marshalling of C strings.
14 -- The marshalling converts each Haskell character, representing a Unicode
15 -- code point, to one or more bytes in a manner that, by default, is
16 -- determined by the current locale. As a consequence, no guarantees
17 -- can be made about the relative length of a Haskell string and its
18 -- corresponding C string, and therefore all the marshalling routines
19 -- include memory allocation. The translation between Unicode and the
20 -- encoding of the current locale may be lossy.
22 -----------------------------------------------------------------------------
24 module Foreign.C.String ( -- representation of strings in C
28 CString, -- = Ptr CChar
29 CStringLen, -- = (Ptr CChar, Int)
31 -- ** Using a locale-dependent encoding
33 -- | Currently these functions are identical to their @CAString@ counterparts;
34 -- eventually they will use an encoding determined by the current locale.
36 -- conversion of C strings into Haskell strings
38 peekCString, -- :: CString -> IO String
39 peekCStringLen, -- :: CStringLen -> IO String
41 -- conversion of Haskell strings into C strings
43 newCString, -- :: String -> IO CString
44 newCStringLen, -- :: String -> IO CStringLen
46 -- conversion of Haskell strings into C strings using temporary storage
48 withCString, -- :: String -> (CString -> IO a) -> IO a
49 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
51 charIsRepresentable, -- :: Char -> IO Bool
53 -- ** Using 8-bit characters
55 -- | These variants of the above functions are for use with C libraries
56 -- that are ignorant of Unicode. These functions should be used with
57 -- care, as a loss of information can occur.
59 castCharToCChar, -- :: Char -> CChar
60 castCCharToChar, -- :: CChar -> Char
62 castCharToCUChar, -- :: Char -> CUChar
63 castCUCharToChar, -- :: CUChar -> Char
64 castCharToCSChar, -- :: Char -> CSChar
65 castCSCharToChar, -- :: CSChar -> Char
67 peekCAString, -- :: CString -> IO String
68 peekCAStringLen, -- :: CStringLen -> IO String
69 newCAString, -- :: String -> IO CString
70 newCAStringLen, -- :: String -> IO CStringLen
71 withCAString, -- :: String -> (CString -> IO a) -> IO a
72 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
76 -- | These variants of the above functions are for use with C libraries
77 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
78 -- way. The only encodings supported are
80 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
82 -- * UTF-16 (as used on Windows systems).
84 CWString, -- = Ptr CWchar
85 CWStringLen, -- = (Ptr CWchar, Int)
87 peekCWString, -- :: CWString -> IO String
88 peekCWStringLen, -- :: CWStringLen -> IO String
89 newCWString, -- :: String -> IO CWString
90 newCWStringLen, -- :: String -> IO CWStringLen
91 withCWString, -- :: String -> (CWString -> IO a) -> IO a
92 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
96 import Foreign.Marshal.Array
97 import Foreign.C.Types
99 import Foreign.Storable
103 #ifdef __GLASGOW_HASKELL__
109 import Data.Char ( chr, ord )
110 #define unsafeChr chr
113 -----------------------------------------------------------------------------
116 -- representation of strings in C
117 -- ------------------------------
119 -- | A C string is a reference to an array of C characters terminated by NUL.
120 type CString = Ptr CChar
122 -- | A string with explicit length information in bytes instead of a
123 -- terminating NUL (allowing NUL characters in the middle of the string).
124 type CStringLen = (Ptr CChar, Int)
126 -- exported functions
127 -- ------------------
129 -- * the following routines apply the default conversion when converting the
130 -- C-land character encoding into the Haskell-land character encoding
132 -- | Marshal a NUL terminated C string into a Haskell string.
134 peekCString :: CString -> IO String
135 peekCString = peekCAString
137 -- | Marshal a C string with explicit length into a Haskell string.
139 peekCStringLen :: CStringLen -> IO String
140 peekCStringLen = peekCAStringLen
142 -- | Marshal a Haskell string into a NUL terminated C string.
144 -- * the Haskell string may /not/ contain any NUL characters
146 -- * new storage is allocated for the C string and must be
147 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
148 -- 'Foreign.Marshal.Alloc.finalizerFree'.
150 newCString :: String -> IO CString
151 newCString = newCAString
153 -- | Marshal a Haskell string into a C string (ie, character array) with
154 -- explicit length information.
156 -- * new storage is allocated for the C string and must be
157 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
158 -- 'Foreign.Marshal.Alloc.finalizerFree'.
160 newCStringLen :: String -> IO CStringLen
161 newCStringLen = newCAStringLen
163 -- | Marshal a Haskell string into a NUL terminated C string using temporary
166 -- * the Haskell string may /not/ contain any NUL characters
168 -- * the memory is freed when the subcomputation terminates (either
169 -- normally or via an exception), so the pointer to the temporary
170 -- storage must /not/ be used after this.
172 withCString :: String -> (CString -> IO a) -> IO a
173 withCString = withCAString
175 -- | Marshal a Haskell string into a C string (ie, character array)
176 -- in temporary storage, with explicit length information.
178 -- * the memory is freed when the subcomputation terminates (either
179 -- normally or via an exception), so the pointer to the temporary
180 -- storage must /not/ be used after this.
182 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
183 withCStringLen = withCAStringLen
185 -- | Determines whether a character can be accurately encoded in a 'CString'.
186 -- Unrepresentable characters are converted to @\'?\'@.
188 -- Currently only Latin-1 characters are representable.
189 charIsRepresentable :: Char -> IO Bool
190 charIsRepresentable c = return (ord c < 256)
192 -- single byte characters
193 -- ----------------------
195 -- ** NOTE: These routines don't handle conversions! **
197 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
198 -- Haskell character.
199 castCCharToChar :: CChar -> Char
200 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
202 -- | Convert a Haskell character to a C character.
203 -- This function is only safe on the first 256 characters.
204 castCharToCChar :: Char -> CChar
205 castCharToCChar ch = fromIntegral (ord ch)
207 -- | Convert a C @unsigned char@, representing a Latin-1 character, to
208 -- the corresponding Haskell character.
209 castCUCharToChar :: CUChar -> Char
210 castCUCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
212 -- | Convert a Haskell character to a C @unsigned char@.
213 -- This function is only safe on the first 256 characters.
214 castCharToCUChar :: Char -> CUChar
215 castCharToCUChar ch = fromIntegral (ord ch)
217 -- | Convert a C @signed char@, representing a Latin-1 character, to the
218 -- corresponding Haskell character.
219 castCSCharToChar :: CSChar -> Char
220 castCSCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
222 -- | Convert a Haskell character to a C @signed char@.
223 -- This function is only safe on the first 256 characters.
224 castCharToCSChar :: Char -> CSChar
225 castCharToCSChar ch = fromIntegral (ord ch)
227 -- | Marshal a NUL terminated C string into a Haskell string.
229 peekCAString :: CString -> IO String
230 #ifndef __GLASGOW_HASKELL__
232 cs <- peekArray0 nUL cp
233 return (cCharsToChars cs)
236 l <- lengthArray0 nUL cp
237 if l <= 0 then return "" else loop "" (l-1)
240 xval <- peekElemOff cp i
241 let val = castCCharToChar xval
242 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
245 -- | Marshal a C string with explicit length into a Haskell string.
247 peekCAStringLen :: CStringLen -> IO String
248 #ifndef __GLASGOW_HASKELL__
249 peekCAStringLen (cp, len) = do
250 cs <- peekArray len cp
251 return (cCharsToChars cs)
253 peekCAStringLen (cp, len)
254 | len <= 0 = return "" -- being (too?) nice.
255 | otherwise = loop [] (len-1)
258 xval <- peekElemOff cp i
259 let val = castCCharToChar xval
260 -- blow away the coercion ASAP.
261 if (val `seq` (i == 0))
262 then return (val:acc)
263 else loop (val:acc) (i-1)
266 -- | Marshal a Haskell string into a NUL terminated C string.
268 -- * the Haskell string may /not/ contain any NUL characters
270 -- * new storage is allocated for the C string and must be
271 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
272 -- 'Foreign.Marshal.Alloc.finalizerFree'.
274 newCAString :: String -> IO CString
275 #ifndef __GLASGOW_HASKELL__
276 newCAString = newArray0 nUL . charsToCChars
279 ptr <- mallocArray0 (length str)
281 go [] n = pokeElemOff ptr n nUL
282 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
287 -- | Marshal a Haskell string into a C string (ie, character array) with
288 -- explicit length information.
290 -- * new storage is allocated for the C string and must be
291 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
292 -- 'Foreign.Marshal.Alloc.finalizerFree'.
294 newCAStringLen :: String -> IO CStringLen
295 #ifndef __GLASGOW_HASKELL__
296 newCAStringLen str = newArrayLen (charsToCChars str)
298 newCAStringLen str = do
299 ptr <- mallocArray0 len
301 go [] n = n `seq` return () -- make it strict in n
302 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
309 -- | Marshal a Haskell string into a NUL terminated C string using temporary
312 -- * the Haskell string may /not/ contain any NUL characters
314 -- * the memory is freed when the subcomputation terminates (either
315 -- normally or via an exception), so the pointer to the temporary
316 -- storage must /not/ be used after this.
318 withCAString :: String -> (CString -> IO a) -> IO a
319 #ifndef __GLASGOW_HASKELL__
320 withCAString = withArray0 nUL . charsToCChars
323 allocaArray0 (length str) $ \ptr ->
325 go [] n = pokeElemOff ptr n nUL
326 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
332 -- | Marshal a Haskell string into a C string (ie, character array)
333 -- in temporary storage, with explicit length information.
335 -- * the memory is freed when the subcomputation terminates (either
336 -- normally or via an exception), so the pointer to the temporary
337 -- storage must /not/ be used after this.
339 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
340 withCAStringLen str f =
341 #ifndef __GLASGOW_HASKELL__
342 withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len)
344 allocaArray len $ \ptr ->
346 go [] n = n `seq` return () -- make it strict in n
347 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
355 -- auxiliary definitions
356 -- ----------------------
358 -- C's end of string character
363 -- allocate an array to hold the list and pair it with the number of elements
364 newArrayLen :: Storable a => [a] -> IO (Ptr a, Int)
367 return (a, length xs)
369 #ifndef __GLASGOW_HASKELL__
370 -- cast [CChar] to [Char]
372 cCharsToChars :: [CChar] -> [Char]
373 cCharsToChars xs = map castCCharToChar xs
375 -- cast [Char] to [CChar]
377 charsToCChars :: [Char] -> [CChar]
378 charsToCChars xs = map castCharToCChar xs
381 -----------------------------------------------------------------------------
384 -- representation of wide strings in C
385 -- -----------------------------------
387 -- | A C wide string is a reference to an array of C wide characters
388 -- terminated by NUL.
389 type CWString = Ptr CWchar
391 -- | A wide character string with explicit length information in 'CWchar's
392 -- instead of a terminating NUL (allowing NUL characters in the middle
394 type CWStringLen = (Ptr CWchar, Int)
396 -- | Marshal a NUL terminated C wide string into a Haskell string.
398 peekCWString :: CWString -> IO String
400 cs <- peekArray0 wNUL cp
401 return (cWcharsToChars cs)
403 -- | Marshal a C wide string with explicit length into a Haskell string.
405 peekCWStringLen :: CWStringLen -> IO String
406 peekCWStringLen (cp, len) = do
407 cs <- peekArray len cp
408 return (cWcharsToChars cs)
410 -- | Marshal a Haskell string into a NUL terminated C wide string.
412 -- * the Haskell string may /not/ contain any NUL characters
414 -- * new storage is allocated for the C wide string and must
415 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
416 -- 'Foreign.Marshal.Alloc.finalizerFree'.
418 newCWString :: String -> IO CWString
419 newCWString = newArray0 wNUL . charsToCWchars
421 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
422 -- with explicit length information.
424 -- * new storage is allocated for the C wide string and must
425 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
426 -- 'Foreign.Marshal.Alloc.finalizerFree'.
428 newCWStringLen :: String -> IO CWStringLen
429 newCWStringLen str = newArrayLen (charsToCWchars str)
431 -- | Marshal a Haskell string into a NUL terminated C wide string using
432 -- temporary storage.
434 -- * the Haskell string may /not/ contain any NUL characters
436 -- * the memory is freed when the subcomputation terminates (either
437 -- normally or via an exception), so the pointer to the temporary
438 -- storage must /not/ be used after this.
440 withCWString :: String -> (CWString -> IO a) -> IO a
441 withCWString = withArray0 wNUL . charsToCWchars
443 -- | Marshal a Haskell string into a NUL terminated C wide string using
444 -- temporary storage.
446 -- * the Haskell string may /not/ contain any NUL characters
448 -- * the memory is freed when the subcomputation terminates (either
449 -- normally or via an exception), so the pointer to the temporary
450 -- storage must /not/ be used after this.
452 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
453 withCWStringLen str f =
454 withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
456 -- auxiliary definitions
457 -- ----------------------
462 cWcharsToChars :: [CWchar] -> [Char]
463 charsToCWchars :: [Char] -> [CWchar]
465 #ifdef mingw32_HOST_OS
467 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
469 -- coding errors generate Chars in the surrogate range
470 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
472 fromUTF16 (c1:c2:wcs)
473 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
474 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
475 fromUTF16 (c:wcs) = c : fromUTF16 wcs
478 charsToCWchars = foldr utf16Char [] . map ord
481 | c < 0x10000 = fromIntegral c : wcs
482 | otherwise = let c' = c - 0x10000 in
483 fromIntegral (c' `div` 0x400 + 0xd800) :
484 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
486 #else /* !mingw32_HOST_OS */
488 cWcharsToChars xs = map castCWcharToChar xs
489 charsToCWchars xs = map castCharToCWchar xs
491 -- These conversions only make sense if __STDC_ISO_10646__ is defined
492 -- (meaning that wchar_t is ISO 10646, aka Unicode)
494 castCWcharToChar :: CWchar -> Char
495 castCWcharToChar ch = chr (fromIntegral ch )
497 castCharToCWchar :: Char -> CWchar
498 castCharToCWchar ch = fromIntegral (ord ch)
500 #endif /* !mingw32_HOST_OS */