1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3 -----------------------------------------------------------------------------
5 -- Module : Foreign.C.String
6 -- Copyright : (c) The FFI task force 2001
7 -- License : BSD-style (see the file libraries/base/LICENSE)
9 -- Maintainer : ffi@haskell.org
10 -- Stability : provisional
11 -- Portability : portable
13 -- Utilities for primitive marshalling of C strings.
15 -- The marshalling converts each Haskell character, representing a Unicode
16 -- code point, to one or more bytes in a manner that, by default, is
17 -- determined by the current locale. As a consequence, no guarantees
18 -- can be made about the relative length of a Haskell string and its
19 -- corresponding C string, and therefore all the marshalling routines
20 -- include memory allocation. The translation between Unicode and the
21 -- encoding of the current locale may be lossy.
23 -----------------------------------------------------------------------------
25 module Foreign.C.String ( -- representation of strings in C
29 CString, -- = Ptr CChar
30 CStringLen, -- = (Ptr CChar, Int)
32 -- ** Using a locale-dependent encoding
34 -- | Currently these functions are identical to their @CAString@ counterparts;
35 -- eventually they will use an encoding determined by the current locale.
37 -- conversion of C strings into Haskell strings
39 peekCString, -- :: CString -> IO String
40 peekCStringLen, -- :: CStringLen -> IO String
42 -- conversion of Haskell strings into C strings
44 newCString, -- :: String -> IO CString
45 newCStringLen, -- :: String -> IO CStringLen
47 -- conversion of Haskell strings into C strings using temporary storage
49 withCString, -- :: String -> (CString -> IO a) -> IO a
50 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
52 charIsRepresentable, -- :: Char -> IO Bool
54 -- ** Using 8-bit characters
56 -- | These variants of the above functions are for use with C libraries
57 -- that are ignorant of Unicode. These functions should be used with
58 -- care, as a loss of information can occur.
60 castCharToCChar, -- :: Char -> CChar
61 castCCharToChar, -- :: CChar -> Char
63 castCharToCUChar, -- :: Char -> CUChar
64 castCUCharToChar, -- :: CUChar -> Char
65 castCharToCSChar, -- :: Char -> CSChar
66 castCSCharToChar, -- :: CSChar -> Char
68 peekCAString, -- :: CString -> IO String
69 peekCAStringLen, -- :: CStringLen -> IO String
70 newCAString, -- :: String -> IO CString
71 newCAStringLen, -- :: String -> IO CStringLen
72 withCAString, -- :: String -> (CString -> IO a) -> IO a
73 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
77 -- | These variants of the above functions are for use with C libraries
78 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
79 -- way. The only encodings supported are
81 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
83 -- * UTF-16 (as used on Windows systems).
85 CWString, -- = Ptr CWchar
86 CWStringLen, -- = (Ptr CWchar, Int)
88 peekCWString, -- :: CWString -> IO String
89 peekCWStringLen, -- :: CWStringLen -> IO String
90 newCWString, -- :: String -> IO CWString
91 newCWStringLen, -- :: String -> IO CWStringLen
92 withCWString, -- :: String -> (CWString -> IO a) -> IO a
93 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
97 import Foreign.Marshal.Array
98 import Foreign.C.Types
100 import Foreign.Storable
104 #ifdef __GLASGOW_HASKELL__
110 import Data.Char ( chr, ord )
111 #define unsafeChr chr
114 -----------------------------------------------------------------------------
117 -- representation of strings in C
118 -- ------------------------------
120 -- | A C string is a reference to an array of C characters terminated by NUL.
121 type CString = Ptr CChar
123 -- | A string with explicit length information in bytes instead of a
124 -- terminating NUL (allowing NUL characters in the middle of the string).
125 type CStringLen = (Ptr CChar, Int)
127 -- exported functions
128 -- ------------------
130 -- * the following routines apply the default conversion when converting the
131 -- C-land character encoding into the Haskell-land character encoding
133 -- | Marshal a NUL terminated C string into a Haskell string.
135 peekCString :: CString -> IO String
136 peekCString = peekCAString
138 -- | Marshal a C string with explicit length into a Haskell string.
140 peekCStringLen :: CStringLen -> IO String
141 peekCStringLen = peekCAStringLen
143 -- | Marshal a Haskell string into a NUL terminated C string.
145 -- * the Haskell string may /not/ contain any NUL characters
147 -- * new storage is allocated for the C string and must be
148 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
149 -- 'Foreign.Marshal.Alloc.finalizerFree'.
151 newCString :: String -> IO CString
152 newCString = newCAString
154 -- | Marshal a Haskell string into a C string (ie, character array) with
155 -- explicit length information.
157 -- * new storage is allocated for the C string and must be
158 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
159 -- 'Foreign.Marshal.Alloc.finalizerFree'.
161 newCStringLen :: String -> IO CStringLen
162 newCStringLen = newCAStringLen
164 -- | Marshal a Haskell string into a NUL terminated C string using temporary
167 -- * the Haskell string may /not/ contain any NUL characters
169 -- * the memory is freed when the subcomputation terminates (either
170 -- normally or via an exception), so the pointer to the temporary
171 -- storage must /not/ be used after this.
173 withCString :: String -> (CString -> IO a) -> IO a
174 withCString = withCAString
176 -- | Marshal a Haskell string into a C string (ie, character array)
177 -- in temporary storage, with explicit length information.
179 -- * the memory is freed when the subcomputation terminates (either
180 -- normally or via an exception), so the pointer to the temporary
181 -- storage must /not/ be used after this.
183 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
184 withCStringLen = withCAStringLen
186 -- | Determines whether a character can be accurately encoded in a 'CString'.
187 -- Unrepresentable characters are converted to @\'?\'@.
189 -- Currently only Latin-1 characters are representable.
190 charIsRepresentable :: Char -> IO Bool
191 charIsRepresentable c = return (ord c < 256)
193 -- single byte characters
194 -- ----------------------
196 -- ** NOTE: These routines don't handle conversions! **
198 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
199 -- Haskell character.
200 castCCharToChar :: CChar -> Char
201 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
203 -- | Convert a Haskell character to a C character.
204 -- This function is only safe on the first 256 characters.
205 castCharToCChar :: Char -> CChar
206 castCharToCChar ch = fromIntegral (ord ch)
208 -- | Convert a C @unsigned char@, representing a Latin-1 character, to
209 -- the corresponding Haskell character.
210 castCUCharToChar :: CUChar -> Char
211 castCUCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
213 -- | Convert a Haskell character to a C @unsigned char@.
214 -- This function is only safe on the first 256 characters.
215 castCharToCUChar :: Char -> CUChar
216 castCharToCUChar ch = fromIntegral (ord ch)
218 -- | Convert a C @signed char@, representing a Latin-1 character, to the
219 -- corresponding Haskell character.
220 castCSCharToChar :: CSChar -> Char
221 castCSCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
223 -- | Convert a Haskell character to a C @signed char@.
224 -- This function is only safe on the first 256 characters.
225 castCharToCSChar :: Char -> CSChar
226 castCharToCSChar ch = fromIntegral (ord ch)
228 -- | Marshal a NUL terminated C string into a Haskell string.
230 peekCAString :: CString -> IO String
231 #ifndef __GLASGOW_HASKELL__
233 cs <- peekArray0 nUL cp
234 return (cCharsToChars cs)
237 l <- lengthArray0 nUL cp
238 if l <= 0 then return "" else loop "" (l-1)
241 xval <- peekElemOff cp i
242 let val = castCCharToChar xval
243 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
246 -- | Marshal a C string with explicit length into a Haskell string.
248 peekCAStringLen :: CStringLen -> IO String
249 #ifndef __GLASGOW_HASKELL__
250 peekCAStringLen (cp, len) = do
251 cs <- peekArray len cp
252 return (cCharsToChars cs)
254 peekCAStringLen (cp, len)
255 | len <= 0 = return "" -- being (too?) nice.
256 | otherwise = loop [] (len-1)
259 xval <- peekElemOff cp i
260 let val = castCCharToChar xval
261 -- blow away the coercion ASAP.
262 if (val `seq` (i == 0))
263 then return (val:acc)
264 else loop (val:acc) (i-1)
267 -- | Marshal a Haskell string into a NUL terminated C string.
269 -- * the Haskell string may /not/ contain any NUL characters
271 -- * new storage is allocated for the C string and must be
272 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
273 -- 'Foreign.Marshal.Alloc.finalizerFree'.
275 newCAString :: String -> IO CString
276 #ifndef __GLASGOW_HASKELL__
277 newCAString = newArray0 nUL . charsToCChars
280 ptr <- mallocArray0 (length str)
282 go [] n = pokeElemOff ptr n nUL
283 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
288 -- | Marshal a Haskell string into a C string (ie, character array) with
289 -- explicit length information.
291 -- * new storage is allocated for the C string and must be
292 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
293 -- 'Foreign.Marshal.Alloc.finalizerFree'.
295 newCAStringLen :: String -> IO CStringLen
296 #ifndef __GLASGOW_HASKELL__
297 newCAStringLen str = newArrayLen (charsToCChars str)
299 newCAStringLen str = do
300 ptr <- mallocArray0 len
302 go [] n = n `seq` return () -- make it strict in n
303 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
310 -- | Marshal a Haskell string into a NUL terminated C string using temporary
313 -- * the Haskell string may /not/ contain any NUL characters
315 -- * the memory is freed when the subcomputation terminates (either
316 -- normally or via an exception), so the pointer to the temporary
317 -- storage must /not/ be used after this.
319 withCAString :: String -> (CString -> IO a) -> IO a
320 #ifndef __GLASGOW_HASKELL__
321 withCAString = withArray0 nUL . charsToCChars
324 allocaArray0 (length str) $ \ptr ->
326 go [] n = pokeElemOff ptr n nUL
327 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
333 -- | Marshal a Haskell string into a C string (ie, character array)
334 -- in temporary storage, with explicit length information.
336 -- * the memory is freed when the subcomputation terminates (either
337 -- normally or via an exception), so the pointer to the temporary
338 -- storage must /not/ be used after this.
340 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
341 withCAStringLen str f =
342 #ifndef __GLASGOW_HASKELL__
343 withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len)
345 allocaArray len $ \ptr ->
347 go [] n = n `seq` return () -- make it strict in n
348 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
356 -- auxiliary definitions
357 -- ----------------------
359 -- C's end of string character
364 -- allocate an array to hold the list and pair it with the number of elements
365 newArrayLen :: Storable a => [a] -> IO (Ptr a, Int)
368 return (a, length xs)
370 #ifndef __GLASGOW_HASKELL__
371 -- cast [CChar] to [Char]
373 cCharsToChars :: [CChar] -> [Char]
374 cCharsToChars xs = map castCCharToChar xs
376 -- cast [Char] to [CChar]
378 charsToCChars :: [Char] -> [CChar]
379 charsToCChars xs = map castCharToCChar xs
382 -----------------------------------------------------------------------------
385 -- representation of wide strings in C
386 -- -----------------------------------
388 -- | A C wide string is a reference to an array of C wide characters
389 -- terminated by NUL.
390 type CWString = Ptr CWchar
392 -- | A wide character string with explicit length information in 'CWchar's
393 -- instead of a terminating NUL (allowing NUL characters in the middle
395 type CWStringLen = (Ptr CWchar, Int)
397 -- | Marshal a NUL terminated C wide string into a Haskell string.
399 peekCWString :: CWString -> IO String
401 cs <- peekArray0 wNUL cp
402 return (cWcharsToChars cs)
404 -- | Marshal a C wide string with explicit length into a Haskell string.
406 peekCWStringLen :: CWStringLen -> IO String
407 peekCWStringLen (cp, len) = do
408 cs <- peekArray len cp
409 return (cWcharsToChars cs)
411 -- | Marshal a Haskell string into a NUL terminated C wide string.
413 -- * the Haskell string may /not/ contain any NUL characters
415 -- * new storage is allocated for the C wide string and must
416 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
417 -- 'Foreign.Marshal.Alloc.finalizerFree'.
419 newCWString :: String -> IO CWString
420 newCWString = newArray0 wNUL . charsToCWchars
422 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
423 -- with explicit length information.
425 -- * new storage is allocated for the C wide string and must
426 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
427 -- 'Foreign.Marshal.Alloc.finalizerFree'.
429 newCWStringLen :: String -> IO CWStringLen
430 newCWStringLen str = newArrayLen (charsToCWchars str)
432 -- | Marshal a Haskell string into a NUL terminated C wide string using
433 -- temporary storage.
435 -- * the Haskell string may /not/ contain any NUL characters
437 -- * the memory is freed when the subcomputation terminates (either
438 -- normally or via an exception), so the pointer to the temporary
439 -- storage must /not/ be used after this.
441 withCWString :: String -> (CWString -> IO a) -> IO a
442 withCWString = withArray0 wNUL . charsToCWchars
444 -- | Marshal a Haskell string into a NUL terminated C wide string using
445 -- temporary storage.
447 -- * the Haskell string may /not/ contain any NUL characters
449 -- * the memory is freed when the subcomputation terminates (either
450 -- normally or via an exception), so the pointer to the temporary
451 -- storage must /not/ be used after this.
453 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
454 withCWStringLen str f =
455 withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
457 -- auxiliary definitions
458 -- ----------------------
463 cWcharsToChars :: [CWchar] -> [Char]
464 charsToCWchars :: [Char] -> [CWchar]
466 #ifdef mingw32_HOST_OS
468 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
470 -- coding errors generate Chars in the surrogate range
471 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
473 fromUTF16 (c1:c2:wcs)
474 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
475 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
476 fromUTF16 (c:wcs) = c : fromUTF16 wcs
479 charsToCWchars = foldr utf16Char [] . map ord
482 | c < 0x10000 = fromIntegral c : wcs
483 | otherwise = let c' = c - 0x10000 in
484 fromIntegral (c' `div` 0x400 + 0xd800) :
485 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
487 #else /* !mingw32_HOST_OS */
489 cWcharsToChars xs = map castCWcharToChar xs
490 charsToCWchars xs = map castCharToCWchar xs
492 -- These conversions only make sense if __STDC_ISO_10646__ is defined
493 -- (meaning that wchar_t is ISO 10646, aka Unicode)
495 castCWcharToChar :: CWchar -> Char
496 castCWcharToChar ch = chr (fromIntegral ch )
498 castCharToCWchar :: Char -> CWchar
499 castCharToCWchar ch = fromIntegral (ord ch)
501 #endif /* !mingw32_HOST_OS */