1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3 -----------------------------------------------------------------------------
5 -- Module : Foreign.C.String
6 -- Copyright : (c) The FFI task force 2001
7 -- License : BSD-style (see the file libraries/base/LICENSE)
9 -- Maintainer : ffi@haskell.org
10 -- Stability : provisional
11 -- Portability : portable
13 -- Utilities for primitive marshalling of C strings.
15 -- The marshalling converts each Haskell character, representing a Unicode
16 -- code point, to one or more bytes in a manner that, by default, is
17 -- determined by the current locale. As a consequence, no guarantees
18 -- can be made about the relative length of a Haskell string and its
19 -- corresponding C string, and therefore all the marshalling routines
20 -- include memory allocation. The translation between Unicode and the
21 -- encoding of the current locale may be lossy.
23 -----------------------------------------------------------------------------
25 module Foreign.C.String ( -- representation of strings in C
28 CString, -- = Ptr CChar
29 CStringLen, -- = (Ptr CChar, Int)
31 -- ** Using a locale-dependent encoding
33 #ifndef __GLASGOW_HASKELL__
34 -- | Currently these functions are identical to their @CAString@ counterparts;
35 -- eventually they will use an encoding determined by the current locale.
37 -- | These functions are different from their @CAString@ counterparts
38 -- in that they will use an encoding determined by the current locale,
39 -- rather than always assuming ASCII.
42 -- conversion of C strings into Haskell strings
44 peekCString, -- :: CString -> IO String
45 peekCStringLen, -- :: CStringLen -> IO String
47 -- conversion of Haskell strings into C strings
49 newCString, -- :: String -> IO CString
50 newCStringLen, -- :: String -> IO CStringLen
52 -- conversion of Haskell strings into C strings using temporary storage
54 withCString, -- :: String -> (CString -> IO a) -> IO a
55 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
57 charIsRepresentable, -- :: Char -> IO Bool
59 -- ** Using 8-bit characters
61 -- | These variants of the above functions are for use with C libraries
62 -- that are ignorant of Unicode. These functions should be used with
63 -- care, as a loss of information can occur.
65 castCharToCChar, -- :: Char -> CChar
66 castCCharToChar, -- :: CChar -> Char
68 castCharToCUChar, -- :: Char -> CUChar
69 castCUCharToChar, -- :: CUChar -> Char
70 castCharToCSChar, -- :: Char -> CSChar
71 castCSCharToChar, -- :: CSChar -> Char
73 peekCAString, -- :: CString -> IO String
74 peekCAStringLen, -- :: CStringLen -> IO String
75 newCAString, -- :: String -> IO CString
76 newCAStringLen, -- :: String -> IO CStringLen
77 withCAString, -- :: String -> (CString -> IO a) -> IO a
78 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
82 -- | These variants of the above functions are for use with C libraries
83 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
84 -- way. The only encodings supported are
86 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
88 -- * UTF-16 (as used on Windows systems).
90 CWString, -- = Ptr CWchar
91 CWStringLen, -- = (Ptr CWchar, Int)
93 peekCWString, -- :: CWString -> IO String
94 peekCWStringLen, -- :: CWStringLen -> IO String
95 newCWString, -- :: String -> IO CWString
96 newCWStringLen, -- :: String -> IO CWStringLen
97 withCWString, -- :: String -> (CWString -> IO a) -> IO a
98 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
102 import Foreign.Marshal.Array
103 import Foreign.C.Types
105 import Foreign.Storable
109 #ifdef __GLASGOW_HASKELL__
117 import {-# SOURCE #-} GHC.IO.Encoding
118 import qualified GHC.Foreign as GHC
120 import Data.Char ( chr, ord )
121 #define unsafeChr chr
124 -----------------------------------------------------------------------------
127 -- representation of strings in C
128 -- ------------------------------
130 -- | A C string is a reference to an array of C characters terminated by NUL.
131 type CString = Ptr CChar
133 -- | A string with explicit length information in bytes instead of a
134 -- terminating NUL (allowing NUL characters in the middle of the string).
135 type CStringLen = (Ptr CChar, Int)
137 -- exported functions
138 -- ------------------
140 -- * the following routines apply the default conversion when converting the
141 -- C-land character encoding into the Haskell-land character encoding
143 -- | Marshal a NUL terminated C string into a Haskell string.
145 peekCString :: CString -> IO String
146 #ifndef __GLASGOW_HASKELL__
147 peekCString = peekCAString
149 peekCString = GHC.peekCString foreignEncoding
152 -- | Marshal a C string with explicit length into a Haskell string.
154 peekCStringLen :: CStringLen -> IO String
155 #ifndef __GLASGOW_HASKELL__
156 peekCStringLen = peekCAStringLen
158 peekCStringLen = GHC.peekCStringLen foreignEncoding
161 -- | Marshal a Haskell string into a NUL terminated C string.
163 -- * the Haskell string may /not/ contain any NUL characters
165 -- * new storage is allocated for the C string and must be
166 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
167 -- 'Foreign.Marshal.Alloc.finalizerFree'.
169 newCString :: String -> IO CString
170 #ifndef __GLASGOW_HASKELL__
171 newCString = newCAString
173 newCString = GHC.newCString foreignEncoding
176 -- | Marshal a Haskell string into a C string (ie, character array) with
177 -- explicit length information.
179 -- * new storage is allocated for the C string and must be
180 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
181 -- 'Foreign.Marshal.Alloc.finalizerFree'.
183 newCStringLen :: String -> IO CStringLen
184 #ifndef __GLASGOW_HASKELL__
185 newCStringLen = newCAStringLen
187 newCStringLen = GHC.newCStringLen foreignEncoding
190 -- | Marshal a Haskell string into a NUL terminated C string using temporary
193 -- * the Haskell string may /not/ contain any NUL characters
195 -- * the memory is freed when the subcomputation terminates (either
196 -- normally or via an exception), so the pointer to the temporary
197 -- storage must /not/ be used after this.
199 withCString :: String -> (CString -> IO a) -> IO a
200 #ifndef __GLASGOW_HASKELL__
201 withCString = withCAString
203 withCString = GHC.withCString foreignEncoding
206 -- | Marshal a Haskell string into a C string (ie, character array)
207 -- in temporary storage, with explicit length information.
209 -- * the memory is freed when the subcomputation terminates (either
210 -- normally or via an exception), so the pointer to the temporary
211 -- storage must /not/ be used after this.
213 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
214 #ifndef __GLASGOW_HASKELL__
215 withCStringLen = withCAStringLen
217 withCStringLen = GHC.withCStringLen foreignEncoding
221 #ifndef __GLASGOW_HASKELL__
222 -- | Determines whether a character can be accurately encoded in a 'CString'.
223 -- Unrepresentable characters are converted to @\'?\'@.
225 -- Currently only Latin-1 characters are representable.
226 charIsRepresentable :: Char -> IO Bool
227 charIsRepresentable c = return (ord c < 256)
229 -- -- | Determines whether a character can be accurately encoded in a 'CString'.
230 -- -- Unrepresentable characters are converted to '?' or their nearest visual equivalent.
231 charIsRepresentable :: Char -> IO Bool
232 charIsRepresentable = GHC.charIsRepresentable foreignEncoding
235 -- single byte characters
236 -- ----------------------
238 -- ** NOTE: These routines don't handle conversions! **
240 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
241 -- Haskell character.
242 castCCharToChar :: CChar -> Char
243 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
245 -- | Convert a Haskell character to a C character.
246 -- This function is only safe on the first 256 characters.
247 castCharToCChar :: Char -> CChar
248 castCharToCChar ch = fromIntegral (ord ch)
250 -- | Convert a C @unsigned char@, representing a Latin-1 character, to
251 -- the corresponding Haskell character.
252 castCUCharToChar :: CUChar -> Char
253 castCUCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
255 -- | Convert a Haskell character to a C @unsigned char@.
256 -- This function is only safe on the first 256 characters.
257 castCharToCUChar :: Char -> CUChar
258 castCharToCUChar ch = fromIntegral (ord ch)
260 -- | Convert a C @signed char@, representing a Latin-1 character, to the
261 -- corresponding Haskell character.
262 castCSCharToChar :: CSChar -> Char
263 castCSCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
265 -- | Convert a Haskell character to a C @signed char@.
266 -- This function is only safe on the first 256 characters.
267 castCharToCSChar :: Char -> CSChar
268 castCharToCSChar ch = fromIntegral (ord ch)
270 -- | Marshal a NUL terminated C string into a Haskell string.
272 peekCAString :: CString -> IO String
273 #ifndef __GLASGOW_HASKELL__
275 cs <- peekArray0 nUL cp
276 return (cCharsToChars cs)
279 l <- lengthArray0 nUL cp
280 if l <= 0 then return "" else loop "" (l-1)
283 xval <- peekElemOff cp i
284 let val = castCCharToChar xval
285 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
288 -- | Marshal a C string with explicit length into a Haskell string.
290 peekCAStringLen :: CStringLen -> IO String
291 #ifndef __GLASGOW_HASKELL__
292 peekCAStringLen (cp, len) = do
293 cs <- peekArray len cp
294 return (cCharsToChars cs)
296 peekCAStringLen (cp, len)
297 | len <= 0 = return "" -- being (too?) nice.
298 | otherwise = loop [] (len-1)
301 xval <- peekElemOff cp i
302 let val = castCCharToChar xval
303 -- blow away the coercion ASAP.
304 if (val `seq` (i == 0))
305 then return (val:acc)
306 else loop (val:acc) (i-1)
309 -- | Marshal a Haskell string into a NUL terminated C string.
311 -- * the Haskell string may /not/ contain any NUL characters
313 -- * new storage is allocated for the C string and must be
314 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
315 -- 'Foreign.Marshal.Alloc.finalizerFree'.
317 newCAString :: String -> IO CString
318 #ifndef __GLASGOW_HASKELL__
319 newCAString = newArray0 nUL . charsToCChars
322 ptr <- mallocArray0 (length str)
324 go [] n = pokeElemOff ptr n nUL
325 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
330 -- | Marshal a Haskell string into a C string (ie, character array) with
331 -- explicit length information.
333 -- * new storage is allocated for the C string and must be
334 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
335 -- 'Foreign.Marshal.Alloc.finalizerFree'.
337 newCAStringLen :: String -> IO CStringLen
338 #ifndef __GLASGOW_HASKELL__
339 newCAStringLen str = newArrayLen (charsToCChars str)
341 newCAStringLen str = do
342 ptr <- mallocArray0 len
344 go [] n = n `seq` return () -- make it strict in n
345 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
352 -- | Marshal a Haskell string into a NUL terminated C string using temporary
355 -- * the Haskell string may /not/ contain any NUL characters
357 -- * the memory is freed when the subcomputation terminates (either
358 -- normally or via an exception), so the pointer to the temporary
359 -- storage must /not/ be used after this.
361 withCAString :: String -> (CString -> IO a) -> IO a
362 #ifndef __GLASGOW_HASKELL__
363 withCAString = withArray0 nUL . charsToCChars
366 allocaArray0 (length str) $ \ptr ->
368 go [] n = pokeElemOff ptr n nUL
369 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
375 -- | Marshal a Haskell string into a C string (ie, character array)
376 -- in temporary storage, with explicit length information.
378 -- * the memory is freed when the subcomputation terminates (either
379 -- normally or via an exception), so the pointer to the temporary
380 -- storage must /not/ be used after this.
382 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
383 withCAStringLen str f =
384 #ifndef __GLASGOW_HASKELL__
385 withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len)
387 allocaArray len $ \ptr ->
389 go [] n = n `seq` return () -- make it strict in n
390 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
398 -- auxiliary definitions
399 -- ----------------------
401 -- C's end of string character
406 -- allocate an array to hold the list and pair it with the number of elements
407 newArrayLen :: Storable a => [a] -> IO (Ptr a, Int)
410 return (a, length xs)
412 #ifndef __GLASGOW_HASKELL__
413 -- cast [CChar] to [Char]
415 cCharsToChars :: [CChar] -> [Char]
416 cCharsToChars xs = map castCCharToChar xs
418 -- cast [Char] to [CChar]
420 charsToCChars :: [Char] -> [CChar]
421 charsToCChars xs = map castCharToCChar xs
424 -----------------------------------------------------------------------------
427 -- representation of wide strings in C
428 -- -----------------------------------
430 -- | A C wide string is a reference to an array of C wide characters
431 -- terminated by NUL.
432 type CWString = Ptr CWchar
434 -- | A wide character string with explicit length information in 'CWchar's
435 -- instead of a terminating NUL (allowing NUL characters in the middle
437 type CWStringLen = (Ptr CWchar, Int)
439 -- | Marshal a NUL terminated C wide string into a Haskell string.
441 peekCWString :: CWString -> IO String
443 cs <- peekArray0 wNUL cp
444 return (cWcharsToChars cs)
446 -- | Marshal a C wide string with explicit length into a Haskell string.
448 peekCWStringLen :: CWStringLen -> IO String
449 peekCWStringLen (cp, len) = do
450 cs <- peekArray len cp
451 return (cWcharsToChars cs)
453 -- | Marshal a Haskell string into a NUL terminated C wide string.
455 -- * the Haskell string may /not/ contain any NUL characters
457 -- * new storage is allocated for the C wide string and must
458 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
459 -- 'Foreign.Marshal.Alloc.finalizerFree'.
461 newCWString :: String -> IO CWString
462 newCWString = newArray0 wNUL . charsToCWchars
464 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
465 -- with explicit length information.
467 -- * new storage is allocated for the C wide string and must
468 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
469 -- 'Foreign.Marshal.Alloc.finalizerFree'.
471 newCWStringLen :: String -> IO CWStringLen
472 newCWStringLen str = newArrayLen (charsToCWchars str)
474 -- | Marshal a Haskell string into a NUL terminated C wide string using
475 -- temporary storage.
477 -- * the Haskell string may /not/ contain any NUL characters
479 -- * the memory is freed when the subcomputation terminates (either
480 -- normally or via an exception), so the pointer to the temporary
481 -- storage must /not/ be used after this.
483 withCWString :: String -> (CWString -> IO a) -> IO a
484 withCWString = withArray0 wNUL . charsToCWchars
486 -- | Marshal a Haskell string into a NUL terminated C wide string using
487 -- temporary storage.
489 -- * the Haskell string may /not/ contain any NUL characters
491 -- * the memory is freed when the subcomputation terminates (either
492 -- normally or via an exception), so the pointer to the temporary
493 -- storage must /not/ be used after this.
495 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
496 withCWStringLen str f =
497 withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
499 -- auxiliary definitions
500 -- ----------------------
505 cWcharsToChars :: [CWchar] -> [Char]
506 charsToCWchars :: [Char] -> [CWchar]
508 #ifdef mingw32_HOST_OS
510 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
512 -- coding errors generate Chars in the surrogate range
513 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
515 fromUTF16 (c1:c2:wcs)
516 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
517 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
518 fromUTF16 (c:wcs) = c : fromUTF16 wcs
521 charsToCWchars = foldr utf16Char [] . map ord
524 | c < 0x10000 = fromIntegral c : wcs
525 | otherwise = let c' = c - 0x10000 in
526 fromIntegral (c' `div` 0x400 + 0xd800) :
527 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
529 #else /* !mingw32_HOST_OS */
531 cWcharsToChars xs = map castCWcharToChar xs
532 charsToCWchars xs = map castCharToCWchar xs
534 -- These conversions only make sense if __STDC_ISO_10646__ is defined
535 -- (meaning that wchar_t is ISO 10646, aka Unicode)
537 castCWcharToChar :: CWchar -> Char
538 castCWcharToChar ch = chr (fromIntegral ch )
540 castCharToCWchar :: Char -> CWchar
541 castCharToCWchar ch = fromIntegral (ord ch)
543 #endif /* !mingw32_HOST_OS */