X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Foreign%2FC%2FString.hs;h=fdefdc67c8d7f29f0395ce6d79f7f2edf263a9dd;hb=509f28cc93b980d30aca37008cbe66c677a0d6f6;hp=6f465e5111bb3e967f0312a42f9c5f4163a349a8;hpb=9fa9bc17072a58c0bae2cce4764d38677e96ac29;p=ghc-base.git diff --git a/Foreign/C/String.hs b/Foreign/C/String.hs index 6f465e5..fdefdc6 100644 --- a/Foreign/C/String.hs +++ b/Foreign/C/String.hs @@ -1,24 +1,43 @@ -{-# OPTIONS -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} + ----------------------------------------------------------------------------- -- | -- Module : Foreign.C.String -- Copyright : (c) The FFI task force 2001 --- License : BSD-style (see the file libraries/core/LICENSE) +-- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : ffi@haskell.org -- Stability : provisional -- Portability : portable -- --- $Id: String.hs,v 1.5 2002/04/24 16:31:44 simonmar Exp $ +-- Utilities for primitive marshalling of C strings. -- --- Utilities for primitive marshaling +-- The marshalling converts each Haskell character, representing a Unicode +-- code point, to one or more bytes in a manner that, by default, is +-- determined by the current locale. As a consequence, no guarantees +-- can be made about the relative length of a Haskell string and its +-- corresponding C string, and therefore all the marshalling routines +-- include memory allocation. The translation between Unicode and the +-- encoding of the current locale may be lossy. -- ----------------------------------------------------------------------------- module Foreign.C.String ( -- representation of strings in C + -- * C strings CString, -- = Ptr CChar - CStringLen, -- = (CString, Int) + CStringLen, -- = (Ptr CChar, Int) + + -- ** Using a locale-dependent encoding + +#ifndef __GLASGOW_HASKELL__ + -- | Currently these functions are identical to their @CAString@ counterparts; + -- eventually they will use an encoding determined by the current locale. +#else + -- | These functions are different from their @CAString@ counterparts + -- in that they will use an encoding determined by the current locale, + -- rather than always assuming ASCII. +#endif -- conversion of C strings into Haskell strings -- @@ -35,25 +54,71 @@ module Foreign.C.String ( -- representation of strings in C withCString, -- :: String -> (CString -> IO a) -> IO a withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a - -- conversion between Haskell and C characters *ignoring* the encoding - -- + charIsRepresentable, -- :: Char -> IO Bool + + -- ** Using 8-bit characters + + -- | These variants of the above functions are for use with C libraries + -- that are ignorant of Unicode. These functions should be used with + -- care, as a loss of information can occur. + castCharToCChar, -- :: Char -> CChar castCCharToChar, -- :: CChar -> Char + castCharToCUChar, -- :: Char -> CUChar + castCUCharToChar, -- :: CUChar -> Char + castCharToCSChar, -- :: Char -> CSChar + castCSCharToChar, -- :: CSChar -> Char + + peekCAString, -- :: CString -> IO String + peekCAStringLen, -- :: CStringLen -> IO String + newCAString, -- :: String -> IO CString + newCAStringLen, -- :: String -> IO CStringLen + withCAString, -- :: String -> (CString -> IO a) -> IO a + withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a + + -- * C wide strings + + -- | These variants of the above functions are for use with C libraries + -- that encode Unicode using the C @wchar_t@ type in a system-dependent + -- way. The only encodings supported are + -- + -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or + -- + -- * UTF-16 (as used on Windows systems). + + CWString, -- = Ptr CWchar + CWStringLen, -- = (Ptr CWchar, Int) + + peekCWString, -- :: CWString -> IO String + peekCWStringLen, -- :: CWStringLen -> IO String + newCWString, -- :: String -> IO CWString + newCWStringLen, -- :: String -> IO CWStringLen + withCWString, -- :: String -> (CWString -> IO a) -> IO a + withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a + ) where import Foreign.Marshal.Array import Foreign.C.Types import Foreign.Ptr +import Foreign.Storable import Data.Word #ifdef __GLASGOW_HASKELL__ +import Control.Monad + import GHC.List import GHC.Real import GHC.Num -import GHC.IOBase import GHC.Base + +import {-# SOURCE #-} GHC.IO.Encoding +import qualified GHC.Foreign as GHC +#else +import Data.Char ( chr, ord ) +#define unsafeChr chr #endif ----------------------------------------------------------------------------- @@ -62,71 +127,275 @@ import GHC.Base -- representation of strings in C -- ------------------------------ -type CString = Ptr CChar -- conventional NUL terminates strings -type CStringLen = (CString, Int) -- strings with explicit length +-- | A C string is a reference to an array of C characters terminated by NUL. +type CString = Ptr CChar +-- | A string with explicit length information in bytes instead of a +-- terminating NUL (allowing NUL characters in the middle of the string). +type CStringLen = (Ptr CChar, Int) -- exported functions -- ------------------ -- -- * the following routines apply the default conversion when converting the -- C-land character encoding into the Haskell-land character encoding --- --- ** NOTE: The current implementation doesn't handle conversions yet! ** --- --- * the routines using an explicit length tolerate NUL characters in the --- middle of a string --- --- marshal a NUL terminated C string into a Haskell string +-- | Marshal a NUL terminated C string into a Haskell string. -- peekCString :: CString -> IO String -peekCString cp = do cs <- peekArray0 nUL cp; return (cCharsToChars cs) +#ifndef __GLASGOW_HASKELL__ +peekCString = peekCAString +#else +peekCString = GHC.peekCString foreignEncoding +#endif --- marshal a C string with explicit length into a Haskell string +-- | Marshal a C string with explicit length into a Haskell string. -- peekCStringLen :: CStringLen -> IO String -peekCStringLen (cp, len) = do cs <- peekArray len cp; return (cCharsToChars cs) +#ifndef __GLASGOW_HASKELL__ +peekCStringLen = peekCAStringLen +#else +peekCStringLen = GHC.peekCStringLen foreignEncoding +#endif --- marshal a Haskell string into a NUL terminated C strings +-- | Marshal a Haskell string into a NUL terminated C string. -- --- * the Haskell string may *not* contain any NUL characters +-- * the Haskell string may /not/ contain any NUL characters -- --- * new storage is allocated for the C string and must be explicitly freed +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. -- newCString :: String -> IO CString -newCString = newArray0 nUL . charsToCChars +#ifndef __GLASGOW_HASKELL__ +newCString = newCAString +#else +newCString = GHC.newCString foreignEncoding +#endif --- marshal a Haskell string into a C string (ie, character array) with --- explicit length information +-- | Marshal a Haskell string into a C string (ie, character array) with +-- explicit length information. -- --- * new storage is allocated for the C string and must be explicitly freed +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. -- newCStringLen :: String -> IO CStringLen -newCStringLen str = do a <- newArray (charsToCChars str) - return (pairLength str a) +#ifndef __GLASGOW_HASKELL__ +newCStringLen = newCAStringLen +#else +newCStringLen = GHC.newCStringLen foreignEncoding +#endif --- marshal a Haskell string into a NUL terminated C strings using temporary --- storage +-- | Marshal a Haskell string into a NUL terminated C string using temporary +-- storage. -- --- * the Haskell string may *not* contain any NUL characters +-- * the Haskell string may /not/ contain any NUL characters -- --- * see the lifetime constraints of `MarshalAlloc.alloca' +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. -- withCString :: String -> (CString -> IO a) -> IO a -withCString = withArray0 nUL . charsToCChars +#ifndef __GLASGOW_HASKELL__ +withCString = withCAString +#else +withCString = GHC.withCString foreignEncoding +#endif --- marshal a Haskell string into a NUL terminated C strings using temporary --- storage --- --- * the Haskell string may *not* contain any NUL characters +-- | Marshal a Haskell string into a C string (ie, character array) +-- in temporary storage, with explicit length information. -- --- * see the lifetime constraints of `MarshalAlloc.alloca' +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. -- withCStringLen :: String -> (CStringLen -> IO a) -> IO a -withCStringLen str act = withArray (charsToCChars str) $ act . pairLength str +#ifndef __GLASGOW_HASKELL__ +withCStringLen = withCAStringLen +#else +withCStringLen = GHC.withCStringLen foreignEncoding +#endif --- auxilliary definitions + +#ifndef __GLASGOW_HASKELL__ +-- | Determines whether a character can be accurately encoded in a 'CString'. +-- Unrepresentable characters are converted to @\'?\'@. +-- +-- Currently only Latin-1 characters are representable. +charIsRepresentable :: Char -> IO Bool +charIsRepresentable c = return (ord c < 256) +#else +-- -- | Determines whether a character can be accurately encoded in a 'CString'. +-- -- Unrepresentable characters are converted to '?' or their nearest visual equivalent. +charIsRepresentable :: Char -> IO Bool +charIsRepresentable = GHC.charIsRepresentable foreignEncoding +#endif + +-- single byte characters +-- ---------------------- +-- +-- ** NOTE: These routines don't handle conversions! ** + +-- | Convert a C byte, representing a Latin-1 character, to the corresponding +-- Haskell character. +castCCharToChar :: CChar -> Char +castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8)) + +-- | Convert a Haskell character to a C character. +-- This function is only safe on the first 256 characters. +castCharToCChar :: Char -> CChar +castCharToCChar ch = fromIntegral (ord ch) + +-- | Convert a C @unsigned char@, representing a Latin-1 character, to +-- the corresponding Haskell character. +castCUCharToChar :: CUChar -> Char +castCUCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8)) + +-- | Convert a Haskell character to a C @unsigned char@. +-- This function is only safe on the first 256 characters. +castCharToCUChar :: Char -> CUChar +castCharToCUChar ch = fromIntegral (ord ch) + +-- | Convert a C @signed char@, representing a Latin-1 character, to the +-- corresponding Haskell character. +castCSCharToChar :: CSChar -> Char +castCSCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8)) + +-- | Convert a Haskell character to a C @signed char@. +-- This function is only safe on the first 256 characters. +castCharToCSChar :: Char -> CSChar +castCharToCSChar ch = fromIntegral (ord ch) + +-- | Marshal a NUL terminated C string into a Haskell string. +-- +peekCAString :: CString -> IO String +#ifndef __GLASGOW_HASKELL__ +peekCAString cp = do + cs <- peekArray0 nUL cp + return (cCharsToChars cs) +#else +peekCAString cp = do + l <- lengthArray0 nUL cp + if l <= 0 then return "" else loop "" (l-1) + where + loop s i = do + xval <- peekElemOff cp i + let val = castCCharToChar xval + val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1) +#endif + +-- | Marshal a C string with explicit length into a Haskell string. +-- +peekCAStringLen :: CStringLen -> IO String +#ifndef __GLASGOW_HASKELL__ +peekCAStringLen (cp, len) = do + cs <- peekArray len cp + return (cCharsToChars cs) +#else +peekCAStringLen (cp, len) + | len <= 0 = return "" -- being (too?) nice. + | otherwise = loop [] (len-1) + where + loop acc i = do + xval <- peekElemOff cp i + let val = castCCharToChar xval + -- blow away the coercion ASAP. + if (val `seq` (i == 0)) + then return (val:acc) + else loop (val:acc) (i-1) +#endif + +-- | Marshal a Haskell string into a NUL terminated C string. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCAString :: String -> IO CString +#ifndef __GLASGOW_HASKELL__ +newCAString = newArray0 nUL . charsToCChars +#else +newCAString str = do + ptr <- mallocArray0 (length str) + let + go [] n = pokeElemOff ptr n nUL + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + go str 0 + return ptr +#endif + +-- | Marshal a Haskell string into a C string (ie, character array) with +-- explicit length information. +-- +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCAStringLen :: String -> IO CStringLen +#ifndef __GLASGOW_HASKELL__ +newCAStringLen str = newArrayLen (charsToCChars str) +#else +newCAStringLen str = do + ptr <- mallocArray0 len + let + go [] n = n `seq` return () -- make it strict in n + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + go str 0 + return (ptr, len) + where + len = length str +#endif + +-- | Marshal a Haskell string into a NUL terminated C string using temporary +-- storage. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCAString :: String -> (CString -> IO a) -> IO a +#ifndef __GLASGOW_HASKELL__ +withCAString = withArray0 nUL . charsToCChars +#else +withCAString str f = + allocaArray0 (length str) $ \ptr -> + let + go [] n = pokeElemOff ptr n nUL + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + in do + go str 0 + f ptr +#endif + +-- | Marshal a Haskell string into a C string (ie, character array) +-- in temporary storage, with explicit length information. +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCAStringLen :: String -> (CStringLen -> IO a) -> IO a +withCAStringLen str f = +#ifndef __GLASGOW_HASKELL__ + withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len) +#else + allocaArray len $ \ptr -> + let + go [] n = n `seq` return () -- make it strict in n + go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1) + in do + go str 0 + f (ptr,len) + where + len = length str +#endif + +-- auxiliary definitions -- ---------------------- -- C's end of string character @@ -134,23 +403,141 @@ withCStringLen str act = withArray (charsToCChars str) $ act . pairLength str nUL :: CChar nUL = 0 --- pair a C string with the length of the given Haskell string --- -pairLength :: String -> CString -> CStringLen -pairLength = flip (,) . length +-- allocate an array to hold the list and pair it with the number of elements +newArrayLen :: Storable a => [a] -> IO (Ptr a, Int) +newArrayLen xs = do + a <- newArray xs + return (a, length xs) +#ifndef __GLASGOW_HASKELL__ -- cast [CChar] to [Char] -- cCharsToChars :: [CChar] -> [Char] -cCharsToChars = map castCCharToChar +cCharsToChars xs = map castCCharToChar xs -- cast [Char] to [CChar] -- charsToCChars :: [Char] -> [CChar] -charsToCChars = map castCharToCChar +charsToCChars xs = map castCharToCChar xs +#endif -castCCharToChar :: CChar -> Char -castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8)) +----------------------------------------------------------------------------- +-- Wide strings -castCharToCChar :: Char -> CChar -castCharToCChar ch = fromIntegral (ord ch) +-- representation of wide strings in C +-- ----------------------------------- + +-- | A C wide string is a reference to an array of C wide characters +-- terminated by NUL. +type CWString = Ptr CWchar + +-- | A wide character string with explicit length information in 'CWchar's +-- instead of a terminating NUL (allowing NUL characters in the middle +-- of the string). +type CWStringLen = (Ptr CWchar, Int) + +-- | Marshal a NUL terminated C wide string into a Haskell string. +-- +peekCWString :: CWString -> IO String +peekCWString cp = do + cs <- peekArray0 wNUL cp + return (cWcharsToChars cs) + +-- | Marshal a C wide string with explicit length into a Haskell string. +-- +peekCWStringLen :: CWStringLen -> IO String +peekCWStringLen (cp, len) = do + cs <- peekArray len cp + return (cWcharsToChars cs) + +-- | Marshal a Haskell string into a NUL terminated C wide string. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * new storage is allocated for the C wide string and must +-- be explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCWString :: String -> IO CWString +newCWString = newArray0 wNUL . charsToCWchars + +-- | Marshal a Haskell string into a C wide string (ie, wide character array) +-- with explicit length information. +-- +-- * new storage is allocated for the C wide string and must +-- be explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCWStringLen :: String -> IO CWStringLen +newCWStringLen str = newArrayLen (charsToCWchars str) + +-- | Marshal a Haskell string into a NUL terminated C wide string using +-- temporary storage. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCWString :: String -> (CWString -> IO a) -> IO a +withCWString = withArray0 wNUL . charsToCWchars + +-- | Marshal a Haskell string into a NUL terminated C wide string using +-- temporary storage. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a +withCWStringLen str f = + withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len) + +-- auxiliary definitions +-- ---------------------- + +wNUL :: CWchar +wNUL = 0 + +cWcharsToChars :: [CWchar] -> [Char] +charsToCWchars :: [Char] -> [CWchar] + +#ifdef mingw32_HOST_OS + +-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding. + +-- coding errors generate Chars in the surrogate range +cWcharsToChars = map chr . fromUTF16 . map fromIntegral + where + fromUTF16 (c1:c2:wcs) + | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff = + ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs + fromUTF16 (c:wcs) = c : fromUTF16 wcs + fromUTF16 [] = [] + +charsToCWchars = foldr utf16Char [] . map ord + where + utf16Char c wcs + | c < 0x10000 = fromIntegral c : wcs + | otherwise = let c' = c - 0x10000 in + fromIntegral (c' `div` 0x400 + 0xd800) : + fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs + +#else /* !mingw32_HOST_OS */ + +cWcharsToChars xs = map castCWcharToChar xs +charsToCWchars xs = map castCharToCWchar xs + +-- These conversions only make sense if __STDC_ISO_10646__ is defined +-- (meaning that wchar_t is ISO 10646, aka Unicode) + +castCWcharToChar :: CWchar -> Char +castCWcharToChar ch = chr (fromIntegral ch ) + +castCharToCWchar :: Char -> CWchar +castCharToCWchar ch = fromIntegral (ord ch) + +#endif /* !mingw32_HOST_OS */