+-- representation of wide strings in C
+-- -----------------------------------
+
+-- | A C wide string is a reference to an array of C wide characters
+-- terminated by NUL.
+type CWString = Ptr CWchar
+
+-- | A wide character string with explicit length information in 'CWchar's
+-- instead of a terminating NUL (allowing NUL characters in the middle
+-- of the string).
+type CWStringLen = (Ptr CWchar, Int)
+
+-- | Marshal a NUL terminated C wide string into a Haskell string.
+--
+peekCWString :: CWString -> IO String
+peekCWString cp = do
+ cs <- peekArray0 wNUL cp
+ return (cWcharsToChars cs)
+
+-- | Marshal a C wide string with explicit length into a Haskell string.
+--
+peekCWStringLen :: CWStringLen -> IO String
+peekCWStringLen (cp, len) = do
+ cs <- peekArray len cp
+ return (cWcharsToChars cs)
+
+-- | Marshal a Haskell string into a NUL terminated C wide string.
+--
+-- * the Haskell string may /not/ contain any NUL characters
+--
+-- * new storage is allocated for the C wide string and must
+-- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
+-- 'Foreign.Marshal.Alloc.finalizerFree'.
+--
+newCWString :: String -> IO CWString
+newCWString = newArray0 wNUL . charsToCWchars
+
+-- | Marshal a Haskell string into a C wide string (ie, wide character array)
+-- with explicit length information.
+--
+-- * new storage is allocated for the C wide string and must
+-- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
+-- 'Foreign.Marshal.Alloc.finalizerFree'.
+--
+newCWStringLen :: String -> IO CWStringLen
+newCWStringLen str = newArrayLen (charsToCWchars str)
+
+-- | Marshal a Haskell string into a NUL terminated C wide string using
+-- temporary storage.
+--
+-- * the Haskell string may /not/ contain any NUL characters
+--
+-- * the memory is freed when the subcomputation terminates (either
+-- normally or via an exception), so the pointer to the temporary
+-- storage must /not/ be used after this.
+--
+withCWString :: String -> (CWString -> IO a) -> IO a
+withCWString = withArray0 wNUL . charsToCWchars
+
+-- | Marshal a Haskell string into a NUL terminated C wide string using
+-- temporary storage.
+--
+-- * the Haskell string may /not/ contain any NUL characters
+--
+-- * the memory is freed when the subcomputation terminates (either
+-- normally or via an exception), so the pointer to the temporary
+-- storage must /not/ be used after this.
+--
+withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
+withCWStringLen str f =
+ withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
+
+-- auxiliary definitions
+-- ----------------------
+
+wNUL :: CWchar
+wNUL = 0
+
+cWcharsToChars :: [CWchar] -> [Char]
+charsToCWchars :: [Char] -> [CWchar]
+
+#ifdef mingw32_HOST_OS
+
+-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
+
+-- coding errors generate Chars in the surrogate range
+cWcharsToChars = map chr . fromUTF16 . map fromIntegral
+ where
+ fromUTF16 (c1:c2:wcs)
+ | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
+ ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
+ fromUTF16 (c:wcs) = c : fromUTF16 wcs
+ fromUTF16 [] = []
+
+charsToCWchars = foldr utf16Char [] . map ord
+ where
+ utf16Char c wcs
+ | c < 0x10000 = fromIntegral c : wcs
+ | otherwise = let c' = c - 0x10000 in
+ fromIntegral (c' `div` 0x400 + 0xd800) :
+ fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
+
+#else /* !mingw32_HOST_OS */
+
+cWcharsToChars xs = map castCWcharToChar xs
+charsToCWchars xs = map castCharToCWchar xs
+
+-- These conversions only make sense if __STDC_ISO_10646__ is defined
+-- (meaning that wchar_t is ISO 10646, aka Unicode)
+
+castCWcharToChar :: CWchar -> Char
+castCWcharToChar ch = chr (fromIntegral ch )
+
+castCharToCWchar :: Char -> CWchar
+castCharToCWchar ch = fromIntegral (ord ch)
+
+#endif /* !mingw32_HOST_OS */