X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=GHC%2FUnicode.hs;h=da74685c0adb8fe42c703340b8d53f1a6ed5029b;hb=62de77e9a5d1f19ab333f1796437e1b16f78d23e;hp=2d836a206b7ecd3eaf7d42acdd67c1303e0effee;hpb=057101550d65351f0e84b6a537ee5c95245aaa6e;p=ghc-base.git diff --git a/GHC/Unicode.hs b/GHC/Unicode.hs index 2d836a2..da74685 100644 --- a/GHC/Unicode.hs +++ b/GHC/Unicode.hs @@ -1,7 +1,8 @@ {-# OPTIONS -fno-implicit-prelude #-} +{-# OPTIONS -#include "WCsubst.h" #-} ----------------------------------------------------------------------------- -- | --- Module : GHC.Unicde +-- Module : GHC.Unicode -- Copyright : (c) The University of Glasgow, 2003 -- License : see libraries/base/LICENSE -- @@ -21,7 +22,8 @@ module GHC.Unicode ( isPrint, isSpace, isUpper, isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum, - toUpper, toLower, + toUpper, toLower, toTitle, + wgencat, ) where import GHC.Base @@ -30,7 +32,7 @@ import GHC.Int import GHC.Word import GHC.Num (fromInteger) -#include "config.h" +#include "HsBaseConfig.h" -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. @@ -66,7 +68,8 @@ isSpace c = c == ' ' || c == '\r' || c == '\f' || c == '\v' || - c == '\xa0' + c == '\xa0' || + iswspace (fromIntegral (ord c)) /= 0 -- | Selects alphabetic Unicode characters (letters) that are not lower-case. -- (In Unicode terms, this includes letters in upper and title cases, @@ -88,6 +91,7 @@ isAlphaNum :: Char -> Bool -- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@. isDigit :: Char -> Bool +isDigit c = c >= '0' && c <= '9' -- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@. isOctDigit :: Char -> Bool @@ -109,24 +113,22 @@ toUpper :: Char -> Char -- equivalent is transformed. toLower :: Char -> Char --- ----------------------------------------------------------------------------- --- Win32 implementation +-- | Convert a letter to the corresponding title-case letter, leaving any +-- other character unchanged. Any Unicode letter which has a lower-case +-- equivalent is transformed. +toTitle :: Char -> Char -#if (defined(HAVE_WCTYPE_H) && HAVE_ISWSPACE && defined(HTYPE_WINT_T)) || mingw32_TARGET_OS +-- ----------------------------------------------------------------------------- +-- Implementation with the supplied auto-generated Unicode character properties +-- table (default) --- Use the wide-char classification functions if available. Glibc --- seems to implement these properly, even for chars > 0xffff, as long --- as you call setlocale() to set the locale to something other than --- "C". Therefore, we call setlocale() in hs_init(). +#if 1 --- Win32 uses UTF-16, so presumably the system-supplied iswlower() and --- friends won't work properly with characters > 0xffff. These --- characters are represented as surrogate pairs in UTF-16. +-- Regardless of the O/S and Library, use the functions contained in WCsubst.c type WInt = HTYPE_WINT_T type CInt = HTYPE_INT -isDigit c = iswdigit (fromIntegral (ord c)) /= 0 isAlpha c = iswalpha (fromIntegral (ord c)) /= 0 isAlphaNum c = iswalnum (fromIntegral (ord c)) /= 0 --isSpace c = iswspace (fromIntegral (ord c)) /= 0 @@ -137,39 +139,46 @@ isLower c = iswlower (fromIntegral (ord c)) /= 0 toLower c = chr (fromIntegral (towlower (fromIntegral (ord c)))) toUpper c = chr (fromIntegral (towupper (fromIntegral (ord c)))) +toTitle c = chr (fromIntegral (towtitle (fromIntegral (ord c)))) + +foreign import ccall unsafe "u_iswdigit" + iswdigit :: CInt -> CInt -foreign import ccall unsafe "iswdigit" - iswdigit :: WInt -> CInt +foreign import ccall unsafe "u_iswalpha" + iswalpha :: CInt -> CInt -foreign import ccall unsafe "iswalpha" - iswalpha :: WInt -> CInt +foreign import ccall unsafe "u_iswalnum" + iswalnum :: CInt -> CInt -foreign import ccall unsafe "iswalnum" - iswalnum :: WInt -> CInt +foreign import ccall unsafe "u_iswcntrl" + iswcntrl :: CInt -> CInt -foreign import ccall unsafe "iswcntrl" - iswcntrl :: WInt -> CInt +foreign import ccall unsafe "u_iswspace" + iswspace :: CInt -> CInt -foreign import ccall unsafe "iswspace" - iswspace :: WInt -> CInt +foreign import ccall unsafe "u_iswprint" + iswprint :: CInt -> CInt -foreign import ccall unsafe "iswprint" - iswprint :: WInt -> CInt +foreign import ccall unsafe "u_iswlower" + iswlower :: CInt -> CInt -foreign import ccall unsafe "iswlower" - iswlower :: WInt -> CInt +foreign import ccall unsafe "u_iswupper" + iswupper :: CInt -> CInt -foreign import ccall unsafe "iswupper" - iswupper :: WInt -> CInt +foreign import ccall unsafe "u_towlower" + towlower :: CInt -> CInt -foreign import ccall unsafe "towlower" - towlower :: WInt -> WInt +foreign import ccall unsafe "u_towupper" + towupper :: CInt -> CInt -foreign import ccall unsafe "towupper" - towupper :: WInt -> WInt +foreign import ccall unsafe "u_towtitle" + towtitle :: CInt -> CInt + +foreign import ccall unsafe "u_gencat" + wgencat :: CInt -> Int -- ----------------------------------------------------------------------------- --- No libunicode, so fall back to the ASCII-only implementation +-- No libunicode, so fall back to the ASCII-only implementation (never used, indeed) #else @@ -188,7 +197,6 @@ isLower c = c >= 'a' && c <= 'z' || c >= '\xF8' && c <= '\xFF' isAlpha c = isLower c || isUpper c -isDigit c = c >= '0' && c <= '9' isAlphaNum c = isAlpha c || isDigit c -- Case-changing operations