X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FChar.hs;h=40052a760761b9e263efe5e030e900d251c2bb59;hb=HEAD;hp=fa0e8995db604772d61b35b6676a202027e31759;hpb=62de77e9a5d1f19ab333f1796437e1b16f78d23e;p=ghc-base.git diff --git a/Data/Char.hs b/Data/Char.hs index fa0e899..40052a7 100644 --- a/Data/Char.hs +++ b/Data/Char.hs @@ -1,4 +1,5 @@ -{-# OPTIONS_GHC -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} + ----------------------------------------------------------------------------- -- | -- Module : Data.Char @@ -13,25 +14,25 @@ -- ----------------------------------------------------------------------------- -module Data.Char +module Data.Char ( Char - , String - -- * Character classification -- | Unicode characters are divided into letters, numbers, marks, -- punctuation, symbols, separators (including spaces) and others -- (including control characters). - , isAscii, isLatin1, isControl, isSpace - , isLower, isUpper, isAlpha, isAlphaNum, isPrint + , isControl, isSpace + , isLower, isUpper, isAlpha, isAlphaNum, isPrint , isDigit, isOctDigit, isHexDigit - , isAsciiUpper, isAsciiLower -#ifndef __NHC__ , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator + -- ** Subranges + , isAscii, isLatin1 + , isAsciiUpper, isAsciiLower + + -- ** Unicode general categories , GeneralCategory(..), generalCategory -#endif -- * Case conversion , toUpper, toLower, toTitle -- :: Char -> Char @@ -46,7 +47,7 @@ module Data.Char -- * String representations , showLitChar -- :: Char -> ShowS - , lexLitChar -- :: ReadS String + , lexLitChar -- :: ReadS String , readLitChar -- :: ReadS Char -- Implementation checked wrt. Haskell 98 lib report, 1/99. @@ -54,6 +55,7 @@ module Data.Char #ifdef __GLASGOW_HASKELL__ import GHC.Base +import GHC.Arr (Ix) import GHC.Real (fromIntegral) import GHC.Show import GHC.Read (Read, readLitChar, lexLitChar) @@ -63,6 +65,7 @@ import GHC.Enum #endif #ifdef __HUGS__ +import Hugs.Prelude (Ix) import Hugs.Char #endif @@ -70,6 +73,9 @@ import Hugs.Char import Prelude import Prelude(Char,String) import Char +import Ix +import NHC.FFI (CInt) +foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt #endif -- | Convert a single digit 'Char' to the corresponding 'Int'. @@ -78,10 +84,10 @@ import Char -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@). digitToInt :: Char -> Int digitToInt c - | isDigit c = ord c - ord '0' + | isDigit c = ord c - ord '0' | c >= 'a' && c <= 'f' = ord c - ord 'a' + 10 | c >= 'A' && c <= 'F' = ord c - ord 'A' + 10 - | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh + | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh #ifndef __GLASGOW_HASKELL__ isAsciiUpper, isAsciiLower :: Char -> Bool @@ -89,7 +95,6 @@ isAsciiLower c = c >= 'a' && c <= 'z' isAsciiUpper c = c >= 'A' && c <= 'Z' #endif -#ifndef __NHC__ -- | Unicode General Categories (column 2 of the UnicodeData table) -- in the order they are listed in the Unicode standard. @@ -124,12 +129,12 @@ data GeneralCategory | Surrogate -- ^ Cs: Other, Surrogate | PrivateUse -- ^ Co: Other, Private Use | NotAssigned -- ^ Cn: Other, Not Assigned - deriving (Eq, Ord, Enum, Read, Show, Bounded) + deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix) --- | Retrieves the general Unicode category of the character. +-- | The Unicode general category of the character. generalCategory :: Char -> GeneralCategory -#ifdef __GLASGOW_HASKELL__ -generalCategory c = toEnum (wgencat (fromIntegral (ord c))) +#if defined(__GLASGOW_HASKELL__) || defined(__NHC__) +generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c #endif #ifdef __HUGS__ generalCategory c = toEnum (primUniGenCat c) @@ -137,6 +142,9 @@ generalCategory c = toEnum (primUniGenCat c) -- derived character classifiers +-- | Selects alphabetic Unicode characters (lower-case, upper-case and +-- title-case letters, plus letters of caseless scripts and modifiers letters). +-- This function is equivalent to 'Data.Char.isAlpha'. isLetter :: Char -> Bool isLetter c = case generalCategory c of UppercaseLetter -> True @@ -146,6 +154,8 @@ isLetter c = case generalCategory c of OtherLetter -> True _ -> False +-- | Selects Unicode mark characters, e.g. accents and the like, which +-- combine with preceding letters. isMark :: Char -> Bool isMark c = case generalCategory c of NonSpacingMark -> True @@ -153,6 +163,8 @@ isMark c = case generalCategory c of EnclosingMark -> True _ -> False +-- | Selects Unicode numeric characters, including digits from various +-- scripts, Roman numerals, etc. isNumber :: Char -> Bool isNumber c = case generalCategory c of DecimalNumber -> True @@ -160,6 +172,8 @@ isNumber c = case generalCategory c of OtherNumber -> True _ -> False +-- | Selects Unicode punctuation characters, including various kinds +-- of connectors, brackets and quotes. isPunctuation :: Char -> Bool isPunctuation c = case generalCategory c of ConnectorPunctuation -> True @@ -171,6 +185,8 @@ isPunctuation c = case generalCategory c of OtherPunctuation -> True _ -> False +-- | Selects Unicode symbol characters, including mathematical and +-- currency symbols. isSymbol :: Char -> Bool isSymbol c = case generalCategory c of MathSymbol -> True @@ -179,13 +195,13 @@ isSymbol c = case generalCategory c of OtherSymbol -> True _ -> False +-- | Selects Unicode space and separator characters. isSeparator :: Char -> Bool isSeparator c = case generalCategory c of Space -> True LineSeparator -> True ParagraphSeparator -> True _ -> False -#endif /* !__NHC__ */ #ifdef __NHC__ -- dummy implementation