-- | Unicode characters are divided into letters, numbers, marks,
-- punctuation, symbols, separators (including spaces) and others
-- (including control characters).
- , isAscii, isLatin1, isControl, isSpace
- , isLower, isUpper, isAlpha, isAlphaNum, isPrint
+ , isControl, isSpace
+ , isLower, isUpper, isAlpha, isAlphaNum, isPrint
, isDigit, isOctDigit, isHexDigit
- , isAsciiUpper, isAsciiLower
, isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
+ -- ** Subranges
+ , isAscii, isLatin1
+ , isAsciiUpper, isAsciiLower
+
+ -- ** Unicode general categories
, GeneralCategory(..), generalCategory
-- * Case conversion
#ifdef __GLASGOW_HASKELL__
import GHC.Base
+import GHC.Arr (Ix)
import GHC.Real (fromIntegral)
import GHC.Show
import GHC.Read (Read, readLitChar, lexLitChar)
#endif
#ifdef __HUGS__
+import Hugs.Prelude (Ix)
import Hugs.Char
#endif
import Prelude
import Prelude(Char,String)
import Char
+import Ix
import NHC.FFI (CInt)
foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int
#endif
| Surrogate -- ^ Cs: Other, Surrogate
| PrivateUse -- ^ Co: Other, Private Use
| NotAssigned -- ^ Cn: Other, Not Assigned
- deriving (Eq, Ord, Enum, Read, Show, Bounded)
+ deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
--- | Retrieves the general Unicode category of the character.
+-- | The Unicode general category of the character.
generalCategory :: Char -> GeneralCategory
#if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
-- derived character classifiers
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isAlpha'.
isLetter :: Char -> Bool
isLetter c = case generalCategory c of
UppercaseLetter -> True
OtherLetter -> True
_ -> False
+-- | Selects Unicode mark characters, e.g. accents and the like, which
+-- combine with preceding letters.
isMark :: Char -> Bool
isMark c = case generalCategory c of
NonSpacingMark -> True
EnclosingMark -> True
_ -> False
+-- | Selects Unicode numeric characters, including digits from various
+-- scripts, Roman numerals, etc.
isNumber :: Char -> Bool
isNumber c = case generalCategory c of
DecimalNumber -> True
OtherNumber -> True
_ -> False
+-- | Selects Unicode punctuation characters, including various kinds
+-- of connectors, brackets and quotes.
isPunctuation :: Char -> Bool
isPunctuation c = case generalCategory c of
ConnectorPunctuation -> True
OtherPunctuation -> True
_ -> False
+-- | Selects Unicode symbol characters, including mathematical and
+-- currency symbols.
isSymbol :: Char -> Bool
isSymbol c = case generalCategory c of
MathSymbol -> True
OtherSymbol -> True
_ -> False
+-- | Selects Unicode space and separator characters.
isSeparator :: Char -> Bool
isSeparator c = case generalCategory c of
Space -> True