-{-# OPTIONS_GHC -fno-implicit-prelude #-}
+{-# OPTIONS_GHC -XNoImplicitPrelude #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Char
--
-----------------------------------------------------------------------------
-module Data.Char
+module Data.Char
(
Char
-- | Unicode characters are divided into letters, numbers, marks,
-- punctuation, symbols, separators (including spaces) and others
-- (including control characters).
- , isAscii, isLatin1, isControl, isSpace
- , isLower, isUpper, isAlpha, isAlphaNum, isPrint
+ , isControl, isSpace
+ , isLower, isUpper, isAlpha, isAlphaNum, isPrint
, isDigit, isOctDigit, isHexDigit
- , isAsciiUpper, isAsciiLower
-#ifndef __NHC__
, isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
+ -- ** Subranges
+ , isAscii, isLatin1
+ , isAsciiUpper, isAsciiLower
+
+ -- ** Unicode general categories
, GeneralCategory(..), generalCategory
-#endif
-- * Case conversion
, toUpper, toLower, toTitle -- :: Char -> Char
-- * String representations
, showLitChar -- :: Char -> ShowS
- , lexLitChar -- :: ReadS String
+ , lexLitChar -- :: ReadS String
, readLitChar -- :: ReadS Char
-- Implementation checked wrt. Haskell 98 lib report, 1/99.
#ifdef __GLASGOW_HASKELL__
import GHC.Base
+import GHC.Arr (Ix)
import GHC.Real (fromIntegral)
import GHC.Show
import GHC.Read (Read, readLitChar, lexLitChar)
#endif
#ifdef __HUGS__
+import Hugs.Prelude (Ix)
import Hugs.Char
#endif
import Prelude
import Prelude(Char,String)
import Char
+import Ix
+import NHC.FFI (CInt)
+foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
#endif
-- | Convert a single digit 'Char' to the corresponding 'Int'.
-- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
digitToInt :: Char -> Int
digitToInt c
- | isDigit c = ord c - ord '0'
+ | isDigit c = ord c - ord '0'
| c >= 'a' && c <= 'f' = ord c - ord 'a' + 10
| c >= 'A' && c <= 'F' = ord c - ord 'A' + 10
- | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh
+ | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh
#ifndef __GLASGOW_HASKELL__
isAsciiUpper, isAsciiLower :: Char -> Bool
isAsciiUpper c = c >= 'A' && c <= 'Z'
#endif
-#ifndef __NHC__
-- | Unicode General Categories (column 2 of the UnicodeData table)
-- in the order they are listed in the Unicode standard.
| Surrogate -- ^ Cs: Other, Surrogate
| PrivateUse -- ^ Co: Other, Private Use
| NotAssigned -- ^ Cn: Other, Not Assigned
- deriving (Eq, Ord, Enum, Read, Show, Bounded)
+ deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
--- | Retrieves the general Unicode category of the character.
+-- | The Unicode general category of the character.
generalCategory :: Char -> GeneralCategory
-#ifdef __GLASGOW_HASKELL__
-generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
+#if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
+generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
#endif
#ifdef __HUGS__
generalCategory c = toEnum (primUniGenCat c)
-- derived character classifiers
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isAlpha'.
isLetter :: Char -> Bool
isLetter c = case generalCategory c of
UppercaseLetter -> True
OtherLetter -> True
_ -> False
+-- | Selects Unicode mark characters, e.g. accents and the like, which
+-- combine with preceding letters.
isMark :: Char -> Bool
isMark c = case generalCategory c of
NonSpacingMark -> True
EnclosingMark -> True
_ -> False
+-- | Selects Unicode numeric characters, including digits from various
+-- scripts, Roman numerals, etc.
isNumber :: Char -> Bool
isNumber c = case generalCategory c of
DecimalNumber -> True
OtherNumber -> True
_ -> False
+-- | Selects Unicode punctuation characters, including various kinds
+-- of connectors, brackets and quotes.
isPunctuation :: Char -> Bool
isPunctuation c = case generalCategory c of
ConnectorPunctuation -> True
OtherPunctuation -> True
_ -> False
+-- | Selects Unicode symbol characters, including mathematical and
+-- currency symbols.
isSymbol :: Char -> Bool
isSymbol c = case generalCategory c of
MathSymbol -> True
OtherSymbol -> True
_ -> False
+-- | Selects Unicode space and separator characters.
isSeparator :: Char -> Bool
isSeparator c = case generalCategory c of
Space -> True
LineSeparator -> True
ParagraphSeparator -> True
_ -> False
-#endif /* !__NHC__ */
#ifdef __NHC__
-- dummy implementation