projects
/
haskell-directory.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fix the type of wgencat
[haskell-directory.git]
/
Data
/
Char.hs
diff --git
a/Data/Char.hs
b/Data/Char.hs
index
c59ec00
..
f770999
100644
(file)
--- a/
Data/Char.hs
+++ b/
Data/Char.hs
@@
-23,12
+23,16
@@
module Data.Char
-- | Unicode characters are divided into letters, numbers, marks,
-- punctuation, symbols, separators (including spaces) and others
-- (including control characters).
-- | Unicode characters are divided into letters, numbers, marks,
-- punctuation, symbols, separators (including spaces) and others
-- (including control characters).
- , isAscii, isLatin1, isControl, isSpace
- , isLower, isUpper, isAlpha, isAlphaNum, isPrint
+ , isControl, isSpace
+ , isLower, isUpper, isAlpha, isAlphaNum, isPrint
, isDigit, isOctDigit, isHexDigit
, isDigit, isOctDigit, isHexDigit
- , isAsciiUpper, isAsciiLower
, isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
, isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
+ -- ** Subranges
+ , isAscii, isLatin1
+ , isAsciiUpper, isAsciiLower
+
+ -- ** Unicode general categories
, GeneralCategory(..), generalCategory
-- * Case conversion
, GeneralCategory(..), generalCategory
-- * Case conversion
@@
-52,6
+56,7
@@
module Data.Char
#ifdef __GLASGOW_HASKELL__
import GHC.Base
#ifdef __GLASGOW_HASKELL__
import GHC.Base
+import GHC.Arr (Ix)
import GHC.Real (fromIntegral)
import GHC.Show
import GHC.Read (Read, readLitChar, lexLitChar)
import GHC.Real (fromIntegral)
import GHC.Show
import GHC.Read (Read, readLitChar, lexLitChar)
@@
-61,6
+66,7
@@
import GHC.Enum
#endif
#ifdef __HUGS__
#endif
#ifdef __HUGS__
+import Hugs.Prelude (Ix)
import Hugs.Char
#endif
import Hugs.Char
#endif
@@
-68,8
+74,9
@@
import Hugs.Char
import Prelude
import Prelude(Char,String)
import Char
import Prelude
import Prelude(Char,String)
import Char
+import Ix
import NHC.FFI (CInt)
import NHC.FFI (CInt)
-foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int
+foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
#endif
-- | Convert a single digit 'Char' to the corresponding 'Int'.
#endif
-- | Convert a single digit 'Char' to the corresponding 'Int'.
@@
-123,12
+130,12
@@
data GeneralCategory
| Surrogate -- ^ Cs: Other, Surrogate
| PrivateUse -- ^ Co: Other, Private Use
| NotAssigned -- ^ Cn: Other, Not Assigned
| Surrogate -- ^ Cs: Other, Surrogate
| PrivateUse -- ^ Co: Other, Private Use
| NotAssigned -- ^ Cn: Other, Not Assigned
- deriving (Eq, Ord, Enum, Read, Show, Bounded)
+ deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
--- | Retrieves the general Unicode category of the character.
+-- | The Unicode general category of the character.
generalCategory :: Char -> GeneralCategory
#if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
generalCategory :: Char -> GeneralCategory
#if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
-generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
+generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
#endif
#ifdef __HUGS__
generalCategory c = toEnum (primUniGenCat c)
#endif
#ifdef __HUGS__
generalCategory c = toEnum (primUniGenCat c)
@@
-136,6
+143,9
@@
generalCategory c = toEnum (primUniGenCat c)
-- derived character classifiers
-- derived character classifiers
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isAlpha'.
isLetter :: Char -> Bool
isLetter c = case generalCategory c of
UppercaseLetter -> True
isLetter :: Char -> Bool
isLetter c = case generalCategory c of
UppercaseLetter -> True
@@
-145,6
+155,8
@@
isLetter c = case generalCategory c of
OtherLetter -> True
_ -> False
OtherLetter -> True
_ -> False
+-- | Selects Unicode mark characters, e.g. accents and the like, which
+-- combine with preceding letters.
isMark :: Char -> Bool
isMark c = case generalCategory c of
NonSpacingMark -> True
isMark :: Char -> Bool
isMark c = case generalCategory c of
NonSpacingMark -> True
@@
-152,6
+164,8
@@
isMark c = case generalCategory c of
EnclosingMark -> True
_ -> False
EnclosingMark -> True
_ -> False
+-- | Selects Unicode numeric characters, including digits from various
+-- scripts, Roman numerals, etc.
isNumber :: Char -> Bool
isNumber c = case generalCategory c of
DecimalNumber -> True
isNumber :: Char -> Bool
isNumber c = case generalCategory c of
DecimalNumber -> True
@@
-159,6
+173,8
@@
isNumber c = case generalCategory c of
OtherNumber -> True
_ -> False
OtherNumber -> True
_ -> False
+-- | Selects Unicode punctuation characters, including various kinds
+-- of connectors, brackets and quotes.
isPunctuation :: Char -> Bool
isPunctuation c = case generalCategory c of
ConnectorPunctuation -> True
isPunctuation :: Char -> Bool
isPunctuation c = case generalCategory c of
ConnectorPunctuation -> True
@@
-170,6
+186,8
@@
isPunctuation c = case generalCategory c of
OtherPunctuation -> True
_ -> False
OtherPunctuation -> True
_ -> False
+-- | Selects Unicode symbol characters, including mathematical and
+-- currency symbols.
isSymbol :: Char -> Bool
isSymbol c = case generalCategory c of
MathSymbol -> True
isSymbol :: Char -> Bool
isSymbol c = case generalCategory c of
MathSymbol -> True
@@
-178,6
+196,7
@@
isSymbol c = case generalCategory c of
OtherSymbol -> True
_ -> False
OtherSymbol -> True
_ -> False
+-- | Selects Unicode space and separator characters.
isSeparator :: Char -> Bool
isSeparator c = case generalCategory c of
Space -> True
isSeparator :: Char -> Bool
isSeparator c = case generalCategory c of
Space -> True