From: ross Date: Wed, 16 Mar 2005 13:27:04 +0000 (+0000) Subject: [project @ 2005-03-16 13:27:03 by ross] X-Git-Tag: arity-anal-branch-point~20 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=ca4bf46e5ccd5d1e007ac8051b6119ba99195f33;p=haskell-directory.git [project @ 2005-03-16 13:27:03 by ross] Data.Char docs, and hide GHC.Unicode --- diff --git a/Data/Char.hs b/Data/Char.hs index c59ec00..48de798 100644 --- a/Data/Char.hs +++ b/Data/Char.hs @@ -23,12 +23,16 @@ module Data.Char -- | Unicode characters are divided into letters, numbers, marks, -- punctuation, symbols, separators (including spaces) and others -- (including control characters). - , isAscii, isLatin1, isControl, isSpace - , isLower, isUpper, isAlpha, isAlphaNum, isPrint + , isControl, isSpace + , isLower, isUpper, isAlpha, isAlphaNum, isPrint , isDigit, isOctDigit, isHexDigit - , isAsciiUpper, isAsciiLower , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator + -- ** Subranges + , isAscii, isLatin1 + , isAsciiUpper, isAsciiLower + + -- ** Unicode general categories , GeneralCategory(..), generalCategory -- * Case conversion @@ -125,7 +129,7 @@ data GeneralCategory | NotAssigned -- ^ Cn: Other, Not Assigned deriving (Eq, Ord, Enum, Read, Show, Bounded) --- | Retrieves the general Unicode category of the character. +-- | The Unicode general category of the character. generalCategory :: Char -> GeneralCategory #if defined(__GLASGOW_HASKELL__) || defined(__NHC__) generalCategory c = toEnum (wgencat (fromIntegral (ord c))) @@ -136,6 +140,9 @@ generalCategory c = toEnum (primUniGenCat c) -- derived character classifiers +-- | Selects alphabetic Unicode characters (lower-case, upper-case and +-- title-case letters, plus letters of caseless scripts and modifiers letters). +-- This function is equivalent to 'Data.Char.isAlpha'. isLetter :: Char -> Bool isLetter c = case generalCategory c of UppercaseLetter -> True @@ -145,6 +152,8 @@ isLetter c = case generalCategory c of OtherLetter -> True _ -> False +-- | Selects Unicode mark characters, e.g. accents and the like, which +-- combine with preceding letters. isMark :: Char -> Bool isMark c = case generalCategory c of NonSpacingMark -> True @@ -152,6 +161,8 @@ isMark c = case generalCategory c of EnclosingMark -> True _ -> False +-- | Selects Unicode numeric characters, including digits from various +-- scripts, Roman numerals, etc. isNumber :: Char -> Bool isNumber c = case generalCategory c of DecimalNumber -> True @@ -159,6 +170,8 @@ isNumber c = case generalCategory c of OtherNumber -> True _ -> False +-- | Selects Unicode punctuation characters, including various kinds +-- of connectors, brackets and quotes. isPunctuation :: Char -> Bool isPunctuation c = case generalCategory c of ConnectorPunctuation -> True @@ -170,6 +183,8 @@ isPunctuation c = case generalCategory c of OtherPunctuation -> True _ -> False +-- | Selects Unicode symbol characters, including mathematical and +-- currency symbols. isSymbol :: Char -> Bool isSymbol c = case generalCategory c of MathSymbol -> True @@ -178,6 +193,7 @@ isSymbol c = case generalCategory c of OtherSymbol -> True _ -> False +-- | Selects Unicode space and separator characters. isSeparator :: Char -> Bool isSeparator c = case generalCategory c of Space -> True diff --git a/GHC/Base.lhs b/GHC/Base.lhs index 5b6a676..32bf498 100644 --- a/GHC/Base.lhs +++ b/GHC/Base.lhs @@ -560,7 +560,8 @@ data Ordering = LT | EQ | GT deriving (Eq, Ord) type String = [Char] {-| The character type 'Char' is an enumeration whose values represent -Unicode (or equivalently ISO 10646) characters. +Unicode (or equivalently ISO\/IEC 10646) characters +(see for details). This set extends the ISO 8859-1 (Latin-1) character set (the first 256 charachers), which is itself an extension of the ASCII character set (the first 128 characters). diff --git a/GHC/Unicode.hs b/GHC/Unicode.hs index 50fef9a..e22fae5 100644 --- a/GHC/Unicode.hs +++ b/GHC/Unicode.hs @@ -16,6 +16,7 @@ -- ----------------------------------------------------------------------------- +-- #hide module GHC.Unicode ( isAscii, isLatin1, isControl, isAsciiUpper, isAsciiLower, @@ -44,8 +45,14 @@ isAscii c = c < '\x80' isLatin1 :: Char -> Bool isLatin1 c = c <= '\xff' -isAsciiUpper, isAsciiLower :: Char -> Bool +-- | Selects ASCII lower-case letters, +-- i.e. characters satisfying both 'isAscii' and 'isLower'. +isAsciiLower :: Char -> Bool isAsciiLower c = c >= 'a' && c <= 'z' + +-- | Selects ASCII upper-case letters, +-- i.e. characters satisfying both 'isAscii' and 'isUpper'. +isAsciiUpper :: Char -> Bool isAsciiUpper c = c >= 'A' && c <= 'Z' -- | Selects control characters, which are the non-printing characters of @@ -71,15 +78,17 @@ isSpace c = c == ' ' || c == '\xa0' || iswspace (fromIntegral (ord c)) /= 0 --- | Selects alphabetic Unicode characters (letters) that are not lower-case. --- (In Unicode terms, this includes letters in upper and title cases, --- as well as modifier letters and other letters.) +-- | Selects upper-case or title-case alphabetic Unicode characters (letters). +-- Title case is used by a small number of letter ligatures like the +-- single-character form of /Lj/. isUpper :: Char -> Bool -- | Selects lower-case alphabetic Unicode characters (letters). isLower :: Char -> Bool --- | Selects alphabetic Unicode characters (letters). +-- | Selects alphabetic Unicode characters (lower-case, upper-case and +-- title-case letters, plus letters of caseless scripts and modifiers letters). +-- This function is equivalent to 'Data.Char.isLetter'. isAlpha :: Char -> Bool -- | Selects alphabetic or numeric digit Unicode characters. @@ -103,19 +112,18 @@ isHexDigit :: Char -> Bool isHexDigit c = isDigit c || c >= 'A' && c <= 'F' || c >= 'a' && c <= 'f' --- | Convert a letter to the corresponding upper-case letter, leaving any --- other character unchanged. Any Unicode letter which has an upper-case --- equivalent is transformed. +-- | Convert a letter to the corresponding upper-case letter, if any. +-- Any other character is returned unchanged. toUpper :: Char -> Char --- | Convert a letter to the corresponding lower-case letter, leaving any --- other character unchanged. Any Unicode letter which has a lower-case --- equivalent is transformed. +-- | Convert a letter to the corresponding lower-case letter, if any. +-- Any other character is returned unchanged. toLower :: Char -> Char --- | Convert a letter to the corresponding title-case letter, leaving any --- other character unchanged. Any Unicode letter which has a lower-case --- equivalent is transformed. +-- | Convert a letter to the corresponding title-case or upper-case +-- letter, if any. (Title case differs from upper case only for a small +-- number of ligature letters.) +-- Any other character is returned unchanged. toTitle :: Char -> Char -- -----------------------------------------------------------------------------