X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=GHC%2FUnicode.hs;h=da74685c0adb8fe42c703340b8d53f1a6ed5029b;hb=62de77e9a5d1f19ab333f1796437e1b16f78d23e;hp=6a4732b6c1986ce86f9d63144da73fdab81495a4;hpb=0a41af38169035a4359c0c29bc1219af564dce64;p=ghc-base.git diff --git a/GHC/Unicode.hs b/GHC/Unicode.hs index 6a4732b..da74685 100644 --- a/GHC/Unicode.hs +++ b/GHC/Unicode.hs @@ -2,7 +2,7 @@ {-# OPTIONS -#include "WCsubst.h" #-} ----------------------------------------------------------------------------- -- | --- Module : GHC.Unicde +-- Module : GHC.Unicode -- Copyright : (c) The University of Glasgow, 2003 -- License : see libraries/base/LICENSE -- @@ -17,20 +17,13 @@ ----------------------------------------------------------------------------- module GHC.Unicode ( - GeneralCategory (..), - generalCategory, isAscii, isLatin1, isControl, isAsciiUpper, isAsciiLower, isPrint, isSpace, isUpper, isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum, toUpper, toLower, toTitle, - isLetter, -- :: Char -> Bool - isMark, -- :: Char -> Bool - isNumber, -- :: Char -> Bool - isPunctuation, -- :: Char -> Bool - isSymbol, -- :: Char -> Bool - isSeparator, -- :: Char -> Bool + wgencat, ) where import GHC.Base @@ -38,106 +31,9 @@ import GHC.Real (fromIntegral) import GHC.Int import GHC.Word import GHC.Num (fromInteger) -import GHC.Read -import GHC.Show -import GHC.Enum #include "HsBaseConfig.h" --- | Unicode General Categories (column 2 of the UnicodeData table) --- in the order they are listed in the Unicode standard. - -data GeneralCategory - = UppercaseLetter -- Lu Letter, Uppercase - | LowercaseLetter -- Ll Letter, Lowercase - | TitlecaseLetter -- Lt Letter, Titlecase - | ModifierLetter -- Lm Letter, Modifier - | OtherLetter -- Lo Letter, Other - | NonSpacingMark -- Mn Mark, Non-Spacing - | SpacingCombiningMark -- Mc Mark, Spacing Combining - | EnclosingMark -- Me Mark, Enclosing - | DecimalNumber -- Nd Number, Decimal - | LetterNumber -- Nl Number, Letter - | OtherNumber -- No Number, Other - | ConnectorPunctuation -- Pc Punctuation, Connector - | DashPunctuation -- Pd Punctuation, Dash - | OpenPunctuation -- Ps Punctuation, Open - | ClosePunctuation -- Pe Punctuation, Close - | InitialQuote -- Pi Punctuation, Initial quote - | FinalQuote -- Pf Punctuation, Final quote - | OtherPunctuation -- Po Punctuation, Other - | MathSymbol -- Sm Symbol, Math - | CurrencySymbol -- Sc Symbol, Currency - | ModifierSymbol -- Sk Symbol, Modifier - | OtherSymbol -- So Symbol, Other - | Space -- Zs Separator, Space - | LineSeparator -- Zl Separator, Line - | ParagraphSeparator -- Zp Separator, Paragraph - | Control -- Cc Other, Control - | Format -- Cf Other, Format - | Surrogate -- Cs Other, Surrogate - | PrivateUse -- Co Other, Private Use - | NotAssigned -- Cn Other, Not Assigned - deriving (Eq, Ord, Enum, Read, Show, Bounded) - --- | Retrieves the general Unicode category of the character. -generalCategory :: Char -> GeneralCategory -generalCategory c = toEnum (wgencat (fromIntegral (ord c))) - --- ------------------------------------------------------------------------ --- These are copied from Hugs Unicode.hs - --- derived character classifiers - -isLetter :: Char -> Bool -isLetter c = case generalCategory c of - UppercaseLetter -> True - LowercaseLetter -> True - TitlecaseLetter -> True - ModifierLetter -> True - OtherLetter -> True - _ -> False - -isMark :: Char -> Bool -isMark c = case generalCategory c of - NonSpacingMark -> True - SpacingCombiningMark -> True - EnclosingMark -> True - _ -> False - -isNumber :: Char -> Bool -isNumber c = case generalCategory c of - DecimalNumber -> True - LetterNumber -> True - OtherNumber -> True - _ -> False - -isPunctuation :: Char -> Bool -isPunctuation c = case generalCategory c of - ConnectorPunctuation -> True - DashPunctuation -> True - OpenPunctuation -> True - ClosePunctuation -> True - InitialQuote -> True - FinalQuote -> True - OtherPunctuation -> True - _ -> False - -isSymbol :: Char -> Bool -isSymbol c = case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - _ -> False - -isSeparator :: Char -> Bool -isSeparator c = case generalCategory c of - Space -> True - LineSeparator -> True - ParagraphSeparator -> True - _ -> False - -- | Selects the first 128 characters of the Unicode character set, -- corresponding to the ASCII character set. isAscii :: Char -> Bool