X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Flib%2Fcompat%2FCompat%2FUnicode.hs;fp=ghc%2Flib%2Fcompat%2FCompat%2FUnicode.hs;h=4765511954783d005b2d7b9c470df5bd42c9d107;hb=8e59ba46e26979cc11fa71e3f67aebbe6da4e8d6;hp=0000000000000000000000000000000000000000;hpb=080c9600d0ca63cc4125c6d2957ed8336b311c76;p=ghc-hetmet.git diff --git a/ghc/lib/compat/Compat/Unicode.hs b/ghc/lib/compat/Compat/Unicode.hs new file mode 100644 index 0000000..4765511 --- /dev/null +++ b/ghc/lib/compat/Compat/Unicode.hs @@ -0,0 +1,57 @@ +{-# OPTIONS -cpp #-} +module Compat.Unicode ( + GeneralCategory(..), generalCategory, + ) where + +#if __GLASGOW_HASKELL__ > 604 + +import Data.Char (GeneralCategory(..), generalCategory) + +#else + +import Foreign.C ( CInt ) +import Data.Char ( ord ) + +-- | Unicode General Categories (column 2 of the UnicodeData table) +-- in the order they are listed in the Unicode standard. + +data GeneralCategory + = UppercaseLetter -- Lu Letter, Uppercase + | LowercaseLetter -- Ll Letter, Lowercase + | TitlecaseLetter -- Lt Letter, Titlecase + | ModifierLetter -- Lm Letter, Modifier + | OtherLetter -- Lo Letter, Other + | NonSpacingMark -- Mn Mark, Non-Spacing + | SpacingCombiningMark -- Mc Mark, Spacing Combining + | EnclosingMark -- Me Mark, Enclosing + | DecimalNumber -- Nd Number, Decimal + | LetterNumber -- Nl Number, Letter + | OtherNumber -- No Number, Other + | ConnectorPunctuation -- Pc Punctuation, Connector + | DashPunctuation -- Pd Punctuation, Dash + | OpenPunctuation -- Ps Punctuation, Open + | ClosePunctuation -- Pe Punctuation, Close + | InitialQuote -- Pi Punctuation, Initial quote + | FinalQuote -- Pf Punctuation, Final quote + | OtherPunctuation -- Po Punctuation, Other + | MathSymbol -- Sm Symbol, Math + | CurrencySymbol -- Sc Symbol, Currency + | ModifierSymbol -- Sk Symbol, Modifier + | OtherSymbol -- So Symbol, Other + | Space -- Zs Separator, Space + | LineSeparator -- Zl Separator, Line + | ParagraphSeparator -- Zp Separator, Paragraph + | Control -- Cc Other, Control + | Format -- Cf Other, Format + | Surrogate -- Cs Other, Surrogate + | PrivateUse -- Co Other, Private Use + | NotAssigned -- Cn Other, Not Assigned + deriving (Eq, Ord, Enum, Read, Show, Bounded) + +-- | Retrieves the general Unicode category of the character. +generalCategory :: Char -> GeneralCategory +generalCategory c = toEnum (wgencat (fromIntegral (ord c))) + +foreign import ccall unsafe "u_gencat" + wgencat :: CInt -> Int +#endif