+import GHC.Read
+import GHC.Show
+import GHC.Enum
+
+#include "HsBaseConfig.h"
+
+-- | Unicode General Categories (column 2 of the UnicodeData table)
+-- in the order they are listed in the Unicode standard.
+
+data GeneralCategory
+ = UppercaseLetter -- Lu Letter, Uppercase
+ | LowercaseLetter -- Ll Letter, Lowercase
+ | TitlecaseLetter -- Lt Letter, Titlecase
+ | ModifierLetter -- Lm Letter, Modifier
+ | OtherLetter -- Lo Letter, Other
+ | NonSpacingMark -- Mn Mark, Non-Spacing
+ | SpacingCombiningMark -- Mc Mark, Spacing Combining
+ | EnclosingMark -- Me Mark, Enclosing
+ | DecimalNumber -- Nd Number, Decimal
+ | LetterNumber -- Nl Number, Letter
+ | OtherNumber -- No Number, Other
+ | ConnectorPunctuation -- Pc Punctuation, Connector
+ | DashPunctuation -- Pd Punctuation, Dash
+ | OpenPunctuation -- Ps Punctuation, Open
+ | ClosePunctuation -- Pe Punctuation, Close
+ | InitialQuote -- Pi Punctuation, Initial quote
+ | FinalQuote -- Pf Punctuation, Final quote
+ | OtherPunctuation -- Po Punctuation, Other
+ | MathSymbol -- Sm Symbol, Math
+ | CurrencySymbol -- Sc Symbol, Currency
+ | ModifierSymbol -- Sk Symbol, Modifier
+ | OtherSymbol -- So Symbol, Other
+ | Space -- Zs Separator, Space
+ | LineSeparator -- Zl Separator, Line
+ | ParagraphSeparator -- Zp Separator, Paragraph
+ | Control -- Cc Other, Control
+ | Format -- Cf Other, Format
+ | Surrogate -- Cs Other, Surrogate
+ | PrivateUse -- Co Other, Private Use
+ | NotAssigned -- Cn Other, Not Assigned
+ deriving (Eq, Ord, Enum, Read, Show, Bounded)
+
+-- | Retrieves the general Unicode category of the character.
+generalCategory :: Char -> GeneralCategory
+generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
+
+-- ------------------------------------------------------------------------
+-- These are copied from Hugs Unicode.hs
+
+-- derived character classifiers
+
+isLetter :: Char -> Bool
+isLetter c = case generalCategory c of
+ UppercaseLetter -> True
+ LowercaseLetter -> True
+ TitlecaseLetter -> True
+ ModifierLetter -> True
+ OtherLetter -> True
+ _ -> False
+
+isMark :: Char -> Bool
+isMark c = case generalCategory c of
+ NonSpacingMark -> True
+ SpacingCombiningMark -> True
+ EnclosingMark -> True
+ _ -> False
+
+isNumber :: Char -> Bool
+isNumber c = case generalCategory c of
+ DecimalNumber -> True
+ LetterNumber -> True
+ OtherNumber -> True
+ _ -> False
+
+isPunctuation :: Char -> Bool
+isPunctuation c = case generalCategory c of
+ ConnectorPunctuation -> True
+ DashPunctuation -> True
+ OpenPunctuation -> True
+ ClosePunctuation -> True
+ InitialQuote -> True
+ FinalQuote -> True
+ OtherPunctuation -> True
+ _ -> False
+
+isSymbol :: Char -> Bool
+isSymbol c = case generalCategory c of
+ MathSymbol -> True
+ CurrencySymbol -> True
+ ModifierSymbol -> True
+ OtherSymbol -> True
+ _ -> False
+
+isSeparator :: Char -> Bool
+isSeparator c = case generalCategory c of
+ Space -> True
+ LineSeparator -> True
+ ParagraphSeparator -> True
+ _ -> False