X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FChar.hs;h=40052a760761b9e263efe5e030e900d251c2bb59;hb=HEAD;hp=c59ec00c1d253ab1011cd6beb994f31a248f8110;hpb=052b9b84fff4bffabbd93d19cb17a2c6c6672128;p=ghc-base.git diff --git a/Data/Char.hs b/Data/Char.hs index c59ec00..40052a7 100644 --- a/Data/Char.hs +++ b/Data/Char.hs @@ -1,4 +1,5 @@ -{-# OPTIONS_GHC -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} + ----------------------------------------------------------------------------- -- | -- Module : Data.Char @@ -13,22 +14,24 @@ -- ----------------------------------------------------------------------------- -module Data.Char +module Data.Char ( Char - , String - -- * Character classification -- | Unicode characters are divided into letters, numbers, marks, -- punctuation, symbols, separators (including spaces) and others -- (including control characters). - , isAscii, isLatin1, isControl, isSpace - , isLower, isUpper, isAlpha, isAlphaNum, isPrint + , isControl, isSpace + , isLower, isUpper, isAlpha, isAlphaNum, isPrint , isDigit, isOctDigit, isHexDigit - , isAsciiUpper, isAsciiLower , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator + -- ** Subranges + , isAscii, isLatin1 + , isAsciiUpper, isAsciiLower + + -- ** Unicode general categories , GeneralCategory(..), generalCategory -- * Case conversion @@ -44,7 +47,7 @@ module Data.Char -- * String representations , showLitChar -- :: Char -> ShowS - , lexLitChar -- :: ReadS String + , lexLitChar -- :: ReadS String , readLitChar -- :: ReadS Char -- Implementation checked wrt. Haskell 98 lib report, 1/99. @@ -52,6 +55,7 @@ module Data.Char #ifdef __GLASGOW_HASKELL__ import GHC.Base +import GHC.Arr (Ix) import GHC.Real (fromIntegral) import GHC.Show import GHC.Read (Read, readLitChar, lexLitChar) @@ -61,6 +65,7 @@ import GHC.Enum #endif #ifdef __HUGS__ +import Hugs.Prelude (Ix) import Hugs.Char #endif @@ -68,8 +73,9 @@ import Hugs.Char import Prelude import Prelude(Char,String) import Char +import Ix import NHC.FFI (CInt) -foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int +foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt #endif -- | Convert a single digit 'Char' to the corresponding 'Int'. @@ -78,10 +84,10 @@ foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@). digitToInt :: Char -> Int digitToInt c - | isDigit c = ord c - ord '0' + | isDigit c = ord c - ord '0' | c >= 'a' && c <= 'f' = ord c - ord 'a' + 10 | c >= 'A' && c <= 'F' = ord c - ord 'A' + 10 - | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh + | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh #ifndef __GLASGOW_HASKELL__ isAsciiUpper, isAsciiLower :: Char -> Bool @@ -123,12 +129,12 @@ data GeneralCategory | Surrogate -- ^ Cs: Other, Surrogate | PrivateUse -- ^ Co: Other, Private Use | NotAssigned -- ^ Cn: Other, Not Assigned - deriving (Eq, Ord, Enum, Read, Show, Bounded) + deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix) --- | Retrieves the general Unicode category of the character. +-- | The Unicode general category of the character. generalCategory :: Char -> GeneralCategory #if defined(__GLASGOW_HASKELL__) || defined(__NHC__) -generalCategory c = toEnum (wgencat (fromIntegral (ord c))) +generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c #endif #ifdef __HUGS__ generalCategory c = toEnum (primUniGenCat c) @@ -136,6 +142,9 @@ generalCategory c = toEnum (primUniGenCat c) -- derived character classifiers +-- | Selects alphabetic Unicode characters (lower-case, upper-case and +-- title-case letters, plus letters of caseless scripts and modifiers letters). +-- This function is equivalent to 'Data.Char.isAlpha'. isLetter :: Char -> Bool isLetter c = case generalCategory c of UppercaseLetter -> True @@ -145,6 +154,8 @@ isLetter c = case generalCategory c of OtherLetter -> True _ -> False +-- | Selects Unicode mark characters, e.g. accents and the like, which +-- combine with preceding letters. isMark :: Char -> Bool isMark c = case generalCategory c of NonSpacingMark -> True @@ -152,6 +163,8 @@ isMark c = case generalCategory c of EnclosingMark -> True _ -> False +-- | Selects Unicode numeric characters, including digits from various +-- scripts, Roman numerals, etc. isNumber :: Char -> Bool isNumber c = case generalCategory c of DecimalNumber -> True @@ -159,6 +172,8 @@ isNumber c = case generalCategory c of OtherNumber -> True _ -> False +-- | Selects Unicode punctuation characters, including various kinds +-- of connectors, brackets and quotes. isPunctuation :: Char -> Bool isPunctuation c = case generalCategory c of ConnectorPunctuation -> True @@ -170,6 +185,8 @@ isPunctuation c = case generalCategory c of OtherPunctuation -> True _ -> False +-- | Selects Unicode symbol characters, including mathematical and +-- currency symbols. isSymbol :: Char -> Bool isSymbol c = case generalCategory c of MathSymbol -> True @@ -178,6 +195,7 @@ isSymbol c = case generalCategory c of OtherSymbol -> True _ -> False +-- | Selects Unicode space and separator characters. isSeparator :: Char -> Bool isSeparator c = case generalCategory c of Space -> True