Data/Char.hs

   1 {-# OPTIONS_GHC -fno-implicit-prelude #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.Char
   5 -- Copyright   :  (c) The University of Glasgow 2001
   6 -- License     :  BSD-style (see the file libraries/base/LICENSE)
   7 --
   8 -- Maintainer  :  libraries@haskell.org
   9 -- Stability   :  stable
  10 -- Portability :  portable
  11 --
  12 -- The Char type and associated operations.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 module Data.Char
  17     (
  18       Char
  19
  20     , String
  21
  22     -- * Character classification
  23     -- | Unicode characters are divided into letters, numbers, marks,
  24     -- punctuation, symbols, separators (including spaces) and others
  25     -- (including control characters).
  26     , isControl, isSpace
  27     , isLower, isUpper, isAlpha, isAlphaNum, isPrint
  28     , isDigit, isOctDigit, isHexDigit
  29     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  30
  31     -- ** Subranges
  32     , isAscii, isLatin1
  33     , isAsciiUpper, isAsciiLower
  34
  35     -- ** Unicode general categories
  36     , GeneralCategory(..), generalCategory
  37
  38     -- * Case conversion
  39     , toUpper, toLower, toTitle  -- :: Char -> Char
  40
  41     -- * Single digit characters
  42     , digitToInt        -- :: Char -> Int
  43     , intToDigit        -- :: Int  -> Char
  44
  45     -- * Numeric representations
  46     , ord               -- :: Char -> Int
  47     , chr               -- :: Int  -> Char
  48
  49     -- * String representations
  50     , showLitChar       -- :: Char -> ShowS
  51     , lexLitChar        -- :: ReadS String
  52     , readLitChar       -- :: ReadS Char
  53
  54      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
  55     ) where
  56
  57 #ifdef __GLASGOW_HASKELL__
  58 import GHC.Base
  59 import GHC.Real (fromIntegral)
  60 import GHC.Show
  61 import GHC.Read (Read, readLitChar, lexLitChar)
  62 import GHC.Unicode
  63 import GHC.Num
  64 import GHC.Enum
  65 #endif
  66
  67 #ifdef __HUGS__
  68 import Hugs.Char
  69 #endif
  70
  71 #ifdef __NHC__
  72 import Prelude
  73 import Prelude(Char,String)
  74 import Char
  75 import NHC.FFI (CInt)
  76 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int
  77 #endif
  78
  79 -- | Convert a single digit 'Char' to the corresponding 'Int'.
  80 -- This function fails unless its argument satisfies 'isHexDigit',
  81 -- but recognises both upper and lower-case hexadecimal digits
  82 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  83 digitToInt :: Char -> Int
  84 digitToInt c
  85  | isDigit c            =  ord c - ord '0'
  86  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  87  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
  88  | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
  89
  90 #ifndef __GLASGOW_HASKELL__
  91 isAsciiUpper, isAsciiLower :: Char -> Bool
  92 isAsciiLower c          =  c >= 'a' && c <= 'z'
  93 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  94 #endif
  95
  96 -- | Unicode General Categories (column 2 of the UnicodeData table)
  97 -- in the order they are listed in the Unicode standard.
  98
  99 data GeneralCategory
 100         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
 101         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 102         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 103         | ModifierLetter        -- ^ Lm: Letter, Modifier
 104         | OtherLetter           -- ^ Lo: Letter, Other
 105         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 106         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 107         | EnclosingMark         -- ^ Me: Mark, Enclosing
 108         | DecimalNumber         -- ^ Nd: Number, Decimal
 109         | LetterNumber          -- ^ Nl: Number, Letter
 110         | OtherNumber           -- ^ No: Number, Other
 111         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 112         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 113         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 114         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 115         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 116         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 117         | OtherPunctuation      -- ^ Po: Punctuation, Other
 118         | MathSymbol            -- ^ Sm: Symbol, Math
 119         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 120         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 121         | OtherSymbol           -- ^ So: Symbol, Other
 122         | Space                 -- ^ Zs: Separator, Space
 123         | LineSeparator         -- ^ Zl: Separator, Line
 124         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 125         | Control               -- ^ Cc: Other, Control
 126         | Format                -- ^ Cf: Other, Format
 127         | Surrogate             -- ^ Cs: Other, Surrogate
 128         | PrivateUse            -- ^ Co: Other, Private Use
 129         | NotAssigned           -- ^ Cn: Other, Not Assigned
 130         deriving (Eq, Ord, Enum, Read, Show, Bounded)
 131
 132 -- | The Unicode general category of the character.
 133 generalCategory :: Char -> GeneralCategory
 134 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
 135 generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
 136 #endif
 137 #ifdef __HUGS__
 138 generalCategory c = toEnum (primUniGenCat c)
 139 #endif
 140
 141 -- derived character classifiers
 142
 143 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
 144 -- title-case letters, plus letters of caseless scripts and modifiers letters).
 145 -- This function is equivalent to 'Data.Char.isAlpha'.
 146 isLetter :: Char -> Bool
 147 isLetter c = case generalCategory c of
 148         UppercaseLetter         -> True
 149         LowercaseLetter         -> True
 150         TitlecaseLetter         -> True
 151         ModifierLetter          -> True
 152         OtherLetter             -> True
 153         _                       -> False
 154
 155 -- | Selects Unicode mark characters, e.g. accents and the like, which
 156 -- combine with preceding letters.
 157 isMark :: Char -> Bool
 158 isMark c = case generalCategory c of
 159         NonSpacingMark          -> True
 160         SpacingCombiningMark    -> True
 161         EnclosingMark           -> True
 162         _                       -> False
 163
 164 -- | Selects Unicode numeric characters, including digits from various
 165 -- scripts, Roman numerals, etc.
 166 isNumber :: Char -> Bool
 167 isNumber c = case generalCategory c of
 168         DecimalNumber           -> True
 169         LetterNumber            -> True
 170         OtherNumber             -> True
 171         _                       -> False
 172
 173 -- | Selects Unicode punctuation characters, including various kinds
 174 -- of connectors, brackets and quotes.
 175 isPunctuation :: Char -> Bool
 176 isPunctuation c = case generalCategory c of
 177         ConnectorPunctuation    -> True
 178         DashPunctuation         -> True
 179         OpenPunctuation         -> True
 180         ClosePunctuation        -> True
 181         InitialQuote            -> True
 182         FinalQuote              -> True
 183         OtherPunctuation        -> True
 184         _                       -> False
 185
 186 -- | Selects Unicode symbol characters, including mathematical and
 187 -- currency symbols.
 188 isSymbol :: Char -> Bool
 189 isSymbol c = case generalCategory c of
 190         MathSymbol              -> True
 191         CurrencySymbol          -> True
 192         ModifierSymbol          -> True
 193         OtherSymbol             -> True
 194         _                       -> False
 195
 196 -- | Selects Unicode space and separator characters.
 197 isSeparator :: Char -> Bool
 198 isSeparator c = case generalCategory c of
 199         Space                   -> True
 200         LineSeparator           -> True
 201         ParagraphSeparator      -> True
 202         _                       -> False
 203
 204 #ifdef __NHC__
 205 -- dummy implementation
 206 toTitle :: Char -> Char
 207 toTitle = toUpper
 208 #endif