Data/Char.hs

   1 {-# OPTIONS_GHC -fno-implicit-prelude #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.Char
   5 -- Copyright   :  (c) The University of Glasgow 2001
   6 -- License     :  BSD-style (see the file libraries/base/LICENSE)
   7 --
   8 -- Maintainer  :  libraries@haskell.org
   9 -- Stability   :  stable
  10 -- Portability :  portable
  11 --
  12 -- The Char type and associated operations.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 module Data.Char
  17     (
  18       Char
  19
  20     , String
  21
  22     -- * Character classification
  23     -- | Unicode characters are divided into letters, numbers, marks,
  24     -- punctuation, symbols, separators (including spaces) and others
  25     -- (including control characters).
  26     , isControl, isSpace
  27     , isLower, isUpper, isAlpha, isAlphaNum, isPrint
  28     , isDigit, isOctDigit, isHexDigit
  29     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  30
  31     -- ** Subranges
  32     , isAscii, isLatin1
  33     , isAsciiUpper, isAsciiLower
  34
  35     -- ** Unicode general categories
  36     , GeneralCategory(..), generalCategory
  37
  38     -- * Case conversion
  39     , toUpper, toLower, toTitle  -- :: Char -> Char
  40
  41     -- * Single digit characters
  42     , digitToInt        -- :: Char -> Int
  43     , intToDigit        -- :: Int  -> Char
  44
  45     -- * Numeric representations
  46     , ord               -- :: Char -> Int
  47     , chr               -- :: Int  -> Char
  48
  49     -- * String representations
  50     , showLitChar       -- :: Char -> ShowS
  51     , lexLitChar        -- :: ReadS String
  52     , readLitChar       -- :: ReadS Char
  53
  54      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
  55     ) where
  56
  57 #ifdef __GLASGOW_HASKELL__
  58 import GHC.Base
  59 import GHC.Arr (Ix)
  60 import GHC.Real (fromIntegral)
  61 import GHC.Show
  62 import GHC.Read (Read, readLitChar, lexLitChar)
  63 import GHC.Unicode
  64 import GHC.Num
  65 import GHC.Enum
  66 #endif
  67
  68 #ifdef __HUGS__
  69 import Hugs.Prelude (Ix)
  70 import Hugs.Char
  71 #endif
  72
  73 #ifdef __NHC__
  74 import Prelude
  75 import Prelude(Char,String)
  76 import Char
  77 import Ix
  78 import NHC.FFI (CInt)
  79 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
  80 #endif
  81
  82 -- | Convert a single digit 'Char' to the corresponding 'Int'.
  83 -- This function fails unless its argument satisfies 'isHexDigit',
  84 -- but recognises both upper and lower-case hexadecimal digits
  85 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  86 digitToInt :: Char -> Int
  87 digitToInt c
  88  | isDigit c            =  ord c - ord '0'
  89  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  90  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
  91  | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
  92
  93 #ifndef __GLASGOW_HASKELL__
  94 isAsciiUpper, isAsciiLower :: Char -> Bool
  95 isAsciiLower c          =  c >= 'a' && c <= 'z'
  96 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  97 #endif
  98
  99 -- | Unicode General Categories (column 2 of the UnicodeData table)
 100 -- in the order they are listed in the Unicode standard.
 101
 102 data GeneralCategory
 103         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
 104         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 105         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 106         | ModifierLetter        -- ^ Lm: Letter, Modifier
 107         | OtherLetter           -- ^ Lo: Letter, Other
 108         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 109         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 110         | EnclosingMark         -- ^ Me: Mark, Enclosing
 111         | DecimalNumber         -- ^ Nd: Number, Decimal
 112         | LetterNumber          -- ^ Nl: Number, Letter
 113         | OtherNumber           -- ^ No: Number, Other
 114         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 115         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 116         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 117         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 118         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 119         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 120         | OtherPunctuation      -- ^ Po: Punctuation, Other
 121         | MathSymbol            -- ^ Sm: Symbol, Math
 122         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 123         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 124         | OtherSymbol           -- ^ So: Symbol, Other
 125         | Space                 -- ^ Zs: Separator, Space
 126         | LineSeparator         -- ^ Zl: Separator, Line
 127         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 128         | Control               -- ^ Cc: Other, Control
 129         | Format                -- ^ Cf: Other, Format
 130         | Surrogate             -- ^ Cs: Other, Surrogate
 131         | PrivateUse            -- ^ Co: Other, Private Use
 132         | NotAssigned           -- ^ Cn: Other, Not Assigned
 133         deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
 134
 135 -- | The Unicode general category of the character.
 136 generalCategory :: Char -> GeneralCategory
 137 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
 138 generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
 139 #endif
 140 #ifdef __HUGS__
 141 generalCategory c = toEnum (primUniGenCat c)
 142 #endif
 143
 144 -- derived character classifiers
 145
 146 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
 147 -- title-case letters, plus letters of caseless scripts and modifiers letters).
 148 -- This function is equivalent to 'Data.Char.isAlpha'.
 149 isLetter :: Char -> Bool
 150 isLetter c = case generalCategory c of
 151         UppercaseLetter         -> True
 152         LowercaseLetter         -> True
 153         TitlecaseLetter         -> True
 154         ModifierLetter          -> True
 155         OtherLetter             -> True
 156         _                       -> False
 157
 158 -- | Selects Unicode mark characters, e.g. accents and the like, which
 159 -- combine with preceding letters.
 160 isMark :: Char -> Bool
 161 isMark c = case generalCategory c of
 162         NonSpacingMark          -> True
 163         SpacingCombiningMark    -> True
 164         EnclosingMark           -> True
 165         _                       -> False
 166
 167 -- | Selects Unicode numeric characters, including digits from various
 168 -- scripts, Roman numerals, etc.
 169 isNumber :: Char -> Bool
 170 isNumber c = case generalCategory c of
 171         DecimalNumber           -> True
 172         LetterNumber            -> True
 173         OtherNumber             -> True
 174         _                       -> False
 175
 176 -- | Selects Unicode punctuation characters, including various kinds
 177 -- of connectors, brackets and quotes.
 178 isPunctuation :: Char -> Bool
 179 isPunctuation c = case generalCategory c of
 180         ConnectorPunctuation    -> True
 181         DashPunctuation         -> True
 182         OpenPunctuation         -> True
 183         ClosePunctuation        -> True
 184         InitialQuote            -> True
 185         FinalQuote              -> True
 186         OtherPunctuation        -> True
 187         _                       -> False
 188
 189 -- | Selects Unicode symbol characters, including mathematical and
 190 -- currency symbols.
 191 isSymbol :: Char -> Bool
 192 isSymbol c = case generalCategory c of
 193         MathSymbol              -> True
 194         CurrencySymbol          -> True
 195         ModifierSymbol          -> True
 196         OtherSymbol             -> True
 197         _                       -> False
 198
 199 -- | Selects Unicode space and separator characters.
 200 isSeparator :: Char -> Bool
 201 isSeparator c = case generalCategory c of
 202         Space                   -> True
 203         LineSeparator           -> True
 204         ParagraphSeparator      -> True
 205         _                       -> False
 206
 207 #ifdef __NHC__
 208 -- dummy implementation
 209 toTitle :: Char -> Char
 210 toTitle = toUpper
 211 #endif