Data/Char.hs

   1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
   2
   3 -----------------------------------------------------------------------------
   4 -- |
   5 -- Module      :  Data.Char
   6 -- Copyright   :  (c) The University of Glasgow 2001
   7 -- License     :  BSD-style (see the file libraries/base/LICENSE)
   8 --
   9 -- Maintainer  :  libraries@haskell.org
  10 -- Stability   :  stable
  11 -- Portability :  portable
  12 --
  13 -- The Char type and associated operations.
  14 --
  15 -----------------------------------------------------------------------------
  16
  17 module Data.Char
  18     (
  19       Char
  20
  21     -- * Character classification
  22     -- | Unicode characters are divided into letters, numbers, marks,
  23     -- punctuation, symbols, separators (including spaces) and others
  24     -- (including control characters).
  25     , isControl, isSpace
  26     , isLower, isUpper, isAlpha, isAlphaNum, isPrint
  27     , isDigit, isOctDigit, isHexDigit
  28     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  29
  30     -- ** Subranges
  31     , isAscii, isLatin1
  32     , isAsciiUpper, isAsciiLower
  33
  34     -- ** Unicode general categories
  35     , GeneralCategory(..), generalCategory
  36
  37     -- * Case conversion
  38     , toUpper, toLower, toTitle  -- :: Char -> Char
  39
  40     -- * Single digit characters
  41     , digitToInt        -- :: Char -> Int
  42     , intToDigit        -- :: Int  -> Char
  43
  44     -- * Numeric representations
  45     , ord               -- :: Char -> Int
  46     , chr               -- :: Int  -> Char
  47
  48     -- * String representations
  49     , showLitChar       -- :: Char -> ShowS
  50     , lexLitChar        -- :: ReadS String
  51     , readLitChar       -- :: ReadS Char
  52
  53      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
  54     ) where
  55
  56 #ifdef __GLASGOW_HASKELL__
  57 import GHC.Base
  58 import GHC.Arr (Ix)
  59 import GHC.Real (fromIntegral)
  60 import GHC.Show
  61 import GHC.Read (Read, readLitChar, lexLitChar)
  62 import GHC.Unicode
  63 import GHC.Num
  64 import GHC.Enum
  65 #endif
  66
  67 #ifdef __HUGS__
  68 import Hugs.Prelude (Ix)
  69 import Hugs.Char
  70 #endif
  71
  72 #ifdef __NHC__
  73 import Prelude
  74 import Prelude(Char,String)
  75 import Char
  76 import Ix
  77 import NHC.FFI (CInt)
  78 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
  79 #endif
  80
  81 -- | Convert a single digit 'Char' to the corresponding 'Int'.
  82 -- This function fails unless its argument satisfies 'isHexDigit',
  83 -- but recognises both upper and lower-case hexadecimal digits
  84 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  85 digitToInt :: Char -> Int
  86 digitToInt c
  87  | isDigit c            =  ord c - ord '0'
  88  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  89  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
  90  | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
  91
  92 #ifndef __GLASGOW_HASKELL__
  93 isAsciiUpper, isAsciiLower :: Char -> Bool
  94 isAsciiLower c          =  c >= 'a' && c <= 'z'
  95 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  96 #endif
  97
  98 -- | Unicode General Categories (column 2 of the UnicodeData table)
  99 -- in the order they are listed in the Unicode standard.
 100
 101 data GeneralCategory
 102         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
 103         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 104         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 105         | ModifierLetter        -- ^ Lm: Letter, Modifier
 106         | OtherLetter           -- ^ Lo: Letter, Other
 107         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 108         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 109         | EnclosingMark         -- ^ Me: Mark, Enclosing
 110         | DecimalNumber         -- ^ Nd: Number, Decimal
 111         | LetterNumber          -- ^ Nl: Number, Letter
 112         | OtherNumber           -- ^ No: Number, Other
 113         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 114         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 115         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 116         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 117         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 118         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 119         | OtherPunctuation      -- ^ Po: Punctuation, Other
 120         | MathSymbol            -- ^ Sm: Symbol, Math
 121         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 122         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 123         | OtherSymbol           -- ^ So: Symbol, Other
 124         | Space                 -- ^ Zs: Separator, Space
 125         | LineSeparator         -- ^ Zl: Separator, Line
 126         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 127         | Control               -- ^ Cc: Other, Control
 128         | Format                -- ^ Cf: Other, Format
 129         | Surrogate             -- ^ Cs: Other, Surrogate
 130         | PrivateUse            -- ^ Co: Other, Private Use
 131         | NotAssigned           -- ^ Cn: Other, Not Assigned
 132         deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
 133
 134 -- | The Unicode general category of the character.
 135 generalCategory :: Char -> GeneralCategory
 136 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
 137 generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
 138 #endif
 139 #ifdef __HUGS__
 140 generalCategory c = toEnum (primUniGenCat c)
 141 #endif
 142
 143 -- derived character classifiers
 144
 145 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
 146 -- title-case letters, plus letters of caseless scripts and modifiers letters).
 147 -- This function is equivalent to 'Data.Char.isAlpha'.
 148 isLetter :: Char -> Bool
 149 isLetter c = case generalCategory c of
 150         UppercaseLetter         -> True
 151         LowercaseLetter         -> True
 152         TitlecaseLetter         -> True
 153         ModifierLetter          -> True
 154         OtherLetter             -> True
 155         _                       -> False
 156
 157 -- | Selects Unicode mark characters, e.g. accents and the like, which
 158 -- combine with preceding letters.
 159 isMark :: Char -> Bool
 160 isMark c = case generalCategory c of
 161         NonSpacingMark          -> True
 162         SpacingCombiningMark    -> True
 163         EnclosingMark           -> True
 164         _                       -> False
 165
 166 -- | Selects Unicode numeric characters, including digits from various
 167 -- scripts, Roman numerals, etc.
 168 isNumber :: Char -> Bool
 169 isNumber c = case generalCategory c of
 170         DecimalNumber           -> True
 171         LetterNumber            -> True
 172         OtherNumber             -> True
 173         _                       -> False
 174
 175 -- | Selects Unicode punctuation characters, including various kinds
 176 -- of connectors, brackets and quotes.
 177 isPunctuation :: Char -> Bool
 178 isPunctuation c = case generalCategory c of
 179         ConnectorPunctuation    -> True
 180         DashPunctuation         -> True
 181         OpenPunctuation         -> True
 182         ClosePunctuation        -> True
 183         InitialQuote            -> True
 184         FinalQuote              -> True
 185         OtherPunctuation        -> True
 186         _                       -> False
 187
 188 -- | Selects Unicode symbol characters, including mathematical and
 189 -- currency symbols.
 190 isSymbol :: Char -> Bool
 191 isSymbol c = case generalCategory c of
 192         MathSymbol              -> True
 193         CurrencySymbol          -> True
 194         ModifierSymbol          -> True
 195         OtherSymbol             -> True
 196         _                       -> False
 197
 198 -- | Selects Unicode space and separator characters.
 199 isSeparator :: Char -> Bool
 200 isSeparator c = case generalCategory c of
 201         Space                   -> True
 202         LineSeparator           -> True
 203         ParagraphSeparator      -> True
 204         _                       -> False
 205
 206 #ifdef __NHC__
 207 -- dummy implementation
 208 toTitle :: Char -> Char
 209 toTitle = toUpper
 210 #endif