Data/Char.hs

   1 {-# OPTIONS_GHC -XNoImplicitPrelude #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.Char
   5 -- Copyright   :  (c) The University of Glasgow 2001
   6 -- License     :  BSD-style (see the file libraries/base/LICENSE)
   7 --
   8 -- Maintainer  :  libraries@haskell.org
   9 -- Stability   :  stable
  10 -- Portability :  portable
  11 --
  12 -- The Char type and associated operations.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 module Data.Char
  17     (
  18       Char
  19
  20     -- * Character classification
  21     -- | Unicode characters are divided into letters, numbers, marks,
  22     -- punctuation, symbols, separators (including spaces) and others
  23     -- (including control characters).
  24     , isControl, isSpace
  25     , isLower, isUpper, isAlpha, isAlphaNum, isPrint
  26     , isDigit, isOctDigit, isHexDigit
  27     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  28
  29     -- ** Subranges
  30     , isAscii, isLatin1
  31     , isAsciiUpper, isAsciiLower
  32
  33     -- ** Unicode general categories
  34     , GeneralCategory(..), generalCategory
  35
  36     -- * Case conversion
  37     , toUpper, toLower, toTitle  -- :: Char -> Char
  38
  39     -- * Single digit characters
  40     , digitToInt        -- :: Char -> Int
  41     , intToDigit        -- :: Int  -> Char
  42
  43     -- * Numeric representations
  44     , ord               -- :: Char -> Int
  45     , chr               -- :: Int  -> Char
  46
  47     -- * String representations
  48     , showLitChar       -- :: Char -> ShowS
  49     , lexLitChar        -- :: ReadS String
  50     , readLitChar       -- :: ReadS Char
  51
  52      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
  53     ) where
  54
  55 #ifdef __GLASGOW_HASKELL__
  56 import GHC.Base
  57 import GHC.Arr (Ix)
  58 import GHC.Real (fromIntegral)
  59 import GHC.Show
  60 import GHC.Read (Read, readLitChar, lexLitChar)
  61 import GHC.Unicode
  62 import GHC.Num
  63 import GHC.Enum
  64 #endif
  65
  66 #ifdef __HUGS__
  67 import Hugs.Prelude (Ix)
  68 import Hugs.Char
  69 #endif
  70
  71 #ifdef __NHC__
  72 import Prelude
  73 import Prelude(Char,String)
  74 import Char
  75 import Ix
  76 import NHC.FFI (CInt)
  77 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
  78 #endif
  79
  80 -- | Convert a single digit 'Char' to the corresponding 'Int'.
  81 -- This function fails unless its argument satisfies 'isHexDigit',
  82 -- but recognises both upper and lower-case hexadecimal digits
  83 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  84 digitToInt :: Char -> Int
  85 digitToInt c
  86  | isDigit c            =  ord c - ord '0'
  87  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  88  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
  89  | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
  90
  91 #ifndef __GLASGOW_HASKELL__
  92 isAsciiUpper, isAsciiLower :: Char -> Bool
  93 isAsciiLower c          =  c >= 'a' && c <= 'z'
  94 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  95 #endif
  96
  97 -- | Unicode General Categories (column 2 of the UnicodeData table)
  98 -- in the order they are listed in the Unicode standard.
  99
 100 data GeneralCategory
 101         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
 102         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 103         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 104         | ModifierLetter        -- ^ Lm: Letter, Modifier
 105         | OtherLetter           -- ^ Lo: Letter, Other
 106         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 107         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 108         | EnclosingMark         -- ^ Me: Mark, Enclosing
 109         | DecimalNumber         -- ^ Nd: Number, Decimal
 110         | LetterNumber          -- ^ Nl: Number, Letter
 111         | OtherNumber           -- ^ No: Number, Other
 112         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 113         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 114         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 115         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 116         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 117         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 118         | OtherPunctuation      -- ^ Po: Punctuation, Other
 119         | MathSymbol            -- ^ Sm: Symbol, Math
 120         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 121         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 122         | OtherSymbol           -- ^ So: Symbol, Other
 123         | Space                 -- ^ Zs: Separator, Space
 124         | LineSeparator         -- ^ Zl: Separator, Line
 125         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 126         | Control               -- ^ Cc: Other, Control
 127         | Format                -- ^ Cf: Other, Format
 128         | Surrogate             -- ^ Cs: Other, Surrogate
 129         | PrivateUse            -- ^ Co: Other, Private Use
 130         | NotAssigned           -- ^ Cn: Other, Not Assigned
 131         deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
 132
 133 -- | The Unicode general category of the character.
 134 generalCategory :: Char -> GeneralCategory
 135 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
 136 generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
 137 #endif
 138 #ifdef __HUGS__
 139 generalCategory c = toEnum (primUniGenCat c)
 140 #endif
 141
 142 -- derived character classifiers
 143
 144 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
 145 -- title-case letters, plus letters of caseless scripts and modifiers letters).
 146 -- This function is equivalent to 'Data.Char.isAlpha'.
 147 isLetter :: Char -> Bool
 148 isLetter c = case generalCategory c of
 149         UppercaseLetter         -> True
 150         LowercaseLetter         -> True
 151         TitlecaseLetter         -> True
 152         ModifierLetter          -> True
 153         OtherLetter             -> True
 154         _                       -> False
 155
 156 -- | Selects Unicode mark characters, e.g. accents and the like, which
 157 -- combine with preceding letters.
 158 isMark :: Char -> Bool
 159 isMark c = case generalCategory c of
 160         NonSpacingMark          -> True
 161         SpacingCombiningMark    -> True
 162         EnclosingMark           -> True
 163         _                       -> False
 164
 165 -- | Selects Unicode numeric characters, including digits from various
 166 -- scripts, Roman numerals, etc.
 167 isNumber :: Char -> Bool
 168 isNumber c = case generalCategory c of
 169         DecimalNumber           -> True
 170         LetterNumber            -> True
 171         OtherNumber             -> True
 172         _                       -> False
 173
 174 -- | Selects Unicode punctuation characters, including various kinds
 175 -- of connectors, brackets and quotes.
 176 isPunctuation :: Char -> Bool
 177 isPunctuation c = case generalCategory c of
 178         ConnectorPunctuation    -> True
 179         DashPunctuation         -> True
 180         OpenPunctuation         -> True
 181         ClosePunctuation        -> True
 182         InitialQuote            -> True
 183         FinalQuote              -> True
 184         OtherPunctuation        -> True
 185         _                       -> False
 186
 187 -- | Selects Unicode symbol characters, including mathematical and
 188 -- currency symbols.
 189 isSymbol :: Char -> Bool
 190 isSymbol c = case generalCategory c of
 191         MathSymbol              -> True
 192         CurrencySymbol          -> True
 193         ModifierSymbol          -> True
 194         OtherSymbol             -> True
 195         _                       -> False
 196
 197 -- | Selects Unicode space and separator characters.
 198 isSeparator :: Char -> Bool
 199 isSeparator c = case generalCategory c of
 200         Space                   -> True
 201         LineSeparator           -> True
 202         ParagraphSeparator      -> True
 203         _                       -> False
 204
 205 #ifdef __NHC__
 206 -- dummy implementation
 207 toTitle :: Char -> Char
 208 toTitle = toUpper
 209 #endif