Data/Char.hs

   1 {-# OPTIONS_GHC -fno-implicit-prelude #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.Char
   5 -- Copyright   :  (c) The University of Glasgow 2001
   6 -- License     :  BSD-style (see the file libraries/base/LICENSE)
   7 --
   8 -- Maintainer  :  libraries@haskell.org
   9 -- Stability   :  stable
  10 -- Portability :  portable
  11 --
  12 -- The Char type and associated operations.
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 module Data.Char
  17     (
  18       Char
  19
  20     , String
  21
  22     -- * Character classification
  23     -- | Unicode characters are divided into letters, numbers, marks,
  24     -- punctuation, symbols, separators (including spaces) and others
  25     -- (including control characters).
  26     , isAscii, isLatin1, isControl, isSpace
  27     , isLower, isUpper,  isAlpha,   isAlphaNum, isPrint
  28     , isDigit, isOctDigit, isHexDigit
  29     , isAsciiUpper, isAsciiLower
  30 #ifndef __NHC__
  31     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  32
  33     , GeneralCategory(..), generalCategory
  34 #endif
  35
  36     -- * Case conversion
  37     , toUpper, toLower, toTitle  -- :: Char -> Char
  38
  39     -- * Single digit characters
  40     , digitToInt        -- :: Char -> Int
  41     , intToDigit        -- :: Int  -> Char
  42
  43     -- * Numeric representations
  44     , ord               -- :: Char -> Int
  45     , chr               -- :: Int  -> Char
  46
  47     -- * String representations
  48     , showLitChar       -- :: Char -> ShowS
  49     , lexLitChar        -- :: ReadS String
  50     , readLitChar       -- :: ReadS Char
  51
  52      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
  53     ) where
  54
  55 #ifdef __GLASGOW_HASKELL__
  56 import GHC.Base
  57 import GHC.Real (fromIntegral)
  58 import GHC.Show
  59 import GHC.Read (Read, readLitChar, lexLitChar)
  60 import GHC.Unicode
  61 import GHC.Num
  62 import GHC.Enum
  63 #endif
  64
  65 #ifdef __HUGS__
  66 import Hugs.Char
  67 #endif
  68
  69 #ifdef __NHC__
  70 import Prelude
  71 import Prelude(Char,String)
  72 import Char
  73 import NHC.FFI (CInt)
  74 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> Int
  75 #endif
  76
  77 -- | Convert a single digit 'Char' to the corresponding 'Int'.
  78 -- This function fails unless its argument satisfies 'isHexDigit',
  79 -- but recognises both upper and lower-case hexadecimal digits
  80 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  81 digitToInt :: Char -> Int
  82 digitToInt c
  83  | isDigit c            =  ord c - ord '0'
  84  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  85  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
  86  | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
  87
  88 #ifndef __GLASGOW_HASKELL__
  89 isAsciiUpper, isAsciiLower :: Char -> Bool
  90 isAsciiLower c          =  c >= 'a' && c <= 'z'
  91 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  92 #endif
  93
  94 -- | Unicode General Categories (column 2 of the UnicodeData table)
  95 -- in the order they are listed in the Unicode standard.
  96
  97 data GeneralCategory
  98         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
  99         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 100         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 101         | ModifierLetter        -- ^ Lm: Letter, Modifier
 102         | OtherLetter           -- ^ Lo: Letter, Other
 103         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 104         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 105         | EnclosingMark         -- ^ Me: Mark, Enclosing
 106         | DecimalNumber         -- ^ Nd: Number, Decimal
 107         | LetterNumber          -- ^ Nl: Number, Letter
 108         | OtherNumber           -- ^ No: Number, Other
 109         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 110         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 111         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 112         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 113         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 114         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 115         | OtherPunctuation      -- ^ Po: Punctuation, Other
 116         | MathSymbol            -- ^ Sm: Symbol, Math
 117         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 118         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 119         | OtherSymbol           -- ^ So: Symbol, Other
 120         | Space                 -- ^ Zs: Separator, Space
 121         | LineSeparator         -- ^ Zl: Separator, Line
 122         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 123         | Control               -- ^ Cc: Other, Control
 124         | Format                -- ^ Cf: Other, Format
 125         | Surrogate             -- ^ Cs: Other, Surrogate
 126         | PrivateUse            -- ^ Co: Other, Private Use
 127         | NotAssigned           -- ^ Cn: Other, Not Assigned
 128         deriving (Eq, Ord, Enum, Read, Show, Bounded)
 129
 130 -- | Retrieves the general Unicode category of the character.
 131 generalCategory :: Char -> GeneralCategory
 132 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
 133 generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
 134 #endif
 135 #ifdef __HUGS__
 136 generalCategory c = toEnum (primUniGenCat c)
 137 #endif
 138
 139 -- derived character classifiers
 140
 141 isLetter :: Char -> Bool
 142 isLetter c = case generalCategory c of
 143         UppercaseLetter         -> True
 144         LowercaseLetter         -> True
 145         TitlecaseLetter         -> True
 146         ModifierLetter          -> True
 147         OtherLetter             -> True
 148         _                       -> False
 149
 150 isMark :: Char -> Bool
 151 isMark c = case generalCategory c of
 152         NonSpacingMark          -> True
 153         SpacingCombiningMark    -> True
 154         EnclosingMark           -> True
 155         _                       -> False
 156
 157 isNumber :: Char -> Bool
 158 isNumber c = case generalCategory c of
 159         DecimalNumber           -> True
 160         LetterNumber            -> True
 161         OtherNumber             -> True
 162         _                       -> False
 163
 164 isPunctuation :: Char -> Bool
 165 isPunctuation c = case generalCategory c of
 166         ConnectorPunctuation    -> True
 167         DashPunctuation         -> True
 168         OpenPunctuation         -> True
 169         ClosePunctuation        -> True
 170         InitialQuote            -> True
 171         FinalQuote              -> True
 172         OtherPunctuation        -> True
 173         _                       -> False
 174
 175 isSymbol :: Char -> Bool
 176 isSymbol c = case generalCategory c of
 177         MathSymbol              -> True
 178         CurrencySymbol          -> True
 179         ModifierSymbol          -> True
 180         OtherSymbol             -> True
 181         _                       -> False
 182
 183 isSeparator :: Char -> Bool
 184 isSeparator c = case generalCategory c of
 185         Space                   -> True
 186         LineSeparator           -> True
 187         ParagraphSeparator      -> True
 188         _                       -> False
 189
 190 #ifdef __NHC__
 191 -- dummy implementation
 192 toTitle :: Char -> Char
 193 toTitle = toUpper
 194 #endif