ghc/interpreter/library/Char.hs

   1 #ifdef HEAD
   2 module Char (
   3     isAscii, isLatin1, isControl, isPrint, isSpace, isUpper, isLower,
   4     isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum,
   5     digitToInt, intToDigit,
   6     toUpper, toLower,
   7     ord, chr,
   8     readLitChar, showLitChar, lexLitChar
   9     ) where
  10
  11 import Array  -- used for character name table.
  12
  13 import UnicodePrims  -- source of primitive Unicode functions.
  14 import PreludeBuiltin
  15 #endif /* HEAD */
  16 #ifdef BODY
  17
  18 -- Character-testing operations
  19 isAscii, isControl, isPrint, isSpace, isUpper, isLower,
  20  isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum :: Char -> Bool
  21
  22 isAscii c                =  c < '\x80'
  23
  24 isLatin1 c               =  c <= '\xff'
  25
  26 -- Only ASCII Chars can be controls
  27
  28 isControl c              =  c < ' ' || c >= '\DEL' && c <= '\x9f'
  29
  30 -- This function does not
  31
  32 isPrint                  =  primUnicodeIsPrint
  33
  34 -- Only Latin-1 spaces recognized
  35
  36 isSpace c                =  c `elem` " \t\n\r\f\v\xA0"
  37
  38 isUpper                  =  primUnicodeIsUpper
  39
  40 isLower                  =  primUnicodeIsLower
  41
  42 isAlpha c                =  isUpper c || isLower c
  43
  44 isDigit c                =  c >= '0' && c <= '9'
  45
  46 isOctDigit c             =  c >= '0' && c <= '7'
  47
  48 isHexDigit c             =  isDigit c || c >= 'A' && c <= 'F' ||
  49                                          c >= 'a' && c <= 'f'
  50
  51 isAlphaNum               =  primUnicodeIsAlphaNum
  52
  53
  54 -- Digit conversion operations
  55 digitToInt :: Char -> Int
  56 digitToInt c
  57   | isDigit c            =  fromEnum c - fromEnum '0'
  58   | c >= 'a' && c <= 'f' =  fromEnum c - fromEnum 'a' + 10
  59   | c >= 'A' && c <= 'F' =  fromEnum c - fromEnum 'A' + 10
  60   | otherwise            =  error "Char.digitToInt: not a digit"
  61
  62 intToDigit :: Int -> Char
  63 intToDigit i
  64   | i >= 0  && i <=  9   =  toEnum (fromEnum '0' + i)
  65   | i >= 10 && i <= 15   =  toEnum (fromEnum 'a' + i - 10)
  66   | otherwise            =  error "Char.intToDigit: not a digit"
  67
  68
  69 -- Case-changing operations
  70 toUpper                  :: Char -> Char
  71 toUpper                  =  primUnicodeToUpper
  72
  73 toLower                  :: Char -> Char
  74 toLower                  =  primUnicodeToLower
  75
  76 -- Character code functions
  77 ord                     :: Char -> Int
  78 ord                     =  fromEnum
  79
  80 chr                     :: Int  -> Char
  81 chr                     =  toEnum
  82
  83 -- Text functions
  84 readLitChar             :: ReadS Char
  85 readLitChar ('\\':s)    =  readEsc s
  86         where
  87         readEsc ('a':s)  = [('\a',s)]
  88         readEsc ('b':s)  = [('\b',s)]
  89         readEsc ('f':s)  = [('\f',s)]
  90         readEsc ('n':s)  = [('\n',s)]
  91         readEsc ('r':s)  = [('\r',s)]
  92         readEsc ('t':s)  = [('\t',s)]
  93         readEsc ('v':s)  = [('\v',s)]
  94         readEsc ('\\':s) = [('\\',s)]
  95         readEsc ('"':s)  = [('"',s)]
  96         readEsc ('\'':s) = [('\'',s)]
  97         readEsc ('^':c:s) | c >= '@' && c <= '_'
  98                          = [(chr (ord c - ord '@'), s)]
  99         readEsc s@(d:_) | isDigit d
 100                          = [(chr n, t) | (n,t) <- readDec s]
 101         readEsc ('o':s)  = [(chr n, t) | (n,t) <- readOct s]
 102         readEsc ('x':s)  = [(chr n, t) | (n,t) <- readHex s]
 103         readEsc s@(c:_) | isUpper c
 104                          = let table = ('\DEL', "DEL") : assocs asciiTab
 105                            in case [(c,s') | (c, mne) <- table,
 106                                              ([],s') <- [match mne s]]
 107                               of (pr:_) -> [pr]
 108                                  []     -> []
 109         readEsc _        = []
 110 readLitChar (c:s)       =  [(c,s)]
 111
 112 showLitChar                :: Char -> ShowS
 113 showLitChar c | c > '\DEL' =  showChar '\\' .
 114                               protectEsc isDigit (shows (ord c))
 115 showLitChar '\DEL'         =  showString "\\DEL"
 116 showLitChar '\\'           =  showString "\\\\"
 117 showLitChar c | c >= ' '   =  showChar c
 118 showLitChar '\a'           =  showString "\\a"
 119 showLitChar '\b'           =  showString "\\b"
 120 showLitChar '\f'           =  showString "\\f"
 121 showLitChar '\n'           =  showString "\\n"
 122 showLitChar '\r'           =  showString "\\r"
 123 showLitChar '\t'           =  showString "\\t"
 124 showLitChar '\v'           =  showString "\\v"
 125 showLitChar '\SO'          =  protectEsc (== 'H') (showString "\\SO")
 126 showLitChar c              =  showString ('\\' : asciiTab!c)
 127
 128 protectEsc p f             = f . cont
 129                              where cont s@(c:_) | p c = "\\&" ++ s
 130                                    cont s             = s
 131
 132 match                         :: (Eq a) => [a] -> [a] -> ([a],[a])
 133 match (x:xs) (y:ys) | x == y  =  match xs ys
 134 match xs     ys               =  (xs,ys)
 135
 136 asciiTab = listArray ('\NUL', ' ')
 137            ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
 138             "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI",
 139             "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
 140             "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
 141             "SP"]
 142
 143 lexLitChar          :: ReadS String
 144 lexLitChar ('\\':s) =  [('\\':esc, t) | (esc,t) <- lexEsc s]
 145         where
 146           lexEsc (c:s)     | c `elem` "abfnrtv\\\"'" = [([c],s)]
 147           lexEsc s@(d:_)   | isDigit d               = lexDigits s
 148           lexEsc ('^':c:s) | c >= '@' && c <= '_'    = [(['^',c],s)]
 149           -- Very crude approximation to \XYZ.  Let readers work this out.
 150           lexEsc s@(c:_)   | isUpper c               = [span isCharName s]
 151           lexEsc _                                   = []
 152           isCharName c = isUpper c || isDigit c
 153
 154 lexLitChar (c:s)    =  [([c],s)]
 155 lexLitChar ""       =  []
 156
 157 #endif /* BODY */