Character classification
\begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
module Ctype
( is_ident -- Char# -> Bool
, is_symbol -- Char# -> Bool
, is_digit -- Char# -> Bool
, is_alphanum -- Char# -> Bool
- , is_hexdigit, is_octdigit
+ , is_decdigit, is_hexdigit, is_octdigit
, hexDigit, octDecDigit
) where
import Data.Int ( Int32 )
import Data.Bits ( Bits((.&.)) )
import Data.Char ( ord, chr )
+import Panic
\end{code}
Bit masks
is_ctype :: Int -> Char -> Bool
is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32)
-is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool
+is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit,
+ is_alphanum :: Char -> Bool
is_ident = is_ctype cIdent
is_symbol = is_ctype cSymbol
is_any = is_ctype cAny
\begin{code}
hexDigit :: Char -> Int
-hexDigit c | is_digit c = ord c - ord '0'
- | otherwise = ord (to_lower c) - ord 'a' + 10
+hexDigit c | is_decdigit c = ord c - ord '0'
+ | otherwise = ord (to_lower c) - ord 'a' + 10
octDecDigit :: Char -> Int
octDecDigit c = ord c - ord '0'
+is_decdigit :: Char -> Bool
+is_decdigit c
+ = c >= '0' && c <= '9'
+
+is_hexdigit :: Char -> Bool
is_hexdigit c
- = is_digit c
+ = is_decdigit c
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F')
+is_octdigit :: Char -> Bool
is_octdigit c = c >= '0' && c <= '7'
-to_lower c
+to_lower :: Char -> Char
+to_lower c
| c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
| otherwise = c
\end{code}
'\125' -> cAny -- }
'\126' -> cAny + cSymbol -- ~
'\127' -> 0 -- \177
- '\128' -> 0 -- \200
- '\129' -> 0 -- \201
- '\130' -> 0 -- \202
- '\131' -> 0 -- \203
- '\132' -> 0 -- \204
- '\133' -> 0 -- \205
- '\134' -> 0 -- \206
- '\135' -> 0 -- \207
- '\136' -> 0 -- \210
- '\137' -> 0 -- \211
- '\138' -> 0 -- \212
- '\139' -> 0 -- \213
- '\140' -> 0 -- \214
- '\141' -> 0 -- \215
- '\142' -> 0 -- \216
- '\143' -> 0 -- \217
- '\144' -> 0 -- \220
- '\145' -> 0 -- \221
- '\146' -> 0 -- \222
- '\147' -> 0 -- \223
- '\148' -> 0 -- \224
- '\149' -> 0 -- \225
- '\150' -> 0 -- \226
- '\151' -> 0 -- \227
- '\152' -> 0 -- \230
- '\153' -> 0 -- \231
- '\154' -> 0 -- \232
- '\155' -> 0 -- \233
- '\156' -> 0 -- \234
- '\157' -> 0 -- \235
- '\158' -> 0 -- \236
- '\159' -> 0 -- \237
- '\160' -> cSpace --
- '\161' -> cAny + cSymbol -- ¡
- '\162' -> cAny + cSymbol -- ¢
- '\163' -> cAny + cSymbol -- £
- '\164' -> cAny + cSymbol -- ¤
- '\165' -> cAny + cSymbol -- ¥
- '\166' -> cAny + cSymbol -- ¦
- '\167' -> cAny + cSymbol -- §
- '\168' -> cAny + cSymbol -- ¨
- '\169' -> cAny + cSymbol -- ©
- '\170' -> cAny + cSymbol -- ª
- '\171' -> cAny + cSymbol -- «
- '\172' -> cAny + cSymbol -- ¬
- '\173' -> cAny + cSymbol --
- '\174' -> cAny + cSymbol -- ®
- '\175' -> cAny + cSymbol -- ¯
- '\176' -> cAny + cSymbol -- °
- '\177' -> cAny + cSymbol -- ±
- '\178' -> cAny + cSymbol -- ²
- '\179' -> cAny + cSymbol -- ³
- '\180' -> cAny + cSymbol -- ´
- '\181' -> cAny + cSymbol -- µ
- '\182' -> cAny + cSymbol -- ¶
- '\183' -> cAny + cSymbol -- ·
- '\184' -> cAny + cSymbol -- ¸
- '\185' -> cAny + cSymbol -- ¹
- '\186' -> cAny + cSymbol -- º
- '\187' -> cAny + cSymbol -- »
- '\188' -> cAny + cSymbol -- ¼
- '\189' -> cAny + cSymbol -- ½
- '\190' -> cAny + cSymbol -- ¾
- '\191' -> cAny + cSymbol -- ¿
- '\192' -> cAny + cIdent + cUpper -- À
- '\193' -> cAny + cIdent + cUpper -- Á
- '\194' -> cAny + cIdent + cUpper -- Â
- '\195' -> cAny + cIdent + cUpper -- Ã
- '\196' -> cAny + cIdent + cUpper -- Ä
- '\197' -> cAny + cIdent + cUpper -- Å
- '\198' -> cAny + cIdent + cUpper -- Æ
- '\199' -> cAny + cIdent + cUpper -- Ç
- '\200' -> cAny + cIdent + cUpper -- È
- '\201' -> cAny + cIdent + cUpper -- É
- '\202' -> cAny + cIdent + cUpper -- Ê
- '\203' -> cAny + cIdent + cUpper -- Ë
- '\204' -> cAny + cIdent + cUpper -- Ì
- '\205' -> cAny + cIdent + cUpper -- Í
- '\206' -> cAny + cIdent + cUpper -- Î
- '\207' -> cAny + cIdent + cUpper -- Ï
- '\208' -> cAny + cIdent + cUpper -- Ð
- '\209' -> cAny + cIdent + cUpper -- Ñ
- '\210' -> cAny + cIdent + cUpper -- Ò
- '\211' -> cAny + cIdent + cUpper -- Ó
- '\212' -> cAny + cIdent + cUpper -- Ô
- '\213' -> cAny + cIdent + cUpper -- Õ
- '\214' -> cAny + cIdent + cUpper -- Ö
- '\215' -> cAny + cSymbol + cLower -- ×
- '\216' -> cAny + cIdent + cUpper -- Ø
- '\217' -> cAny + cIdent + cUpper -- Ù
- '\218' -> cAny + cIdent + cUpper -- Ú
- '\219' -> cAny + cIdent + cUpper -- Û
- '\220' -> cAny + cIdent + cUpper -- Ü
- '\221' -> cAny + cIdent + cUpper -- Ý
- '\222' -> cAny + cIdent + cUpper -- Þ
- '\223' -> cAny + cIdent -- ß
- '\224' -> cAny + cIdent + cLower -- à
- '\225' -> cAny + cIdent + cLower -- á
- '\226' -> cAny + cIdent + cLower -- â
- '\227' -> cAny + cIdent + cLower -- ã
- '\228' -> cAny + cIdent + cLower -- ä
- '\229' -> cAny + cIdent + cLower -- å
- '\230' -> cAny + cIdent + cLower -- æ
- '\231' -> cAny + cIdent + cLower -- ç
- '\232' -> cAny + cIdent + cLower -- è
- '\233' -> cAny + cIdent + cLower -- é
- '\234' -> cAny + cIdent + cLower -- ê
- '\235' -> cAny + cIdent + cLower -- ë
- '\236' -> cAny + cIdent + cLower -- ì
- '\237' -> cAny + cIdent + cLower -- í
- '\238' -> cAny + cIdent + cLower -- î
- '\239' -> cAny + cIdent + cLower -- ï
- '\240' -> cAny + cIdent + cLower -- ð
- '\241' -> cAny + cIdent + cLower -- ñ
- '\242' -> cAny + cIdent + cLower -- ò
- '\243' -> cAny + cIdent + cLower -- ó
- '\244' -> cAny + cIdent + cLower -- ô
- '\245' -> cAny + cIdent + cLower -- õ
- '\246' -> cAny + cIdent + cLower -- ö
- '\247' -> cAny + cSymbol -- ÷
- '\248' -> cAny + cIdent -- ø
- '\249' -> cAny + cIdent + cLower -- ù
- '\250' -> cAny + cIdent + cLower -- ú
- '\251' -> cAny + cIdent + cLower -- û
- '\252' -> cAny + cIdent + cLower -- ü
- '\253' -> cAny + cIdent + cLower -- ý
- '\254' -> cAny + cIdent + cLower -- þ
- '\255' -> cAny + cIdent + cLower -- ÿ
+ _ -> panic ("charType: " ++ show c)
\end{code}