, is_lower -- Char# -> Bool
, is_upper -- Char# -> Bool
, is_digit -- Char# -> Bool
+ , is_alphanum -- Char# -> Bool
+
+ , is_hexdigit, is_octdigit
+ , hexDigit, octDecDigit
) where
-\end{code}
-\begin{code}
-import Bits ( Bits((.&.)) )
-import Int ( Int32 )
-import PrelBase ( Char#, Char(..) )
+#include "HsVersions.h"
+
+import DATA_INT ( Int32 )
+import DATA_BITS ( Bits((.&.)) )
+import Char ( ord, chr )
\end{code}
Bit masks
\begin{code}
{-# INLINE is_ctype #-}
-is_ctype :: Int -> Char# -> Bool
-is_ctype mask c = (fromIntegral (charType (C# c)) .&. fromIntegral mask) /= (0::Int32)
+is_ctype :: Int -> Char -> Bool
+is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32)
-is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char# -> Bool
+is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool
is_ident = is_ctype cIdent
is_symbol = is_ctype cSymbol
is_any = is_ctype cAny
is_lower = is_ctype cLower
is_upper = is_ctype cUpper
is_digit = is_ctype cDigit
+is_alphanum = is_ctype (cLower+cUpper+cDigit)
+\end{code}
+
+Utils
+
+\begin{code}
+hexDigit :: Char -> Int
+hexDigit c | is_digit c = ord c - ord '0'
+ | otherwise = ord (to_lower c) - ord 'a' + 10
+
+octDecDigit :: Char -> Int
+octDecDigit c = ord c - ord '0'
+
+is_hexdigit c
+ = is_digit c
+ || (c >= 'a' && c <= 'f')
+ || (c >= 'A' && c <= 'F')
+
+is_octdigit c = c >= '0' && c <= '7'
+
+to_lower c
+ | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
+ | otherwise = c
\end{code}
We really mean .|. instead of + below, but GHC currently doesn't do
'\7' -> 0 -- \007
'\8' -> 0 -- \010
'\9' -> cAny + cSpace -- \t
- '\10' -> cAny + cSpace -- \n
+ '\10' -> cSpace -- \n (not allowed in strings, so !cAny)
'\11' -> cAny + cSpace -- \v
'\12' -> cAny + cSpace -- \f
- '\13' -> cAny + cSpace -- ^M
+ '\13' -> cAny + cSpace -- ^M
'\14' -> 0 -- \016
'\15' -> 0 -- \017
'\16' -> 0 -- \020
'\32' -> cAny + cSpace --
'\33' -> cAny + cSymbol -- !
'\34' -> cAny -- "
- '\35' -> cAny + cSymbol -- #
- '\36' -> cAny + cSymbol -- $
+ '\35' -> cAny + cSymbol -- #
+ '\36' -> cAny + cSymbol -- $
'\37' -> cAny + cSymbol -- %
'\38' -> cAny + cSymbol -- &
'\39' -> cAny + cIdent -- '
'\40' -> cAny -- (
'\41' -> cAny -- )
- '\42' -> cAny + cSymbol -- *
+ '\42' -> cAny + cSymbol -- *
'\43' -> cAny + cSymbol -- +
'\44' -> cAny -- ,
'\45' -> cAny + cSymbol -- -
'\46' -> cAny + cSymbol -- .
- '\47' -> cAny + cSymbol -- /
+ '\47' -> cAny + cSymbol -- /
'\48' -> cAny + cIdent + cDigit -- 0
'\49' -> cAny + cIdent + cDigit -- 1
'\50' -> cAny + cIdent + cDigit -- 2
'\91' -> cAny -- [
'\92' -> cAny + cSymbol -- backslash
'\93' -> cAny -- ]
- '\94' -> cAny + cSymbol -- ^
+ '\94' -> cAny + cSymbol -- ^
'\95' -> cAny + cIdent + cLower -- _
'\96' -> cAny -- `
'\97' -> cAny + cIdent + cLower -- a
'\121' -> cAny + cIdent + cLower -- y
'\122' -> cAny + cIdent + cLower -- z
'\123' -> cAny -- {
- '\124' -> cAny + cSymbol -- |
+ '\124' -> cAny + cSymbol -- |
'\125' -> cAny -- }
'\126' -> cAny + cSymbol -- ~
'\127' -> 0 -- \177
'\158' -> 0 -- \236
'\159' -> 0 -- \237
'\160' -> cSpace --
- '\161' -> cAny + cSymbol -- ¡
- '\162' -> cAny + cSymbol -- ¢
- '\163' -> cAny + cSymbol -- £
- '\164' -> cAny + cSymbol -- ¤
- '\165' -> cAny + cSymbol -- ¥
- '\166' -> cAny + cSymbol -- ¦
- '\167' -> cAny + cSymbol -- §
- '\168' -> cAny + cSymbol -- ¨
- '\169' -> cAny + cSymbol -- ©
- '\170' -> cAny + cSymbol -- ª
- '\171' -> cAny + cSymbol -- «
- '\172' -> cAny + cSymbol -- ¬
- '\173' -> cAny + cSymbol --
- '\174' -> cAny + cSymbol -- ®
- '\175' -> cAny + cSymbol -- ¯
- '\176' -> cAny + cSymbol -- °
- '\177' -> cAny + cSymbol -- ±
- '\178' -> cAny + cSymbol -- ²
- '\179' -> cAny + cSymbol -- ³
- '\180' -> cAny + cSymbol -- ´
- '\181' -> cAny + cSymbol -- µ
- '\182' -> cAny + cSymbol -- ¶
- '\183' -> cAny + cSymbol -- ·
- '\184' -> cAny + cSymbol -- ¸
- '\185' -> cAny + cSymbol -- ¹
- '\186' -> cAny + cSymbol -- º
- '\187' -> cAny + cSymbol -- »
- '\188' -> cAny + cSymbol -- ¼
- '\189' -> cAny + cSymbol -- ½
- '\190' -> cAny + cSymbol -- ¾
- '\191' -> cAny + cSymbol -- ¿
- '\192' -> cAny + cIdent + cUpper -- À
- '\193' -> cAny + cIdent + cUpper -- Á
- '\194' -> cAny + cIdent + cUpper -- Â
- '\195' -> cAny + cIdent + cUpper -- Ã
- '\196' -> cAny + cIdent + cUpper -- Ä
- '\197' -> cAny + cIdent + cUpper -- Å
- '\198' -> cAny + cIdent + cUpper -- Æ
- '\199' -> cAny + cIdent + cUpper -- Ç
- '\200' -> cAny + cIdent + cUpper -- È
- '\201' -> cAny + cIdent + cUpper -- É
- '\202' -> cAny + cIdent + cUpper -- Ê
- '\203' -> cAny + cIdent + cUpper -- Ë
- '\204' -> cAny + cIdent + cUpper -- Ì
- '\205' -> cAny + cIdent + cUpper -- Í
- '\206' -> cAny + cIdent + cUpper -- Î
- '\207' -> cAny + cIdent + cUpper -- Ï
- '\208' -> cAny + cIdent + cUpper -- Ð
- '\209' -> cAny + cIdent + cUpper -- Ñ
- '\210' -> cAny + cIdent + cUpper -- Ò
- '\211' -> cAny + cIdent + cUpper -- Ó
- '\212' -> cAny + cIdent + cUpper -- Ô
- '\213' -> cAny + cIdent + cUpper -- Õ
- '\214' -> cAny + cIdent + cUpper -- Ö
- '\215' -> cAny + cSymbol + cLower -- ×
- '\216' -> cAny + cIdent + cUpper -- Ø
- '\217' -> cAny + cIdent + cUpper -- Ù
- '\218' -> cAny + cIdent + cUpper -- Ú
- '\219' -> cAny + cIdent + cUpper -- Û
- '\220' -> cAny + cIdent + cUpper -- Ü
- '\221' -> cAny + cIdent + cUpper -- Ý
- '\222' -> cAny + cIdent + cUpper -- Þ
- '\223' -> cAny + cIdent -- ß
- '\224' -> cAny + cIdent + cLower -- à
- '\225' -> cAny + cIdent + cLower -- á
- '\226' -> cAny + cIdent + cLower -- â
- '\227' -> cAny + cIdent + cLower -- ã
- '\228' -> cAny + cIdent + cLower -- ä
- '\229' -> cAny + cIdent + cLower -- å
- '\230' -> cAny + cIdent + cLower -- æ
- '\231' -> cAny + cIdent + cLower -- ç
- '\232' -> cAny + cIdent + cLower -- è
- '\233' -> cAny + cIdent + cLower -- é
- '\234' -> cAny + cIdent + cLower -- ê
- '\235' -> cAny + cIdent + cLower -- ë
- '\236' -> cAny + cIdent + cLower -- ì
- '\237' -> cAny + cIdent + cLower -- í
- '\238' -> cAny + cIdent + cLower -- î
- '\239' -> cAny + cIdent + cLower -- ï
- '\240' -> cAny + cIdent + cLower -- ð
- '\241' -> cAny + cIdent + cLower -- ñ
- '\242' -> cAny + cIdent + cLower -- ò
- '\243' -> cAny + cIdent + cLower -- ó
- '\244' -> cAny + cIdent + cLower -- ô
- '\245' -> cAny + cIdent + cLower -- õ
- '\246' -> cAny + cIdent + cLower -- ö
- '\247' -> cAny + cSymbol -- ÷
- '\248' -> cAny + cIdent -- ø
- '\249' -> cAny + cIdent + cLower -- ù
- '\250' -> cAny + cIdent + cLower -- ú
- '\251' -> cAny + cIdent + cLower -- û
- '\252' -> cAny + cIdent + cLower -- ü
- '\253' -> cAny + cIdent + cLower -- ý
- '\254' -> cAny + cIdent + cLower -- þ
- '\255' -> cAny + cIdent + cLower -- ÿ
+ '\161' -> cAny + cSymbol -- ¡
+ '\162' -> cAny + cSymbol -- ¢
+ '\163' -> cAny + cSymbol -- £
+ '\164' -> cAny + cSymbol -- ¤
+ '\165' -> cAny + cSymbol -- ¥
+ '\166' -> cAny + cSymbol -- ¦
+ '\167' -> cAny + cSymbol -- §
+ '\168' -> cAny + cSymbol -- ¨
+ '\169' -> cAny + cSymbol -- ©
+ '\170' -> cAny + cSymbol -- ª
+ '\171' -> cAny + cSymbol -- «
+ '\172' -> cAny + cSymbol -- ¬
+ '\173' -> cAny + cSymbol --
+ '\174' -> cAny + cSymbol -- ®
+ '\175' -> cAny + cSymbol -- ¯
+ '\176' -> cAny + cSymbol -- °
+ '\177' -> cAny + cSymbol -- ±
+ '\178' -> cAny + cSymbol -- ²
+ '\179' -> cAny + cSymbol -- ³
+ '\180' -> cAny + cSymbol -- ´
+ '\181' -> cAny + cSymbol -- µ
+ '\182' -> cAny + cSymbol -- ¶
+ '\183' -> cAny + cSymbol -- ·
+ '\184' -> cAny + cSymbol -- ¸
+ '\185' -> cAny + cSymbol -- ¹
+ '\186' -> cAny + cSymbol -- º
+ '\187' -> cAny + cSymbol -- »
+ '\188' -> cAny + cSymbol -- ¼
+ '\189' -> cAny + cSymbol -- ½
+ '\190' -> cAny + cSymbol -- ¾
+ '\191' -> cAny + cSymbol -- ¿
+ '\192' -> cAny + cIdent + cUpper -- À
+ '\193' -> cAny + cIdent + cUpper -- Á
+ '\194' -> cAny + cIdent + cUpper -- Â
+ '\195' -> cAny + cIdent + cUpper -- Ã
+ '\196' -> cAny + cIdent + cUpper -- Ä
+ '\197' -> cAny + cIdent + cUpper -- Å
+ '\198' -> cAny + cIdent + cUpper -- Æ
+ '\199' -> cAny + cIdent + cUpper -- Ç
+ '\200' -> cAny + cIdent + cUpper -- È
+ '\201' -> cAny + cIdent + cUpper -- É
+ '\202' -> cAny + cIdent + cUpper -- Ê
+ '\203' -> cAny + cIdent + cUpper -- Ë
+ '\204' -> cAny + cIdent + cUpper -- Ì
+ '\205' -> cAny + cIdent + cUpper -- Í
+ '\206' -> cAny + cIdent + cUpper -- Î
+ '\207' -> cAny + cIdent + cUpper -- Ï
+ '\208' -> cAny + cIdent + cUpper -- Ð
+ '\209' -> cAny + cIdent + cUpper -- Ñ
+ '\210' -> cAny + cIdent + cUpper -- Ò
+ '\211' -> cAny + cIdent + cUpper -- Ó
+ '\212' -> cAny + cIdent + cUpper -- Ô
+ '\213' -> cAny + cIdent + cUpper -- Õ
+ '\214' -> cAny + cIdent + cUpper -- Ö
+ '\215' -> cAny + cSymbol + cLower -- ×
+ '\216' -> cAny + cIdent + cUpper -- Ø
+ '\217' -> cAny + cIdent + cUpper -- Ù
+ '\218' -> cAny + cIdent + cUpper -- Ú
+ '\219' -> cAny + cIdent + cUpper -- Û
+ '\220' -> cAny + cIdent + cUpper -- Ü
+ '\221' -> cAny + cIdent + cUpper -- Ý
+ '\222' -> cAny + cIdent + cUpper -- Þ
+ '\223' -> cAny + cIdent -- ß
+ '\224' -> cAny + cIdent + cLower -- à
+ '\225' -> cAny + cIdent + cLower -- á
+ '\226' -> cAny + cIdent + cLower -- â
+ '\227' -> cAny + cIdent + cLower -- ã
+ '\228' -> cAny + cIdent + cLower -- ä
+ '\229' -> cAny + cIdent + cLower -- å
+ '\230' -> cAny + cIdent + cLower -- æ
+ '\231' -> cAny + cIdent + cLower -- ç
+ '\232' -> cAny + cIdent + cLower -- è
+ '\233' -> cAny + cIdent + cLower -- é
+ '\234' -> cAny + cIdent + cLower -- ê
+ '\235' -> cAny + cIdent + cLower -- ë
+ '\236' -> cAny + cIdent + cLower -- ì
+ '\237' -> cAny + cIdent + cLower -- í
+ '\238' -> cAny + cIdent + cLower -- î
+ '\239' -> cAny + cIdent + cLower -- ï
+ '\240' -> cAny + cIdent + cLower -- ð
+ '\241' -> cAny + cIdent + cLower -- ñ
+ '\242' -> cAny + cIdent + cLower -- ò
+ '\243' -> cAny + cIdent + cLower -- ó
+ '\244' -> cAny + cIdent + cLower -- ô
+ '\245' -> cAny + cIdent + cLower -- õ
+ '\246' -> cAny + cIdent + cLower -- ö
+ '\247' -> cAny + cSymbol -- ÷
+ '\248' -> cAny + cIdent -- ø
+ '\249' -> cAny + cIdent + cLower -- ù
+ '\250' -> cAny + cIdent + cLower -- ú
+ '\251' -> cAny + cIdent + cLower -- û
+ '\252' -> cAny + cIdent + cLower -- ü
+ '\253' -> cAny + cIdent + cLower -- ý
+ '\254' -> cAny + cIdent + cLower -- þ
+ '\255' -> cAny + cIdent + cLower -- ÿ
\end{code}