, is_lower -- Char# -> Bool
, is_upper -- Char# -> Bool
, is_digit -- Char# -> Bool
+
+ , is_hexdigit, is_octdigit
+ , hexDigit, octDecDigit
) where
-\end{code}
-\begin{code}
-import Bits ( Bits((.&.)) )
-import Int ( Int32 )
-import GlaExts ( Char#, Char(..) )
+#include "HsVersions.h"
+
+import DATA_INT ( Int32 )
+import DATA_BITS ( Bits((.&.)) )
+import Char ( ord, chr )
\end{code}
Bit masks
\begin{code}
{-# INLINE is_ctype #-}
-is_ctype :: Int -> Char# -> Bool
-is_ctype mask c = (fromIntegral (charType (C# c)) .&. fromIntegral mask) /= (0::Int32)
+is_ctype :: Int -> Char -> Bool
+is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32)
-is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char# -> Bool
+is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool
is_ident = is_ctype cIdent
is_symbol = is_ctype cSymbol
is_any = is_ctype cAny
is_digit = is_ctype cDigit
\end{code}
+Utils
+
+\begin{code}
+hexDigit :: Char -> Int
+hexDigit c | is_digit c = ord c - ord '0'
+ | otherwise = ord (to_lower c) - ord 'a' + 10
+
+octDecDigit :: Char -> Int
+octDecDigit c = ord c - ord '0'
+
+is_hexdigit c
+ = is_digit c
+ || (c >= 'a' && c <= 'f')
+ || (c >= 'A' && c <= 'F')
+
+is_octdigit c = c >= '0' && c <= '7'
+
+to_lower c
+ | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
+ | otherwise = c
+\end{code}
+
We really mean .|. instead of + below, but GHC currently doesn't do
any constant folding with bitops. *sigh*
'\7' -> 0 -- \007
'\8' -> 0 -- \010
'\9' -> cAny + cSpace -- \t
- '\10' -> cAny + cSpace -- \n
+ '\10' -> cSpace -- \n (not allowed in strings, so !cAny)
'\11' -> cAny + cSpace -- \v
'\12' -> cAny + cSpace -- \f
- '\13' -> cAny + cSpace -- ^M
+ '\13' -> cAny + cSpace -- ^M
'\14' -> 0 -- \016
'\15' -> 0 -- \017
'\16' -> 0 -- \020
'\32' -> cAny + cSpace --
'\33' -> cAny + cSymbol -- !
'\34' -> cAny -- "
- '\35' -> cAny + cSymbol -- #
- '\36' -> cAny + cSymbol -- $
+ '\35' -> cAny + cSymbol -- #
+ '\36' -> cAny + cSymbol -- $
'\37' -> cAny + cSymbol -- %
'\38' -> cAny + cSymbol -- &
'\39' -> cAny + cIdent -- '
'\40' -> cAny -- (
'\41' -> cAny -- )
- '\42' -> cAny + cSymbol -- *
+ '\42' -> cAny + cSymbol -- *
'\43' -> cAny + cSymbol -- +
'\44' -> cAny -- ,
'\45' -> cAny + cSymbol -- -
'\46' -> cAny + cSymbol -- .
- '\47' -> cAny + cSymbol -- /
+ '\47' -> cAny + cSymbol -- /
'\48' -> cAny + cIdent + cDigit -- 0
'\49' -> cAny + cIdent + cDigit -- 1
'\50' -> cAny + cIdent + cDigit -- 2
'\91' -> cAny -- [
'\92' -> cAny + cSymbol -- backslash
'\93' -> cAny -- ]
- '\94' -> cAny + cSymbol -- ^
+ '\94' -> cAny + cSymbol -- ^
'\95' -> cAny + cIdent + cLower -- _
'\96' -> cAny -- `
'\97' -> cAny + cIdent + cLower -- a
'\121' -> cAny + cIdent + cLower -- y
'\122' -> cAny + cIdent + cLower -- z
'\123' -> cAny -- {
- '\124' -> cAny + cSymbol -- |
+ '\124' -> cAny + cSymbol -- |
'\125' -> cAny -- }
'\126' -> cAny + cSymbol -- ~
'\127' -> 0 -- \177