X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FCtype.lhs;h=dbe4e9f1b07f27d6358bb4fe171f02b338f2de8a;hb=9d7da331989abcd1844e9d03b8d1e4163796fa85;hp=645f31ea611973ad0ae18fc5abcd07ed531a8123;hpb=8c845163cf72456b2865e08b4f5aa4a0f480f503;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Ctype.lhs b/ghc/compiler/parser/Ctype.lhs index 645f31e..dbe4e9f 100644 --- a/ghc/compiler/parser/Ctype.lhs +++ b/ghc/compiler/parser/Ctype.lhs @@ -9,13 +9,17 @@ module Ctype , is_lower -- Char# -> Bool , is_upper -- Char# -> Bool , is_digit -- Char# -> Bool + , is_alphanum -- Char# -> Bool + + , is_hexdigit, is_octdigit + , hexDigit, octDecDigit ) where -\end{code} -\begin{code} -import Bits ( Bits((.&.)) ) -import Int ( Int32 ) -import GlaExts ( Char#, Char(..) ) +#include "HsVersions.h" + +import DATA_INT ( Int32 ) +import DATA_BITS ( Bits((.&.)) ) +import Char ( ord, chr ) \end{code} Bit masks @@ -36,10 +40,10 @@ at the big case below. \begin{code} {-# INLINE is_ctype #-} -is_ctype :: Int -> Char# -> Bool -is_ctype mask c = (fromIntegral (charType (C# c)) .&. fromIntegral mask) /= (0::Int32) +is_ctype :: Int -> Char -> Bool +is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32) -is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char# -> Bool +is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool is_ident = is_ctype cIdent is_symbol = is_ctype cSymbol is_any = is_ctype cAny @@ -47,6 +51,29 @@ is_space = is_ctype cSpace is_lower = is_ctype cLower is_upper = is_ctype cUpper is_digit = is_ctype cDigit +is_alphanum = is_ctype (cLower+cUpper+cDigit) +\end{code} + +Utils + +\begin{code} +hexDigit :: Char -> Int +hexDigit c | is_digit c = ord c - ord '0' + | otherwise = ord (to_lower c) - ord 'a' + 10 + +octDecDigit :: Char -> Int +octDecDigit c = ord c - ord '0' + +is_hexdigit c + = is_digit c + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') + +is_octdigit c = c >= '0' && c <= '7' + +to_lower c + | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a')) + | otherwise = c \end{code} We really mean .|. instead of + below, but GHC currently doesn't do @@ -65,10 +92,10 @@ charType c = case c of '\7' -> 0 -- \007 '\8' -> 0 -- \010 '\9' -> cAny + cSpace -- \t - '\10' -> cAny + cSpace -- \n + '\10' -> cSpace -- \n (not allowed in strings, so !cAny) '\11' -> cAny + cSpace -- \v '\12' -> cAny + cSpace -- \f - '\13' -> cAny + cSpace -- ^M + '\13' -> cAny + cSpace -- ^M '\14' -> 0 -- \016 '\15' -> 0 -- \017 '\16' -> 0 -- \020 @@ -90,19 +117,19 @@ charType c = case c of '\32' -> cAny + cSpace -- '\33' -> cAny + cSymbol -- ! '\34' -> cAny -- " - '\35' -> cAny + cSymbol -- # - '\36' -> cAny + cSymbol -- $ + '\35' -> cAny + cSymbol -- # + '\36' -> cAny + cSymbol -- $ '\37' -> cAny + cSymbol -- % '\38' -> cAny + cSymbol -- & '\39' -> cAny + cIdent -- ' '\40' -> cAny -- ( '\41' -> cAny -- ) - '\42' -> cAny + cSymbol -- * + '\42' -> cAny + cSymbol -- * '\43' -> cAny + cSymbol -- + '\44' -> cAny -- , '\45' -> cAny + cSymbol -- - '\46' -> cAny + cSymbol -- . - '\47' -> cAny + cSymbol -- / + '\47' -> cAny + cSymbol -- / '\48' -> cAny + cIdent + cDigit -- 0 '\49' -> cAny + cIdent + cDigit -- 1 '\50' -> cAny + cIdent + cDigit -- 2 @@ -149,7 +176,7 @@ charType c = case c of '\91' -> cAny -- [ '\92' -> cAny + cSymbol -- backslash '\93' -> cAny -- ] - '\94' -> cAny + cSymbol -- ^ + '\94' -> cAny + cSymbol -- ^ '\95' -> cAny + cIdent + cLower -- _ '\96' -> cAny -- ` '\97' -> cAny + cIdent + cLower -- a @@ -179,7 +206,7 @@ charType c = case c of '\121' -> cAny + cIdent + cLower -- y '\122' -> cAny + cIdent + cLower -- z '\123' -> cAny -- { - '\124' -> cAny + cSymbol -- | + '\124' -> cAny + cSymbol -- | '\125' -> cAny -- } '\126' -> cAny + cSymbol -- ~ '\127' -> 0 -- \177 @@ -216,99 +243,99 @@ charType c = case c of '\158' -> 0 -- \236 '\159' -> 0 -- \237 '\160' -> cSpace -- - '\161' -> cAny + cSymbol -- ¡ - '\162' -> cAny + cSymbol -- ¢ - '\163' -> cAny + cSymbol -- £ - '\164' -> cAny + cSymbol -- ¤ - '\165' -> cAny + cSymbol -- ¥ - '\166' -> cAny + cSymbol -- ¦ - '\167' -> cAny + cSymbol -- § - '\168' -> cAny + cSymbol -- ¨ - '\169' -> cAny + cSymbol -- © - '\170' -> cAny + cSymbol -- ª - '\171' -> cAny + cSymbol -- « - '\172' -> cAny + cSymbol -- ¬ - '\173' -> cAny + cSymbol -- ­ - '\174' -> cAny + cSymbol -- ® - '\175' -> cAny + cSymbol -- ¯ - '\176' -> cAny + cSymbol -- ° - '\177' -> cAny + cSymbol -- ± - '\178' -> cAny + cSymbol -- ² - '\179' -> cAny + cSymbol -- ³ - '\180' -> cAny + cSymbol -- ´ - '\181' -> cAny + cSymbol -- µ - '\182' -> cAny + cSymbol -- ¶ - '\183' -> cAny + cSymbol -- · - '\184' -> cAny + cSymbol -- ¸ - '\185' -> cAny + cSymbol -- ¹ - '\186' -> cAny + cSymbol -- º - '\187' -> cAny + cSymbol -- » - '\188' -> cAny + cSymbol -- ¼ - '\189' -> cAny + cSymbol -- ½ - '\190' -> cAny + cSymbol -- ¾ - '\191' -> cAny + cSymbol -- ¿ - '\192' -> cAny + cIdent + cUpper -- À - '\193' -> cAny + cIdent + cUpper -- Á - '\194' -> cAny + cIdent + cUpper --  - '\195' -> cAny + cIdent + cUpper -- à - '\196' -> cAny + cIdent + cUpper -- Ä - '\197' -> cAny + cIdent + cUpper -- Å - '\198' -> cAny + cIdent + cUpper -- Æ - '\199' -> cAny + cIdent + cUpper -- Ç - '\200' -> cAny + cIdent + cUpper -- È - '\201' -> cAny + cIdent + cUpper -- É - '\202' -> cAny + cIdent + cUpper -- Ê - '\203' -> cAny + cIdent + cUpper -- Ë - '\204' -> cAny + cIdent + cUpper -- Ì - '\205' -> cAny + cIdent + cUpper -- Í - '\206' -> cAny + cIdent + cUpper -- Î - '\207' -> cAny + cIdent + cUpper -- Ï - '\208' -> cAny + cIdent + cUpper -- Ð - '\209' -> cAny + cIdent + cUpper -- Ñ - '\210' -> cAny + cIdent + cUpper -- Ò - '\211' -> cAny + cIdent + cUpper -- Ó - '\212' -> cAny + cIdent + cUpper -- Ô - '\213' -> cAny + cIdent + cUpper -- Õ - '\214' -> cAny + cIdent + cUpper -- Ö - '\215' -> cAny + cSymbol + cLower -- × - '\216' -> cAny + cIdent + cUpper -- Ø - '\217' -> cAny + cIdent + cUpper -- Ù - '\218' -> cAny + cIdent + cUpper -- Ú - '\219' -> cAny + cIdent + cUpper -- Û - '\220' -> cAny + cIdent + cUpper -- Ü - '\221' -> cAny + cIdent + cUpper -- Ý - '\222' -> cAny + cIdent + cUpper -- Þ - '\223' -> cAny + cIdent -- ß - '\224' -> cAny + cIdent + cLower -- à - '\225' -> cAny + cIdent + cLower -- á - '\226' -> cAny + cIdent + cLower -- â - '\227' -> cAny + cIdent + cLower -- ã - '\228' -> cAny + cIdent + cLower -- ä - '\229' -> cAny + cIdent + cLower -- å - '\230' -> cAny + cIdent + cLower -- æ - '\231' -> cAny + cIdent + cLower -- ç - '\232' -> cAny + cIdent + cLower -- è - '\233' -> cAny + cIdent + cLower -- é - '\234' -> cAny + cIdent + cLower -- ê - '\235' -> cAny + cIdent + cLower -- ë - '\236' -> cAny + cIdent + cLower -- ì - '\237' -> cAny + cIdent + cLower -- í - '\238' -> cAny + cIdent + cLower -- î - '\239' -> cAny + cIdent + cLower -- ï - '\240' -> cAny + cIdent + cLower -- ð - '\241' -> cAny + cIdent + cLower -- ñ - '\242' -> cAny + cIdent + cLower -- ò - '\243' -> cAny + cIdent + cLower -- ó - '\244' -> cAny + cIdent + cLower -- ô - '\245' -> cAny + cIdent + cLower -- õ - '\246' -> cAny + cIdent + cLower -- ö - '\247' -> cAny + cSymbol -- ÷ - '\248' -> cAny + cIdent -- ø - '\249' -> cAny + cIdent + cLower -- ù - '\250' -> cAny + cIdent + cLower -- ú - '\251' -> cAny + cIdent + cLower -- û - '\252' -> cAny + cIdent + cLower -- ü - '\253' -> cAny + cIdent + cLower -- ý - '\254' -> cAny + cIdent + cLower -- þ - '\255' -> cAny + cIdent + cLower -- ÿ + '\161' -> cAny + cSymbol -- ¡ + '\162' -> cAny + cSymbol -- ¢ + '\163' -> cAny + cSymbol -- £ + '\164' -> cAny + cSymbol -- ¤ + '\165' -> cAny + cSymbol -- Â¥ + '\166' -> cAny + cSymbol -- ¦ + '\167' -> cAny + cSymbol -- § + '\168' -> cAny + cSymbol -- ¨ + '\169' -> cAny + cSymbol -- © + '\170' -> cAny + cSymbol -- ª + '\171' -> cAny + cSymbol -- « + '\172' -> cAny + cSymbol -- ¬ + '\173' -> cAny + cSymbol -- ­ + '\174' -> cAny + cSymbol -- ® + '\175' -> cAny + cSymbol -- ¯ + '\176' -> cAny + cSymbol -- ° + '\177' -> cAny + cSymbol -- ± + '\178' -> cAny + cSymbol -- ² + '\179' -> cAny + cSymbol -- ³ + '\180' -> cAny + cSymbol -- ´ + '\181' -> cAny + cSymbol -- µ + '\182' -> cAny + cSymbol -- ¶ + '\183' -> cAny + cSymbol -- · + '\184' -> cAny + cSymbol -- ¸ + '\185' -> cAny + cSymbol -- ¹ + '\186' -> cAny + cSymbol -- º + '\187' -> cAny + cSymbol -- » + '\188' -> cAny + cSymbol -- ¼ + '\189' -> cAny + cSymbol -- ½ + '\190' -> cAny + cSymbol -- ¾ + '\191' -> cAny + cSymbol -- ¿ + '\192' -> cAny + cIdent + cUpper -- À + '\193' -> cAny + cIdent + cUpper -- Á + '\194' -> cAny + cIdent + cUpper --  + '\195' -> cAny + cIdent + cUpper -- à + '\196' -> cAny + cIdent + cUpper -- Ä + '\197' -> cAny + cIdent + cUpper -- Å + '\198' -> cAny + cIdent + cUpper -- Æ + '\199' -> cAny + cIdent + cUpper -- Ç + '\200' -> cAny + cIdent + cUpper -- È + '\201' -> cAny + cIdent + cUpper -- É + '\202' -> cAny + cIdent + cUpper -- Ê + '\203' -> cAny + cIdent + cUpper -- Ë + '\204' -> cAny + cIdent + cUpper -- Ì + '\205' -> cAny + cIdent + cUpper -- Í + '\206' -> cAny + cIdent + cUpper -- Î + '\207' -> cAny + cIdent + cUpper -- Ï + '\208' -> cAny + cIdent + cUpper -- Ð + '\209' -> cAny + cIdent + cUpper -- Ñ + '\210' -> cAny + cIdent + cUpper -- Ò + '\211' -> cAny + cIdent + cUpper -- Ó + '\212' -> cAny + cIdent + cUpper -- Ô + '\213' -> cAny + cIdent + cUpper -- Õ + '\214' -> cAny + cIdent + cUpper -- Ö + '\215' -> cAny + cSymbol + cLower -- × + '\216' -> cAny + cIdent + cUpper -- Ø + '\217' -> cAny + cIdent + cUpper -- Ù + '\218' -> cAny + cIdent + cUpper -- Ú + '\219' -> cAny + cIdent + cUpper -- Û + '\220' -> cAny + cIdent + cUpper -- Ü + '\221' -> cAny + cIdent + cUpper -- Ý + '\222' -> cAny + cIdent + cUpper -- Þ + '\223' -> cAny + cIdent -- ß + '\224' -> cAny + cIdent + cLower -- à + '\225' -> cAny + cIdent + cLower -- á + '\226' -> cAny + cIdent + cLower -- â + '\227' -> cAny + cIdent + cLower -- ã + '\228' -> cAny + cIdent + cLower -- ä + '\229' -> cAny + cIdent + cLower -- Ã¥ + '\230' -> cAny + cIdent + cLower -- æ + '\231' -> cAny + cIdent + cLower -- ç + '\232' -> cAny + cIdent + cLower -- è + '\233' -> cAny + cIdent + cLower -- é + '\234' -> cAny + cIdent + cLower -- ê + '\235' -> cAny + cIdent + cLower -- ë + '\236' -> cAny + cIdent + cLower -- ì + '\237' -> cAny + cIdent + cLower -- í + '\238' -> cAny + cIdent + cLower -- î + '\239' -> cAny + cIdent + cLower -- ï + '\240' -> cAny + cIdent + cLower -- ð + '\241' -> cAny + cIdent + cLower -- ñ + '\242' -> cAny + cIdent + cLower -- ò + '\243' -> cAny + cIdent + cLower -- ó + '\244' -> cAny + cIdent + cLower -- ô + '\245' -> cAny + cIdent + cLower -- õ + '\246' -> cAny + cIdent + cLower -- ö + '\247' -> cAny + cSymbol -- ÷ + '\248' -> cAny + cIdent -- ø + '\249' -> cAny + cIdent + cLower -- ù + '\250' -> cAny + cIdent + cLower -- ú + '\251' -> cAny + cIdent + cLower -- û + '\252' -> cAny + cIdent + cLower -- ü + '\253' -> cAny + cIdent + cLower -- ý + '\254' -> cAny + cIdent + cLower -- þ + '\255' -> cAny + cIdent + cLower -- ÿ \end{code}