X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexCore.hs;h=68b341916395ff7e2f657545baecab0d2c5424ea;hp=32ea6a83953c1cb20ba2d6fefea3b5dc0f5d334b;hb=2d4d636af091b8da27466b5cf90011395a9c2f66;hpb=a7515ed727fc7e50ad9a59864f20cb4ddc93fb20 diff --git a/compiler/parser/LexCore.hs b/compiler/parser/LexCore.hs index 32ea6a8..68b3419 100644 --- a/compiler/parser/LexCore.hs +++ b/compiler/parser/LexCore.hs @@ -1,12 +1,16 @@ + module LexCore where import ParserCoreUtils -import Ratio -import Char -import qualified Numeric( readFloat, readDec ) +import Panic +import Data.Char +import Numeric +isNameChar :: Char -> Bool isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'') - || (c == ':') || (c == '$') + || (c == '$') || (c == '-') || (c == '.') + +isKeywordChar :: Char -> Bool isKeywordChar c = isAlpha c || (c == '_') lexer :: (Token -> P a) -> P a @@ -29,6 +33,7 @@ lexer cont (')':cs) = cont TKcparen cs lexer cont ('{':cs) = cont TKobrace cs lexer cont ('}':cs) = cont TKcbrace cs lexer cont ('=':cs) = cont TKeq cs +lexer cont (':':'=':':':cs) = cont TKcoloneqcolon cs lexer cont (':':':':cs) = cont TKcoloncolon cs lexer cont ('*':cs) = cont TKstar cs lexer cont ('.':cs) = cont TKdot cs @@ -37,33 +42,40 @@ lexer cont ('@':cs) = cont TKat cs lexer cont ('?':cs) = cont TKquestion cs lexer cont (';':cs) = cont TKsemicolon cs -- 20060420 GHC spits out constructors with colon in them nowadays. jds -lexer cont (':':cs) = lexName cont TKcname (':':cs) +-- 20061103 but it's easier to parse if we split on the colon, and treat them +-- as several tokens +lexer cont (':':cs) = cont TKcolon cs -- 20060420 Likewise does it create identifiers starting with dollar. jds lexer cont ('$':cs) = lexName cont TKname ('$':cs) -lexer cont (c:cs) = failP "invalid character" [c] - - +lexer _ (c:_) = failP "invalid character" [c] +lexChar :: (Token -> String -> Int -> ParseResult a) -> String -> Int + -> ParseResult a lexChar cont ('\\':'x':h1:h0:'\'':cs) - | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs -lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs)) -lexChar cont ('\'':cs) = failP "invalid char character" ['\''] -lexChar cont ('\"':cs) = failP "invalid char character" ['\"'] -lexChar cont (c:'\'':cs) = cont (TKchar c) cs - - + | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs +lexChar _ ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs)) +lexChar _ ('\'':_) = failP "invalid char character" ['\''] +lexChar _ ('\"':_) = failP "invalid char character" ['\"'] +lexChar cont (c:'\'':cs) = cont (TKchar c) cs +lexChar _ cs = panic ("lexChar: " ++ show cs) + +lexString :: String -> (Token -> [Char] -> Int -> ParseResult a) + -> String -> Int -> ParseResult a lexString s cont ('\\':'x':h1:h0:cs) - | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs -lexString s cont ('\\':cs) = failP "invalid string character" ['\\'] -lexString s cont ('\'':cs) = failP "invalid string character" ['\''] + | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs +lexString _ _ ('\\':_) = failP "invalid string character" ['\\'] +lexString _ _ ('\'':_) = failP "invalid string character" ['\''] lexString s cont ('\"':cs) = cont (TKstring s) cs -lexString s cont (c:cs) = lexString (s++[c]) cont cs +lexString s cont (c:cs) = lexString (s++[c]) cont cs +lexString _ _ [] = panic "lexString []" +isHexEscape :: String -> Bool isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c)) +hexToChar :: Char -> Char -> Char hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0) - +lexNum :: (Token -> String -> a) -> String -> a lexNum cont cs = case cs of ('-':cs) -> f (-1) cs @@ -74,12 +86,16 @@ lexNum cont cs = | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest' where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest)) -- When reading a floating-point number, which is - -- a bit complicated, use the Haskell 98 library function + -- a bit complicated, use the standard library function + -- "readFloat" (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest +lexName :: (a -> String -> b) -> (String -> a) -> String -> b lexName cont cstr cs = cont (cstr name) rest where (name,rest) = span isNameChar cs +lexKeyword :: (Token -> [Char] -> Int -> ParseResult a) -> String -> Int + -> ParseResult a lexKeyword cont cs = case span isKeywordChar cs of ("module",rest) -> cont TKmodule rest @@ -91,45 +107,10 @@ lexKeyword cont cs = ("in",rest) -> cont TKin rest ("case",rest) -> cont TKcase rest ("of",rest) -> cont TKof rest - ("coerce",rest) -> cont TKcoerce rest + ("cast",rest) -> cont TKcast rest ("note",rest) -> cont TKnote rest ("external",rest) -> cont TKexternal rest + ("local",rest) -> cont TKlocal rest ("_",rest) -> cont TKwild rest _ -> failP "invalid keyword" ('%':cs) - -#if __GLASGOW_HASKELL__ >= 504 --- The readFloat in the Numeric library will do the job - -readFloat :: (RealFrac a) => ReadS a -readFloat = Numeric.readFloat - -#else --- Haskell 98's Numeric.readFloat used to have a bogusly restricted signature --- so it was incapable of reading a rational. --- So for GHCs that have that old bogus library, here is the code, written out longhand. - -readFloat r = [(fromRational ((n%1)*10^^(k-d)),t) | (n,d,s) <- readFix r, - (k,t) <- readExp s] ++ - [ (0/0, t) | ("NaN",t) <- lex r] ++ - [ (1/0, t) | ("Infinity",t) <- lex r] - where - readFix r = [(read (ds++ds'), length ds', t) - | (ds,d) <- lexDigits r, - (ds',t) <- lexFrac d ] - - lexFrac ('.':ds) = lexDigits ds - lexFrac s = [("",s)] - - readExp (e:s) | e `elem` "eE" = readExp' s - readExp s = [(0,s)] - - readExp' ('-':s) = [(-k,t) | (k,t) <- Numeric.readDec s] - readExp' ('+':s) = Numeric.readDec s - readExp' s = Numeric.readDec s - -lexDigits :: ReadS String -lexDigits s = case span isDigit s of - (cs,s') | not (null cs) -> [(cs,s')] - otherwise -> [] -#endif