X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=fdbaeef3feaa773673b9bdec87c2e9d6220df075;hb=7ab880e6cbce4e095d8316d4289066aa2d50419b;hp=4c1b48efc0845f3009dc19f4d8e5e26eadb36103;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 4c1b48e..fdbaeef 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -65,7 +65,7 @@ $decdigit = $ascdigit -- for now, should really be $digit (ToDo) $digit = [$ascdigit $unidigit] $special = [\(\)\,\;\[\]\`\{\}] -$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7] $unisymbol = \x04 $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] @@ -216,6 +216,11 @@ $white_no_nl+ ; "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } } +<0,option_prags,glaexts> { + -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... + "{-#" $whitechar* $idchar+ { nested_comment } +} + -- '0' state: ordinary lexemes -- 'glaexts' state: glasgow extensions (postfix '#', etc.) @@ -371,6 +376,8 @@ data Token | ITccallconv | ITdotnet | ITmdo + | ITiso + | ITfamily -- Pragmas | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE @@ -494,6 +501,8 @@ isSpecial ITunsafe = True isSpecial ITccallconv = True isSpecial ITstdcallconv = True isSpecial ITmdo = True +isSpecial ITiso = True +isSpecial ITfamily = True isSpecial _ = False -- the bitmap provided as the third component indicates whether the @@ -534,6 +543,8 @@ reservedWordsFM = listToUFM $ ( "forall", ITforall, bit tvBit), ( "mdo", ITmdo, bit glaExtsBit), + ( "iso", ITiso, bit glaExtsBit), + ( "family", ITfamily, bit glaExtsBit), ( "foreign", ITforeign, bit ffiBit), ( "export", ITexport, bit ffiBit), @@ -583,6 +594,9 @@ reservedSymsFM = listToUFM $ ,("→", ITrarrow, bit glaExtsBit) ,("←", ITlarrow, bit glaExtsBit) ,("⋯", ITdotdot, bit glaExtsBit) + -- ToDo: ideally, → and ∷ should be "specials", so that they cannot + -- form part of a large operator. This would let us have a better + -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). #endif ] @@ -1210,6 +1224,7 @@ alexGetChar :: AlexInput -> Maybe (Char,AlexInput) alexGetChar (AI loc ofs s) | atEnd s = Nothing | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` + --trace (show (ord c)) $ Just (adj_c, (AI loc' ofs' s')) where (c,s') = nextChar s loc' = advanceSrcLoc loc c @@ -1259,6 +1274,7 @@ alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) alexGetChar' (AI loc ofs s) | atEnd s = Nothing | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` + --trace (show (ord c)) $ Just (c, (AI loc' ofs' s')) where (c,s') = nextChar s loc' = advanceSrcLoc loc c @@ -1443,15 +1459,13 @@ lexToken = do span `seq` setLastToken span bytes t span buf bytes --- ToDo: Alex reports the buffer at the start of the erroneous lexeme, --- but it would be more informative to report the location where the --- error was actually discovered, especially if this is a decoding --- error. -reportLexError loc1 loc2 buf str = +reportLexError loc1 loc2 buf str + | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input") + | otherwise = let c = fst (nextChar buf) in if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar# - then failLocMsgP loc2 loc2 "UTF-8 decoding error" + then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)") else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c) }