X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=45da0d0ca23e8c26b2de0c18608579e7a1e14a45;hp=5015ca7059d40e00118eb1175a0dcca1cd14c422;hb=bf40e268d916947786c56ec38db86190854a2d2c;hpb=543a890b3de87c2535072c894035996661bf106c diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 5015ca7..45da0d0 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -216,6 +216,11 @@ $white_no_nl+ ; "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } } +<0,option_prags,glaexts> { + -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... + "{-#" $whitechar* $idchar+ { nested_comment } +} + -- '0' state: ordinary lexemes -- 'glaexts' state: glasgow extensions (postfix '#', etc.) @@ -245,7 +250,6 @@ $white_no_nl+ ; <0,glaexts> { \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid } - \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid } } { @@ -371,6 +375,8 @@ data Token | ITccallconv | ITdotnet | ITmdo + | ITiso + | ITfamily -- Pragmas | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE @@ -435,7 +441,6 @@ data Token | ITqconsym (FastString,FastString) | ITdupipvarid FastString -- GHC extension: implicit param: ?x - | ITsplitipvarid FastString -- GHC extension: implicit param: %x | ITpragma StringBuffer @@ -494,6 +499,8 @@ isSpecial ITunsafe = True isSpecial ITccallconv = True isSpecial ITstdcallconv = True isSpecial ITmdo = True +isSpecial ITiso = True +isSpecial ITfamily = True isSpecial _ = False -- the bitmap provided as the third component indicates whether the @@ -534,6 +541,7 @@ reservedWordsFM = listToUFM $ ( "forall", ITforall, bit tvBit), ( "mdo", ITmdo, bit glaExtsBit), + ( "family", ITfamily, bit idxTysBit), ( "foreign", ITforeign, bit ffiBit), ( "export", ITexport, bit ffiBit), @@ -567,8 +575,9 @@ reservedSymsFM = listToUFM $ ,("-", ITminus, 0) ,("!", ITbang, 0) - ,("*", ITstar, bit glaExtsBit) -- For data T (a::*) = MkT - ,(".", ITdot, bit tvBit) -- For 'forall a . t' + ,("*", ITstar, bit glaExtsBit .|. + bit idxTysBit) -- For data T (a::*) = MkT + ,(".", ITdot, bit tvBit) -- For 'forall a . t' ,("-<", ITlarrowtail, bit arrowsBit) ,(">-", ITrarrowtail, bit arrowsBit) @@ -583,6 +592,9 @@ reservedSymsFM = listToUFM $ ,("→", ITrarrow, bit glaExtsBit) ,("←", ITlarrow, bit glaExtsBit) ,("⋯", ITdotdot, bit glaExtsBit) + -- ToDo: ideally, → and ∷ should be "specials", so that they cannot + -- form part of a large operator. This would let us have a better + -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). #endif ] @@ -1300,6 +1312,7 @@ ipBit = 6 tvBit = 7 -- Scoped type variables enables 'forall' keyword bangPatBit = 8 -- Tells the parser to understand bang-patterns -- (doesn't affect the lexer) +idxTysBit = 9 -- indexed type families: 'family' keyword and kind sigs glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool glaExtsEnabled flags = testBit flags glaExtsBit @@ -1310,6 +1323,7 @@ thEnabled flags = testBit flags thBit ipEnabled flags = testBit flags ipBit tvEnabled flags = testBit flags tvBit bangPatEnabled flags = testBit flags bangPatBit +idxTysEnabled flags = testBit flags idxTysBit -- PState for parsing options pragmas -- @@ -1351,6 +1365,7 @@ mkPState buf loc flags = .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags .|. tvBit `setBitIf` dopt Opt_ScopedTypeVariables flags .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags + .|. idxTysBit `setBitIf` dopt Opt_IndexedTypes flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b @@ -1445,15 +1460,13 @@ lexToken = do span `seq` setLastToken span bytes t span buf bytes --- ToDo: Alex reports the buffer at the start of the erroneous lexeme, --- but it would be more informative to report the location where the --- error was actually discovered, especially if this is a decoding --- error. -reportLexError loc1 loc2 buf str = +reportLexError loc1 loc2 buf str + | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input") + | otherwise = let c = fst (nextChar buf) in if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar# - then failLocMsgP loc2 loc2 "UTF-8 decoding error" + then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)") else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c) }