X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=2493b2117952b0a1cadecbd4f1722f9dfc01422c;hp=aaeaca73873c79f2a70d52800f6987f1796a005b;hb=661c97c65e5fa47177502e592bb763f752b487ac;hpb=f4b727487a65e6b611bbaafbd2207bd63a8df706 diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index aaeaca7..2493b21 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -307,7 +307,7 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape } "$(" / { ifExtension thEnabled } { token ITparenEscape } - "[$" @varid "|" / { ifExtension qqEnabled } + "[" @varid "|" / { ifExtension qqEnabled } { lex_quasiquote_tok } } @@ -701,7 +701,6 @@ reservedSymsFM = listToUFM $ explicitForallEnabled i) ,("→", ITrarrow, unicodeSyntaxEnabled) ,("←", ITlarrow, unicodeSyntaxEnabled) - ,("⋯", ITdotdot, unicodeSyntaxEnabled) ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) @@ -1180,7 +1179,7 @@ lex_string :: String -> P Token lex_string s = do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i Just ('"',i) -> do setInput i @@ -1205,21 +1204,25 @@ lex_string s = do Just ('\\',i) | Just ('&',i) <- next -> do setInput i; lex_string s - | Just (c,i) <- next, is_space c -> do + | Just (c,i) <- next, c <= '\x7f' && is_space c -> do + -- is_space only works for <= '\x7f' (#3751) setInput i; lex_stringgap s where next = alexGetChar' i - Just (c, i) -> do - c' <- lex_char c i - lex_string (c':s) + Just (c, i1) -> do + case c of + '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s) + c | isAny c -> do setInput i1; lex_string (c:s) + _other -> lit_error i lex_stringgap :: String -> P Token lex_stringgap s = do - c <- getCharOrFail + i <- getInput + c <- getCharOrFail i case c of '\\' -> lex_string s c | is_space c -> lex_stringgap s - _other -> lit_error + _other -> lit_error i lex_char_tok :: Action @@ -1233,24 +1236,25 @@ lex_char_tok span _buf _len = do -- We've seen ' i1 <- getInput -- Look ahead to first character let loc = srcSpanStart span case alexGetChar' i1 of - Nothing -> lit_error + Nothing -> lit_error i1 Just ('\'', i2@(AI end2 _)) -> do -- We've seen '' th_exts <- extension thEnabled if th_exts then do setInput i2 return (L (mkSrcSpan loc end2) ITtyQuote) - else lit_error + else lit_error i1 Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash setInput i2 lit_ch <- lex_escape - mc <- getCharOrFail -- Trailing quote + i3 <- getInput + mc <- getCharOrFail i3 -- Trailing quote if mc == '\'' then finish_char_tok loc lit_ch - else do setInput i2; lit_error + else lit_error i3 Just (c, i2@(AI _end2 _)) - | not (isAny c) -> lit_error + | not (isAny c) -> lit_error i1 | otherwise -> -- We've seen 'x, where x is a valid character @@ -1265,7 +1269,7 @@ lex_char_tok span _buf _len = do -- We've seen ' th_exts <- extension thEnabled let (AI end _) = i1 if th_exts then return (L (mkSrcSpan loc end) ITvarQuote) - else do setInput i2; lit_error + else lit_error i2 finish_char_tok :: SrcLoc -> Char -> P (Located Token) finish_char_tok loc ch -- We've already seen the closing quote @@ -1282,20 +1286,14 @@ finish_char_tok loc ch -- We've already seen the closing quote else do return (L (mkSrcSpan loc end) (ITchar ch)) -lex_char :: Char -> AlexInput -> P Char -lex_char c inp = do - case c of - '\\' -> do setInput inp; lex_escape - c | isAny c -> do setInput inp; return c - _other -> lit_error - isAny :: Char -> Bool isAny c | c > '\x7f' = isPrint c | otherwise = is_any c lex_escape :: P Char lex_escape = do - c <- getCharOrFail + i0 <- getInput + c <- getCharOrFail i0 case c of 'a' -> return '\a' 'b' -> return '\b' @@ -1307,10 +1305,11 @@ lex_escape = do '\\' -> return '\\' '"' -> return '\"' '\'' -> return '\'' - '^' -> do c <- getCharOrFail + '^' -> do i1 <- getInput + c <- getCharOrFail i1 if c >= '@' && c <= '_' then return (chr (ord c - ord '@')) - else lit_error + else lit_error i1 'x' -> readNum is_hexdigit 16 hexDigit 'o' -> readNum is_octdigit 8 octDecDigit @@ -1319,10 +1318,10 @@ lex_escape = do c1 -> do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i0 Just (c2,i2) -> case alexGetChar' i2 of - Nothing -> do setInput i2; lit_error + Nothing -> do lit_error i0 Just (c3,i3) -> let str = [c1,c2,c3] in case [ (c,rest) | (p,c) <- silly_escape_chars, @@ -1333,15 +1332,15 @@ lex_escape = do (escape_char,_:_):_ -> do setInput i2 return escape_char - [] -> lit_error + [] -> lit_error i0 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char readNum is_digit base conv = do i <- getInput - c <- getCharOrFail + c <- getCharOrFail i if is_digit c then readNum2 is_digit base conv (conv c) - else do setInput i; lit_error + else lit_error i readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char readNum2 is_digit base conv i = do @@ -1354,7 +1353,7 @@ readNum2 is_digit base conv i = do _other -> do if i >= 0 && i <= 0x10FFFF then do setInput input; return (chr i) - else lit_error + else lit_error input silly_escape_chars :: [(String, Char)] silly_escape_chars = [ @@ -1398,12 +1397,11 @@ silly_escape_chars = [ -- the position of the error in the buffer. This is so that we can report -- a correct location to the user, but also so we can detect UTF-8 decoding -- errors if they occur. -lit_error :: P a -lit_error = lexError "lexical error in string/character literal" +lit_error :: AlexInput -> P a +lit_error i = do setInput i; lexError "lexical error in string/character literal" -getCharOrFail :: P Char -getCharOrFail = do - i <- getInput +getCharOrFail :: AlexInput -> P Char +getCharOrFail i = do case alexGetChar' i of Nothing -> lexError "unexpected end-of-file in string/character literal" Just (c,i) -> do setInput i; return c @@ -1413,8 +1411,9 @@ getCharOrFail = do lex_quasiquote_tok :: Action lex_quasiquote_tok span buf len = do - let quoter = reverse $ takeWhile (/= '$') - $ reverse $ lexemeToString buf (len - 1) + let quoter = tail (lexemeToString buf (len - 1)) + -- 'tail' drops the initial '[', + -- while the -1 drops the trailing '|' quoteStart <- getSrcLoc quote <- lex_quasiquote "" end <- getSrcLoc @@ -1427,7 +1426,7 @@ lex_quasiquote :: String -> P String lex_quasiquote s = do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i Just ('\\',i) | Just ('|',i) <- next -> do @@ -1497,7 +1496,10 @@ data PState = PState { alr_context :: [ALRContext], -- Are we expecting a '{'? If it's Just, then the ALRLayout tells -- us what sort of layout the '{' will open: - alr_expecting_ocurly :: Maybe ALRLayout + alr_expecting_ocurly :: Maybe ALRLayout, + -- Have we just had the '}' for a let block? If so, than an 'in' + -- token doesn't need to close anything: + alr_justClosedExplicitLetBlock :: Bool } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -1506,6 +1508,7 @@ data PState = PState { -- implement pushCurrentContext (which is only called from one place). data ALRContext = ALRNoLayout Bool{- does it contain commas? -} + Bool{- is it a 'let' block? -} | ALRLayout ALRLayout Int data ALRLayout = ALRLayoutLet | ALRLayoutWhere @@ -1670,6 +1673,14 @@ getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs setALRContext :: [ALRContext] -> P () setALRContext cs = P $ \s -> POk (s {alr_context = cs}) () +getJustClosedExplicitLetBlock :: P Bool +getJustClosedExplicitLetBlock + = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b + +setJustClosedExplicitLetBlock :: Bool -> P () +setJustClosedExplicitLetBlock b + = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) () + setNextToken :: Located Token -> P () setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) () @@ -1802,7 +1813,8 @@ pragState dynflags buf loc = alr_next_token = Nothing, alr_last_loc = noSrcSpan, alr_context = [], - alr_expecting_ocurly = Nothing + alr_expecting_ocurly = Nothing, + alr_justClosedExplicitLetBlock = False } @@ -1825,7 +1837,8 @@ mkPState buf loc flags = alr_next_token = Nothing, alr_last_loc = noSrcSpan, alr_context = [], - alr_expecting_ocurly = Nothing + alr_expecting_ocurly = Nothing, + alr_justClosedExplicitLetBlock = False } where bitmap = genericsBit `setBitIf` dopt Opt_Generics flags @@ -1955,6 +1968,8 @@ lexTokenAlr = do mPending <- popPendingImplicitToken ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet) ITof -> setAlrExpectingOCurly (Just ALRLayoutOf) ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + ITmdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + ITrec -> setAlrExpectingOCurly (Just ALRLayoutDo) _ -> return () return t @@ -1963,16 +1978,23 @@ alternativeLayoutRuleToken t = do context <- getALRContext lastLoc <- getAlrLastLoc mExpectingOCurly <- getAlrExpectingOCurly - let thisLoc = getLoc t + justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock + setJustClosedExplicitLetBlock False + dflags <- getDynFlags + let transitional = dopt Opt_AlternativeLayoutRuleTransitional dflags + thisLoc = getLoc t thisCol = srcSpanStartCol thisLoc newLine = (lastLoc == noSrcSpan) || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc) case (unLoc t, context, mExpectingOCurly) of -- This case handles a GHC extension to the original H98 -- layout rule... - (ITocurly, _, Just _) -> + (ITocurly, _, Just alrLayout) -> do setAlrExpectingOCurly Nothing - setALRContext (ALRNoLayout (containsCommas ITocurly) : context) + let isLet = case alrLayout of + ALRLayoutLet -> True + _ -> False + setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context) return t -- ...and makes this case unnecessary {- @@ -2011,11 +2033,38 @@ alternativeLayoutRuleToken t (ITeof, _, _) -> return t -- the other ITeof case omitted; general case below covers it + (ITin, _, _) + | justClosedExplicitLetBlock -> + return t (ITin, ALRLayout ALRLayoutLet _ : ls, _) | newLine -> do setPendingImplicitTokens [t] setALRContext ls return (L thisLoc ITccurly) + -- This next case is to handle a transitional issue: + (ITwhere, ALRLayout _ col : ls, _) + | newLine && thisCol == col && transitional -> + do addWarning Opt_WarnAlternativeLayoutRuleTransitional + thisLoc + (transitionalAlternativeLayoutWarning + "`where' clause at the same depth as implicit layout block") + setALRContext ls + setNextToken t + -- Note that we use lastLoc, as we may need to close + -- more layouts, or give a semicolon + return (L lastLoc ITccurly) + -- This next case is to handle a transitional issue: + (ITvbar, ALRLayout _ col : ls, _) + | newLine && thisCol == col && transitional -> + do addWarning Opt_WarnAlternativeLayoutRuleTransitional + thisLoc + (transitionalAlternativeLayoutWarning + "`|' at the same depth as implicit layout block") + setALRContext ls + setNextToken t + -- Note that we use lastLoc, as we may need to close + -- more layouts, or give a semicolon + return (L lastLoc ITccurly) (_, ALRLayout _ col : ls, _) | newLine && thisCol == col -> do setNextToken t @@ -2026,10 +2075,8 @@ alternativeLayoutRuleToken t -- Note that we use lastLoc, as we may need to close -- more layouts, or give a semicolon return (L lastLoc ITccurly) - (u, _, _) - | isALRopen u -> - do setALRContext (ALRNoLayout (containsCommas u) : context) - return t + -- We need to handle close before open, as 'then' is both + -- an open and a close (u, _, _) | isALRclose u -> case context of @@ -2037,13 +2084,25 @@ alternativeLayoutRuleToken t do setALRContext ls setNextToken t return (L thisLoc ITccurly) - ALRNoLayout _ : ls -> - do setALRContext ls + ALRNoLayout _ isLet : ls -> + do let ls' = if isALRopen u + then ALRNoLayout (containsCommas u) False : ls + else ls + setALRContext ls' + when isLet $ setJustClosedExplicitLetBlock True return t [] -> - -- XXX This is an error in John's code, but - -- it looks reachable to me at first glance - return t + do let ls = if isALRopen u + then [ALRNoLayout (containsCommas u) False] + else ls + setALRContext ls + -- XXX This is an error in John's code, but + -- it looks reachable to me at first glance + return t + (u, _, _) + | isALRopen u -> + do setALRContext (ALRNoLayout (containsCommas u) False : context) + return t (ITin, ALRLayout ALRLayoutLet _ : ls, _) -> do setALRContext ls setPendingImplicitTokens [t] @@ -2065,19 +2124,27 @@ alternativeLayoutRuleToken t -- the other ITwhere case omitted; general case below covers it (_, _, _) -> return t +transitionalAlternativeLayoutWarning :: String -> SDoc +transitionalAlternativeLayoutWarning msg + = text "transitional layout will not be accepted in the future:" + $$ text msg + isALRopen :: Token -> Bool -isALRopen ITcase = True -isALRopen ITif = True -isALRopen IToparen = True -isALRopen ITobrack = True -isALRopen ITocurly = True +isALRopen ITcase = True +isALRopen ITif = True +isALRopen ITthen = True +isALRopen IToparen = True +isALRopen ITobrack = True +isALRopen ITocurly = True -- GHC Extensions: -isALRopen IToubxparen = True -isALRopen _ = False +isALRopen IToubxparen = True +isALRopen ITparenEscape = True +isALRopen _ = False isALRclose :: Token -> Bool isALRclose ITof = True isALRclose ITthen = True +isALRclose ITelse = True isALRclose ITcparen = True isALRclose ITcbrack = True isALRclose ITccurly = True @@ -2103,7 +2170,7 @@ containsCommas _ = False topNoLayoutContainsCommas :: [ALRContext] -> Bool topNoLayoutContainsCommas [] = False topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls -topNoLayoutContainsCommas (ALRNoLayout b : _) = b +topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b lexToken :: P (Located Token) lexToken = do