X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=6651333dc4b709821e6a2163afcfd534ba0c295d;hp=bbdd2a1cce2cd10e679d446e3aa789aa6dbd7dc1;hb=26f164e5759e9eca73deb0531ddec422d36a6924;hpb=a02e7f40afc1aab7fe466f949f505c1d7250713d diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index bbdd2a1..6651333 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -31,15 +31,15 @@ -- qualified varids. { -{-# OPTIONS -Wwarn -w #-} --- The above -Wwarn supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details --- --- Note that Alex itself generates code with with some unused bindings and --- without type signatures, so removing the flag might not be possible. +-- XXX The above flags turn off warnings in the generated code: +{-# OPTIONS_GHC -fno-warn-unused-matches #-} +{-# OPTIONS_GHC -fno-warn-unused-binds #-} +{-# OPTIONS_GHC -fno-warn-unused-imports #-} +{-# OPTIONS_GHC -fno-warn-missing-signatures #-} +-- But alex still generates some code that causes the "lazy unlifted bindings" +-- warning, and old compilers don't know about it so we can't easily turn +-- it off, so for now we use the sledge hammer: +{-# OPTIONS_GHC -w #-} {-# OPTIONS_GHC -funbox-strict-fields #-} @@ -307,7 +307,7 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape } "$(" / { ifExtension thEnabled } { token ITparenEscape } - "[$" @varid "|" / { ifExtension qqEnabled } + "[" @varid "|" / { ifExtension qqEnabled } { lex_quasiquote_tok } } @@ -702,6 +702,14 @@ reservedSymsFM = listToUFM $ ,("→", ITrarrow, unicodeSyntaxEnabled) ,("←", ITlarrow, unicodeSyntaxEnabled) ,("⋯", ITdotdot, unicodeSyntaxEnabled) + + ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤛", ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤜", ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + + ,("★", ITstar, unicodeSyntaxEnabled) + -- ToDo: ideally, → and ∷ should be "specials", so that they cannot -- form part of a large operator. This would let us have a better -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). @@ -739,21 +747,23 @@ begin :: Int -> Action begin code _span _str _len = do pushLexState code; lexToken pop :: Action -pop _span _buf _len = do popLexState; lexToken +pop _span _buf _len = do _ <- popLexState + lexToken pop_and :: Action -> Action -pop_and act span buf len = do popLexState; act span buf len +pop_and act span buf len = do _ <- popLexState + act span buf len {-# INLINE nextCharIs #-} nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool nextCharIs buf p = not (atEnd buf) && p (currentChar buf) notFollowedBy :: Char -> AlexAccPred Int -notFollowedBy char _ _ _ (AI _ _ buf) +notFollowedBy char _ _ _ (AI _ buf) = nextCharIs buf (/=char) notFollowedBySymbol :: AlexAccPred Int -notFollowedBySymbol _ _ _ (AI _ _ buf) +notFollowedBySymbol _ _ _ (AI _ buf) = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~") -- We must reject doc comments as being ordinary comments everywhere. @@ -762,7 +772,7 @@ notFollowedBySymbol _ _ _ (AI _ _ buf) -- valid in all states, but the doc-comment rules are only valid in -- the non-layout states. isNormalComment :: AlexAccPred Int -isNormalComment bits _ _ (AI _ _ buf) +isNormalComment bits _ _ (AI _ buf) | haddockEnabled bits = notFollowedByDocOrPragma | otherwise = nextCharIs buf (/='#') where @@ -773,12 +783,12 @@ spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf)) {- -haddockDisabledAnd p bits _ _ (AI _ _ buf) +haddockDisabledAnd p bits _ _ (AI _ buf) = if haddockEnabled bits then False else (p buf) -} atEOL :: AlexAccPred Int -atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n' +atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n' ifExtension :: (Int -> Bool) -> AlexAccPred Int ifExtension pred bits _ _ _ = pred bits @@ -864,7 +874,7 @@ nested_doc_comment span buf _len = withLexedDocType (go "") withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token)) -> P (Located Token) withLexedDocType lexDocComment = do - input@(AI _ _ buf) <- getInput + input@(AI _ buf) <- getInput case prevChar buf ' ' of '|' -> lexDocComment input ITdocCommentNext False '^' -> lexDocComment input ITdocCommentPrev False @@ -898,32 +908,20 @@ endPrag span _buf _len = do -- it writes the wrong token length to the parser state. This function is -- called afterwards, so it can just update the state. --- This is complicated by the fact that Haddock tokens can span multiple lines, --- which is something that the original lexer didn't account for. --- I have added last_line_len in the parser state which represents the length --- of the part of the token that is on the last line. It is now used for layout --- calculation in pushCurrentContext instead of last_len. last_len is, like it --- was before, the full length of the token, and it is now only used for error --- messages. /Waern - docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer -> SrcSpan -> P (Located Token) docCommentEnd input commentAcc docType buf span = do setInput input - let (AI loc last_offs nextBuf) = input + let (AI loc nextBuf) = input comment = reverse commentAcc span' = mkSrcSpan (srcSpanStart span) loc last_len = byteDiff buf nextBuf - last_line_len = if (last_offs - last_len < 0) - then last_offs - else last_len - - span `seq` setLastToken span' last_len last_line_len + span `seq` setLastToken span' last_len return (L span' (docType comment)) errBrace :: AlexInput -> SrcSpan -> P a -errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'" +errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'" open_brace, close_brace :: Action open_brace span _str _len = do @@ -1061,22 +1059,31 @@ do_bol span _str _len = do return (L span ITvccurly) EQ -> do --trace "layout: inserting ';'" $ do - popLexState + _ <- popLexState return (L span ITsemi) GT -> do - popLexState + _ <- popLexState lexToken -- certain keywords put us in the "layout" state, where we might -- add an opening curly brace. maybe_layout :: Token -> P () -maybe_layout ITdo = pushLexState layout_do -maybe_layout ITmdo = pushLexState layout_do -maybe_layout ITof = pushLexState layout -maybe_layout ITlet = pushLexState layout -maybe_layout ITwhere = pushLexState layout -maybe_layout ITrec = pushLexState layout -maybe_layout _ = return () +maybe_layout t = do -- If the alternative layout rule is enabled then + -- we never create an implicit layout context here. + -- Layout is handled XXX instead. + -- The code for closing implicit contexts, or + -- inserting implicit semi-colons, is therefore + -- irrelevant as it only applies in an implicit + -- context. + alr <- extension alternativeLayoutRule + unless alr $ f t + where f ITdo = pushLexState layout_do + f ITmdo = pushLexState layout_do + f ITof = pushLexState layout + f ITlet = pushLexState layout + f ITwhere = pushLexState layout + f ITrec = pushLexState layout + f _ = return () -- Pushing a new implicit layout context. If the indentation of the -- next token is not greater than the previous layout context, then @@ -1089,8 +1096,9 @@ maybe_layout _ = return () -- new_layout_context :: Bool -> Action new_layout_context strict span _buf _len = do - popLexState - (AI _ offset _) <- getInput + _ <- popLexState + (AI l _) <- getInput + let offset = srcLocCol l ctx <- getContext case ctx of Layout prev_off : _ | @@ -1106,7 +1114,7 @@ new_layout_context strict span _buf _len = do do_layout_left :: Action do_layout_left span _buf _len = do - popLexState + _ <- popLexState pushLexState bol -- we must be at the start of a line return (L span ITvccurly) @@ -1116,17 +1124,18 @@ do_layout_left span _buf _len = do setLine :: Int -> Action setLine code span buf len = do let line = parseUnsignedInteger buf len 10 octDecDigit - setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0) + setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1) -- subtract one: the line number refers to the *following* line - popLexState + _ <- popLexState pushLexState code lexToken setFile :: Int -> Action setFile code span buf len = do let file = lexemeToFastString (stepOn buf) (len-2) + setAlrLastLoc noSrcSpan setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) - popLexState + _ <- popLexState pushLexState code lexToken @@ -1153,7 +1162,7 @@ lex_string_prag mkTok span _buf _len = case alexGetChar i of Just (c,i') | c == x -> isString i' xs _other -> False - err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma" + err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma" -- ----------------------------------------------------------------------------- @@ -1171,7 +1180,7 @@ lex_string :: String -> P Token lex_string s = do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i Just ('"',i) -> do setInput i @@ -1196,21 +1205,25 @@ lex_string s = do Just ('\\',i) | Just ('&',i) <- next -> do setInput i; lex_string s - | Just (c,i) <- next, is_space c -> do + | Just (c,i) <- next, c <= '\x7f' && is_space c -> do + -- is_space only works for <= '\x7f' (#3751) setInput i; lex_stringgap s where next = alexGetChar' i - Just (c, i) -> do - c' <- lex_char c i - lex_string (c':s) + Just (c, i1) -> do + case c of + '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s) + c | isAny c -> do setInput i1; lex_string (c:s) + _other -> lit_error i lex_stringgap :: String -> P Token lex_stringgap s = do - c <- getCharOrFail + i <- getInput + c <- getCharOrFail i case c of '\\' -> lex_string s c | is_space c -> lex_stringgap s - _other -> lit_error + _other -> lit_error i lex_char_tok :: Action @@ -1224,24 +1237,25 @@ lex_char_tok span _buf _len = do -- We've seen ' i1 <- getInput -- Look ahead to first character let loc = srcSpanStart span case alexGetChar' i1 of - Nothing -> lit_error + Nothing -> lit_error i1 - Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen '' + Just ('\'', i2@(AI end2 _)) -> do -- We've seen '' th_exts <- extension thEnabled if th_exts then do setInput i2 return (L (mkSrcSpan loc end2) ITtyQuote) - else lit_error + else lit_error i1 - Just ('\\', i2@(AI _end2 _ _)) -> do -- We've seen 'backslash + Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash setInput i2 lit_ch <- lex_escape - mc <- getCharOrFail -- Trailing quote + i3 <- getInput + mc <- getCharOrFail i3 -- Trailing quote if mc == '\'' then finish_char_tok loc lit_ch - else do setInput i2; lit_error + else lit_error i3 - Just (c, i2@(AI _end2 _ _)) - | not (isAny c) -> lit_error + Just (c, i2@(AI _end2 _)) + | not (isAny c) -> lit_error i1 | otherwise -> -- We've seen 'x, where x is a valid character @@ -1254,39 +1268,33 @@ lex_char_tok span _buf _len = do -- We've seen ' -- (including the possibility of EOF) -- If TH is on, just parse the quote only th_exts <- extension thEnabled - let (AI end _ _) = i1 + let (AI end _) = i1 if th_exts then return (L (mkSrcSpan loc end) ITvarQuote) - else do setInput i2; lit_error + else lit_error i2 finish_char_tok :: SrcLoc -> Char -> P (Located Token) finish_char_tok loc ch -- We've already seen the closing quote -- Just need to check for trailing # = do magicHash <- extension magicHashEnabled - i@(AI end _ _) <- getInput + i@(AI end _) <- getInput if magicHash then do case alexGetChar' i of - Just ('#',i@(AI end _ _)) -> do + Just ('#',i@(AI end _)) -> do setInput i return (L (mkSrcSpan loc end) (ITprimchar ch)) _other -> return (L (mkSrcSpan loc end) (ITchar ch)) - else do + else do return (L (mkSrcSpan loc end) (ITchar ch)) -lex_char :: Char -> AlexInput -> P Char -lex_char c inp = do - case c of - '\\' -> do setInput inp; lex_escape - c | isAny c -> do setInput inp; return c - _other -> lit_error - isAny :: Char -> Bool isAny c | c > '\x7f' = isPrint c | otherwise = is_any c lex_escape :: P Char lex_escape = do - c <- getCharOrFail + i0 <- getInput + c <- getCharOrFail i0 case c of 'a' -> return '\a' 'b' -> return '\b' @@ -1298,10 +1306,11 @@ lex_escape = do '\\' -> return '\\' '"' -> return '\"' '\'' -> return '\'' - '^' -> do c <- getCharOrFail + '^' -> do i1 <- getInput + c <- getCharOrFail i1 if c >= '@' && c <= '_' then return (chr (ord c - ord '@')) - else lit_error + else lit_error i1 'x' -> readNum is_hexdigit 16 hexDigit 'o' -> readNum is_octdigit 8 octDecDigit @@ -1310,10 +1319,10 @@ lex_escape = do c1 -> do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i0 Just (c2,i2) -> case alexGetChar' i2 of - Nothing -> do setInput i2; lit_error + Nothing -> do lit_error i0 Just (c3,i3) -> let str = [c1,c2,c3] in case [ (c,rest) | (p,c) <- silly_escape_chars, @@ -1324,15 +1333,15 @@ lex_escape = do (escape_char,_:_):_ -> do setInput i2 return escape_char - [] -> lit_error + [] -> lit_error i0 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char readNum is_digit base conv = do i <- getInput - c <- getCharOrFail + c <- getCharOrFail i if is_digit c then readNum2 is_digit base conv (conv c) - else do setInput i; lit_error + else lit_error i readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char readNum2 is_digit base conv i = do @@ -1345,7 +1354,7 @@ readNum2 is_digit base conv i = do _other -> do if i >= 0 && i <= 0x10FFFF then do setInput input; return (chr i) - else lit_error + else lit_error input silly_escape_chars :: [(String, Char)] silly_escape_chars = [ @@ -1389,12 +1398,11 @@ silly_escape_chars = [ -- the position of the error in the buffer. This is so that we can report -- a correct location to the user, but also so we can detect UTF-8 decoding -- errors if they occur. -lit_error :: P a -lit_error = lexError "lexical error in string/character literal" +lit_error :: AlexInput -> P a +lit_error i = do setInput i; lexError "lexical error in string/character literal" -getCharOrFail :: P Char -getCharOrFail = do - i <- getInput +getCharOrFail :: AlexInput -> P Char +getCharOrFail i = do case alexGetChar' i of Nothing -> lexError "unexpected end-of-file in string/character literal" Just (c,i) -> do setInput i; return c @@ -1404,8 +1412,9 @@ getCharOrFail = do lex_quasiquote_tok :: Action lex_quasiquote_tok span buf len = do - let quoter = reverse $ takeWhile (/= '$') - $ reverse $ lexemeToString buf (len - 1) + let quoter = tail (lexemeToString buf (len - 1)) + -- 'tail' drops the initial '[', + -- while the -1 drops the trailing '|' quoteStart <- getSrcLoc quote <- lex_quasiquote "" end <- getSrcLoc @@ -1418,7 +1427,7 @@ lex_quasiquote :: String -> P String lex_quasiquote s = do i <- getInput case alexGetChar' i of - Nothing -> lit_error + Nothing -> lit_error i Just ('\\',i) | Just ('|',i) <- next -> do @@ -1469,15 +1478,26 @@ data PState = PState { dflags :: DynFlags, messages :: Messages, last_loc :: SrcSpan, -- pos of previous token - last_offs :: !Int, -- offset of the previous token from the - -- beginning of the current line. - -- \t is equal to 8 spaces. last_len :: !Int, -- len of previous token - last_line_len :: !Int, loc :: SrcLoc, -- current loc (end of prev token + 1) extsBitmap :: !Int, -- bitmap that determines permitted extensions context :: [LayoutContext], - lex_state :: [Int] + lex_state :: [Int], + -- Used in the alternative layout rule: + -- These tokens are the next ones to be sent out. They are + -- just blindly emitted, without the rule looking at them again: + alr_pending_implicit_tokens :: [Located Token], + -- This is the next token to be considered or, if it is Nothing, + -- we need to get the next token from the input stream: + alr_next_token :: Maybe (Located Token), + -- This is what we consider to be the locatino of the last token + -- emitted: + alr_last_loc :: SrcSpan, + -- The stack of layout contexts: + alr_context :: [ALRContext], + -- Are we expecting a '{'? If it's Just, then the ALRLayout tells + -- us what sort of layout the '{' will open: + alr_expecting_ocurly :: Maybe ALRLayout } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -1485,6 +1505,13 @@ data PState = PState { -- Getting rid of last_loc would require finding another way to -- implement pushCurrentContext (which is only called from one place). +data ALRContext = ALRNoLayout Bool{- does it contain commas? -} + | ALRLayout ALRLayout Int +data ALRLayout = ALRLayoutLet + | ALRLayoutWhere + | ALRLayoutOf + | ALRLayoutDo + newtype P a = P { unP :: PState -> ParseResult a } instance Monad P where @@ -1539,27 +1566,25 @@ setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} () getSrcLoc :: P SrcLoc getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc -setLastToken :: SrcSpan -> Int -> Int -> P () -setLastToken loc len line_len = P $ \s -> POk s { +setLastToken :: SrcSpan -> Int -> P () +setLastToken loc len = P $ \s -> POk s { last_loc=loc, - last_len=len, - last_line_len=line_len -} () + last_len=len + } () -data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer +data AlexInput = AI SrcLoc StringBuffer alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (AI _ _ buf) = prevChar buf '\n' +alexInputPrevChar (AI _ buf) = prevChar buf '\n' alexGetChar :: AlexInput -> Maybe (Char,AlexInput) -alexGetChar (AI loc ofs s) +alexGetChar (AI loc s) | atEnd s = Nothing - | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` + | otherwise = adj_c `seq` loc' `seq` s' `seq` --trace (show (ord c)) $ - Just (adj_c, (AI loc' ofs' s')) + Just (adj_c, (AI loc' s')) where (c,s') = nextChar s loc' = advanceSrcLoc loc c - ofs' = advanceOffs c ofs non_graphic = '\x0' upper = '\x1' @@ -1605,25 +1630,19 @@ alexGetChar (AI loc ofs s) -- This version does not squash unicode characters, it is used when -- lexing strings. alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) -alexGetChar' (AI loc ofs s) +alexGetChar' (AI loc s) | atEnd s = Nothing - | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` + | otherwise = c `seq` loc' `seq` s' `seq` --trace (show (ord c)) $ - Just (c, (AI loc' ofs' s')) + Just (c, (AI loc' s')) where (c,s') = nextChar s loc' = advanceSrcLoc loc c - ofs' = advanceOffs c ofs - -advanceOffs :: Char -> Int -> Int -advanceOffs '\n' _ = 0 -advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8 -advanceOffs _ offs = offs + 1 getInput :: P AlexInput -getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b) +getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b) setInput :: AlexInput -> P () -setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } () +setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } () pushLexState :: Int -> P () pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} () @@ -1634,6 +1653,42 @@ popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls getLexState :: P Int getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls +popNextToken :: P (Maybe (Located Token)) +popNextToken + = P $ \s@PState{ alr_next_token = m } -> + POk (s {alr_next_token = Nothing}) m + +setAlrLastLoc :: SrcSpan -> P () +setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) () + +getAlrLastLoc :: P SrcSpan +getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l + +getALRContext :: P [ALRContext] +getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs + +setALRContext :: [ALRContext] -> P () +setALRContext cs = P $ \s -> POk (s {alr_context = cs}) () + +setNextToken :: Located Token -> P () +setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) () + +popPendingImplicitToken :: P (Maybe (Located Token)) +popPendingImplicitToken + = P $ \s@PState{ alr_pending_implicit_tokens = ts } -> + case ts of + [] -> POk s Nothing + (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t) + +setPendingImplicitTokens :: [Located Token] -> P () +setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) () + +getAlrExpectingOCurly :: P (Maybe ALRLayout) +getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b + +setAlrExpectingOCurly :: Maybe ALRLayout -> P () +setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) () + -- for reasons of efficiency, flags indicating language extensions (eg, -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed -- integer @@ -1683,6 +1738,8 @@ newQualOpsBit :: Int newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+) recBit :: Int recBit = 22 -- rec +alternativeLayoutRuleBit :: Int +alternativeLayoutRuleBit = 23 always :: Int -> Bool always _ = True @@ -1724,6 +1781,8 @@ newQualOps :: Int -> Bool newQualOps flags = testBit flags newQualOpsBit oldQualOps :: Int -> Bool oldQualOps flags = not (newQualOps flags) +alternativeLayoutRule :: Int -> Bool +alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit -- PState for parsing options pragmas -- @@ -1734,13 +1793,16 @@ pragState dynflags buf loc = messages = emptyMessages, dflags = dynflags, last_loc = mkSrcSpan loc loc, - last_offs = 0, last_len = 0, - last_line_len = 0, loc = loc, extsBitmap = 0, context = [], - lex_state = [bol, option_prags, 0] + lex_state = [bol, option_prags, 0], + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } @@ -1753,14 +1815,17 @@ mkPState buf loc flags = dflags = flags, messages = emptyMessages, last_loc = mkSrcSpan loc loc, - last_offs = 0, last_len = 0, - last_line_len = 0, loc = loc, extsBitmap = fromIntegral bitmap, context = [], - lex_state = [bol, 0] + lex_state = [bol, 0], -- we begin in the layout state if toplev_layout is set + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } where bitmap = genericsBit `setBitIf` dopt Opt_Generics flags @@ -1785,6 +1850,7 @@ mkPState buf loc flags = .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags + .|. alternativeLayoutRuleBit `setBitIf` dopt Opt_AlternativeLayoutRule flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b @@ -1817,14 +1883,15 @@ popContext = P $ \ s@(PState{ buffer = buf, context = ctx, -- This is only used at the outer level of a module when the 'module' -- keyword is missing. pushCurrentContext :: P () -pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } -> - POk s{context = Layout (offs-len) : ctx} () ---trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} () +pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } -> + POk s{context = Layout (srcSpanStartCol loc) : ctx} () getOffside :: P Ordering -getOffside = P $ \s@PState{last_offs=offs, context=stk} -> +getOffside = P $ \s@PState{last_loc=loc, context=stk} -> + let offs = srcSpanStartCol loc in let ord = case stk of - (Layout n:_) -> compare offs n + (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $ + compare offs n _ -> GT in POk s ord @@ -1856,7 +1923,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len, lexError :: String -> P a lexError str = do loc <- getSrcLoc - (AI end _ buf) <- getInput + (AI end buf) <- getInput reportLexError loc end buf str -- ----------------------------------------------------------------------------- @@ -1865,30 +1932,202 @@ lexError str = do lexer :: (Located Token -> P a) -> P a lexer cont = do - tok@(L _span _tok__) <- lexToken --- trace ("token: " ++ show tok__) $ do + alr <- extension alternativeLayoutRule + let lexTokenFun = if alr then lexTokenAlr else lexToken + tok@(L _span _tok__) <- lexTokenFun + --trace ("token: " ++ show _tok__) $ do cont tok +lexTokenAlr :: P (Located Token) +lexTokenAlr = do mPending <- popPendingImplicitToken + t <- case mPending of + Nothing -> + do mNext <- popNextToken + t <- case mNext of + Nothing -> lexToken + Just next -> return next + alternativeLayoutRuleToken t + Just t -> + return t + setAlrLastLoc (getLoc t) + case unLoc t of + ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere) + ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet) + ITof -> setAlrExpectingOCurly (Just ALRLayoutOf) + ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + ITmdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + ITrec -> setAlrExpectingOCurly (Just ALRLayoutDo) + _ -> return () + return t + +alternativeLayoutRuleToken :: Located Token -> P (Located Token) +alternativeLayoutRuleToken t + = do context <- getALRContext + lastLoc <- getAlrLastLoc + mExpectingOCurly <- getAlrExpectingOCurly + let thisLoc = getLoc t + thisCol = srcSpanStartCol thisLoc + newLine = (lastLoc == noSrcSpan) + || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc) + case (unLoc t, context, mExpectingOCurly) of + -- This case handles a GHC extension to the original H98 + -- layout rule... + (ITocurly, _, Just _) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRNoLayout (containsCommas ITocurly) : context) + return t + -- ...and makes this case unnecessary + {- + -- I think our implicit open-curly handling is slightly + -- different to John's, in how it interacts with newlines + -- and "in" + (ITocurly, _, Just _) -> + do setAlrExpectingOCurly Nothing + setNextToken t + lexTokenAlr + -} + (_, ALRLayout _ col : ls, Just expectingOCurly) + | (thisCol > col) || + (thisCol == col && + isNonDecreasingIntentation expectingOCurly) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + | otherwise -> + do setAlrExpectingOCurly Nothing + setPendingImplicitTokens [L lastLoc ITccurly] + setNextToken t + return (L lastLoc ITocurly) + (_, _, Just expectingOCurly) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + -- We do the [] cases earlier than in the spec, as we + -- have an actual EOF token + (ITeof, ALRLayout _ _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITeof, _, _) -> + return t + -- the other ITeof case omitted; general case below covers it + (ITin, ALRLayout ALRLayoutLet _ : ls, _) + | newLine -> + do setPendingImplicitTokens [t] + setALRContext ls + return (L thisLoc ITccurly) + (_, ALRLayout _ col : ls, _) + | newLine && thisCol == col -> + do setNextToken t + return (L thisLoc ITsemi) + | newLine && thisCol < col -> + do setALRContext ls + setNextToken t + -- Note that we use lastLoc, as we may need to close + -- more layouts, or give a semicolon + return (L lastLoc ITccurly) + (u, _, _) + | isALRopen u -> + do setALRContext (ALRNoLayout (containsCommas u) : context) + return t + (u, _, _) + | isALRclose u -> + case context of + ALRLayout _ _ : ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + ALRNoLayout _ : ls -> + do setALRContext ls + return t + [] -> + -- XXX This is an error in John's code, but + -- it looks reachable to me at first glance + return t + (ITin, ALRLayout ALRLayoutLet _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + (ITin, ALRLayout _ _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + -- the other ITin case omitted; general case below covers it + (ITcomma, ALRLayout _ _ : ls, _) + | topNoLayoutContainsCommas ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + -- the other ITwhere case omitted; general case below covers it + (_, _, _) -> return t + +isALRopen :: Token -> Bool +isALRopen ITcase = True +isALRopen ITif = True +isALRopen IToparen = True +isALRopen ITobrack = True +isALRopen ITocurly = True +-- GHC Extensions: +isALRopen IToubxparen = True +isALRopen ITparenEscape = True +isALRopen _ = False + +isALRclose :: Token -> Bool +isALRclose ITof = True +isALRclose ITthen = True +isALRclose ITcparen = True +isALRclose ITcbrack = True +isALRclose ITccurly = True +-- GHC Extensions: +isALRclose ITcubxparen = True +isALRclose _ = False + +isNonDecreasingIntentation :: ALRLayout -> Bool +isNonDecreasingIntentation ALRLayoutDo = True +isNonDecreasingIntentation _ = False + +containsCommas :: Token -> Bool +containsCommas IToparen = True +containsCommas ITobrack = True +-- John doesn't have {} as containing commas, but records contain them, +-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs +-- (defaultInstallDirs). +containsCommas ITocurly = True +-- GHC Extensions: +containsCommas IToubxparen = True +containsCommas _ = False + +topNoLayoutContainsCommas :: [ALRContext] -> Bool +topNoLayoutContainsCommas [] = False +topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls +topNoLayoutContainsCommas (ALRNoLayout b : _) = b + lexToken :: P (Located Token) lexToken = do - inp@(AI loc1 _ buf) <- getInput + inp@(AI loc1 buf) <- getInput sc <- getLexState exts <- getExts case alexScanUser exts inp sc of AlexEOF -> do let span = mkSrcSpan loc1 loc1 - setLastToken span 0 0 + setLastToken span 0 return (L span ITeof) - AlexError (AI loc2 _ buf) -> + AlexError (AI loc2 buf) -> reportLexError loc1 loc2 buf "lexical error" AlexSkip inp2 _ -> do setInput inp2 lexToken - AlexToken inp2@(AI end _ buf2) _ t -> do + AlexToken inp2@(AI end buf2) _ t -> do setInput inp2 let span = mkSrcSpan loc1 end let bytes = byteDiff buf buf2 - span `seq` setLastToken span bytes bytes + span `seq` setLastToken span bytes t span buf bytes reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a @@ -1951,7 +2190,7 @@ dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToStr Nothing -> lexError "unknown pragma" known_pragma :: Map String Action -> AlexAccPred Int -known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags) +known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags) && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))) clean_pragma :: String -> String @@ -1963,6 +2202,6 @@ clean_pragma prag = canon_ws (map toLower (unprefix prag)) "noinline" -> "notinline" "specialise" -> "specialize" "constructorlike" -> "conlike" - otherwise -> prag' + _ -> prag' canon_ws s = unwords (map canonical (words s)) }