X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=00926bd13d87b6a7c0f6db94e008a3c42223bd58;hp=fe5c69326641e1bb8c06fe77979cf834d840c634;hb=609e7ddfb10bc04762b820e70e0487ad6c514c2e;hpb=e17a800817a370fd198831f12cff35122376fa8d diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index fe5c693..00926bd 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -31,24 +31,24 @@ -- qualified varids. { -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details --- --- Note that Alex itself generates code with with some unused bindings and --- without type signatures, so removing the flag might not be possible. +-- XXX The above flags turn off warnings in the generated code: +{-# OPTIONS_GHC -fno-warn-unused-matches #-} +{-# OPTIONS_GHC -fno-warn-unused-binds #-} +{-# OPTIONS_GHC -fno-warn-unused-imports #-} +{-# OPTIONS_GHC -fno-warn-missing-signatures #-} +-- But alex still generates some code that causes the "lazy unlifted bindings" +-- warning, and old compilers don't know about it so we can't easily turn +-- it off, so for now we use the sledge hammer: +{-# OPTIONS_GHC -w #-} {-# OPTIONS_GHC -funbox-strict-fields #-} module Lexer ( Token(..), lexer, pragState, mkPState, PState(..), P(..), ParseResult(..), getSrcLoc, - getPState, + getPState, getDynFlags, withThisPackage, failLocMsgP, failSpanMsgP, srcParseFail, - getMessages, + getMessages, popContext, pushCurrentContext, setLastToken, setSrcLoc, getLexState, popLexState, pushLexState, extension, standaloneDerivingEnabled, bangPatEnabled, @@ -64,6 +64,7 @@ import FastString import SrcLoc import UniqFM import DynFlags +import Module import Ctype import Util ( readRational ) @@ -662,7 +663,7 @@ reservedWordsFM = listToUFM $ ( "ccall", ITccallconv, bit ffiBit), ( "prim", ITprimcallconv, bit ffiBit), - ( "rec", ITrec, bit arrowsBit), + ( "rec", ITrec, bit recBit), ( "proc", ITproc, bit arrowsBit) ] @@ -738,10 +739,12 @@ begin :: Int -> Action begin code _span _str _len = do pushLexState code; lexToken pop :: Action -pop _span _buf _len = do popLexState; lexToken +pop _span _buf _len = do _ <- popLexState + lexToken pop_and :: Action -> Action -pop_and act span buf len = do popLexState; act span buf len +pop_and act span buf len = do _ <- popLexState + act span buf len {-# INLINE nextCharIs #-} nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool @@ -880,12 +883,12 @@ withLexedDocType lexDocComment = do -- RULES pragmas turn on the forall and '.' keywords, and we turn them -- off again at the end of the pragma. rulePrag :: Action -rulePrag span _ _ = do +rulePrag span _buf _len = do setExts (.|. bit inRulePragBit) return (L span ITrules_prag) endPrag :: Action -endPrag span _ _ = do +endPrag span _buf _len = do setExts (.&. complement (bit inRulePragBit)) return (L span ITclose_prag) @@ -1060,22 +1063,31 @@ do_bol span _str _len = do return (L span ITvccurly) EQ -> do --trace "layout: inserting ';'" $ do - popLexState + _ <- popLexState return (L span ITsemi) GT -> do - popLexState + _ <- popLexState lexToken -- certain keywords put us in the "layout" state, where we might -- add an opening curly brace. maybe_layout :: Token -> P () -maybe_layout ITdo = pushLexState layout_do -maybe_layout ITmdo = pushLexState layout_do -maybe_layout ITof = pushLexState layout -maybe_layout ITlet = pushLexState layout -maybe_layout ITwhere = pushLexState layout -maybe_layout ITrec = pushLexState layout -maybe_layout _ = return () +maybe_layout t = do -- If the alternative layout rule is enabled then + -- we never create an implicit layout context here. + -- Layout is handled XXX instead. + -- The code for closing implicit contexts, or + -- inserting implicit semi-colons, is therefore + -- irrelevant as it only applies in an implicit + -- context. + alr <- extension alternativeLayoutRule + unless alr $ f t + where f ITdo = pushLexState layout_do + f ITmdo = pushLexState layout_do + f ITof = pushLexState layout + f ITlet = pushLexState layout + f ITwhere = pushLexState layout + f ITrec = pushLexState layout + f _ = return () -- Pushing a new implicit layout context. If the indentation of the -- next token is not greater than the previous layout context, then @@ -1088,7 +1100,7 @@ maybe_layout _ = return () -- new_layout_context :: Bool -> Action new_layout_context strict span _buf _len = do - popLexState + _ <- popLexState (AI _ offset _) <- getInput ctx <- getContext case ctx of @@ -1105,7 +1117,7 @@ new_layout_context strict span _buf _len = do do_layout_left :: Action do_layout_left span _buf _len = do - popLexState + _ <- popLexState pushLexState bol -- we must be at the start of a line return (L span ITvccurly) @@ -1117,7 +1129,7 @@ setLine code span buf len = do let line = parseUnsignedInteger buf len 10 octDecDigit setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0) -- subtract one: the line number refers to the *following* line - popLexState + _ <- popLexState pushLexState code lexToken @@ -1125,7 +1137,7 @@ setFile :: Int -> Action setFile code span buf len = do let file = lexemeToFastString (stepOn buf) (len-2) setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) - popLexState + _ <- popLexState pushLexState code lexToken @@ -1476,7 +1488,13 @@ data PState = PState { loc :: SrcLoc, -- current loc (end of prev token + 1) extsBitmap :: !Int, -- bitmap that determines permitted extensions context :: [LayoutContext], - lex_state :: [Int] + lex_state :: [Int], + -- Used in the alternative layout rule: + alr_pending_implicit_tokens :: [Located Token], + alr_next_token :: Maybe (Located Token), + alr_last_loc :: SrcSpan, + alr_context :: [ALRContext], + alr_expecting_ocurly :: Maybe ALRLayout } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -1484,6 +1502,13 @@ data PState = PState { -- Getting rid of last_loc would require finding another way to -- implement pushCurrentContext (which is only called from one place). +data ALRContext = ALRNoLayout Bool{- does it contain commas? -} + | ALRLayout ALRLayout Int +data ALRLayout = ALRLayoutLet + | ALRLayoutWhere + | ALRLayoutOf + | ALRLayoutDo + newtype P a = P { unP :: PState -> ParseResult a } instance Monad P where @@ -1515,6 +1540,14 @@ failSpanMsgP span msg = P $ \_ -> PFailed span msg getPState :: P PState getPState = P $ \s -> POk s s +getDynFlags :: P DynFlags +getDynFlags = P $ \s -> POk s (dflags s) + +withThisPackage :: (PackageId -> a) -> P a +withThisPackage f + = do pkg <- liftM thisPackage getDynFlags + return $ f pkg + extension :: (Int -> Bool) -> P Bool extension p = P $ \s -> POk s (p $! extsBitmap s) @@ -1625,6 +1658,42 @@ popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls getLexState :: P Int getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls +popNextToken :: P (Maybe (Located Token)) +popNextToken + = P $ \s@PState{ alr_next_token = m } -> + POk (s {alr_next_token = Nothing}) m + +setAlrLastLoc :: SrcSpan -> P () +setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) () + +getAlrLastLoc :: P SrcSpan +getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l + +getALRContext :: P [ALRContext] +getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs + +setALRContext :: [ALRContext] -> P () +setALRContext cs = P $ \s -> POk (s {alr_context = cs}) () + +setNextToken :: Located Token -> P () +setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) () + +popPendingImplicitToken :: P (Maybe (Located Token)) +popPendingImplicitToken + = P $ \s@PState{ alr_pending_implicit_tokens = ts } -> + case ts of + [] -> POk s Nothing + (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t) + +setPendingImplicitTokens :: [Located Token] -> P () +setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) () + +getAlrExpectingOCurly :: P (Maybe ALRLayout) +getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b + +setAlrExpectingOCurly :: Maybe ALRLayout -> P () +setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) () + -- for reasons of efficiency, flags indicating language extensions (eg, -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed -- integer @@ -1672,6 +1741,10 @@ rawTokenStreamBit :: Int rawTokenStreamBit = 20 -- producing a token stream with all comments included newQualOpsBit :: Int newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+) +recBit :: Int +recBit = 22 -- rec +alternativeLayoutRuleBit :: Int +alternativeLayoutRuleBit = 23 always :: Int -> Bool always _ = True @@ -1713,6 +1786,8 @@ newQualOps :: Int -> Bool newQualOps flags = testBit flags newQualOpsBit oldQualOps :: Int -> Bool oldQualOps flags = not (newQualOps flags) +alternativeLayoutRule :: Int -> Bool +alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit -- PState for parsing options pragmas -- @@ -1729,7 +1804,12 @@ pragState dynflags buf loc = loc = loc, extsBitmap = 0, context = [], - lex_state = [bol, option_prags, 0] + lex_state = [bol, option_prags, 0], + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } @@ -1748,8 +1828,13 @@ mkPState buf loc flags = loc = loc, extsBitmap = fromIntegral bitmap, context = [], - lex_state = [bol, 0] + lex_state = [bol, 0], -- we begin in the layout state if toplev_layout is set + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } where bitmap = genericsBit `setBitIf` dopt Opt_Generics flags @@ -1766,12 +1851,15 @@ mkPState buf loc flags = .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags + .|. recBit `setBitIf` dopt Opt_DoRec flags + .|. recBit `setBitIf` dopt Opt_Arrows flags .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags + .|. alternativeLayoutRuleBit `setBitIf` dopt Opt_AlternativeLayoutRule flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b @@ -1852,10 +1940,165 @@ lexError str = do lexer :: (Located Token -> P a) -> P a lexer cont = do - tok@(L _span _tok__) <- lexToken + alr <- extension alternativeLayoutRule + let lexTokenFun = if alr then lexTokenAlr else lexToken + tok@(L _span _tok__) <- lexTokenFun -- trace ("token: " ++ show tok__) $ do cont tok +lexTokenAlr :: P (Located Token) +lexTokenAlr = do mPending <- popPendingImplicitToken + t <- case mPending of + Nothing -> + do mNext <- popNextToken + t <- case mNext of + Nothing -> lexToken + Just next -> return next + alternativeLayoutRuleToken t + Just t -> + return t + setAlrLastLoc (getLoc t) + case unLoc t of + ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere) + ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet) + ITof -> setAlrExpectingOCurly (Just ALRLayoutOf) + ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + _ -> return () + return t + +alternativeLayoutRuleToken :: Located Token -> P (Located Token) +alternativeLayoutRuleToken t + = do context <- getALRContext + lastLoc <- getAlrLastLoc + mExpectingOCurly <- getAlrExpectingOCurly + let thisLoc = getLoc t + thisCol = srcSpanStartCol thisLoc + newLine = srcSpanStartLine thisLoc > srcSpanEndLine lastLoc + case (unLoc t, context, mExpectingOCurly) of + -- I think our implicit open-curly handling is slightly + -- different to John's, in how it interacts with newlines + -- and "in" + (ITocurly, _, Just _) -> + do setAlrExpectingOCurly Nothing + setNextToken t + lexTokenAlr + (_, ALRLayout _ col : ls, Just expectingOCurly) + | thisCol > col -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + | otherwise -> + do setAlrExpectingOCurly Nothing + setPendingImplicitTokens [L thisLoc ITccurly] + setNextToken t + return (L thisLoc ITocurly) + (_, _, Just expectingOCurly) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + -- We do the [] cases earlier than in the spec, as we + -- have an actual EOF token + (ITeof, ALRLayout _ _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITeof, _, _) -> + return t + -- the other ITeof case omitted; general case below covers it + (ITin, ALRLayout ALRLayoutLet _ : ls, _) + | newLine -> + do setPendingImplicitTokens [t] + setALRContext ls + return (L thisLoc ITccurly) + (_, ls@(ALRLayout _ col : _), _) + | newLine && thisCol <= col -> + do let f ls'@(ALRLayout _ col' : ls'') + | thisCol < col' = case f ls'' of + (ts, ls''') -> + (L thisLoc ITccurly : ts, ls''') + | thisCol == col' = ([L thisLoc ITsemi], ls') + | otherwise = ([], ls') + f ls' = ([], ls') + case f ls of + (t' : ts, ls') -> + do setALRContext ls' + setPendingImplicitTokens ts + setNextToken t + return t' + _ -> panic "Layout rule: [] when considering newline" + (u, _, _) + | isALRopen u -> + do setALRContext (ALRNoLayout (containsCommas u) : context) + return t + (u, _, _) + | isALRclose u -> + case context of + ALRLayout _ _ : ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + ALRNoLayout _ : ls -> + do setALRContext ls + return t + [] -> + -- XXX This is an error in John's code, but + -- it looks reachable to me at first glance + return t + (ITin, ALRLayout ALRLayoutLet _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + (ITin, _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + -- the other ITin case omitted; general case below covers it + (ITcomma, ALRLayout _ _ : ls, _) + | topNoLayoutContainsCommas ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + -- the other ITwhere case omitted; general case below covers it + (_, _, _) -> return t + +isALRopen :: Token -> Bool +isALRopen ITcase = True +isALRopen ITif = True +isALRopen IToparen = True +isALRopen ITobrack = True +isALRopen ITocurly = True +-- GHC Extensions: +isALRopen IToubxparen = True +isALRopen _ = False + +isALRclose :: Token -> Bool +isALRclose ITof = True +isALRclose ITthen = True +isALRclose ITcparen = True +isALRclose ITcbrack = True +isALRclose ITccurly = True +-- GHC Extensions: +isALRclose ITcubxparen = True +isALRclose _ = False + +containsCommas :: Token -> Bool +containsCommas IToparen = True +containsCommas ITobrack = True +-- GHC Extensions: +containsCommas IToubxparen = True +containsCommas _ = False + +topNoLayoutContainsCommas :: [ALRContext] -> Bool +topNoLayoutContainsCommas [] = False +topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls +topNoLayoutContainsCommas (ALRNoLayout b : _) = b + lexToken :: P (Located Token) lexToken = do inp@(AI loc1 _ buf) <- getInput @@ -1950,6 +2193,6 @@ clean_pragma prag = canon_ws (map toLower (unprefix prag)) "noinline" -> "notinline" "specialise" -> "specialize" "constructorlike" -> "conlike" - otherwise -> prag' + _ -> prag' canon_ws s = unwords (map canonical (words s)) }