X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=cae53499fedf05324887c9b17af7f4856367a02b;hp=675b4d6fc03e8a3e35ee40b72ce5931bbbf41043;hb=60762c678c83e370609001addc8f52efcd657139;hpb=1e436f2bb208a6c990743afaf17b7c2a93c31742 diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 675b4d6..cae5349 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -31,24 +31,24 @@ -- qualified varids. { -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details --- --- Note that Alex itself generates code with with some unused bindings and --- without type signatures, so removing the flag might not be possible. +-- XXX The above flags turn off warnings in the generated code: +{-# OPTIONS_GHC -fno-warn-unused-matches #-} +{-# OPTIONS_GHC -fno-warn-unused-binds #-} +{-# OPTIONS_GHC -fno-warn-unused-imports #-} +{-# OPTIONS_GHC -fno-warn-missing-signatures #-} +-- But alex still generates some code that causes the "lazy unlifted bindings" +-- warning, and old compilers don't know about it so we can't easily turn +-- it off, so for now we use the sledge hammer: +{-# OPTIONS_GHC -w #-} {-# OPTIONS_GHC -funbox-strict-fields #-} module Lexer ( Token(..), lexer, pragState, mkPState, PState(..), P(..), ParseResult(..), getSrcLoc, - getPState, + getPState, getDynFlags, withThisPackage, failLocMsgP, failSpanMsgP, srcParseFail, - getMessages, + getMessages, popContext, pushCurrentContext, setLastToken, setSrcLoc, getLexState, popLexState, pushLexState, extension, standaloneDerivingEnabled, bangPatEnabled, @@ -64,6 +64,7 @@ import FastString import SrcLoc import UniqFM import DynFlags +import Module import Ctype import Util ( readRational ) @@ -662,7 +663,7 @@ reservedWordsFM = listToUFM $ ( "ccall", ITccallconv, bit ffiBit), ( "prim", ITprimcallconv, bit ffiBit), - ( "rec", ITrec, bit arrowsBit), + ( "rec", ITrec, bit recBit), ( "proc", ITproc, bit arrowsBit) ] @@ -701,6 +702,14 @@ reservedSymsFM = listToUFM $ ,("→", ITrarrow, unicodeSyntaxEnabled) ,("←", ITlarrow, unicodeSyntaxEnabled) ,("⋯", ITdotdot, unicodeSyntaxEnabled) + + ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤛", ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + ,("⤜", ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i) + + ,("★", ITstar, unicodeSyntaxEnabled) + -- ToDo: ideally, → and ∷ should be "specials", so that they cannot -- form part of a large operator. This would let us have a better -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). @@ -738,10 +747,12 @@ begin :: Int -> Action begin code _span _str _len = do pushLexState code; lexToken pop :: Action -pop _span _buf _len = do popLexState; lexToken +pop _span _buf _len = do _ <- popLexState + lexToken pop_and :: Action -> Action -pop_and act span buf len = do popLexState; act span buf len +pop_and act span buf len = do _ <- popLexState + act span buf len {-# INLINE nextCharIs #-} nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool @@ -880,12 +891,12 @@ withLexedDocType lexDocComment = do -- RULES pragmas turn on the forall and '.' keywords, and we turn them -- off again at the end of the pragma. rulePrag :: Action -rulePrag span _ _ = do +rulePrag span _buf _len = do setExts (.|. bit inRulePragBit) return (L span ITrules_prag) endPrag :: Action -endPrag span _ _ = do +endPrag span _buf _len = do setExts (.&. complement (bit inRulePragBit)) return (L span ITclose_prag) @@ -1060,22 +1071,31 @@ do_bol span _str _len = do return (L span ITvccurly) EQ -> do --trace "layout: inserting ';'" $ do - popLexState + _ <- popLexState return (L span ITsemi) GT -> do - popLexState + _ <- popLexState lexToken -- certain keywords put us in the "layout" state, where we might -- add an opening curly brace. maybe_layout :: Token -> P () -maybe_layout ITdo = pushLexState layout_do -maybe_layout ITmdo = pushLexState layout_do -maybe_layout ITof = pushLexState layout -maybe_layout ITlet = pushLexState layout -maybe_layout ITwhere = pushLexState layout -maybe_layout ITrec = pushLexState layout -maybe_layout _ = return () +maybe_layout t = do -- If the alternative layout rule is enabled then + -- we never create an implicit layout context here. + -- Layout is handled XXX instead. + -- The code for closing implicit contexts, or + -- inserting implicit semi-colons, is therefore + -- irrelevant as it only applies in an implicit + -- context. + alr <- extension alternativeLayoutRule + unless alr $ f t + where f ITdo = pushLexState layout_do + f ITmdo = pushLexState layout_do + f ITof = pushLexState layout + f ITlet = pushLexState layout + f ITwhere = pushLexState layout + f ITrec = pushLexState layout + f _ = return () -- Pushing a new implicit layout context. If the indentation of the -- next token is not greater than the previous layout context, then @@ -1088,7 +1108,7 @@ maybe_layout _ = return () -- new_layout_context :: Bool -> Action new_layout_context strict span _buf _len = do - popLexState + _ <- popLexState (AI _ offset _) <- getInput ctx <- getContext case ctx of @@ -1105,7 +1125,7 @@ new_layout_context strict span _buf _len = do do_layout_left :: Action do_layout_left span _buf _len = do - popLexState + _ <- popLexState pushLexState bol -- we must be at the start of a line return (L span ITvccurly) @@ -1115,17 +1135,18 @@ do_layout_left span _buf _len = do setLine :: Int -> Action setLine code span buf len = do let line = parseUnsignedInteger buf len 10 octDecDigit - setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0) + setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1) -- subtract one: the line number refers to the *following* line - popLexState + _ <- popLexState pushLexState code lexToken setFile :: Int -> Action setFile code span buf len = do let file = lexemeToFastString (stepOn buf) (len-2) + setAlrLastLoc noSrcSpan setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) - popLexState + _ <- popLexState pushLexState code lexToken @@ -1269,7 +1290,7 @@ finish_char_tok loc ch -- We've already seen the closing quote return (L (mkSrcSpan loc end) (ITprimchar ch)) _other -> return (L (mkSrcSpan loc end) (ITchar ch)) - else do + else do return (L (mkSrcSpan loc end) (ITchar ch)) lex_char :: Char -> AlexInput -> P Char @@ -1476,7 +1497,22 @@ data PState = PState { loc :: SrcLoc, -- current loc (end of prev token + 1) extsBitmap :: !Int, -- bitmap that determines permitted extensions context :: [LayoutContext], - lex_state :: [Int] + lex_state :: [Int], + -- Used in the alternative layout rule: + -- These tokens are the next ones to be sent out. They are + -- just blindly emitted, without the rule looking at them again: + alr_pending_implicit_tokens :: [Located Token], + -- This is the next token to be considered or, if it is Nothing, + -- we need to get the next token from the input stream: + alr_next_token :: Maybe (Located Token), + -- This is what we consider to be the locatino of the last token + -- emitted: + alr_last_loc :: SrcSpan, + -- The stack of layout contexts: + alr_context :: [ALRContext], + -- Are we expecting a '{'? If it's Just, then the ALRLayout tells + -- us what sort of layout the '{' will open: + alr_expecting_ocurly :: Maybe ALRLayout } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -1484,6 +1520,13 @@ data PState = PState { -- Getting rid of last_loc would require finding another way to -- implement pushCurrentContext (which is only called from one place). +data ALRContext = ALRNoLayout Bool{- does it contain commas? -} + | ALRLayout ALRLayout Int +data ALRLayout = ALRLayoutLet + | ALRLayoutWhere + | ALRLayoutOf + | ALRLayoutDo + newtype P a = P { unP :: PState -> ParseResult a } instance Monad P where @@ -1515,6 +1558,14 @@ failSpanMsgP span msg = P $ \_ -> PFailed span msg getPState :: P PState getPState = P $ \s -> POk s s +getDynFlags :: P DynFlags +getDynFlags = P $ \s -> POk s (dflags s) + +withThisPackage :: (PackageId -> a) -> P a +withThisPackage f + = do pkg <- liftM thisPackage getDynFlags + return $ f pkg + extension :: (Int -> Bool) -> P Bool extension p = P $ \s -> POk s (p $! extsBitmap s) @@ -1625,6 +1676,42 @@ popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls getLexState :: P Int getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls +popNextToken :: P (Maybe (Located Token)) +popNextToken + = P $ \s@PState{ alr_next_token = m } -> + POk (s {alr_next_token = Nothing}) m + +setAlrLastLoc :: SrcSpan -> P () +setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) () + +getAlrLastLoc :: P SrcSpan +getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l + +getALRContext :: P [ALRContext] +getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs + +setALRContext :: [ALRContext] -> P () +setALRContext cs = P $ \s -> POk (s {alr_context = cs}) () + +setNextToken :: Located Token -> P () +setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) () + +popPendingImplicitToken :: P (Maybe (Located Token)) +popPendingImplicitToken + = P $ \s@PState{ alr_pending_implicit_tokens = ts } -> + case ts of + [] -> POk s Nothing + (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t) + +setPendingImplicitTokens :: [Located Token] -> P () +setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) () + +getAlrExpectingOCurly :: P (Maybe ALRLayout) +getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b + +setAlrExpectingOCurly :: Maybe ALRLayout -> P () +setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) () + -- for reasons of efficiency, flags indicating language extensions (eg, -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed -- integer @@ -1672,6 +1759,10 @@ rawTokenStreamBit :: Int rawTokenStreamBit = 20 -- producing a token stream with all comments included newQualOpsBit :: Int newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+) +recBit :: Int +recBit = 22 -- rec +alternativeLayoutRuleBit :: Int +alternativeLayoutRuleBit = 23 always :: Int -> Bool always _ = True @@ -1713,6 +1804,8 @@ newQualOps :: Int -> Bool newQualOps flags = testBit flags newQualOpsBit oldQualOps :: Int -> Bool oldQualOps flags = not (newQualOps flags) +alternativeLayoutRule :: Int -> Bool +alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit -- PState for parsing options pragmas -- @@ -1729,7 +1822,12 @@ pragState dynflags buf loc = loc = loc, extsBitmap = 0, context = [], - lex_state = [bol, option_prags, 0] + lex_state = [bol, option_prags, 0], + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } @@ -1748,35 +1846,38 @@ mkPState buf loc flags = loc = loc, extsBitmap = fromIntegral bitmap, context = [], - lex_state = [bol, 0] + lex_state = [bol, 0], -- we begin in the layout state if toplev_layout is set + alr_pending_implicit_tokens = [], + alr_next_token = Nothing, + alr_last_loc = noSrcSpan, + alr_context = [], + alr_expecting_ocurly = Nothing } where bitmap = genericsBit `setBitIf` dopt Opt_Generics flags - .|. ffiBit `setBitIf` dopt Opt_ForeignFunctionInterface flags - .|. parrBit `setBitIf` dopt Opt_PArr flags - .|. arrowsBit `setBitIf` dopt Opt_Arrows flags - .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags - .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags - .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags - .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags - .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags - .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags - .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags - .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags - .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags - .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags - .|. tyFamBit `setBitIf` dopt Opt_TypeFamilies flags - .|. haddockBit `setBitIf` dopt Opt_Haddock flags - .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags - .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags - .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags - .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags - .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags + .|. ffiBit `setBitIf` dopt Opt_ForeignFunctionInterface flags + .|. parrBit `setBitIf` dopt Opt_PArr flags + .|. arrowsBit `setBitIf` dopt Opt_Arrows flags + .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags + .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags + .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags + .|. explicitForallBit `setBitIf` dopt Opt_ExplicitForAll flags + .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags + .|. tyFamBit `setBitIf` dopt Opt_TypeFamilies flags + .|. haddockBit `setBitIf` dopt Opt_Haddock flags + .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags + .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags + .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags + .|. recBit `setBitIf` dopt Opt_DoRec flags + .|. recBit `setBitIf` dopt Opt_Arrows flags + .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags + .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags + .|. alternativeLayoutRuleBit `setBitIf` dopt Opt_AlternativeLayoutRule flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b @@ -1857,10 +1958,179 @@ lexError str = do lexer :: (Located Token -> P a) -> P a lexer cont = do - tok@(L _span _tok__) <- lexToken + alr <- extension alternativeLayoutRule + let lexTokenFun = if alr then lexTokenAlr else lexToken + tok@(L _span _tok__) <- lexTokenFun -- trace ("token: " ++ show tok__) $ do cont tok +lexTokenAlr :: P (Located Token) +lexTokenAlr = do mPending <- popPendingImplicitToken + t <- case mPending of + Nothing -> + do mNext <- popNextToken + t <- case mNext of + Nothing -> lexToken + Just next -> return next + alternativeLayoutRuleToken t + Just t -> + return t + setAlrLastLoc (getLoc t) + case unLoc t of + ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere) + ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet) + ITof -> setAlrExpectingOCurly (Just ALRLayoutOf) + ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo) + _ -> return () + return t + +alternativeLayoutRuleToken :: Located Token -> P (Located Token) +alternativeLayoutRuleToken t + = do context <- getALRContext + lastLoc <- getAlrLastLoc + mExpectingOCurly <- getAlrExpectingOCurly + let thisLoc = getLoc t + thisCol = srcSpanStartCol thisLoc + newLine = (lastLoc == noSrcSpan) + || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc) + case (unLoc t, context, mExpectingOCurly) of + -- This case handles a GHC extension to the original H98 + -- layout rule... + (ITocurly, _, Just _) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRNoLayout (containsCommas ITocurly) : context) + return t + -- ...and makes this case unnecessary + {- + -- I think our implicit open-curly handling is slightly + -- different to John's, in how it interacts with newlines + -- and "in" + (ITocurly, _, Just _) -> + do setAlrExpectingOCurly Nothing + setNextToken t + lexTokenAlr + -} + (_, ALRLayout _ col : ls, Just expectingOCurly) + | (thisCol > col) || + (thisCol == col && + isNonDecreasingIntentation expectingOCurly) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + | otherwise -> + do setAlrExpectingOCurly Nothing + setPendingImplicitTokens [L lastLoc ITccurly] + setNextToken t + return (L lastLoc ITocurly) + (_, _, Just expectingOCurly) -> + do setAlrExpectingOCurly Nothing + setALRContext (ALRLayout expectingOCurly thisCol : context) + setNextToken t + return (L thisLoc ITocurly) + -- We do the [] cases earlier than in the spec, as we + -- have an actual EOF token + (ITeof, ALRLayout _ _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITeof, _, _) -> + return t + -- the other ITeof case omitted; general case below covers it + (ITin, ALRLayout ALRLayoutLet _ : ls, _) + | newLine -> + do setPendingImplicitTokens [t] + setALRContext ls + return (L thisLoc ITccurly) + (_, ALRLayout _ col : ls, _) + | newLine && thisCol == col -> + do setNextToken t + return (L thisLoc ITsemi) + | newLine && thisCol < col -> + do setALRContext ls + setNextToken t + -- Note that we use lastLoc, as we may need to close + -- more layouts, or give a semicolon + return (L lastLoc ITccurly) + (u, _, _) + | isALRopen u -> + do setALRContext (ALRNoLayout (containsCommas u) : context) + return t + (u, _, _) + | isALRclose u -> + case context of + ALRLayout _ _ : ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + ALRNoLayout _ : ls -> + do setALRContext ls + return t + [] -> + -- XXX This is an error in John's code, but + -- it looks reachable to me at first glance + return t + (ITin, ALRLayout ALRLayoutLet _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + (ITin, ALRLayout _ _ : ls, _) -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + -- the other ITin case omitted; general case below covers it + (ITcomma, ALRLayout _ _ : ls, _) + | topNoLayoutContainsCommas ls -> + do setALRContext ls + setNextToken t + return (L thisLoc ITccurly) + (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) -> + do setALRContext ls + setPendingImplicitTokens [t] + return (L thisLoc ITccurly) + -- the other ITwhere case omitted; general case below covers it + (_, _, _) -> return t + +isALRopen :: Token -> Bool +isALRopen ITcase = True +isALRopen ITif = True +isALRopen IToparen = True +isALRopen ITobrack = True +isALRopen ITocurly = True +-- GHC Extensions: +isALRopen IToubxparen = True +isALRopen _ = False + +isALRclose :: Token -> Bool +isALRclose ITof = True +isALRclose ITthen = True +isALRclose ITcparen = True +isALRclose ITcbrack = True +isALRclose ITccurly = True +-- GHC Extensions: +isALRclose ITcubxparen = True +isALRclose _ = False + +isNonDecreasingIntentation :: ALRLayout -> Bool +isNonDecreasingIntentation ALRLayoutDo = True +isNonDecreasingIntentation _ = False + +containsCommas :: Token -> Bool +containsCommas IToparen = True +containsCommas ITobrack = True +-- John doesn't have {} as containing commas, but records contain them, +-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs +-- (defaultInstallDirs). +containsCommas ITocurly = True +-- GHC Extensions: +containsCommas IToubxparen = True +containsCommas _ = False + +topNoLayoutContainsCommas :: [ALRContext] -> Bool +topNoLayoutContainsCommas [] = False +topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls +topNoLayoutContainsCommas (ALRNoLayout b : _) = b + lexToken :: P (Located Token) lexToken = do inp@(AI loc1 _ buf) <- getInput @@ -1955,6 +2225,6 @@ clean_pragma prag = canon_ws (map toLower (unprefix prag)) "noinline" -> "notinline" "specialise" -> "specialize" "constructorlike" -> "conlike" - otherwise -> prag' + _ -> prag' canon_ws s = unwords (map canonical (words s)) }