X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=6362bf3e20ea3973122c8102feba090174bb6630;hb=0f5e104c36b1dc3d8deeec5fef3d65e7b3a1b5ad;hp=2f3ef4b41b5a238425d3e3d934d8f27fbf75e24f;hpb=67cb409159fa9136dff942b8baaec25909416022;p=ghc-hetmet.git diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 2f3ef4b..6362bf3 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -27,6 +27,9 @@ -- any warnings in the module. See -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings -- for details +-- +-- Note that Alex itself generates code with with some unused bindings and +-- without type signatures, so removing the flag might not be possible. module Lexer ( Token(..), lexer, pragState, mkPState, PState(..), @@ -39,8 +42,6 @@ module Lexer ( addWarning ) where -#include "HsVersions.h" - import Bag import ErrUtils import Outputable @@ -55,7 +56,7 @@ import Util ( maybePrefixMatch, readRational ) import Control.Monad import Data.Bits -import Data.Char ( chr, isSpace ) +import Data.Char ( chr, ord, isSpace ) import Data.Ratio import Debug.Trace @@ -154,7 +155,7 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- Next, match Haddock comments if no -haddock flag -"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ; +"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } ; -- Now, when we've matched comments that begin with 2 dashes and continue -- with a different character, we need to match comments that begin with three @@ -308,6 +309,9 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } "|]" / { ifExtension thEnabled } { token ITcloseQuote } \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape } "$(" / { ifExtension thEnabled } { token ITparenEscape } + + "[$" @varid "|" / { ifExtension qqEnabled } + { lex_quasiquote_tok } } <0> { @@ -372,25 +376,29 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- when trying to be close to Haskell98 <0> { -- Normal integral literals (:: Num a => a, from Integer) - @decimal { tok_num positive 0 0 decimal } - 0[oO] @octal { tok_num positive 2 2 octal } - 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal } + @decimal { tok_num positive 0 0 decimal } + 0[oO] @octal { tok_num positive 2 2 octal } + 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal } -- Normal rational literals (:: Fractional a => a, from Rational) - @floating_point { strtoken tok_float } + @floating_point { strtoken tok_float } } <0> { - -- Unboxed ints (:: Int#) + -- Unboxed ints (:: Int#) and words (:: Word#) -- It's simpler (and faster?) to give separate cases to the negatives, -- especially considering octal/hexadecimal prefixes. - @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal } - 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal } - 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal } - @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal } - @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal } + @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal } + 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal } + 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal } + @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal } + @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal } @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal } + @decimal \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal } + 0[oO] @octal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal } + 0[xX] @hexadecimal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal } + -- Unboxed floats and doubles (:: Float#, :: Double#) -- prim_{float,double} work with signed literals @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat } @@ -496,8 +504,8 @@ data Token | ITvocurly | ITvccurly | ITobrack - | ITopabrack -- [:, for parallel arrays with -fparr - | ITcpabrack -- :], for parallel arrays with -fparr + | ITopabrack -- [:, for parallel arrays with -XParr + | ITcpabrack -- :], for parallel arrays with -XParr | ITcbrack | IToparen | ITcparen @@ -529,6 +537,7 @@ data Token | ITprimchar Char | ITprimstring FastString | ITprimint Integer + | ITprimword Integer | ITprimfloat Rational | ITprimdouble Rational @@ -542,6 +551,7 @@ data Token | ITparenEscape -- $( | ITvarQuote -- ' | ITtyQuote -- '' + | ITquasiQuote (FastString,FastString,SrcSpan) -- [:...|...|] -- Arrow notation extension | ITproc @@ -568,6 +578,7 @@ data Token deriving Show -- debugging #endif +{- isSpecial :: Token -> Bool -- If we see M.x, where x is a keyword, but -- is special, we treat is as just plain M.x, @@ -590,6 +601,7 @@ isSpecial ITgroup = True isSpecial ITby = True isSpecial ITusing = True isSpecial _ = False +-} -- the bitmap provided as the third component indicates whether the -- corresponding extension keyword is valid under the extension options @@ -697,11 +709,11 @@ reservedSymsFM = listToUFM $ type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token) special :: Token -> Action -special tok span _buf len = return (L span tok) +special tok span _buf _len = return (L span tok) token, layout_token :: Token -> Action -token t span buf len = return (L span t) -layout_token t span buf len = pushLexState layout >> return (L span t) +token t span _buf _len = return (L span t) +layout_token t span _buf _len = pushLexState layout >> return (L span t) idtoken :: (StringBuffer -> Int -> Token) -> Action idtoken f span buf len = return (L span $! (f buf len)) @@ -751,8 +763,10 @@ isNormalComment bits _ _ (AI _ _ buf) spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf)) +{- haddockDisabledAnd p bits _ _ (AI _ _ buf) = if haddockEnabled bits then False else (p buf) +-} atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n' @@ -801,12 +815,12 @@ nested_comment cont span _str _len = do Just ('-',input) -> case alexGetChar input of Nothing -> errBrace input span Just ('\125',input) -> go (n-1) input - Just (c,_) -> go n input + Just (_,_) -> go n input Just ('\123',input) -> case alexGetChar input of Nothing -> errBrace input span Just ('-',input) -> go (n+1) input - Just (c,_) -> go n input - Just (c,input) -> go n input + Just (_,_) -> go n input + Just (_,input) -> go n input nested_doc_comment :: Action nested_doc_comment span buf _len = withLexedDocType (go "") @@ -815,16 +829,16 @@ nested_doc_comment span buf _len = withLexedDocType (go "") Nothing -> errBrace input span Just ('-',input) -> case alexGetChar input of Nothing -> errBrace input span - Just ('\125',input@(AI end _ buf2)) -> + Just ('\125',input) -> docCommentEnd input commentAcc docType buf span - Just (c,_) -> go ('-':commentAcc) input docType False + Just (_,_) -> go ('-':commentAcc) input docType False Just ('\123', input) -> case alexGetChar input of Nothing -> errBrace input span Just ('-',input) -> do setInput input let cont = do input <- getInput; go commentAcc input docType False nested_comment cont span buf _len - Just (c,_) -> go ('\123':commentAcc) input docType False + Just (_,_) -> go ('\123':commentAcc) input docType False Just (c,input) -> go (c:commentAcc) input docType False withLexedDocType lexDocComment = do @@ -838,7 +852,7 @@ withLexedDocType lexDocComment = do where lexDocSection n input = case alexGetChar input of Just ('*', input) -> lexDocSection (n+1) input - Just (c, _) -> lexDocComment input (ITdocSection n) True + Just (_, _) -> lexDocComment input (ITdocSection n) True Nothing -> do setInput input; lexToken -- eof reached, lex it normally -- docCommentEnd @@ -916,6 +930,7 @@ splitQualName orig_buf len = split orig_buf orig_buf qual_size = orig_buf `byteDiff` dot_buf varid span buf len = + fs `seq` case lookupUFM reservedWordsFM fs of Just (keyword,0) -> do maybe_layout keyword @@ -961,6 +976,7 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len = -- some conveniences for use with tok_integral tok_num = tok_integral ITinteger tok_primint = tok_integral ITprimint +tok_primword = tok_integral ITprimword positive positive = id negative = negate decimal = (10,octDecDigit) @@ -1024,7 +1040,7 @@ new_layout_context strict span _buf _len = do -- we must generate a {} sequence now. pushLexState layout_left return (L span ITvocurly) - other -> do + _ -> do setContext (Layout offset : ctx) return (L span ITvocurly) @@ -1058,7 +1074,7 @@ setFile code span buf len = do -- Options, includes and language pragmas. lex_string_prag :: (String -> Token) -> Action -lex_string_prag mkTok span buf len +lex_string_prag mkTok span _buf _len = do input <- getInput start <- getSrcLoc tok <- go [] input @@ -1071,7 +1087,7 @@ lex_string_prag mkTok span buf len else case alexGetChar input of Just (c,i) -> go (c:acc) i Nothing -> err input - isString i [] = True + isString _ [] = True isString i (x:xs) = case alexGetChar i of Just (c,i') | c == x -> isString i' xs @@ -1085,7 +1101,7 @@ lex_string_prag mkTok span buf len -- This stuff is horrible. I hates it. lex_string_tok :: Action -lex_string_tok span buf len = do +lex_string_tok span _buf _len = do tok <- lex_string "" end <- getSrcLoc return (L (mkSrcSpan (srcSpanStart span) end) tok) @@ -1142,7 +1158,7 @@ lex_char_tok :: Action -- but WIHTOUT CONSUMING the x or T part (the parser does that). -- So we have to do two characters of lookahead: when we see 'x we need to -- see if there's a trailing quote -lex_char_tok span buf len = do -- We've seen ' +lex_char_tok span _buf _len = do -- We've seen ' i1 <- getInput -- Look ahead to first character let loc = srcSpanStart span case alexGetChar' i1 of @@ -1155,14 +1171,14 @@ lex_char_tok span buf len = do -- We've seen ' return (L (mkSrcSpan loc end2) ITtyQuote) else lit_error - Just ('\\', i2@(AI end2 _ _)) -> do -- We've seen 'backslash + Just ('\\', i2@(AI _end2 _ _)) -> do -- We've seen 'backslash setInput i2 lit_ch <- lex_escape mc <- getCharOrFail -- Trailing quote if mc == '\'' then finish_char_tok loc lit_ch else do setInput i2; lit_error - Just (c, i2@(AI end2 _ _)) + Just (c, i2@(AI _end2 _ _)) | not (isAny c) -> lit_error | otherwise -> @@ -1226,7 +1242,7 @@ lex_escape = do 'x' -> readNum is_hexdigit 16 hexDigit 'o' -> readNum is_octdigit 8 octDecDigit - x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x) + x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x) c1 -> do i <- getInput @@ -1318,6 +1334,42 @@ getCharOrFail = do Just (c,i) -> do setInput i; return c -- ----------------------------------------------------------------------------- +-- QuasiQuote + +lex_quasiquote_tok :: Action +lex_quasiquote_tok span buf len = do + let quoter = reverse $ takeWhile (/= '$') + $ reverse $ lexemeToString buf (len - 1) + quoteStart <- getSrcLoc + quote <- lex_quasiquote "" + end <- getSrcLoc + return (L (mkSrcSpan (srcSpanStart span) end) + (ITquasiQuote (mkFastString quoter, + mkFastString (reverse quote), + mkSrcSpan quoteStart end))) + +lex_quasiquote :: String -> P String +lex_quasiquote s = do + i <- getInput + case alexGetChar' i of + Nothing -> lit_error + + Just ('\\',i) + | Just ('|',i) <- next -> do + setInput i; lex_quasiquote ('|' : s) + | Just (']',i) <- next -> do + setInput i; lex_quasiquote (']' : s) + where next = alexGetChar' i + + Just ('|',i) + | Just (']',i) <- next -> do + setInput i; return s + where next = alexGetChar' i + + Just (c, i) -> do + setInput i; lex_quasiquote (c : s) + +-- ----------------------------------------------------------------------------- -- Warnings warn :: DynFlag -> SDoc -> Action @@ -1370,7 +1422,7 @@ instance Monad P where fail = failP returnP :: a -> P a -returnP a = P $ \s -> POk s a +returnP a = a `seq` (P $ \s -> POk s a) thenP :: P a -> (a -> P b) -> P b (P m) `thenP` k = P $ \ s -> @@ -1385,10 +1437,10 @@ failMsgP :: String -> P a failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg) failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a -failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str) +failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str) -failSpanMsgP :: SrcSpan -> String -> P a -failSpanMsgP span msg = P $ \s -> PFailed span (text msg) +failSpanMsgP :: SrcSpan -> SDoc -> P a +failSpanMsgP span msg = P $ \_ -> PFailed span msg extension :: (Int -> Bool) -> P Bool extension p = P $ \s -> POk s (p $! extsBitmap s) @@ -1478,7 +1530,7 @@ alexGetChar' (AI loc ofs s) ofs' = advanceOffs c ofs advanceOffs :: Char -> Int -> Int -advanceOffs '\n' offs = 0 +advanceOffs '\n' _ = 0 advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8 advanceOffs _ offs = offs + 1 @@ -1495,10 +1547,10 @@ popLexState :: P Int popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls getLexState :: P Int -getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls +getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls -- for reasons of efficiency, flags indicating language extensions (eg, --- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed -- integer genericsBit, ffiBit, parrBit :: Int @@ -1520,6 +1572,7 @@ unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc unboxedTuplesBit = 15 -- (# and #) standaloneDerivingBit = 16 -- standalone instance deriving declarations transformComprehensionsBit = 17 +qqBit = 18 -- enable quasiquoting genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool always _ = True @@ -1540,17 +1593,16 @@ unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit +qqEnabled flags = testBit flags qqBit -- PState for parsing options pragmas -- -pragState :: StringBuffer -> SrcLoc -> PState -pragState buf loc = +pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState +pragState dynflags buf loc = PState { - buffer = buf, + buffer = buf, messages = emptyMessages, - -- XXX defaultDynFlags is not right, but we don't have a real - -- dflags handy - dflags = defaultDynFlags, + dflags = dynflags, last_loc = mkSrcSpan loc loc, last_offs = 0, last_len = 0, @@ -1586,6 +1638,7 @@ mkPState buf loc flags = .|. parrBit `setBitIf` dopt Opt_PArr flags .|. arrowsBit `setBitIf` dopt Opt_Arrows flags .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags + .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags @@ -1625,7 +1678,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} () popContext :: P () popContext = P $ \ s@(PState{ buffer = buf, context = ctx, - loc = loc, last_len = len, last_loc = last_loc }) -> + last_len = len, last_loc = last_loc }) -> case ctx of (_:tl) -> POk s{ context = tl } () [] -> PFailed last_loc (srcParseErr buf len) @@ -1654,8 +1707,8 @@ srcParseErr -> Message srcParseErr buf len = hcat [ if null token - then ptext SLIT("parse error (possibly incorrect indentation)") - else hcat [ptext SLIT("parse error on input "), + then ptext (sLit "parse error (possibly incorrect indentation)") + else hcat [ptext (sLit "parse error on input "), char '`', text token, char '\''] ] where token = lexemeToString (offsetBytes (-len) buf) len @@ -1673,7 +1726,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len, lexError :: String -> P a lexError str = do loc <- getSrcLoc - i@(AI end _ buf) <- getInput + (AI end _ buf) <- getInput reportLexError loc end buf str -- ----------------------------------------------------------------------------- @@ -1682,7 +1735,7 @@ lexError str = do lexer :: (Located Token -> P a) -> P a lexer cont = do - tok@(L span tok__) <- lexToken + tok@(L _span _tok__) <- lexToken -- trace ("token: " ++ show tok__) $ do cont tok @@ -1692,20 +1745,21 @@ lexToken = do sc <- getLexState exts <- getExts case alexScanUser exts inp sc of - AlexEOF -> do let span = mkSrcSpan loc1 loc1 - setLastToken span 0 0 - return (L span ITeof) - AlexError (AI loc2 _ buf) -> do - reportLexError loc1 loc2 buf "lexical error" + AlexEOF -> do + let span = mkSrcSpan loc1 loc1 + setLastToken span 0 0 + return (L span ITeof) + AlexError (AI loc2 _ buf) -> + reportLexError loc1 loc2 buf "lexical error" AlexSkip inp2 _ -> do - setInput inp2 - lexToken - AlexToken inp2@(AI end _ buf2) len t -> do - setInput inp2 - let span = mkSrcSpan loc1 end - let bytes = byteDiff buf buf2 - span `seq` setLastToken span bytes bytes - t span buf bytes + setInput inp2 + lexToken + AlexToken inp2@(AI end _ buf2) _ t -> do + setInput inp2 + let span = mkSrcSpan loc1 end + let bytes = byteDiff buf buf2 + span `seq` setLastToken span bytes bytes + t span buf bytes reportLexError loc1 loc2 buf str | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")