X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLexer.x;h=ccfc3e850592f91c387414d9ab54beeb1e9b8454;hb=98744cef7b82e7eefbb1c6f1d8b9e28c415939c4;hp=855c27e89aa2a17ac88130f02457f8e6e713429f;hpb=2bde7a3ed00cf16175b19d104895b5155332ed1d;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x index 855c27e..ccfc3e8 100644 --- a/ghc/compiler/parser/Lexer.x +++ b/ghc/compiler/parser/Lexer.x @@ -22,9 +22,9 @@ { module Lexer ( - Token(..), Token__(..), lexer, ExtFlags(..), mkPState, showPFailed, + Token(..), Token__(..), lexer, mkPState, showPFailed, P(..), ParseResult(..), setSrcLocFor, getSrcLoc, - failMsgP, failLocMsgP, srcParseFail, + failLocMsgP, srcParseFail, popContext, pushCurrentContext, ) where @@ -38,6 +38,7 @@ import FastString import FastTypes import SrcLoc import UniqFM +import CmdLineOpts import Ctype import Util ( maybePrefixMatch ) @@ -47,7 +48,7 @@ import Ratio import TRACE } -$whitechar = [\ \t\n\r\f\v] +$whitechar = [\ \t\n\r\f\v\xa0] $white_no_nl = $whitechar # \n $ascdigit = 0-9 @@ -122,6 +123,7 @@ $white_no_nl+ ; { \n ; ^\# (line)? { begin line_prag1 } + ^\# pragma .* \n ; -- GCC 3.3 CPP generated, apparently () { do_bol } } @@ -185,27 +187,41 @@ $white_no_nl+ ; -- "special" symbols +<0,glaexts> { + "[:" / { ifExtension parrEnabled } { token ITopabrack } + ":]" / { ifExtension parrEnabled } { token ITcpabrack } +} + +<0,glaexts> { + "[|" / { ifExtension thEnabled } { token ITopenExpQuote } + "[e|" / { ifExtension thEnabled } { token ITopenExpQuote } + "[p|" / { ifExtension thEnabled } { token ITopenPatQuote } + "[d|" / { ifExtension thEnabled } { layout_token ITopenDecQuote } + "[t|" / { ifExtension thEnabled } { token ITopenTypQuote } + "|]" / { ifExtension thEnabled } { token ITcloseQuote } + \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape } + "$(" / { ifExtension thEnabled } { token ITparenEscape } +} + +<0,glaexts> { + "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol } + { special IToparenbar } + "|)" / { ifExtension arrowsEnabled } { special ITcparenbar } +} + +<0,glaexts> { + \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid } + \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid } +} + { - "(#" { token IToubxparen } + "(#" / { notFollowedBySymbol } { token IToubxparen } "#)" { token ITcubxparen } - - "[:" { token ITopabrack } - ":]" { token ITcpabrack } - "{|" { token ITocurlybar } "|}" { token ITccurlybar } - - "[|" { token ITopenExpQuote } - "[e|" { token ITopenExpQuote } - "[p|" { token ITopenPatQuote } - "[d|" { layout_token ITopenDecQuote } - "[t|" { token ITopenTypQuote } - "|]" { token ITcloseQuote } } <0,glaexts> { - "(|" / { \b _ _ _ -> arrowsEnabled b} { special IToparenbar } - "|)" / { \b _ _ _ -> arrowsEnabled b} { special ITcparenbar } \( { special IToparen } \) { special ITcparen } \[ { special ITobrack } @@ -218,13 +234,6 @@ $white_no_nl+ ; \} { close_brace } } - { - \? @varid { skip_one_varid ITdupipvarid } - \% @varid { skip_one_varid ITsplitipvarid } - \$ @varid { skip_one_varid ITidEscape } - "$(" { token ITparenEscape } -} - <0,glaexts> { @qual @varid { check_qvarid } @qual @conid { idtoken qconid } @@ -241,7 +250,7 @@ $white_no_nl+ ; { @qual @varid "#"+ { idtoken qvarid } - @qual (@conid "#"+ | "()") { idtoken qconid } + @qual @conid "#"+ { idtoken qconid } @varid "#"+ { varid } @conid "#"+ { idtoken conid } } @@ -280,9 +289,6 @@ $white_no_nl+ ; \" { lex_string_tok } } - "``" (([$graphic $whitechar] # \') | \' ([$graphic $whitechar] # \'))* - "''" { clitlit } - { -- work around bug in Alex 2.0 #if __GLASGOW_HASKELL__ < 503 @@ -329,11 +335,9 @@ data Token__ | ITsafe | ITthreadsafe | ITunsafe - | ITwith | ITstdcallconv | ITccallconv | ITdotnet - | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc) | ITmdo | ITspecialise_prag -- Pragmas @@ -408,7 +412,6 @@ data Token__ | ITprimint Integer | ITprimfloat Rational | ITprimdouble Rational - | ITlitlit FastString -- MetaHaskell extension tokens | ITopenExpQuote -- [| or [e| @@ -418,9 +421,8 @@ data Token__ | ITcloseQuote -- |] | ITidEscape FastString -- $x | ITparenEscape -- $( - | ITreifyType - | ITreifyDecl - | ITreifyFixity + | ITvarQuote -- ' + | ITtyQuote -- '' -- Arrow notation extension | ITproc @@ -452,7 +454,6 @@ isSpecial ITdynamic = True isSpecial ITsafe = True isSpecial ITthreadsafe = True isSpecial ITunsafe = True -isSpecial ITwith = True isSpecial ITccallconv = True isSpecial ITstdcallconv = True isSpecial ITmdo = True @@ -496,9 +497,6 @@ reservedWordsFM = listToUFM $ ( "forall", ITforall, bit glaExtsBit), ( "mdo", ITmdo, bit glaExtsBit), - ( "reifyDecl", ITreifyDecl, bit glaExtsBit), - ( "reifyType", ITreifyType, bit glaExtsBit), - ( "reifyFixity",ITreifyFixity, bit glaExtsBit), ( "foreign", ITforeign, bit ffiBit), ( "export", ITexport, bit ffiBit), @@ -511,20 +509,8 @@ reservedWordsFM = listToUFM $ ( "ccall", ITccallconv, bit ffiBit), ( "dotnet", ITdotnet, bit ffiBit), - ( "with", ITwith, bit withBit), - ( "rec", ITrec, bit arrowsBit), - ( "proc", ITproc, bit arrowsBit), - - -- On death row - ("_ccall_", ITccall (False, False, PlayRisky), - bit glaExtsBit), - ("_ccall_GC_", ITccall (False, False, PlaySafe False), - bit glaExtsBit), - ("_casm_", ITccall (False, True, PlayRisky), - bit glaExtsBit), - ("_casm_GC_", ITccall (False, True, PlaySafe False), - bit glaExtsBit) + ( "proc", ITproc, bit arrowsBit) ] reservedSymsFM = listToUFM $ @@ -592,6 +578,11 @@ pop_and act loc end buf len = do popLexState; act loc end buf len notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char +notFollowedBySymbol _ _ _ (_,buf) + = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~" + +ifExtension pred bits _ _ _ = pred bits + {- nested comments require traversing by hand, they can't be parsed using regular expressions. @@ -739,10 +730,6 @@ parseInteger buf len radix to_int where go i x | i == len = x | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i))) -clitlit :: Action -clitlit loc end buf len = - return (T loc end (ITlitlit $! lexemeToFastString (stepOnBy 2 buf) (len-4))) - -- ----------------------------------------------------------------------------- -- Layout processing @@ -871,6 +858,13 @@ lex_string s = do c <- lex_char lex_string (c:s) +lex_char :: P Char +lex_char = do + mc <- getCharOrFail + case mc of + '\\' -> lex_escape + c | is_any c -> return c + _other -> lit_error lex_stringgap s = do c <- getCharOrFail @@ -881,34 +875,61 @@ lex_stringgap s = do lex_char_tok :: Action -lex_char_tok loc _end buf len = do - c <- lex_char - mc <- getCharOrFail - case mc of - '\'' -> do - glaexts <- extension glaExtsEnabled - if glaexts - then do - i@(end,_) <- getInput - case alexGetChar i of +-- Here we are basically parsing character literals, such as 'x' or '\n' +-- but, when Template Haskell is on, we additionally spot +-- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, +-- but WIHTOUT CONSUMING the x or T part (the parser does that). +-- So we have to do two characters of lookahead: when we see 'x we need to +-- see if there's a trailing quote +lex_char_tok loc _end buf len = do -- We've seen ' + i1 <- getInput -- Look ahead to first character + case alexGetChar i1 of + Nothing -> lit_error + + Just ('\'', i2@(end2,_)) -> do -- We've seen '' + th_exts <- extension thEnabled + if th_exts then do + setInput i2 + return (T loc end2 ITtyQuote) + else lit_error + + Just ('\\', i2@(end2,_)) -> do -- We've seen 'backslash + setInput i2 + lit_ch <- lex_escape + mc <- getCharOrFail -- Trailing quote + if mc == '\'' then finish_char_tok loc lit_ch + else lit_error + + Just (c, i2@(end2,_)) | not (is_any c) -> lit_error + | otherwise -> + + -- We've seen 'x, where x is a valid character + -- (i.e. not newline etc) but not a quote or backslash + case alexGetChar i2 of -- Look ahead one more character + Nothing -> lit_error + Just ('\'', i3) -> do -- We've seen 'x' + setInput i3 + finish_char_tok loc c + _other -> do -- We've seen 'x not followed by quote + -- If TH is on, just parse the quote only + th_exts <- extension thEnabled + if th_exts then return (T loc (fst i1) ITvarQuote) + else lit_error + +finish_char_tok :: SrcLoc -> Char -> P Token +finish_char_tok loc ch -- We've already seen the closing quote + -- Just need to check for trailing # + = do glaexts <- extension glaExtsEnabled + if glaexts then do + i@(end,_) <- getInput + case alexGetChar i of Just ('#',i@(end,_)) -> do setInput i - return (T loc end (ITprimchar c)) + return (T loc end (ITprimchar ch)) _other -> - return (T loc end (ITchar c)) - else do - end <- getSrcLoc - return (T loc end (ITchar c)) - - _other -> lit_error - -lex_char :: P Char -lex_char = do - mc <- getCharOrFail - case mc of - '\\' -> lex_escape - c | is_any c -> return c - _other -> lit_error + return (T loc end (ITchar ch)) + else do end <- getSrcLoc + return (T loc end (ITchar ch)) lex_escape :: P Char lex_escape = do @@ -1097,8 +1118,7 @@ data ParseResult a -- show this span, e.g. by highlighting it. Message -- The error message -showPFailed loc1 loc2 err - = showSDoc (hcat [ppr loc1, text ": ", err]) +showPFailed loc1 loc2 err = hcat [ppr loc1, text ": ", err] data PState = PState { buffer :: StringBuffer, @@ -1196,32 +1216,22 @@ glaExtsBit, ffiBit, parrBit :: Int glaExtsBit = 0 ffiBit = 1 parrBit = 2 -withBit = 3 arrowsBit = 4 +thBit = 5 +ipBit = 6 glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool glaExtsEnabled flags = testBit flags glaExtsBit ffiEnabled flags = testBit flags ffiBit -withEnabled flags = testBit flags withBit parrEnabled flags = testBit flags parrBit arrowsEnabled flags = testBit flags arrowsBit - --- convenient record-based bitmap for the interface to the rest of the world --- --- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below) --- -data ExtFlags = ExtFlags { - glasgowExtsEF :: Bool, - ffiEF :: Bool, - withEF :: Bool, - parrEF :: Bool, - arrowsEF :: Bool - } +thEnabled flags = testBit flags thBit +ipEnabled flags = testBit flags ipBit -- create a parse state -- -mkPState :: StringBuffer -> SrcLoc -> ExtFlags -> PState -mkPState buf loc exts = +mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState +mkPState buf loc flags = PState { buffer = buf, last_loc = loc, @@ -1233,12 +1243,12 @@ mkPState buf loc exts = -- we begin in the layout state if toplev_layout is set } where - bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts - .|. ffiBit `setBitIf` (ffiEF exts - || glasgowExtsEF exts) - .|. withBit `setBitIf` withEF exts - .|. parrBit `setBitIf` parrEF exts - .|. arrowsBit `setBitIf` arrowsEF exts + bitmap = glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags + .|. ffiBit `setBitIf` dopt Opt_FFI flags + .|. parrBit `setBitIf` dopt Opt_PArr flags + .|. arrowsBit `setBitIf` dopt Opt_Arrows flags + .|. thBit `setBitIf` dopt Opt_TH flags + .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b