X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=3a93ba1c200dd8cdcbf618e08164e93ae3f54ec2;hp=5fa2768162cf44cad800722763c75cd1f99a17d8;hb=f04dead93a15af1cb818172f207b8a81d2c81298;hpb=6686da2c412d2d7ca6e4b6dc2a7b3ee7bf1752c0 diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 5fa2768..3a93ba1 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -12,7 +12,6 @@ ----------------------------------------------------------------------------- -- ToDo / known bugs: --- - Unicode -- - parsing integers is a bit slow -- - readRational is a bit slow -- @@ -32,7 +31,7 @@ -- qualified varids. { -{-# OPTIONS -Wwarn #-} +{-# OPTIONS -Wwarn -w #-} -- The above warning supression flag is a temporary kludge. -- While working on this module you are encouraged to remove it and fix -- any warnings in the module. See @@ -47,6 +46,7 @@ module Lexer ( Token(..), lexer, pragState, mkPState, PState(..), P(..), ParseResult(..), getSrcLoc, + getPState, failLocMsgP, failSpanMsgP, srcParseFail, getMessages, popContext, pushCurrentContext, setLastToken, setSrcLoc, @@ -65,11 +65,15 @@ import SrcLoc import UniqFM import DynFlags import Ctype -import Util ( maybePrefixMatch, readRational ) +import Util ( readRational ) import Control.Monad import Data.Bits import Data.Char +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map import Data.Ratio } @@ -105,6 +109,8 @@ $symchar = [$symbol \:] $nl = [\n\r] $idchar = [$small $large $digit \'] +$pragmachar = [$small $large $digit] + $docsym = [\| \^ \* \$] @varid = $small $idchar* @@ -219,8 +225,8 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } <0,option_prags> \n { begin bol } -"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar } - { begin line_prag2 } +"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags } + { dispatch_pragmas linePrags } -- single-line line pragmas, of the form -- # "" \n @@ -237,69 +243,31 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- with older versions of GHC which generated these. <0,option_prags> { - "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag } - "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar } - { token (ITinline_prag True) } - "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } - { token (ITinline_prag False) } - "{-#" $whitechar* (INLINE|inline) - $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar } - { token (ITinline_conlike_prag True) } - "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline) - $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar } - { token (ITinline_conlike_prag False) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar } - { token ITspec_prag } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar } - { token (ITspec_inline_prag True) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } - { token (ITspec_inline_prag False) } - "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar } - { token ITsource_prag } - "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar } - { token ITwarning_prag } - "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar } - { token ITdeprecated_prag } - "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar } - { token ITscc_prag } - "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar } - { token ITgenerated_prag } - "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar } - { token ITcore_prag } - "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar } - { token ITunpack_prag } - "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar } - { token ITann_prag } + "{-#" $whitechar* $pragmachar+ + $whitechar+ $pragmachar+ / { known_pragma twoWordPrags } + { dispatch_pragmas twoWordPrags } + + "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags } + { dispatch_pragmas oneWordPrags } -- We ignore all these pragmas, but don't generate a warning for them - -- CFILES is a hugs-only thing. - "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar } - { nested_comment lexToken } + "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags } + { dispatch_pragmas ignoredPrags } -- ToDo: should only be valid inside a pragma: "#-}" { endPrag } } { - "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar } - { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar } - { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock) - / { notFollowedByPragmaChar } - { lex_string_prag ITdocOptions } + "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } + { dispatch_pragmas fileHeaderPrags } + "-- #" { multiline_doc_comment } - "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar } - { token ITlanguage_prag } - "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar } - { lex_string_prag ITinclude_prag } } <0> { -- In the "0" mode we ignore these pragmas - "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar } + "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } { nested_comment lexToken } } @@ -485,7 +453,7 @@ data Token | ITunsafe | ITstdcallconv | ITccallconv - | ITdotnet + | ITprimcallconv | ITmdo | ITfamily | ITgroup @@ -562,8 +530,6 @@ data Token | ITdupipvarid FastString -- GHC extension: implicit param: ?x - | ITpragma StringBuffer - | ITchar Char | ITstring FastString | ITinteger Integer @@ -576,7 +542,7 @@ data Token | ITprimfloat Rational | ITprimdouble Rational - -- MetaHaskell extension tokens + -- Template Haskell extension tokens | ITopenExpQuote -- [| or [e| | ITopenPatQuote -- [p| | ITopenDecQuote -- [d| @@ -632,6 +598,7 @@ isSpecial ITthreadsafe = True isSpecial ITunsafe = True isSpecial ITccallconv = True isSpecial ITstdcallconv = True +isSpecial ITprimcallconv = True isSpecial ITmdo = True isSpecial ITfamily = True isSpecial ITgroup = True @@ -689,13 +656,13 @@ reservedWordsFM = listToUFM $ ( "label", ITlabel, bit ffiBit), ( "dynamic", ITdynamic, bit ffiBit), ( "safe", ITsafe, bit ffiBit), - ( "threadsafe", ITthreadsafe, bit ffiBit), + ( "threadsafe", ITthreadsafe, bit ffiBit), -- ToDo: remove ( "unsafe", ITunsafe, bit ffiBit), ( "stdcall", ITstdcallconv, bit ffiBit), ( "ccall", ITccallconv, bit ffiBit), - ( "dotnet", ITdotnet, bit ffiBit), + ( "prim", ITprimcallconv, bit ffiBit), - ( "rec", ITrec, bit arrowsBit), + ( "rec", ITrec, bit recBit), ( "proc", ITproc, bit arrowsBit) ] @@ -788,10 +755,6 @@ notFollowedBySymbol :: AlexAccPred Int notFollowedBySymbol _ _ _ (AI _ _ buf) = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~") -notFollowedByPragmaChar :: AlexAccPred Int -notFollowedByPragmaChar _ _ _ (AI _ _ buf) - = nextCharIs buf (\c -> not (isAlphaNum c || c == '_')) - -- We must reject doc comments as being ordinary comments everywhere. -- In some cases the doc comment will be selected as the lexeme due to -- maximal munch, but not always, because the nested comment rule is @@ -1253,7 +1216,7 @@ lex_char_tok :: Action -- Here we are basically parsing character literals, such as 'x' or '\n' -- but, when Template Haskell is on, we additionally spot -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, --- but WIHTOUT CONSUMING the x or T part (the parser does that). +-- but WITHOUT CONSUMING the x or T part (the parser does that). -- So we have to do two characters of lookahead: when we see 'x we need to -- see if there's a trailing quote lex_char_tok span _buf _len = do -- We've seen ' @@ -1353,7 +1316,7 @@ lex_escape = do Just (c3,i3) -> let str = [c1,c2,c3] in case [ (c,rest) | (p,c) <- silly_escape_chars, - Just rest <- [maybePrefixMatch p str] ] of + Just rest <- [stripPrefix p str] ] of (escape_char,[]):_ -> do setInput i3 return escape_char @@ -1502,14 +1465,14 @@ data ParseResult a data PState = PState { buffer :: StringBuffer, - dflags :: DynFlags, - messages :: Messages, + dflags :: DynFlags, + messages :: Messages, last_loc :: SrcSpan, -- pos of previous token last_offs :: !Int, -- offset of the previous token from the -- beginning of the current line. -- \t is equal to 8 spaces. last_len :: !Int, -- len of previous token - last_line_len :: !Int, + last_line_len :: !Int, loc :: SrcLoc, -- current loc (end of prev token + 1) extsBitmap :: !Int, -- bitmap that determines permitted extensions context :: [LayoutContext], @@ -1549,6 +1512,9 @@ failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str) failSpanMsgP :: SrcSpan -> SDoc -> P a failSpanMsgP span msg = P $ \_ -> PFailed span msg +getPState :: P PState +getPState = P $ \s -> POk s s + extension :: (Int -> Bool) -> P Bool extension p = P $ \s -> POk s (p $! extsBitmap s) @@ -1598,7 +1564,7 @@ alexGetChar (AI loc ofs s) | c <= '\x06' = non_graphic | c <= '\x7f' = c -- Alex doesn't handle Unicode, so when Unicode - -- character is encoutered we output these values + -- character is encountered we output these values -- with the actual character value hidden in the state. | otherwise = case generalCategory c of @@ -1706,6 +1672,8 @@ rawTokenStreamBit :: Int rawTokenStreamBit = 20 -- producing a token stream with all comments included newQualOpsBit :: Int newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+) +recBit :: Int +recBit = 22 -- rec always :: Int -> Bool always _ = True @@ -1787,26 +1755,23 @@ mkPState buf loc flags = } where bitmap = genericsBit `setBitIf` dopt Opt_Generics flags - .|. ffiBit `setBitIf` dopt Opt_ForeignFunctionInterface flags - .|. parrBit `setBitIf` dopt Opt_PArr flags - .|. arrowsBit `setBitIf` dopt Opt_Arrows flags - .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags - .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags - .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags - .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags - .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags - .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags - .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags - .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags - .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags - .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags - .|. tyFamBit `setBitIf` dopt Opt_TypeFamilies flags - .|. haddockBit `setBitIf` dopt Opt_Haddock flags - .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags - .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags - .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags - .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags - .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags + .|. ffiBit `setBitIf` dopt Opt_ForeignFunctionInterface flags + .|. parrBit `setBitIf` dopt Opt_PArr flags + .|. arrowsBit `setBitIf` dopt Opt_Arrows flags + .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags + .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags + .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags + .|. explicitForallBit `setBitIf` dopt Opt_ExplicitForAll flags + .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags + .|. tyFamBit `setBitIf` dopt Opt_TypeFamilies flags + .|. haddockBit `setBitIf` dopt Opt_Haddock flags + .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags + .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags + .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags + .|. recBit `setBitIf` dopt Opt_DoRec flags + .|. recBit `setBitIf` dopt Opt_Arrows flags + .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags + .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags @@ -1936,4 +1901,59 @@ lexTokenStream buf loc dflags = unP go initState case ltok of L _ ITeof -> return [] _ -> liftM (ltok:) go + +linePrags = Map.singleton "line" (begin line_prag2) + +fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag), + ("options_ghc", lex_string_prag IToptions_prag), + ("options_haddock", lex_string_prag ITdocOptions), + ("language", token ITlanguage_prag), + ("include", lex_string_prag ITinclude_prag)]) + +ignoredPrags = Map.fromList (map ignored pragmas) + where ignored opt = (opt, nested_comment lexToken) + impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"] + options_pragmas = map ("options_" ++) impls + -- CFILES is a hugs-only thing. + pragmas = options_pragmas ++ ["cfiles", "contract"] + +oneWordPrags = Map.fromList([("rules", rulePrag), + ("inline", token (ITinline_prag True)), + ("notinline", token (ITinline_prag False)), + ("specialize", token ITspec_prag), + ("source", token ITsource_prag), + ("warning", token ITwarning_prag), + ("deprecated", token ITdeprecated_prag), + ("scc", token ITscc_prag), + ("generated", token ITgenerated_prag), + ("core", token ITcore_prag), + ("unpack", token ITunpack_prag), + ("ann", token ITann_prag)]) + +twoWordPrags = Map.fromList([("inline conlike", token (ITinline_conlike_prag True)), + ("notinline conlike", token (ITinline_conlike_prag False)), + ("specialize inline", token (ITspec_inline_prag True)), + ("specialize notinline", token (ITspec_inline_prag False))]) + + +dispatch_pragmas :: Map String Action -> Action +dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of + Just found -> found span buf len + Nothing -> lexError "unknown pragma" + +known_pragma :: Map String Action -> AlexAccPred Int +known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags) + && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))) + +clean_pragma :: String -> String +clean_pragma prag = canon_ws (map toLower (unprefix prag)) + where unprefix prag' = case stripPrefix "{-#" prag' of + Just rest -> rest + Nothing -> prag' + canonical prag' = case prag' of + "noinline" -> "notinline" + "specialise" -> "specialize" + "constructorlike" -> "conlike" + otherwise -> prag' + canon_ws s = unwords (map canonical (words s)) }