X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=7f5c3a435dd7f554e2bed4b003ec3f64ef1edd18;hp=b4d40cea37d485bea3d4148232197153efc32099;hb=1e50fd4185479a62e02d987bdfcb1c62712859ca;hpb=05c09e4312062aa66d64183a55f8ae5c34b58620 diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index b4d40ce..7f5c3a4 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -64,11 +64,15 @@ import SrcLoc import UniqFM import DynFlags import Ctype -import Util ( maybePrefixMatch, readRational ) +import Util ( readRational ) import Control.Monad import Data.Bits import Data.Char +import Data.List +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map import Data.Ratio } @@ -104,6 +108,8 @@ $symchar = [$symbol \:] $nl = [\n\r] $idchar = [$small $large $digit \'] +$pragmachar = [$small $large $digit] + $docsym = [\| \^ \* \$] @varid = $small $idchar* @@ -218,8 +224,8 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } <0,option_prags> \n { begin bol } -"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar } - { begin line_prag2 } +"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags } + { dispatch_pragmas linePrags } -- single-line line pragmas, of the form -- # "" \n @@ -236,69 +242,31 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- with older versions of GHC which generated these. <0,option_prags> { - "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag } - "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar } - { token (ITinline_prag True) } - "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } - { token (ITinline_prag False) } - "{-#" $whitechar* (INLINE|inline) - $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar } - { token (ITinline_conlike_prag True) } - "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline) - $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar } - { token (ITinline_conlike_prag False) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar } - { token ITspec_prag } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar } - { token (ITspec_inline_prag True) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } - { token (ITspec_inline_prag False) } - "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar } - { token ITsource_prag } - "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar } - { token ITwarning_prag } - "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar } - { token ITdeprecated_prag } - "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar } - { token ITscc_prag } - "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar } - { token ITgenerated_prag } - "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar } - { token ITcore_prag } - "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar } - { token ITunpack_prag } - "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar } - { token ITann_prag } + "{-#" $whitechar* $pragmachar+ + $whitechar+ $pragmachar+ / { known_pragma twoWordPrags } + { dispatch_pragmas twoWordPrags } + + "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags } + { dispatch_pragmas oneWordPrags } -- We ignore all these pragmas, but don't generate a warning for them - -- CFILES is a hugs-only thing. - "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar } - { nested_comment lexToken } + "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags } + { dispatch_pragmas ignoredPrags } -- ToDo: should only be valid inside a pragma: "#-}" { endPrag } } { - "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar } - { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar } - { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock) - / { notFollowedByPragmaChar } - { lex_string_prag ITdocOptions } + "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } + { dispatch_pragmas fileHeaderPrags } + "-- #" { multiline_doc_comment } - "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar } - { token ITlanguage_prag } - "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar } - { lex_string_prag ITinclude_prag } } <0> { -- In the "0" mode we ignore these pragmas - "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar } + "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } { nested_comment lexToken } } @@ -484,6 +452,7 @@ data Token | ITunsafe | ITstdcallconv | ITccallconv + | ITprimcallconv | ITdotnet | ITmdo | ITfamily @@ -631,6 +600,7 @@ isSpecial ITthreadsafe = True isSpecial ITunsafe = True isSpecial ITccallconv = True isSpecial ITstdcallconv = True +isSpecial ITprimcallconv = True isSpecial ITmdo = True isSpecial ITfamily = True isSpecial ITgroup = True @@ -688,10 +658,11 @@ reservedWordsFM = listToUFM $ ( "label", ITlabel, bit ffiBit), ( "dynamic", ITdynamic, bit ffiBit), ( "safe", ITsafe, bit ffiBit), - ( "threadsafe", ITthreadsafe, bit ffiBit), + ( "threadsafe", ITthreadsafe, bit ffiBit), -- ToDo: remove ( "unsafe", ITunsafe, bit ffiBit), ( "stdcall", ITstdcallconv, bit ffiBit), ( "ccall", ITccallconv, bit ffiBit), + ( "prim", ITprimcallconv, bit ffiBit), ( "dotnet", ITdotnet, bit ffiBit), ( "rec", ITrec, bit arrowsBit), @@ -787,10 +758,6 @@ notFollowedBySymbol :: AlexAccPred Int notFollowedBySymbol _ _ _ (AI _ _ buf) = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~") -notFollowedByPragmaChar :: AlexAccPred Int -notFollowedByPragmaChar _ _ _ (AI _ _ buf) - = nextCharIs buf (\c -> not (isAlphaNum c || c == '_')) - -- We must reject doc comments as being ordinary comments everywhere. -- In some cases the doc comment will be selected as the lexeme due to -- maximal munch, but not always, because the nested comment rule is @@ -1252,7 +1219,7 @@ lex_char_tok :: Action -- Here we are basically parsing character literals, such as 'x' or '\n' -- but, when Template Haskell is on, we additionally spot -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, --- but WIHTOUT CONSUMING the x or T part (the parser does that). +-- but WITHOUT CONSUMING the x or T part (the parser does that). -- So we have to do two characters of lookahead: when we see 'x we need to -- see if there's a trailing quote lex_char_tok span _buf _len = do -- We've seen ' @@ -1352,7 +1319,7 @@ lex_escape = do Just (c3,i3) -> let str = [c1,c2,c3] in case [ (c,rest) | (p,c) <- silly_escape_chars, - Just rest <- [maybePrefixMatch p str] ] of + Just rest <- [stripPrefix p str] ] of (escape_char,[]):_ -> do setInput i3 return escape_char @@ -1501,14 +1468,14 @@ data ParseResult a data PState = PState { buffer :: StringBuffer, - dflags :: DynFlags, - messages :: Messages, + dflags :: DynFlags, + messages :: Messages, last_loc :: SrcSpan, -- pos of previous token last_offs :: !Int, -- offset of the previous token from the -- beginning of the current line. -- \t is equal to 8 spaces. last_len :: !Int, -- len of previous token - last_line_len :: !Int, + last_line_len :: !Int, loc :: SrcLoc, -- current loc (end of prev token + 1) extsBitmap :: !Int, -- bitmap that determines permitted extensions context :: [LayoutContext], @@ -1597,7 +1564,7 @@ alexGetChar (AI loc ofs s) | c <= '\x06' = non_graphic | c <= '\x7f' = c -- Alex doesn't handle Unicode, so when Unicode - -- character is encoutered we output these values + -- character is encountered we output these values -- with the actual character value hidden in the state. | otherwise = case generalCategory c of @@ -1935,4 +1902,59 @@ lexTokenStream buf loc dflags = unP go initState case ltok of L _ ITeof -> return [] _ -> liftM (ltok:) go + +linePrags = Map.singleton "line" (begin line_prag2) + +fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag), + ("options_ghc", lex_string_prag IToptions_prag), + ("options_haddock", lex_string_prag ITdocOptions), + ("language", token ITlanguage_prag), + ("include", lex_string_prag ITinclude_prag)]) + +ignoredPrags = Map.fromList (map ignored pragmas) + where ignored opt = (opt, nested_comment lexToken) + impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"] + options_pragmas = map ("options_" ++) impls + -- CFILES is a hugs-only thing. + pragmas = options_pragmas ++ ["cfiles", "contract"] + +oneWordPrags = Map.fromList([("rules", rulePrag), + ("inline", token (ITinline_prag True)), + ("notinline", token (ITinline_prag False)), + ("specialize", token ITspec_prag), + ("source", token ITsource_prag), + ("warning", token ITwarning_prag), + ("deprecated", token ITdeprecated_prag), + ("scc", token ITscc_prag), + ("generated", token ITgenerated_prag), + ("core", token ITcore_prag), + ("unpack", token ITunpack_prag), + ("ann", token ITann_prag)]) + +twoWordPrags = Map.fromList([("inline conlike", token (ITinline_conlike_prag True)), + ("notinline conlike", token (ITinline_conlike_prag False)), + ("specialize inline", token (ITspec_inline_prag True)), + ("specialize notinline", token (ITspec_inline_prag False))]) + + +dispatch_pragmas :: Map String Action -> Action +dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of + Just found -> found span buf len + Nothing -> lexError "unknown pragma" + +known_pragma :: Map String Action -> AlexAccPred Int +known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags) + && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))) + +clean_pragma :: String -> String +clean_pragma prag = canon_ws (map toLower (unprefix prag)) + where unprefix prag' = case stripPrefix "{-#" prag' of + Just rest -> rest + Nothing -> prag' + canonical prag' = case prag' of + "noinline" -> "notinline" + "specialise" -> "specialize" + "constructorlike" -> "conlike" + otherwise -> prag' + canon_ws s = unwords (map canonical (words s)) }