module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
P(..), ParseResult(..), getSrcLoc,
+ getPState,
failLocMsgP, failSpanMsgP, srcParseFail,
getMessages,
popContext, pushCurrentContext, setLastToken, setSrcLoc,
import UniqFM
import DynFlags
import Ctype
-import Util ( maybePrefixMatch, readRational )
+import Util ( readRational )
import Control.Monad
import Data.Bits
import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
import Data.Ratio
}
$nl = [\n\r]
$idchar = [$small $large $digit \']
+$pragmachar = [$small $large $digit]
+
$docsym = [\| \^ \* \$]
@varid = $small $idchar*
<0,option_prags> \n { begin bol }
-"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
- { begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+ { dispatch_pragmas linePrags }
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
-- with older versions of GHC which generated these.
<0,option_prags> {
- "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag }
- "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar }
- { token (ITinline_prag True) }
- "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
- { token (ITinline_prag False) }
- "{-#" $whitechar* (INLINE|inline)
- $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar }
- { token (ITinline_conlike_prag True) }
- "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline)
- $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar }
- { token (ITinline_conlike_prag False) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
- { token ITspec_prag }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
- { token (ITspec_inline_prag True) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
- { token (ITspec_inline_prag False) }
- "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
- { token ITsource_prag }
- "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
- { token ITwarning_prag }
- "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
- { token ITdeprecated_prag }
- "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar }
- { token ITscc_prag }
- "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
- { token ITgenerated_prag }
- "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
- { token ITcore_prag }
- "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
- { token ITunpack_prag }
- "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
- { token ITann_prag }
+ "{-#" $whitechar* $pragmachar+
+ $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+ { dispatch_pragmas twoWordPrags }
+
+ "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+ { dispatch_pragmas oneWordPrags }
-- We ignore all these pragmas, but don't generate a warning for them
- -- CFILES is a hugs-only thing.
- "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar }
- { nested_comment lexToken }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+ { dispatch_pragmas ignoredPrags }
-- ToDo: should only be valid inside a pragma:
"#-}" { endPrag }
}
<option_prags> {
- "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
- { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
- { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock)
- / { notFollowedByPragmaChar }
- { lex_string_prag ITdocOptions }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+ { dispatch_pragmas fileHeaderPrags }
+
"-- #" { multiline_doc_comment }
- "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
- { token ITlanguage_prag }
- "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
- { lex_string_prag ITinclude_prag }
}
<0> {
-- In the "0" mode we ignore these pragmas
- "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
{ nested_comment lexToken }
}
| ITstdcallconv
| ITccallconv
| ITprimcallconv
- | ITdotnet
| ITmdo
| ITfamily
| ITgroup
( "stdcall", ITstdcallconv, bit ffiBit),
( "ccall", ITccallconv, bit ffiBit),
( "prim", ITprimcallconv, bit ffiBit),
- ( "dotnet", ITdotnet, bit ffiBit),
( "rec", ITrec, bit arrowsBit),
( "proc", ITproc, bit arrowsBit)
notFollowedBySymbol _ _ _ (AI _ _ buf)
= nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
-notFollowedByPragmaChar :: AlexAccPred Int
-notFollowedByPragmaChar _ _ _ (AI _ _ buf)
- = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
-
-- We must reject doc comments as being ordinary comments everywhere.
-- In some cases the doc comment will be selected as the lexeme due to
-- maximal munch, but not always, because the nested comment rule is
-- Here we are basically parsing character literals, such as 'x' or '\n'
-- but, when Template Haskell is on, we additionally spot
-- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
--- but WIHTOUT CONSUMING the x or T part (the parser does that).
+-- but WITHOUT CONSUMING the x or T part (the parser does that).
-- So we have to do two characters of lookahead: when we see 'x we need to
-- see if there's a trailing quote
lex_char_tok span _buf _len = do -- We've seen '
Just (c3,i3) ->
let str = [c1,c2,c3] in
case [ (c,rest) | (p,c) <- silly_escape_chars,
- Just rest <- [maybePrefixMatch p str] ] of
+ Just rest <- [stripPrefix p str] ] of
(escape_char,[]):_ -> do
setInput i3
return escape_char
data PState = PState {
buffer :: StringBuffer,
- dflags :: DynFlags,
- messages :: Messages,
+ dflags :: DynFlags,
+ messages :: Messages,
last_loc :: SrcSpan, -- pos of previous token
last_offs :: !Int, -- offset of the previous token from the
-- beginning of the current line.
-- \t is equal to 8 spaces.
last_len :: !Int, -- len of previous token
- last_line_len :: !Int,
+ last_line_len :: !Int,
loc :: SrcLoc, -- current loc (end of prev token + 1)
extsBitmap :: !Int, -- bitmap that determines permitted extensions
context :: [LayoutContext],
failSpanMsgP :: SrcSpan -> SDoc -> P a
failSpanMsgP span msg = P $ \_ -> PFailed span msg
+getPState :: P PState
+getPState = P $ \s -> POk s s
+
extension :: (Int -> Bool) -> P Bool
extension p = P $ \s -> POk s (p $! extsBitmap s)
| c <= '\x06' = non_graphic
| c <= '\x7f' = c
-- Alex doesn't handle Unicode, so when Unicode
- -- character is encoutered we output these values
+ -- character is encountered we output these values
-- with the actual character value hidden in the state.
| otherwise =
case generalCategory c of
case ltok of
L _ ITeof -> return []
_ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+ ("options_ghc", lex_string_prag IToptions_prag),
+ ("options_haddock", lex_string_prag ITdocOptions),
+ ("language", token ITlanguage_prag),
+ ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+ where ignored opt = (opt, nested_comment lexToken)
+ impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+ options_pragmas = map ("options_" ++) impls
+ -- CFILES is a hugs-only thing.
+ pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+ ("inline", token (ITinline_prag True)),
+ ("notinline", token (ITinline_prag False)),
+ ("specialize", token ITspec_prag),
+ ("source", token ITsource_prag),
+ ("warning", token ITwarning_prag),
+ ("deprecated", token ITdeprecated_prag),
+ ("scc", token ITscc_prag),
+ ("generated", token ITgenerated_prag),
+ ("core", token ITcore_prag),
+ ("unpack", token ITunpack_prag),
+ ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_conlike_prag True)),
+ ("notinline conlike", token (ITinline_conlike_prag False)),
+ ("specialize inline", token (ITspec_inline_prag True)),
+ ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+ Just found -> found span buf len
+ Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+ && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+ where unprefix prag' = case stripPrefix "{-#" prag' of
+ Just rest -> rest
+ Nothing -> prag'
+ canonical prag' = case prag' of
+ "noinline" -> "notinline"
+ "specialise" -> "specialize"
+ "constructorlike" -> "conlike"
+ otherwise -> prag'
+ canon_ws s = unwords (map canonical (words s))
}