-- qualified varids.
{
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
---
--- Note that Alex itself generates code with with some unused bindings and
--- without type signatures, so removing the flag might not be possible.
+-- XXX The above flags turn off warnings in the generated code:
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-unused-matches #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+-- But alex still generates some code that causes the "lazy unlifted bindings"
+-- warning, and old compilers don't know about it so we can't easily turn
+-- it off, so for now we use the sledge hammer:
+{-# OPTIONS_GHC -w #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
P(..), ParseResult(..), getSrcLoc,
+ getPState, getDynFlags, withThisPackage,
failLocMsgP, failSpanMsgP, srcParseFail,
- getMessages,
+ getMessages,
popContext, pushCurrentContext, setLastToken, setSrcLoc,
getLexState, popLexState, pushLexState,
- extension, standaloneDerivingEnabled, bangPatEnabled,
+ extension, bangPatEnabled, datatypeContextsEnabled,
addWarning,
lexTokenStream
) where
import SrcLoc
import UniqFM
import DynFlags
+import Module
import Ctype
-import Util ( maybePrefixMatch, readRational )
+import BasicTypes ( InlineSpec(..), RuleMatchInfo(..) )
+import Util ( readRational )
import Control.Monad
import Data.Bits
import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
import Data.Ratio
}
$nl = [\n\r]
$idchar = [$small $large $digit \']
+$pragmachar = [$small $large $digit]
+
$docsym = [\| \^ \* \$]
@varid = $small $idchar*
-- everywhere: skip whitespace and comments
$white_no_nl+ ;
-$tab+ { warn Opt_WarnTabs (text "Tab character") }
+$tab+ { warn Opt_WarnTabs (text "Warning: Tab character") }
-- Everywhere: deal with nested comments. We explicitly rule out
-- pragmas, "{-#", so that we don't accidentally treat them as comments.
-- context if the curly brace is missing.
-- Careful! This stuff is quite delicate.
<layout, layout_do> {
- \{ / { notFollowedBy '-' } { pop_and open_brace }
+ \{ / { notFollowedBy '-' } { hopefully_open_brace }
-- we might encounter {-# here, but {- has been handled already
\n ;
^\# (line)? { begin line_prag1 }
<0,option_prags> \n { begin bol }
-"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
- { begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+ { dispatch_pragmas linePrags }
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
-- with older versions of GHC which generated these.
<0,option_prags> {
- "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag }
- "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar }
- { token (ITinline_prag True) }
- "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
- { token (ITinline_prag False) }
- "{-#" $whitechar* (INLINE|inline)
- $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar }
- { token (ITinline_conlike_prag True) }
- "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline)
- $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar }
- { token (ITinline_conlike_prag False) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
- { token ITspec_prag }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
- { token (ITspec_inline_prag True) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
- { token (ITspec_inline_prag False) }
- "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
- { token ITsource_prag }
- "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
- { token ITwarning_prag }
- "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
- { token ITdeprecated_prag }
- "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar }
- { token ITscc_prag }
- "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
- { token ITgenerated_prag }
- "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
- { token ITcore_prag }
- "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
- { token ITunpack_prag }
- "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
- { token ITann_prag }
+ "{-#" $whitechar* $pragmachar+
+ $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+ { dispatch_pragmas twoWordPrags }
+
+ "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+ { dispatch_pragmas oneWordPrags }
-- We ignore all these pragmas, but don't generate a warning for them
- -- CFILES is a hugs-only thing.
- "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar }
- { nested_comment lexToken }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+ { dispatch_pragmas ignoredPrags }
-- ToDo: should only be valid inside a pragma:
"#-}" { endPrag }
}
<option_prags> {
- "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
- { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
- { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock)
- / { notFollowedByPragmaChar }
- { lex_string_prag ITdocOptions }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+ { dispatch_pragmas fileHeaderPrags }
+
"-- #" { multiline_doc_comment }
- "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
- { token ITlanguage_prag }
- "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
- { lex_string_prag ITinclude_prag }
}
<0> {
-- In the "0" mode we ignore these pragmas
- "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
+ "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
{ nested_comment lexToken }
}
-- Haddock comments
-<0> {
+<0,option_prags> {
"-- " $docsym / { ifExtension haddockEnabled } { multiline_doc_comment }
"{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
}
\$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
"$(" / { ifExtension thEnabled } { token ITparenEscape }
+-- For backward compatibility, accept the old dollar syntax
"[$" @varid "|" / { ifExtension qqEnabled }
{ lex_quasiquote_tok }
+
+ "[" @varid "|" / { ifExtension qqEnabled }
+ { lex_quasiquote_tok }
}
<0> {
-- ToDo: - move `var` and (sym) into lexical syntax?
-- - remove backquote from $special?
<0> {
- @qual @varsym / { ifExtension oldQualOps } { idtoken qvarsym }
- @qual @consym / { ifExtension oldQualOps } { idtoken qconsym }
- @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
- @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
+ @qual @varsym { idtoken qvarsym }
+ @qual @consym { idtoken qconsym }
@varsym { varsym }
@consym { consym }
}
| ITdynamic
| ITsafe
| ITthreadsafe
+ | ITinterruptible
| ITunsafe
| ITstdcallconv
| ITccallconv
- | ITdotnet
+ | ITprimcallconv
| ITmdo
| ITfamily
| ITgroup
| ITusing
-- Pragmas
- | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE
- | ITinline_conlike_prag Bool -- same
+ | ITinline_prag InlineSpec RuleMatchInfo
| ITspec_prag -- SPECIALISE
| ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE)
| ITsource_prag
| ITvocurly
| ITvccurly
| ITobrack
- | ITopabrack -- [:, for parallel arrays with -XParr
- | ITcpabrack -- :], for parallel arrays with -XParr
+ | ITopabrack -- [:, for parallel arrays with -XParallelArrays
+ | ITcpabrack -- :], for parallel arrays with -XParallelArrays
| ITcbrack
| IToparen
| ITcparen
| ITdupipvarid FastString -- GHC extension: implicit param: ?x
- | ITpragma StringBuffer
-
| ITchar Char
| ITstring FastString
| ITinteger Integer
| ITprimfloat Rational
| ITprimdouble Rational
- -- MetaHaskell extension tokens
+ -- Template Haskell extension tokens
| ITopenExpQuote -- [| or [e|
| ITopenPatQuote -- [p|
| ITopenDecQuote -- [d|
isSpecial ITdynamic = True
isSpecial ITsafe = True
isSpecial ITthreadsafe = True
+isSpecial ITinterruptible = True
isSpecial ITunsafe = True
isSpecial ITccallconv = True
isSpecial ITstdcallconv = True
+isSpecial ITprimcallconv = True
isSpecial ITmdo = True
isSpecial ITfamily = True
isSpecial ITgroup = True
( "dynamic", ITdynamic, bit ffiBit),
( "safe", ITsafe, bit ffiBit),
( "threadsafe", ITthreadsafe, bit ffiBit), -- ToDo: remove
+ ( "interruptible", ITinterruptible, bit ffiBit),
( "unsafe", ITunsafe, bit ffiBit),
( "stdcall", ITstdcallconv, bit ffiBit),
( "ccall", ITccallconv, bit ffiBit),
- ( "dotnet", ITdotnet, bit ffiBit),
+ ( "prim", ITprimcallconv, bit ffiBit),
- ( "rec", ITrec, bit arrowsBit),
+ ( "rec", ITrec, bit recBit),
( "proc", ITproc, bit arrowsBit)
]
explicitForallEnabled i)
,("→", ITrarrow, unicodeSyntaxEnabled)
,("←", ITlarrow, unicodeSyntaxEnabled)
- ,("⋯", ITdotdot, unicodeSyntaxEnabled)
+
+ ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤛", ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤜", ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+
+ ,("★", ITstar, unicodeSyntaxEnabled)
+
-- ToDo: ideally, → and ∷ should be "specials", so that they cannot
-- form part of a large operator. This would let us have a better
-- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
begin code _span _str _len = do pushLexState code; lexToken
pop :: Action
-pop _span _buf _len = do popLexState; lexToken
+pop _span _buf _len = do _ <- popLexState
+ lexToken
+
+hopefully_open_brace :: Action
+hopefully_open_brace span buf len
+ = do relaxed <- extension relaxedLayout
+ ctx <- getContext
+ (AI l _) <- getInput
+ let offset = srcLocCol l
+ isOK = relaxed ||
+ case ctx of
+ Layout prev_off : _ -> prev_off < offset
+ _ -> True
+ if isOK then pop_and open_brace span buf len
+ else failSpanMsgP span (text "Missing block")
pop_and :: Action -> Action
-pop_and act span buf len = do popLexState; act span buf len
+pop_and act span buf len = do _ <- popLexState
+ act span buf len
{-# INLINE nextCharIs #-}
nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
notFollowedBy :: Char -> AlexAccPred Int
-notFollowedBy char _ _ _ (AI _ _ buf)
+notFollowedBy char _ _ _ (AI _ buf)
= nextCharIs buf (/=char)
notFollowedBySymbol :: AlexAccPred Int
-notFollowedBySymbol _ _ _ (AI _ _ buf)
+notFollowedBySymbol _ _ _ (AI _ buf)
= nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
-notFollowedByPragmaChar :: AlexAccPred Int
-notFollowedByPragmaChar _ _ _ (AI _ _ buf)
- = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
-
-- We must reject doc comments as being ordinary comments everywhere.
-- In some cases the doc comment will be selected as the lexeme due to
-- maximal munch, but not always, because the nested comment rule is
-- valid in all states, but the doc-comment rules are only valid in
-- the non-layout states.
isNormalComment :: AlexAccPred Int
-isNormalComment bits _ _ (AI _ _ buf)
+isNormalComment bits _ _ (AI _ buf)
| haddockEnabled bits = notFollowedByDocOrPragma
| otherwise = nextCharIs buf (/='#')
where
spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
{-
-haddockDisabledAnd p bits _ _ (AI _ _ buf)
+haddockDisabledAnd p bits _ _ (AI _ buf)
= if haddockEnabled bits then False else (p buf)
-}
atEOL :: AlexAccPred Int
-atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
+atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
ifExtension :: (Int -> Bool) -> AlexAccPred Int
ifExtension pred bits _ _ _ = pred bits
withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
-> P (Located Token)
withLexedDocType lexDocComment = do
- input@(AI _ _ buf) <- getInput
+ input@(AI _ buf) <- getInput
case prevChar buf ' ' of
'|' -> lexDocComment input ITdocCommentNext False
'^' -> lexDocComment input ITdocCommentPrev False
-- RULES pragmas turn on the forall and '.' keywords, and we turn them
-- off again at the end of the pragma.
rulePrag :: Action
-rulePrag span _ _ = do
+rulePrag span _buf _len = do
setExts (.|. bit inRulePragBit)
return (L span ITrules_prag)
endPrag :: Action
-endPrag span _ _ = do
+endPrag span _buf _len = do
setExts (.&. complement (bit inRulePragBit))
return (L span ITclose_prag)
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
--- This is complicated by the fact that Haddock tokens can span multiple lines,
--- which is something that the original lexer didn't account for.
--- I have added last_line_len in the parser state which represents the length
--- of the part of the token that is on the last line. It is now used for layout
--- calculation in pushCurrentContext instead of last_len. last_len is, like it
--- was before, the full length of the token, and it is now only used for error
--- messages. /Waern
-
docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
SrcSpan -> P (Located Token)
docCommentEnd input commentAcc docType buf span = do
setInput input
- let (AI loc last_offs nextBuf) = input
+ let (AI loc nextBuf) = input
comment = reverse commentAcc
span' = mkSrcSpan (srcSpanStart span) loc
last_len = byteDiff buf nextBuf
- last_line_len = if (last_offs - last_len < 0)
- then last_offs
- else last_len
-
- span `seq` setLastToken span' last_len last_line_len
+ span `seq` setLastToken span' last_len
return (L span' (docType comment))
errBrace :: AlexInput -> SrcSpan -> P a
-errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
open_brace, close_brace :: Action
open_brace span _str _len = do
return (L span ITvccurly)
EQ -> do
--trace "layout: inserting ';'" $ do
- popLexState
+ _ <- popLexState
return (L span ITsemi)
GT -> do
- popLexState
+ _ <- popLexState
lexToken
-- certain keywords put us in the "layout" state, where we might
-- add an opening curly brace.
maybe_layout :: Token -> P ()
-maybe_layout ITdo = pushLexState layout_do
-maybe_layout ITmdo = pushLexState layout_do
-maybe_layout ITof = pushLexState layout
-maybe_layout ITlet = pushLexState layout
-maybe_layout ITwhere = pushLexState layout
-maybe_layout ITrec = pushLexState layout
-maybe_layout _ = return ()
+maybe_layout t = do -- If the alternative layout rule is enabled then
+ -- we never create an implicit layout context here.
+ -- Layout is handled XXX instead.
+ -- The code for closing implicit contexts, or
+ -- inserting implicit semi-colons, is therefore
+ -- irrelevant as it only applies in an implicit
+ -- context.
+ alr <- extension alternativeLayoutRule
+ unless alr $ f t
+ where f ITdo = pushLexState layout_do
+ f ITmdo = pushLexState layout_do
+ f ITof = pushLexState layout
+ f ITlet = pushLexState layout
+ f ITwhere = pushLexState layout
+ f ITrec = pushLexState layout
+ f _ = return ()
-- Pushing a new implicit layout context. If the indentation of the
-- next token is not greater than the previous layout context, then
--
new_layout_context :: Bool -> Action
new_layout_context strict span _buf _len = do
- popLexState
- (AI _ offset _) <- getInput
+ _ <- popLexState
+ (AI l _) <- getInput
+ let offset = srcLocCol l
ctx <- getContext
+ nondecreasing <- extension nondecreasingIndentation
+ let strict' = strict || not nondecreasing
case ctx of
Layout prev_off : _ |
- (strict && prev_off >= offset ||
- not strict && prev_off > offset) -> do
+ (strict' && prev_off >= offset ||
+ not strict' && prev_off > offset) -> do
-- token is indented to the left of the previous context.
-- we must generate a {} sequence now.
pushLexState layout_left
do_layout_left :: Action
do_layout_left span _buf _len = do
- popLexState
+ _ <- popLexState
pushLexState bol -- we must be at the start of a line
return (L span ITvccurly)
setLine :: Int -> Action
setLine code span buf len = do
let line = parseUnsignedInteger buf len 10 octDecDigit
- setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+ setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
-- subtract one: the line number refers to the *following* line
- popLexState
+ _ <- popLexState
pushLexState code
lexToken
setFile :: Int -> Action
setFile code span buf len = do
let file = lexemeToFastString (stepOn buf) (len-2)
+ setAlrLastLoc noSrcSpan
setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
- popLexState
+ _ <- popLexState
pushLexState code
lexToken
= case alexGetChar i of
Just (c,i') | c == x -> isString i' xs
_other -> False
- err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
+ err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
-- -----------------------------------------------------------------------------
lex_string s = do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i
Just ('"',i) -> do
setInput i
Just ('\\',i)
| Just ('&',i) <- next -> do
setInput i; lex_string s
- | Just (c,i) <- next, is_space c -> do
+ | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
+ -- is_space only works for <= '\x7f' (#3751)
setInput i; lex_stringgap s
where next = alexGetChar' i
- Just (c, i) -> do
- c' <- lex_char c i
- lex_string (c':s)
+ Just (c, i1) -> do
+ case c of
+ '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
+ c | isAny c -> do setInput i1; lex_string (c:s)
+ _other -> lit_error i
lex_stringgap :: String -> P Token
lex_stringgap s = do
- c <- getCharOrFail
+ i <- getInput
+ c <- getCharOrFail i
case c of
'\\' -> lex_string s
c | is_space c -> lex_stringgap s
- _other -> lit_error
+ _other -> lit_error i
lex_char_tok :: Action
-- Here we are basically parsing character literals, such as 'x' or '\n'
-- but, when Template Haskell is on, we additionally spot
-- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
--- but WIHTOUT CONSUMING the x or T part (the parser does that).
+-- but WITHOUT CONSUMING the x or T part (the parser does that).
-- So we have to do two characters of lookahead: when we see 'x we need to
-- see if there's a trailing quote
lex_char_tok span _buf _len = do -- We've seen '
i1 <- getInput -- Look ahead to first character
let loc = srcSpanStart span
case alexGetChar' i1 of
- Nothing -> lit_error
+ Nothing -> lit_error i1
- Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen ''
+ Just ('\'', i2@(AI end2 _)) -> do -- We've seen ''
th_exts <- extension thEnabled
if th_exts then do
setInput i2
return (L (mkSrcSpan loc end2) ITtyQuote)
- else lit_error
+ else lit_error i1
- Just ('\\', i2@(AI _end2 _ _)) -> do -- We've seen 'backslash
+ Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash
setInput i2
lit_ch <- lex_escape
- mc <- getCharOrFail -- Trailing quote
+ i3 <- getInput
+ mc <- getCharOrFail i3 -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
- else do setInput i2; lit_error
+ else lit_error i3
- Just (c, i2@(AI _end2 _ _))
- | not (isAny c) -> lit_error
+ Just (c, i2@(AI _end2 _))
+ | not (isAny c) -> lit_error i1
| otherwise ->
-- We've seen 'x, where x is a valid character
-- (including the possibility of EOF)
-- If TH is on, just parse the quote only
th_exts <- extension thEnabled
- let (AI end _ _) = i1
+ let (AI end _) = i1
if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
- else do setInput i2; lit_error
+ else lit_error i2
finish_char_tok :: SrcLoc -> Char -> P (Located Token)
finish_char_tok loc ch -- We've already seen the closing quote
-- Just need to check for trailing #
= do magicHash <- extension magicHashEnabled
- i@(AI end _ _) <- getInput
+ i@(AI end _) <- getInput
if magicHash then do
case alexGetChar' i of
- Just ('#',i@(AI end _ _)) -> do
+ Just ('#',i@(AI end _)) -> do
setInput i
return (L (mkSrcSpan loc end) (ITprimchar ch))
_other ->
return (L (mkSrcSpan loc end) (ITchar ch))
- else do
+ else do
return (L (mkSrcSpan loc end) (ITchar ch))
-lex_char :: Char -> AlexInput -> P Char
-lex_char c inp = do
- case c of
- '\\' -> do setInput inp; lex_escape
- c | isAny c -> do setInput inp; return c
- _other -> lit_error
-
isAny :: Char -> Bool
isAny c | c > '\x7f' = isPrint c
| otherwise = is_any c
lex_escape :: P Char
lex_escape = do
- c <- getCharOrFail
+ i0 <- getInput
+ c <- getCharOrFail i0
case c of
'a' -> return '\a'
'b' -> return '\b'
'\\' -> return '\\'
'"' -> return '\"'
'\'' -> return '\''
- '^' -> do c <- getCharOrFail
+ '^' -> do i1 <- getInput
+ c <- getCharOrFail i1
if c >= '@' && c <= '_'
then return (chr (ord c - ord '@'))
- else lit_error
+ else lit_error i1
'x' -> readNum is_hexdigit 16 hexDigit
'o' -> readNum is_octdigit 8 octDecDigit
c1 -> do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i0
Just (c2,i2) ->
case alexGetChar' i2 of
- Nothing -> do setInput i2; lit_error
+ Nothing -> do lit_error i0
Just (c3,i3) ->
let str = [c1,c2,c3] in
case [ (c,rest) | (p,c) <- silly_escape_chars,
- Just rest <- [maybePrefixMatch p str] ] of
+ Just rest <- [stripPrefix p str] ] of
(escape_char,[]):_ -> do
setInput i3
return escape_char
(escape_char,_:_):_ -> do
setInput i2
return escape_char
- [] -> lit_error
+ [] -> lit_error i0
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
readNum is_digit base conv = do
i <- getInput
- c <- getCharOrFail
+ c <- getCharOrFail i
if is_digit c
then readNum2 is_digit base conv (conv c)
- else do setInput i; lit_error
+ else lit_error i
readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
readNum2 is_digit base conv i = do
where read i input = do
case alexGetChar' input of
Just (c,input') | is_digit c -> do
- read (i*base + conv c) input'
+ let i' = i*base + conv c
+ if i' > 0x10ffff
+ then setInput input >> lexError "numeric escape sequence out of range"
+ else read i' input'
_other -> do
- if i >= 0 && i <= 0x10FFFF
- then do setInput input; return (chr i)
- else lit_error
+ setInput input; return (chr i)
+
silly_escape_chars :: [(String, Char)]
silly_escape_chars = [
-- the position of the error in the buffer. This is so that we can report
-- a correct location to the user, but also so we can detect UTF-8 decoding
-- errors if they occur.
-lit_error :: P a
-lit_error = lexError "lexical error in string/character literal"
+lit_error :: AlexInput -> P a
+lit_error i = do setInput i; lexError "lexical error in string/character literal"
-getCharOrFail :: P Char
-getCharOrFail = do
- i <- getInput
+getCharOrFail :: AlexInput -> P Char
+getCharOrFail i = do
case alexGetChar' i of
Nothing -> lexError "unexpected end-of-file in string/character literal"
Just (c,i) -> do setInput i; return c
lex_quasiquote_tok :: Action
lex_quasiquote_tok span buf len = do
- let quoter = reverse $ takeWhile (/= '$')
- $ reverse $ lexemeToString buf (len - 1)
+ let quoter = tail (lexemeToString buf (len - 1))
+ -- 'tail' drops the initial '[',
+ -- while the -1 drops the trailing '|'
quoteStart <- getSrcLoc
quote <- lex_quasiquote ""
end <- getSrcLoc
lex_quasiquote s = do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i
Just ('\\',i)
| Just ('|',i) <- next -> do
data PState = PState {
buffer :: StringBuffer,
- dflags :: DynFlags,
- messages :: Messages,
+ dflags :: DynFlags,
+ messages :: Messages,
last_loc :: SrcSpan, -- pos of previous token
- last_offs :: !Int, -- offset of the previous token from the
- -- beginning of the current line.
- -- \t is equal to 8 spaces.
last_len :: !Int, -- len of previous token
- last_line_len :: !Int,
loc :: SrcLoc, -- current loc (end of prev token + 1)
extsBitmap :: !Int, -- bitmap that determines permitted extensions
context :: [LayoutContext],
- lex_state :: [Int]
+ lex_state :: [Int],
+ -- Used in the alternative layout rule:
+ -- These tokens are the next ones to be sent out. They are
+ -- just blindly emitted, without the rule looking at them again:
+ alr_pending_implicit_tokens :: [Located Token],
+ -- This is the next token to be considered or, if it is Nothing,
+ -- we need to get the next token from the input stream:
+ alr_next_token :: Maybe (Located Token),
+ -- This is what we consider to be the locatino of the last token
+ -- emitted:
+ alr_last_loc :: SrcSpan,
+ -- The stack of layout contexts:
+ alr_context :: [ALRContext],
+ -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
+ -- us what sort of layout the '{' will open:
+ alr_expecting_ocurly :: Maybe ALRLayout,
+ -- Have we just had the '}' for a let block? If so, than an 'in'
+ -- token doesn't need to close anything:
+ alr_justClosedExplicitLetBlock :: Bool
}
-- last_loc and last_len are used when generating error messages,
-- and in pushCurrentContext only. Sigh, if only Happy passed the
-- Getting rid of last_loc would require finding another way to
-- implement pushCurrentContext (which is only called from one place).
+data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
+ Bool{- is it a 'let' block? -}
+ | ALRLayout ALRLayout Int
+data ALRLayout = ALRLayoutLet
+ | ALRLayoutWhere
+ | ALRLayoutOf
+ | ALRLayoutDo
+
newtype P a = P { unP :: PState -> ParseResult a }
instance Monad P where
failSpanMsgP :: SrcSpan -> SDoc -> P a
failSpanMsgP span msg = P $ \_ -> PFailed span msg
+getPState :: P PState
+getPState = P $ \s -> POk s s
+
+getDynFlags :: P DynFlags
+getDynFlags = P $ \s -> POk s (dflags s)
+
+withThisPackage :: (PackageId -> a) -> P a
+withThisPackage f
+ = do pkg <- liftM thisPackage getDynFlags
+ return $ f pkg
+
extension :: (Int -> Bool) -> P Bool
extension p = P $ \s -> POk s (p $! extsBitmap s)
getSrcLoc :: P SrcLoc
getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
-setLastToken :: SrcSpan -> Int -> Int -> P ()
-setLastToken loc len line_len = P $ \s -> POk s {
+setLastToken :: SrcSpan -> Int -> P ()
+setLastToken loc len = P $ \s -> POk s {
last_loc=loc,
- last_len=len,
- last_line_len=line_len
-} ()
+ last_len=len
+ } ()
-data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
+data AlexInput = AI SrcLoc StringBuffer
alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
+alexInputPrevChar (AI _ buf) = prevChar buf '\n'
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI loc ofs s)
+alexGetChar (AI loc s)
| atEnd s = Nothing
- | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq`
+ | otherwise = adj_c `seq` loc' `seq` s' `seq`
--trace (show (ord c)) $
- Just (adj_c, (AI loc' ofs' s'))
+ Just (adj_c, (AI loc' s'))
where (c,s') = nextChar s
loc' = advanceSrcLoc loc c
- ofs' = advanceOffs c ofs
non_graphic = '\x0'
upper = '\x1'
| c <= '\x06' = non_graphic
| c <= '\x7f' = c
-- Alex doesn't handle Unicode, so when Unicode
- -- character is encoutered we output these values
+ -- character is encountered we output these values
-- with the actual character value hidden in the state.
| otherwise =
case generalCategory c of
EnclosingMark -> other_graphic
DecimalNumber -> digit
LetterNumber -> other_graphic
- OtherNumber -> other_graphic
+ OtherNumber -> digit -- see #4373
ConnectorPunctuation -> symbol
DashPunctuation -> symbol
OpenPunctuation -> other_graphic
-- This version does not squash unicode characters, it is used when
-- lexing strings.
alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar' (AI loc ofs s)
+alexGetChar' (AI loc s)
| atEnd s = Nothing
- | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq`
+ | otherwise = c `seq` loc' `seq` s' `seq`
--trace (show (ord c)) $
- Just (c, (AI loc' ofs' s'))
+ Just (c, (AI loc' s'))
where (c,s') = nextChar s
loc' = advanceSrcLoc loc c
- ofs' = advanceOffs c ofs
-
-advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' _ = 0
-advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-advanceOffs _ offs = offs + 1
getInput :: P AlexInput
-getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
+getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
setInput :: AlexInput -> P ()
-setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
+setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
pushLexState :: Int -> P ()
pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
getLexState :: P Int
getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
+popNextToken :: P (Maybe (Located Token))
+popNextToken
+ = P $ \s@PState{ alr_next_token = m } ->
+ POk (s {alr_next_token = Nothing}) m
+
+setAlrLastLoc :: SrcSpan -> P ()
+setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
+
+getAlrLastLoc :: P SrcSpan
+getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
+
+getALRContext :: P [ALRContext]
+getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
+
+setALRContext :: [ALRContext] -> P ()
+setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
+
+getJustClosedExplicitLetBlock :: P Bool
+getJustClosedExplicitLetBlock
+ = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
+
+setJustClosedExplicitLetBlock :: Bool -> P ()
+setJustClosedExplicitLetBlock b
+ = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
+
+setNextToken :: Located Token -> P ()
+setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
+
+popPendingImplicitToken :: P (Maybe (Located Token))
+popPendingImplicitToken
+ = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+ case ts of
+ [] -> POk s Nothing
+ (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
+
+setPendingImplicitTokens :: [Located Token] -> P ()
+setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
+
+getAlrExpectingOCurly :: P (Maybe ALRLayout)
+getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
+
+setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
+setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
+
-- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
-- integer
genericsBit :: Int
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
unboxedTuplesBit :: Int
unboxedTuplesBit = 15 -- (# and #)
-standaloneDerivingBit :: Int
-standaloneDerivingBit = 16 -- standalone instance deriving declarations
+datatypeContextsBit :: Int
+datatypeContextsBit = 16
transformComprehensionsBit :: Int
transformComprehensionsBit = 17
qqBit :: Int
inRulePragBit = 19
rawTokenStreamBit :: Int
rawTokenStreamBit = 20 -- producing a token stream with all comments included
-newQualOpsBit :: Int
-newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
+recBit :: Int
+recBit = 22 -- rec
+alternativeLayoutRuleBit :: Int
+alternativeLayoutRuleBit = 23
+relaxedLayoutBit :: Int
+relaxedLayoutBit = 24
+nondecreasingIndentationBit :: Int
+nondecreasingIndentationBit = 25
always :: Int -> Bool
always _ = True
unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
unboxedTuplesEnabled :: Int -> Bool
unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
-standaloneDerivingEnabled :: Int -> Bool
-standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+datatypeContextsEnabled :: Int -> Bool
+datatypeContextsEnabled flags = testBit flags datatypeContextsBit
qqEnabled :: Int -> Bool
qqEnabled flags = testBit flags qqBit
-- inRulePrag :: Int -> Bool
-- inRulePrag flags = testBit flags inRulePragBit
rawTokenStreamEnabled :: Int -> Bool
rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
-newQualOps :: Int -> Bool
-newQualOps flags = testBit flags newQualOpsBit
-oldQualOps :: Int -> Bool
-oldQualOps flags = not (newQualOps flags)
+alternativeLayoutRule :: Int -> Bool
+alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
+relaxedLayout :: Int -> Bool
+relaxedLayout flags = testBit flags relaxedLayoutBit
+nondecreasingIndentation :: Int -> Bool
+nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
-- PState for parsing options pragmas
--
pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
-pragState dynflags buf loc =
- PState {
- buffer = buf,
- messages = emptyMessages,
- dflags = dynflags,
- last_loc = mkSrcSpan loc loc,
- last_offs = 0,
- last_len = 0,
- last_line_len = 0,
- loc = loc,
- extsBitmap = 0,
- context = [],
- lex_state = [bol, option_prags, 0]
- }
-
+pragState dynflags buf loc = (mkPState dynflags buf loc) {
+ lex_state = [bol, option_prags, 0]
+ }
-- create a parse state
--
-mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
-mkPState buf loc flags =
+mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+mkPState flags buf loc =
PState {
buffer = buf,
dflags = flags,
messages = emptyMessages,
last_loc = mkSrcSpan loc loc,
- last_offs = 0,
last_len = 0,
- last_line_len = 0,
loc = loc,
extsBitmap = fromIntegral bitmap,
context = [],
- lex_state = [bol, 0]
- -- we begin in the layout state if toplev_layout is set
+ lex_state = [bol, 0],
+ alr_pending_implicit_tokens = [],
+ alr_next_token = Nothing,
+ alr_last_loc = noSrcSpan,
+ alr_context = [],
+ alr_expecting_ocurly = Nothing,
+ alr_justClosedExplicitLetBlock = False
}
where
- bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
- .|. ffiBit `setBitIf` dopt Opt_ForeignFunctionInterface flags
- .|. parrBit `setBitIf` dopt Opt_PArr flags
- .|. arrowsBit `setBitIf` dopt Opt_Arrows flags
- .|. thBit `setBitIf` dopt Opt_TemplateHaskell flags
- .|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags
- .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags
- .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
- .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
- .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
- .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
- .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
- .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
- .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags
- .|. tyFamBit `setBitIf` dopt Opt_TypeFamilies flags
- .|. haddockBit `setBitIf` dopt Opt_Haddock flags
- .|. magicHashBit `setBitIf` dopt Opt_MagicHash flags
- .|. kindSigsBit `setBitIf` dopt Opt_KindSignatures flags
- .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
- .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
- .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
- .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
- .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+ bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
+ .|. ffiBit `setBitIf` xopt Opt_ForeignFunctionInterface flags
+ .|. parrBit `setBitIf` xopt Opt_ParallelArrays flags
+ .|. arrowsBit `setBitIf` xopt Opt_Arrows flags
+ .|. thBit `setBitIf` xopt Opt_TemplateHaskell flags
+ .|. qqBit `setBitIf` xopt Opt_QuasiQuotes flags
+ .|. ipBit `setBitIf` xopt Opt_ImplicitParams flags
+ .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll flags
+ .|. bangPatBit `setBitIf` xopt Opt_BangPatterns flags
+ .|. tyFamBit `setBitIf` xopt Opt_TypeFamilies flags
+ .|. haddockBit `setBitIf` dopt Opt_Haddock flags
+ .|. magicHashBit `setBitIf` xopt Opt_MagicHash flags
+ .|. kindSigsBit `setBitIf` xopt Opt_KindSignatures flags
+ .|. recursiveDoBit `setBitIf` xopt Opt_RecursiveDo flags
+ .|. recBit `setBitIf` xopt Opt_DoRec flags
+ .|. recBit `setBitIf` xopt Opt_Arrows flags
+ .|. unicodeSyntaxBit `setBitIf` xopt Opt_UnicodeSyntax flags
+ .|. unboxedTuplesBit `setBitIf` xopt Opt_UnboxedTuples flags
+ .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
+ .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
.|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
- .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
+ .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
+ .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
+ .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
--
setBitIf :: Int -> Bool -> Int
b `setBitIf` cond | cond = bit b
-- This is only used at the outer level of a module when the 'module'
-- keyword is missing.
pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } ->
- POk s{context = Layout (offs-len) : ctx} ()
---trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
+ POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
getOffside :: P Ordering
-getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
+getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
+ let offs = srcSpanStartCol loc in
let ord = case stk of
- (Layout n:_) -> compare offs n
+ (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
+ compare offs n
_ -> GT
in POk s ord
lexError :: String -> P a
lexError str = do
loc <- getSrcLoc
- (AI end _ buf) <- getInput
+ (AI end buf) <- getInput
reportLexError loc end buf str
-- -----------------------------------------------------------------------------
lexer :: (Located Token -> P a) -> P a
lexer cont = do
- tok@(L _span _tok__) <- lexToken
--- trace ("token: " ++ show tok__) $ do
+ alr <- extension alternativeLayoutRule
+ let lexTokenFun = if alr then lexTokenAlr else lexToken
+ tok@(L _span _tok__) <- lexTokenFun
+ --trace ("token: " ++ show _tok__) $ do
cont tok
+lexTokenAlr :: P (Located Token)
+lexTokenAlr = do mPending <- popPendingImplicitToken
+ t <- case mPending of
+ Nothing ->
+ do mNext <- popNextToken
+ t <- case mNext of
+ Nothing -> lexToken
+ Just next -> return next
+ alternativeLayoutRuleToken t
+ Just t ->
+ return t
+ setAlrLastLoc (getLoc t)
+ case unLoc t of
+ ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
+ ITlet -> setAlrExpectingOCurly (Just ALRLayoutLet)
+ ITof -> setAlrExpectingOCurly (Just ALRLayoutOf)
+ ITdo -> setAlrExpectingOCurly (Just ALRLayoutDo)
+ ITmdo -> setAlrExpectingOCurly (Just ALRLayoutDo)
+ ITrec -> setAlrExpectingOCurly (Just ALRLayoutDo)
+ _ -> return ()
+ return t
+
+alternativeLayoutRuleToken :: Located Token -> P (Located Token)
+alternativeLayoutRuleToken t
+ = do context <- getALRContext
+ lastLoc <- getAlrLastLoc
+ mExpectingOCurly <- getAlrExpectingOCurly
+ justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
+ setJustClosedExplicitLetBlock False
+ dflags <- getDynFlags
+ let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
+ thisLoc = getLoc t
+ thisCol = srcSpanStartCol thisLoc
+ newLine = (lastLoc == noSrcSpan)
+ || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
+ case (unLoc t, context, mExpectingOCurly) of
+ -- This case handles a GHC extension to the original H98
+ -- layout rule...
+ (ITocurly, _, Just alrLayout) ->
+ do setAlrExpectingOCurly Nothing
+ let isLet = case alrLayout of
+ ALRLayoutLet -> True
+ _ -> False
+ setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
+ return t
+ -- ...and makes this case unnecessary
+ {-
+ -- I think our implicit open-curly handling is slightly
+ -- different to John's, in how it interacts with newlines
+ -- and "in"
+ (ITocurly, _, Just _) ->
+ do setAlrExpectingOCurly Nothing
+ setNextToken t
+ lexTokenAlr
+ -}
+ (_, ALRLayout _ col : ls, Just expectingOCurly)
+ | (thisCol > col) ||
+ (thisCol == col &&
+ isNonDecreasingIntentation expectingOCurly) ->
+ do setAlrExpectingOCurly Nothing
+ setALRContext (ALRLayout expectingOCurly thisCol : context)
+ setNextToken t
+ return (L thisLoc ITocurly)
+ | otherwise ->
+ do setAlrExpectingOCurly Nothing
+ setPendingImplicitTokens [L lastLoc ITccurly]
+ setNextToken t
+ return (L lastLoc ITocurly)
+ (_, _, Just expectingOCurly) ->
+ do setAlrExpectingOCurly Nothing
+ setALRContext (ALRLayout expectingOCurly thisCol : context)
+ setNextToken t
+ return (L thisLoc ITocurly)
+ -- We do the [] cases earlier than in the spec, as we
+ -- have an actual EOF token
+ (ITeof, ALRLayout _ _ : ls, _) ->
+ do setALRContext ls
+ setNextToken t
+ return (L thisLoc ITccurly)
+ (ITeof, _, _) ->
+ return t
+ -- the other ITeof case omitted; general case below covers it
+ (ITin, _, _)
+ | justClosedExplicitLetBlock ->
+ return t
+ (ITin, ALRLayout ALRLayoutLet _ : ls, _)
+ | newLine ->
+ do setPendingImplicitTokens [t]
+ setALRContext ls
+ return (L thisLoc ITccurly)
+ -- This next case is to handle a transitional issue:
+ (ITwhere, ALRLayout _ col : ls, _)
+ | newLine && thisCol == col && transitional ->
+ do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+ thisLoc
+ (transitionalAlternativeLayoutWarning
+ "`where' clause at the same depth as implicit layout block")
+ setALRContext ls
+ setNextToken t
+ -- Note that we use lastLoc, as we may need to close
+ -- more layouts, or give a semicolon
+ return (L lastLoc ITccurly)
+ -- This next case is to handle a transitional issue:
+ (ITvbar, ALRLayout _ col : ls, _)
+ | newLine && thisCol == col && transitional ->
+ do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+ thisLoc
+ (transitionalAlternativeLayoutWarning
+ "`|' at the same depth as implicit layout block")
+ setALRContext ls
+ setNextToken t
+ -- Note that we use lastLoc, as we may need to close
+ -- more layouts, or give a semicolon
+ return (L lastLoc ITccurly)
+ (_, ALRLayout _ col : ls, _)
+ | newLine && thisCol == col ->
+ do setNextToken t
+ return (L thisLoc ITsemi)
+ | newLine && thisCol < col ->
+ do setALRContext ls
+ setNextToken t
+ -- Note that we use lastLoc, as we may need to close
+ -- more layouts, or give a semicolon
+ return (L lastLoc ITccurly)
+ -- We need to handle close before open, as 'then' is both
+ -- an open and a close
+ (u, _, _)
+ | isALRclose u ->
+ case context of
+ ALRLayout _ _ : ls ->
+ do setALRContext ls
+ setNextToken t
+ return (L thisLoc ITccurly)
+ ALRNoLayout _ isLet : ls ->
+ do let ls' = if isALRopen u
+ then ALRNoLayout (containsCommas u) False : ls
+ else ls
+ setALRContext ls'
+ when isLet $ setJustClosedExplicitLetBlock True
+ return t
+ [] ->
+ do let ls = if isALRopen u
+ then [ALRNoLayout (containsCommas u) False]
+ else ls
+ setALRContext ls
+ -- XXX This is an error in John's code, but
+ -- it looks reachable to me at first glance
+ return t
+ (u, _, _)
+ | isALRopen u ->
+ do setALRContext (ALRNoLayout (containsCommas u) False : context)
+ return t
+ (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
+ do setALRContext ls
+ setPendingImplicitTokens [t]
+ return (L thisLoc ITccurly)
+ (ITin, ALRLayout _ _ : ls, _) ->
+ do setALRContext ls
+ setNextToken t
+ return (L thisLoc ITccurly)
+ -- the other ITin case omitted; general case below covers it
+ (ITcomma, ALRLayout _ _ : ls, _)
+ | topNoLayoutContainsCommas ls ->
+ do setALRContext ls
+ setNextToken t
+ return (L thisLoc ITccurly)
+ (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
+ do setALRContext ls
+ setPendingImplicitTokens [t]
+ return (L thisLoc ITccurly)
+ -- the other ITwhere case omitted; general case below covers it
+ (_, _, _) -> return t
+
+transitionalAlternativeLayoutWarning :: String -> SDoc
+transitionalAlternativeLayoutWarning msg
+ = text "transitional layout will not be accepted in the future:"
+ $$ text msg
+
+isALRopen :: Token -> Bool
+isALRopen ITcase = True
+isALRopen ITif = True
+isALRopen ITthen = True
+isALRopen IToparen = True
+isALRopen ITobrack = True
+isALRopen ITocurly = True
+-- GHC Extensions:
+isALRopen IToubxparen = True
+isALRopen ITparenEscape = True
+isALRopen _ = False
+
+isALRclose :: Token -> Bool
+isALRclose ITof = True
+isALRclose ITthen = True
+isALRclose ITelse = True
+isALRclose ITcparen = True
+isALRclose ITcbrack = True
+isALRclose ITccurly = True
+-- GHC Extensions:
+isALRclose ITcubxparen = True
+isALRclose _ = False
+
+isNonDecreasingIntentation :: ALRLayout -> Bool
+isNonDecreasingIntentation ALRLayoutDo = True
+isNonDecreasingIntentation _ = False
+
+containsCommas :: Token -> Bool
+containsCommas IToparen = True
+containsCommas ITobrack = True
+-- John doesn't have {} as containing commas, but records contain them,
+-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
+-- (defaultInstallDirs).
+containsCommas ITocurly = True
+-- GHC Extensions:
+containsCommas IToubxparen = True
+containsCommas _ = False
+
+topNoLayoutContainsCommas :: [ALRContext] -> Bool
+topNoLayoutContainsCommas [] = False
+topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
+topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
+
lexToken :: P (Located Token)
lexToken = do
- inp@(AI loc1 _ buf) <- getInput
+ inp@(AI loc1 buf) <- getInput
sc <- getLexState
exts <- getExts
case alexScanUser exts inp sc of
AlexEOF -> do
let span = mkSrcSpan loc1 loc1
- setLastToken span 0 0
+ setLastToken span 0
return (L span ITeof)
- AlexError (AI loc2 _ buf) ->
+ AlexError (AI loc2 buf) ->
reportLexError loc1 loc2 buf "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
- AlexToken inp2@(AI end _ buf2) _ t -> do
+ AlexToken inp2@(AI end buf2) _ t -> do
setInput inp2
let span = mkSrcSpan loc1 end
let bytes = byteDiff buf buf2
- span `seq` setLastToken span bytes bytes
+ span `seq` setLastToken span bytes
t span buf bytes
reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
lexTokenStream buf loc dflags = unP go initState
- where initState = mkPState buf loc (dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream)
+ where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
+ initState = mkPState dflags' buf loc
go = do
ltok <- lexer return
case ltok of
L _ ITeof -> return []
_ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+ ("options_ghc", lex_string_prag IToptions_prag),
+ ("options_haddock", lex_string_prag ITdocOptions),
+ ("language", token ITlanguage_prag),
+ ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+ where ignored opt = (opt, nested_comment lexToken)
+ impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+ options_pragmas = map ("options_" ++) impls
+ -- CFILES is a hugs-only thing.
+ pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+ ("inline", token (ITinline_prag Inline FunLike)),
+ ("inlinable", token (ITinline_prag Inlinable FunLike)),
+ ("inlineable", token (ITinline_prag Inlinable FunLike)),
+ -- Spelling variant
+ ("notinline", token (ITinline_prag NoInline FunLike)),
+ ("specialize", token ITspec_prag),
+ ("source", token ITsource_prag),
+ ("warning", token ITwarning_prag),
+ ("deprecated", token ITdeprecated_prag),
+ ("scc", token ITscc_prag),
+ ("generated", token ITgenerated_prag),
+ ("core", token ITcore_prag),
+ ("unpack", token ITunpack_prag),
+ ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
+ ("notinline conlike", token (ITinline_prag NoInline ConLike)),
+ ("specialize inline", token (ITspec_inline_prag True)),
+ ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+ Just found -> found span buf len
+ Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+ && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+ where unprefix prag' = case stripPrefix "{-#" prag' of
+ Just rest -> rest
+ Nothing -> prag'
+ canonical prag' = case prag' of
+ "noinline" -> "notinline"
+ "specialise" -> "specialize"
+ "constructorlike" -> "conlike"
+ _ -> prag'
+ canon_ws s = unwords (map canonical (words s))
}