,("→", ITrarrow, unicodeSyntaxEnabled)
,("←", ITlarrow, unicodeSyntaxEnabled)
,("⋯", ITdotdot, unicodeSyntaxEnabled)
+
+ ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤛", ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+ ,("⤜", ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+
+ ,("★", ITstar, unicodeSyntaxEnabled)
+
-- ToDo: ideally, → and ∷ should be "specials", so that they cannot
-- form part of a large operator. This would let us have a better
-- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
notFollowedBy :: Char -> AlexAccPred Int
-notFollowedBy char _ _ _ (AI _ _ buf)
+notFollowedBy char _ _ _ (AI _ buf)
= nextCharIs buf (/=char)
notFollowedBySymbol :: AlexAccPred Int
-notFollowedBySymbol _ _ _ (AI _ _ buf)
+notFollowedBySymbol _ _ _ (AI _ buf)
= nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
-- We must reject doc comments as being ordinary comments everywhere.
-- valid in all states, but the doc-comment rules are only valid in
-- the non-layout states.
isNormalComment :: AlexAccPred Int
-isNormalComment bits _ _ (AI _ _ buf)
+isNormalComment bits _ _ (AI _ buf)
| haddockEnabled bits = notFollowedByDocOrPragma
| otherwise = nextCharIs buf (/='#')
where
spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
{-
-haddockDisabledAnd p bits _ _ (AI _ _ buf)
+haddockDisabledAnd p bits _ _ (AI _ buf)
= if haddockEnabled bits then False else (p buf)
-}
atEOL :: AlexAccPred Int
-atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
+atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
ifExtension :: (Int -> Bool) -> AlexAccPred Int
ifExtension pred bits _ _ _ = pred bits
withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
-> P (Located Token)
withLexedDocType lexDocComment = do
- input@(AI _ _ buf) <- getInput
+ input@(AI _ buf) <- getInput
case prevChar buf ' ' of
'|' -> lexDocComment input ITdocCommentNext False
'^' -> lexDocComment input ITdocCommentPrev False
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
--- This is complicated by the fact that Haddock tokens can span multiple lines,
--- which is something that the original lexer didn't account for.
--- I have added last_line_len in the parser state which represents the length
--- of the part of the token that is on the last line. It is now used for layout
--- calculation in pushCurrentContext instead of last_len. last_len is, like it
--- was before, the full length of the token, and it is now only used for error
--- messages. /Waern
-
docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
SrcSpan -> P (Located Token)
docCommentEnd input commentAcc docType buf span = do
setInput input
- let (AI loc last_offs nextBuf) = input
+ let (AI loc nextBuf) = input
comment = reverse commentAcc
span' = mkSrcSpan (srcSpanStart span) loc
last_len = byteDiff buf nextBuf
- last_line_len = if (last_offs - last_len < 0)
- then last_offs
- else last_len
-
- span `seq` setLastToken span' last_len last_line_len
+ span `seq` setLastToken span' last_len
return (L span' (docType comment))
errBrace :: AlexInput -> SrcSpan -> P a
-errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
open_brace, close_brace :: Action
open_brace span _str _len = do
new_layout_context :: Bool -> Action
new_layout_context strict span _buf _len = do
_ <- popLexState
- (AI _ offset _) <- getInput
+ (AI l _) <- getInput
+ let offset = srcLocCol l
ctx <- getContext
case ctx of
Layout prev_off : _ |
setLine :: Int -> Action
setLine code span buf len = do
let line = parseUnsignedInteger buf len 10 octDecDigit
- setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+ setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
-- subtract one: the line number refers to the *following* line
_ <- popLexState
pushLexState code
setFile :: Int -> Action
setFile code span buf len = do
let file = lexemeToFastString (stepOn buf) (len-2)
+ setAlrLastLoc noSrcSpan
setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
_ <- popLexState
pushLexState code
= case alexGetChar i of
Just (c,i') | c == x -> isString i' xs
_other -> False
- err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
+ err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
-- -----------------------------------------------------------------------------
lex_string s = do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i
Just ('"',i) -> do
setInput i
Just ('\\',i)
| Just ('&',i) <- next -> do
setInput i; lex_string s
- | Just (c,i) <- next, is_space c -> do
+ | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
+ -- is_space only works for <= '\x7f' (#3751)
setInput i; lex_stringgap s
where next = alexGetChar' i
- Just (c, i) -> do
- c' <- lex_char c i
- lex_string (c':s)
+ Just (c, i1) -> do
+ case c of
+ '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
+ c | isAny c -> do setInput i1; lex_string (c:s)
+ _other -> lit_error i
lex_stringgap :: String -> P Token
lex_stringgap s = do
- c <- getCharOrFail
+ i <- getInput
+ c <- getCharOrFail i
case c of
'\\' -> lex_string s
c | is_space c -> lex_stringgap s
- _other -> lit_error
+ _other -> lit_error i
lex_char_tok :: Action
i1 <- getInput -- Look ahead to first character
let loc = srcSpanStart span
case alexGetChar' i1 of
- Nothing -> lit_error
+ Nothing -> lit_error i1
- Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen ''
+ Just ('\'', i2@(AI end2 _)) -> do -- We've seen ''
th_exts <- extension thEnabled
if th_exts then do
setInput i2
return (L (mkSrcSpan loc end2) ITtyQuote)
- else lit_error
+ else lit_error i1
- Just ('\\', i2@(AI _end2 _ _)) -> do -- We've seen 'backslash
+ Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash
setInput i2
lit_ch <- lex_escape
- mc <- getCharOrFail -- Trailing quote
+ i3 <- getInput
+ mc <- getCharOrFail i3 -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
- else do setInput i2; lit_error
+ else lit_error i3
- Just (c, i2@(AI _end2 _ _))
- | not (isAny c) -> lit_error
+ Just (c, i2@(AI _end2 _))
+ | not (isAny c) -> lit_error i1
| otherwise ->
-- We've seen 'x, where x is a valid character
-- (including the possibility of EOF)
-- If TH is on, just parse the quote only
th_exts <- extension thEnabled
- let (AI end _ _) = i1
+ let (AI end _) = i1
if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
- else do setInput i2; lit_error
+ else lit_error i2
finish_char_tok :: SrcLoc -> Char -> P (Located Token)
finish_char_tok loc ch -- We've already seen the closing quote
-- Just need to check for trailing #
= do magicHash <- extension magicHashEnabled
- i@(AI end _ _) <- getInput
+ i@(AI end _) <- getInput
if magicHash then do
case alexGetChar' i of
- Just ('#',i@(AI end _ _)) -> do
+ Just ('#',i@(AI end _)) -> do
setInput i
return (L (mkSrcSpan loc end) (ITprimchar ch))
_other ->
return (L (mkSrcSpan loc end) (ITchar ch))
- else do
+ else do
return (L (mkSrcSpan loc end) (ITchar ch))
-lex_char :: Char -> AlexInput -> P Char
-lex_char c inp = do
- case c of
- '\\' -> do setInput inp; lex_escape
- c | isAny c -> do setInput inp; return c
- _other -> lit_error
-
isAny :: Char -> Bool
isAny c | c > '\x7f' = isPrint c
| otherwise = is_any c
lex_escape :: P Char
lex_escape = do
- c <- getCharOrFail
+ i0 <- getInput
+ c <- getCharOrFail i0
case c of
'a' -> return '\a'
'b' -> return '\b'
'\\' -> return '\\'
'"' -> return '\"'
'\'' -> return '\''
- '^' -> do c <- getCharOrFail
+ '^' -> do i1 <- getInput
+ c <- getCharOrFail i1
if c >= '@' && c <= '_'
then return (chr (ord c - ord '@'))
- else lit_error
+ else lit_error i1
'x' -> readNum is_hexdigit 16 hexDigit
'o' -> readNum is_octdigit 8 octDecDigit
c1 -> do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i0
Just (c2,i2) ->
case alexGetChar' i2 of
- Nothing -> do setInput i2; lit_error
+ Nothing -> do lit_error i0
Just (c3,i3) ->
let str = [c1,c2,c3] in
case [ (c,rest) | (p,c) <- silly_escape_chars,
(escape_char,_:_):_ -> do
setInput i2
return escape_char
- [] -> lit_error
+ [] -> lit_error i0
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
readNum is_digit base conv = do
i <- getInput
- c <- getCharOrFail
+ c <- getCharOrFail i
if is_digit c
then readNum2 is_digit base conv (conv c)
- else do setInput i; lit_error
+ else lit_error i
readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
readNum2 is_digit base conv i = do
_other -> do
if i >= 0 && i <= 0x10FFFF
then do setInput input; return (chr i)
- else lit_error
+ else lit_error input
silly_escape_chars :: [(String, Char)]
silly_escape_chars = [
-- the position of the error in the buffer. This is so that we can report
-- a correct location to the user, but also so we can detect UTF-8 decoding
-- errors if they occur.
-lit_error :: P a
-lit_error = lexError "lexical error in string/character literal"
+lit_error :: AlexInput -> P a
+lit_error i = do setInput i; lexError "lexical error in string/character literal"
-getCharOrFail :: P Char
-getCharOrFail = do
- i <- getInput
+getCharOrFail :: AlexInput -> P Char
+getCharOrFail i = do
case alexGetChar' i of
Nothing -> lexError "unexpected end-of-file in string/character literal"
Just (c,i) -> do setInput i; return c
lex_quasiquote s = do
i <- getInput
case alexGetChar' i of
- Nothing -> lit_error
+ Nothing -> lit_error i
Just ('\\',i)
| Just ('|',i) <- next -> do
dflags :: DynFlags,
messages :: Messages,
last_loc :: SrcSpan, -- pos of previous token
- last_offs :: !Int, -- offset of the previous token from the
- -- beginning of the current line.
- -- \t is equal to 8 spaces.
last_len :: !Int, -- len of previous token
- last_line_len :: !Int,
loc :: SrcLoc, -- current loc (end of prev token + 1)
extsBitmap :: !Int, -- bitmap that determines permitted extensions
context :: [LayoutContext],
lex_state :: [Int],
-- Used in the alternative layout rule:
+ -- These tokens are the next ones to be sent out. They are
+ -- just blindly emitted, without the rule looking at them again:
alr_pending_implicit_tokens :: [Located Token],
+ -- This is the next token to be considered or, if it is Nothing,
+ -- we need to get the next token from the input stream:
alr_next_token :: Maybe (Located Token),
+ -- This is what we consider to be the locatino of the last token
+ -- emitted:
alr_last_loc :: SrcSpan,
+ -- The stack of layout contexts:
alr_context :: [ALRContext],
+ -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
+ -- us what sort of layout the '{' will open:
alr_expecting_ocurly :: Maybe ALRLayout
}
-- last_loc and last_len are used when generating error messages,
getSrcLoc :: P SrcLoc
getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
-setLastToken :: SrcSpan -> Int -> Int -> P ()
-setLastToken loc len line_len = P $ \s -> POk s {
+setLastToken :: SrcSpan -> Int -> P ()
+setLastToken loc len = P $ \s -> POk s {
last_loc=loc,
- last_len=len,
- last_line_len=line_len
-} ()
+ last_len=len
+ } ()
-data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
+data AlexInput = AI SrcLoc StringBuffer
alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
+alexInputPrevChar (AI _ buf) = prevChar buf '\n'
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI loc ofs s)
+alexGetChar (AI loc s)
| atEnd s = Nothing
- | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq`
+ | otherwise = adj_c `seq` loc' `seq` s' `seq`
--trace (show (ord c)) $
- Just (adj_c, (AI loc' ofs' s'))
+ Just (adj_c, (AI loc' s'))
where (c,s') = nextChar s
loc' = advanceSrcLoc loc c
- ofs' = advanceOffs c ofs
non_graphic = '\x0'
upper = '\x1'
-- This version does not squash unicode characters, it is used when
-- lexing strings.
alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar' (AI loc ofs s)
+alexGetChar' (AI loc s)
| atEnd s = Nothing
- | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq`
+ | otherwise = c `seq` loc' `seq` s' `seq`
--trace (show (ord c)) $
- Just (c, (AI loc' ofs' s'))
+ Just (c, (AI loc' s'))
where (c,s') = nextChar s
loc' = advanceSrcLoc loc c
- ofs' = advanceOffs c ofs
-
-advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' _ = 0
-advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-advanceOffs _ offs = offs + 1
getInput :: P AlexInput
-getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
+getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
setInput :: AlexInput -> P ()
-setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
+setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
pushLexState :: Int -> P ()
pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
messages = emptyMessages,
dflags = dynflags,
last_loc = mkSrcSpan loc loc,
- last_offs = 0,
last_len = 0,
- last_line_len = 0,
loc = loc,
extsBitmap = 0,
context = [],
dflags = flags,
messages = emptyMessages,
last_loc = mkSrcSpan loc loc,
- last_offs = 0,
last_len = 0,
- last_line_len = 0,
loc = loc,
extsBitmap = fromIntegral bitmap,
context = [],
-- This is only used at the outer level of a module when the 'module'
-- keyword is missing.
pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } ->
- POk s{context = Layout (offs-len) : ctx} ()
---trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
+ POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
getOffside :: P Ordering
-getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
+getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
+ let offs = srcSpanStartCol loc in
let ord = case stk of
- (Layout n:_) -> compare offs n
+ (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
+ compare offs n
_ -> GT
in POk s ord
lexError :: String -> P a
lexError str = do
loc <- getSrcLoc
- (AI end _ buf) <- getInput
+ (AI end buf) <- getInput
reportLexError loc end buf str
-- -----------------------------------------------------------------------------
alr <- extension alternativeLayoutRule
let lexTokenFun = if alr then lexTokenAlr else lexToken
tok@(L _span _tok__) <- lexTokenFun
--- trace ("token: " ++ show tok__) $ do
+ --trace ("token: " ++ show _tok__) $ do
cont tok
lexTokenAlr :: P (Located Token)
mExpectingOCurly <- getAlrExpectingOCurly
let thisLoc = getLoc t
thisCol = srcSpanStartCol thisLoc
- newLine = srcSpanStartLine thisLoc > srcSpanEndLine lastLoc
+ newLine = (lastLoc == noSrcSpan)
+ || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
case (unLoc t, context, mExpectingOCurly) of
+ -- This case handles a GHC extension to the original H98
+ -- layout rule...
+ (ITocurly, _, Just _) ->
+ do setAlrExpectingOCurly Nothing
+ setALRContext (ALRNoLayout (containsCommas ITocurly) : context)
+ return t
+ -- ...and makes this case unnecessary
+ {-
-- I think our implicit open-curly handling is slightly
-- different to John's, in how it interacts with newlines
-- and "in"
do setAlrExpectingOCurly Nothing
setNextToken t
lexTokenAlr
+ -}
(_, ALRLayout _ col : ls, Just expectingOCurly)
- | thisCol > col ->
+ | (thisCol > col) ||
+ (thisCol == col &&
+ isNonDecreasingIntentation expectingOCurly) ->
do setAlrExpectingOCurly Nothing
setALRContext (ALRLayout expectingOCurly thisCol : context)
setNextToken t
return (L thisLoc ITocurly)
| otherwise ->
do setAlrExpectingOCurly Nothing
- setPendingImplicitTokens [L thisLoc ITccurly]
+ setPendingImplicitTokens [L lastLoc ITccurly]
setNextToken t
- return (L thisLoc ITocurly)
+ return (L lastLoc ITocurly)
(_, _, Just expectingOCurly) ->
do setAlrExpectingOCurly Nothing
setALRContext (ALRLayout expectingOCurly thisCol : context)
isALRclose ITcubxparen = True
isALRclose _ = False
+isNonDecreasingIntentation :: ALRLayout -> Bool
+isNonDecreasingIntentation ALRLayoutDo = True
+isNonDecreasingIntentation _ = False
+
containsCommas :: Token -> Bool
containsCommas IToparen = True
containsCommas ITobrack = True
+-- John doesn't have {} as containing commas, but records contain them,
+-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
+-- (defaultInstallDirs).
+containsCommas ITocurly = True
-- GHC Extensions:
containsCommas IToubxparen = True
containsCommas _ = False
lexToken :: P (Located Token)
lexToken = do
- inp@(AI loc1 _ buf) <- getInput
+ inp@(AI loc1 buf) <- getInput
sc <- getLexState
exts <- getExts
case alexScanUser exts inp sc of
AlexEOF -> do
let span = mkSrcSpan loc1 loc1
- setLastToken span 0 0
+ setLastToken span 0
return (L span ITeof)
- AlexError (AI loc2 _ buf) ->
+ AlexError (AI loc2 buf) ->
reportLexError loc1 loc2 buf "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
- AlexToken inp2@(AI end _ buf2) _ t -> do
+ AlexToken inp2@(AI end buf2) _ t -> do
setInput inp2
let span = mkSrcSpan loc1 end
let bytes = byteDiff buf buf2
- span `seq` setLastToken span bytes bytes
+ span `seq` setLastToken span bytes
t span buf bytes
reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
Nothing -> lexError "unknown pragma"
known_pragma :: Map String Action -> AlexAccPred Int
-known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
&& (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
clean_pragma :: String -> String