X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=d559150c85db71f9c50b776e1ed9d62a367b8375;hb=cd0f89a0bf35c36575ea89d7c7599473a3600683;hp=ba2ed1fd63156c4e5eb0e6165ab460593bdce595;hpb=7e26eb05a84f5245ff3e5b1a6805d1fcc5cce127;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index ba2ed1f..d559150 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -16,54 +16,42 @@ An example that provokes the error is -------------------------------------------------------- \begin{code} -{-# OPTIONS -#include "ctypes.h" #-} - module Lex ( - ifaceParseErr, + srcParseErr, -- Monad for parser Token(..), lexer, ParseResult(..), PState(..), - checkVersion, + ExtFlags(..), mkPState, StringBuffer, P, thenP, thenP_, returnP, mapP, failP, failMsgP, - getSrcLocP, getSrcFile, + getSrcLocP, setSrcLocP, getSrcFile, layoutOn, layoutOff, pushContext, popContext ) where #include "HsVersions.h" -import Char ( ord, isSpace, toUpper ) -import List ( isSuffixOf ) +import Char ( toUpper, isDigit, chr, ord ) +import Ratio ( (%) ) -import IdInfo ( InlinePragInfo(..), CprInfo(..) ) -import Name ( isLowerISO, isUpperISO ) -import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) -import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) -import Demand ( Demand(..) {- instance Read -} ) -import UniqFM ( UniqFM, listToUFM, lookupUFM) -import BasicTypes ( NewOrData(..) ) +import PrelNames ( mkTupNameStr ) +import ForeignCall ( Safety(..) ) +import UniqFM ( listToUFM, lookupUFM ) +import BasicTypes ( Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) -import Maybes ( MaybeErr(..) ) import ErrUtils ( Message ) import Outputable import FastString import StringBuffer -import GlaExts -import ST ( runST ) - -#if __GLASGOW_HASKELL__ >= 303 -import Bits -import Word -#endif +import Ctype -import Char ( chr ) -import Addr -import PrelRead ( readRational__ ) -- Glasgow non-std +import GLAEXTS +import DATA_BITS ( Bits(..) ) +import DATA_INT ( Int32 ) \end{code} %************************************************************************ @@ -120,56 +108,36 @@ data Token | ITthen | ITtype | ITwhere - | ITscc + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) | ITforall -- GHC extension keywords | ITforeign | ITexport | ITlabel | ITdynamic + | ITsafe + | ITthreadsafe | ITunsafe + | ITwith | ITstdcallconv | ITccallconv - - | ITinterface -- interface keywords - | IT__export - | ITdepends - | IT__forall - | ITletrec - | ITcoerce - | ITinlineMe - | ITinlineCall - | ITccall (Bool,Bool,Bool) -- (is_dyn, is_casm, may_gc) - | ITdefaultbranch - | ITbottom - | ITinteger_lit - | ITfloat_lit - | ITrational_lit - | ITaddr_lit - | ITlit_lit - | ITstring_lit - | ITtypeapp - | ITusage - | ITfuall - | ITarity - | ITspecialise - | ITnocaf - | ITunfold InlinePragInfo - | ITstrict ([Demand], Bool) - | ITrules - | ITcprinfo (CprInfo) - | IT__scc - | ITsccAllCafs + | ITdotnet + | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc) + | ITmdo | ITspecialise_prag -- Pragmas | ITsource_prag | ITinline_prag | ITnoinline_prag | ITrules_prag + | ITdeprecated_prag | ITline_prag + | ITscc_prag + | ITcore_prag -- hdaume: core annotations | ITclose_prag | ITdotdot -- reserved symbols + | ITcolon | ITdcolon | ITequal | ITlam @@ -181,14 +149,19 @@ data Token | ITdarrow | ITminus | ITbang + | ITstar | ITdot | ITbiglam -- GHC-extension symbols | ITocurly -- special symbols | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack + | ITopabrack -- [:, for parallel arrays with -fparr + | ITcpabrack -- :], for parallel arrays with -fparr | ITcbrack | IToparen | ITcparen @@ -199,32 +172,57 @@ data Token | ITunderscore | ITbackquote - | ITvarid FAST_STRING -- identifiers - | ITconid FAST_STRING - | ITvarsym FAST_STRING - | ITconsym FAST_STRING - | ITqvarid (FAST_STRING,FAST_STRING) - | ITqconid (FAST_STRING,FAST_STRING) - | ITqvarsym (FAST_STRING,FAST_STRING) - | ITqconsym (FAST_STRING,FAST_STRING) + | ITvarid FastString -- identifiers + | ITconid FastString + | ITvarsym FastString + | ITconsym FastString + | ITqvarid (FastString,FastString) + | ITqconid (FastString,FastString) + | ITqvarsym (FastString,FastString) + | ITqconsym (FastString,FastString) + + | ITdupipvarid FastString -- GHC extension: implicit param: ?x + | ITsplitipvarid FastString -- GHC extension: implicit param: %x | ITpragma StringBuffer - | ITchar Char - | ITstring FAST_STRING - | ITinteger Integer + | ITchar Int + | ITstring FastString + | ITinteger Integer | ITrational Rational - | ITprimchar Char - | ITprimstring FAST_STRING + | ITprimchar Int + | ITprimstring FastString | ITprimint Integer | ITprimfloat Rational | ITprimdouble Rational - | ITlitlit FAST_STRING + | ITlitlit FastString + + -- MetaHaskell extension tokens + | ITopenExpQuote -- [| or [e| + | ITopenPatQuote -- [p| + | ITopenDecQuote -- [d| + | ITopenTypQuote -- [t| + | ITcloseQuote -- |] + | ITidEscape FastString -- $x + | ITparenEscape -- $( + | ITreifyType + | ITreifyDecl + | ITreifyFixity + + -- Arrow notation extension + | ITproc + | ITrec + | IToparenbar -- (| + | ITcparenbar -- |) + | ITlarrowtail -- -< + | ITrarrowtail -- >- + | ITLarrowtail -- -<< + | ITRarrowtail -- >>- | ITunknown String -- Used when the lexer can't make sense of it | ITeof -- end of file token - deriving Text -- debugging + deriving Show -- debugging \end{code} ----------------------------------------------------------------------------- @@ -232,7 +230,7 @@ Keyword Lists \begin{code} pragmaKeywordsFM = listToUFM $ - map (\ (x,y) -> (_PK_ x,y)) + map (\ (x,y) -> (mkFastString x,y)) [( "SPECIALISE", ITspecialise_prag ), ( "SPECIALIZE", ITspecialise_prag ), ( "SOURCE", ITsource_prag ), @@ -241,11 +239,14 @@ pragmaKeywordsFM = listToUFM $ ( "NOTINLINE", ITnoinline_prag ), ( "LINE", ITline_prag ), ( "RULES", ITrules_prag ), - ( "RULEZ", ITrules_prag ) -- american spelling :-) + ( "RULEZ", ITrules_prag ), -- american spelling :-) + ( "SCC", ITscc_prag ), + ( "CORE", ITcore_prag ), -- hdaume: core annotation + ( "DEPRECATED", ITdeprecated_prag ) ] haskellKeywordsFM = listToUFM $ - map (\ (x,y) -> (_PK_ x,y)) + map (\ (x,y) -> (mkFastString x,y)) [( "_", ITunderscore ), ( "as", ITas ), ( "case", ITcase ), @@ -271,80 +272,97 @@ haskellKeywordsFM = listToUFM $ ( "then", ITthen ), ( "type", ITtype ), ( "where", ITwhere ), - ( "_scc_", ITscc ) + ( "_scc_", ITscc ) -- ToDo: remove ] +isSpecial :: Token -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITsafe = True +isSpecial ITthreadsafe = True +isSpecial ITunsafe = True +isSpecial ITwith = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial ITmdo = True +isSpecial _ = False + +-- the bitmap provided as the third component indicates whether the +-- corresponding extension keyword is valid under the extension options +-- provided to the compiler; if the extension corresponding to *any* of the +-- bits set in the bitmap is enabled, the keyword is valid (this setup +-- facilitates using a keyword in two different extensions that can be +-- activated independently) +-- ghcExtensionKeywordsFM = listToUFM $ - map (\ (x,y) -> (_PK_ x,y)) - [ ( "forall", ITforall ), - ( "foreign", ITforeign ), - ( "export", ITexport ), - ( "label", ITlabel ), - ( "dynamic", ITdynamic ), - ( "unsafe", ITunsafe ), - ( "stdcall", ITstdcallconv), - ( "ccall", ITccallconv), - ("_ccall_", ITccall (False, False, False)), - ("_ccall_GC_", ITccall (False, False, True)), - ("_casm_", ITccall (False, True, False)), - ("_casm_GC_", ITccall (False, True, True)), - - -- interface keywords - ("__interface", ITinterface), - ("__export", IT__export), - ("__depends", ITdepends), - ("__forall", IT__forall), - ("__letrec", ITletrec), - ("__coerce", ITcoerce), - ("__inline_me", ITinlineMe), - ("__inline_call", ITinlineCall), - ("__depends", ITdepends), - ("__DEFAULT", ITdefaultbranch), - ("__bot", ITbottom), - ("__integer", ITinteger_lit), - ("__float", ITfloat_lit), - ("__rational", ITrational_lit), - ("__addr", ITaddr_lit), - ("__litlit", ITlit_lit), - ("__string", ITstring_lit), - ("__a", ITtypeapp), - ("__u", ITusage), - ("__fuall", ITfuall), - ("__A", ITarity), - ("__P", ITspecialise), - ("__C", ITnocaf), - ("__R", ITrules), - ("__U", ITunfold NoInlinePragInfo), - - ("__ccall", ITccall (False, False, False)), - ("__ccall_GC", ITccall (False, False, True)), - ("__dyn_ccall", ITccall (True, False, False)), - ("__dyn_ccall_GC", ITccall (True, False, True)), - ("__casm", ITccall (False, True, False)), - ("__dyn_casm", ITccall (True, True, False)), - ("__casm_GC", ITccall (False, True, True)), - ("__dyn_casm_GC", ITccall (True, True, True)), - - ("/\\", ITbiglam) + map (\(x, y, z) -> (mkFastString x, (y, z))) + [ ( "forall", ITforall, bit glaExtsBit), + ( "mdo", ITmdo, bit glaExtsBit), + ( "reifyDecl", ITreifyDecl, bit glaExtsBit), + ( "reifyType", ITreifyType, bit glaExtsBit), + ( "reifyFixity",ITreifyFixity, bit glaExtsBit), + + ( "foreign", ITforeign, bit ffiBit), + ( "export", ITexport, bit ffiBit), + ( "label", ITlabel, bit ffiBit), + ( "dynamic", ITdynamic, bit ffiBit), + ( "safe", ITsafe, bit ffiBit), + ( "threadsafe", ITthreadsafe, bit ffiBit), + ( "unsafe", ITunsafe, bit ffiBit), + ( "stdcall", ITstdcallconv, bit ffiBit), + ( "ccall", ITccallconv, bit ffiBit), + ( "dotnet", ITdotnet, bit ffiBit), + + ( "with", ITwith, bit withBit), + + ( "rec", ITrec, bit arrowsBit), + ( "proc", ITproc, bit arrowsBit), + + -- On death row + ("_ccall_", ITccall (False, False, PlayRisky), + bit glaExtsBit), + ("_ccall_GC_", ITccall (False, False, PlaySafe False), + bit glaExtsBit), + ("_casm_", ITccall (False, True, PlayRisky), + bit glaExtsBit), + ("_casm_GC_", ITccall (False, True, PlaySafe False), + bit glaExtsBit) ] - haskellKeySymsFM = listToUFM $ - map (\ (x,y) -> (_PK_ x,y)) - [ ("..", ITdotdot) - ,("::", ITdcolon) - ,("=", ITequal) - ,("\\", ITlam) - ,("|", ITvbar) - ,("<-", ITlarrow) - ,("->", ITrarrow) - ,("@", ITat) - ,("~", ITtilde) - ,("=>", ITdarrow) - ,("-", ITminus) - ,("!", ITbang) - ,(".", ITdot) -- sadly, for 'forall a . t' + map (\ (x,y,z) -> (mkFastString x,(y,z))) + [ ("..", ITdotdot, Nothing) + ,(":", ITcolon, Nothing) -- (:) is a reserved op, + -- meaning only list cons + ,("::", ITdcolon, Nothing) + ,("=", ITequal, Nothing) + ,("\\", ITlam, Nothing) + ,("|", ITvbar, Nothing) + ,("<-", ITlarrow, Nothing) + ,("->", ITrarrow, Nothing) + ,("@", ITat, Nothing) + ,("~", ITtilde, Nothing) + ,("=>", ITdarrow, Nothing) + ,("-", ITminus, Nothing) + ,("!", ITbang, Nothing) + + ,("*", ITstar, Just (bit glaExtsBit)) -- For data T (a::*) = MkT + ,(".", ITdot, Just (bit glaExtsBit)) -- For 'forall a . t' + + ,("-<", ITlarrowtail, Just (bit arrowsBit)) + ,(">-", ITrarrowtail, Just (bit arrowsBit)) + ,("-<<", ITLarrowtail, Just (bit arrowsBit)) + ,(">>-", ITRarrowtail, Just (bit arrowsBit)) ] + \end{code} ----------------------------------------------------------------------------- @@ -352,7 +370,8 @@ The lexical analyser Lexer state: - - (glaexts) lexing an interface file or -fglasgow-exts + - (exts) lexing a source with extensions, eg, an interface file or + with -fglasgow-exts - (bol) pointer to beginning of line (for column calculations) - (buf) pointer to beginning of token - (buf) pointer to current char @@ -362,14 +381,15 @@ Lexer state: lexer :: (Token -> P a) -> P a lexer cont buf s@(PState{ loc = loc, - glasgow_exts = glaexts, + extsBitmap = exts, bol = bol, atbol = atbol, context = ctx }) -- first, start a new lexeme and lose all the whitespace - = tab line bol atbol (stepOverLexeme buf) + = _scc_ "Lexer" + tab line bol atbol (stepOverLexeme buf) where line = srcLocLine loc @@ -391,32 +411,49 @@ lexer cont buf s@(PState{ if next `eqChar#` '-'# then trundle (n +# 1#) else if is_symbol next || n <# 2# then is_a_token - else case untilChar# (stepOnBy# buf n) '\n'# of - { buf' -> tab y bol atbol (stepOverLexeme buf') - } + else tab y bol atbol + (stepOnUntilChar# (stepOnBy# buf n) '\n'#) in trundle 1# -- comments and pragmas. We deal with LINE pragmas here, -- and throw out any unrecognised pragmas as comments. Any -- pragmas we know about are dealt with later (after any layout -- processing if necessary). - - '{'# | lookAhead# buf 1# `eqChar#` '-'# -> + '{'# | lookAhead# buf 1# `eqChar#` '-'# -> if lookAhead# buf 2# `eqChar#` '#'# then - if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else - case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1-> + case expandWhile# is_space (addToCurrentPos buf 3#) of { buf1-> case expandWhile# is_ident (stepOverLexeme buf1) of { buf2-> let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of - Just ITline_prag -> line_prag (lexer cont) buf2 s' + -- ignore RULES pragmas when -fglasgow-exts is off + Just ITrules_prag | not (glaExtsEnabled exts) -> + skip_to_end (stepOnBy# buf 2#) s' + Just ITline_prag -> + line_prag skip_to_end buf2 s' Just other -> is_a_token - Nothing -> skip_to_end (stepOnBy# buf 2#) + Nothing -> skip_to_end (stepOnBy# buf 2#) s' }} - - else skip_to_end (stepOnBy# buf 2#) + + else skip_to_end (stepOnBy# buf 2#) s' + where + skip_to_end = skipNestedComment (lexer cont) + + -- special GHC extension: we grok cpp-style #line pragmas + '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 + let buf1 | lookAhead# buf 1# `eqChar#` 'l'# && + lookAhead# buf 2# `eqChar#` 'i'# && + lookAhead# buf 3# `eqChar#` 'n'# && + lookAhead# buf 4# `eqChar#` 'e'# = stepOnBy# buf 5# + | otherwise = stepOn buf + in + case expandWhile# is_space buf1 of { buf2 -> + if is_digit (currentChar# buf2) + then line_prag next_line buf2 s' + else is_a_token + } where - skip_to_end buf = nested_comment (lexer cont) buf s' + next_line buf = lexer cont (stepOnUntilChar# buf '\n'#) -- tabs have been expanded beforehand c | is_space c -> tab y bol atbol (stepOn buf) @@ -427,48 +464,59 @@ lexer cont buf s@(PState{ atbol = atbol} is_a_token | atbol /=# 0# = lexBOL cont buf s' - | otherwise = lexToken cont glaexts buf s' + | otherwise = lexToken cont exts buf s' -- {-# LINE .. #-} pragmas. yeuch. -line_prag cont buf = +line_prag cont buf s@PState{loc=loc} = case expandWhile# is_space buf of { buf1 -> case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) -> -- subtract one: the line number refers to the *following* line. let real_line = line - 1 in case fromInteger real_line of { i@(I# l) -> + -- ToDo, if no filename then we skip the newline.... d'oh case expandWhile# is_space buf2 of { buf3 -> case currentChar# buf3 of '\"'#{-"-} -> case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 -> - let file = lexemeToFastString buf4 in - \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i} + let + file = lexemeToFastString buf4 + new_buf = stepOn (stepOverLexeme buf4) + in + if nullFastString file + then cont new_buf s{loc = replaceSrcLine loc l} + else cont new_buf s{loc = mkSrcLoc file i} } - other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l} + _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} - where - skipToEnd buf = nested_comment cont buf -nested_comment :: P a -> P a -nested_comment cont buf = loop buf +skipNestedComment :: P a -> P a +skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state + +skipNestedComment' :: SrcLoc -> P a -> P a +skipNestedComment' orig_loc cont buf = loop buf where loop buf = case currentChar# buf of - '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf - - '-'# | lookAhead# buf 1# `eqChar#` '}'# -> - cont (stepOnBy# buf 2#) + '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) '{'# | lookAhead# buf 1# `eqChar#` '-'# -> - nested_comment (nested_comment cont) (stepOnBy# buf 2#) + skipNestedComment + (skipNestedComment' orig_loc cont) + (stepOnBy# buf 2#) '\n'# -> \ s@PState{loc=loc} -> let buf' = stepOn buf in - nested_comment cont buf' - s{loc = incSrcLine loc, bol = currentIndex# buf', - atbol = 1#} + loop buf' s{loc = incSrcLine loc, + bol = currentIndex# buf', + atbol = 1#} - _ -> nested_comment cont (stepOn buf) + -- pass the original SrcLoc to lexError so that the error is + -- reported at the line it was originally on, not the line at + -- the end of the file. + '\NUL'# | bufferExhausted (stepOn buf) -> + \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -} + + _ -> loop (stepOn buf) -- When we are lexing the first token of a line, check whether we need to -- insert virtual semicolons or close braces due to layout. @@ -476,7 +524,7 @@ nested_comment cont buf = loop buf lexBOL :: (Token -> P a) -> P a lexBOL cont buf s@(PState{ loc = loc, - glasgow_exts = glaexts, + extsBitmap = exts, bol = bol, atbol = atbol, context = ctx @@ -488,7 +536,7 @@ lexBOL cont buf s@(PState{ --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $ cont ITsemi buf s{atbol = 0#} else - lexToken cont glaexts buf s{atbol = 0#} + lexToken cont exts buf s{atbol = 0#} where col = currentIndex# buf -# bol @@ -507,81 +555,100 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a -lexToken cont glaexts buf = - --trace "lexToken" $ - _scc_ "Lexer" +lexToken cont exts buf = +-- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- - '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'# - -> cont IToubxparen (setCurrentPos# buf 2#) + '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'# && + -- Unboxed tules: '(#' but not '(##' + not (lookAhead# buf 2# `eqChar#` '#'#) + -> cont IToubxparen (addToCurrentPos buf 2#) + -- Arrow notation extension: '(|' but not '(||' + | arrowsEnabled exts && lookAhead# buf 1# `eqChar#` '|'# && + not (lookAhead# buf 2# `eqChar#` '|'#) + -> cont IToparenbar (addToCurrentPos buf 2#) | otherwise - -> cont IToparen (incLexeme buf) - - ')'# -> cont ITcparen (incLexeme buf) - '['# -> cont ITobrack (incLexeme buf) - ']'# -> cont ITcbrack (incLexeme buf) - ','# -> cont ITcomma (incLexeme buf) - ';'# -> cont ITsemi (incLexeme buf) - + -> cont IToparen (incCurrentPos buf) + + ')'# -> cont ITcparen (incCurrentPos buf) + '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# -> + cont ITopabrack (addToCurrentPos buf 2#) + ------- MetaHaskell Extensions, looking for [| [e| [t| [p| and [d| + | glaExtsEnabled exts && + ((lookAhead# buf 1# ) `eqChar#` '|'# ) -> + cont ITopenExpQuote (addToCurrentPos buf 2# ) + | glaExtsEnabled exts && + (let c = (lookAhead# buf 1# ) + in eqChar# c 'e'# || eqChar# c 't'# || eqChar# c 'd'# || eqChar# c 'p'#) && + ((lookAhead# buf 2#) `eqChar#` '|'#) -> + let quote 'e'# = ITopenExpQuote + quote 'p'# = ITopenPatQuote + quote 'd'# = ITopenDecQuote + quote 't'# = ITopenTypQuote + in cont (quote (lookAhead# buf 1#)) (addToCurrentPos buf 3# ) + | otherwise -> + cont ITobrack (incCurrentPos buf) + + ']'# -> cont ITcbrack (incCurrentPos buf) + ','# -> cont ITcomma (incCurrentPos buf) + ';'# -> cont ITsemi (incCurrentPos buf) '}'# -> \ s@PState{context = ctx} -> case ctx of - (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} + (_:ctx') -> cont ITccurly (incCurrentPos buf) s{context=ctx'} _ -> lexError "too many '}'s" buf s - + '|'# -> case lookAhead# buf 1# of + '}'# | glaExtsEnabled exts -> cont ITccurlybar + (addToCurrentPos buf 2#) + -- MetaHaskell extension + ']'# | glaExtsEnabled exts -> cont ITcloseQuote (addToCurrentPos buf 2#) + -- arrow notation extension + ')'# | arrowsEnabled exts -> cont ITcparenbar + (addToCurrentPos buf 2#) + other -> lex_sym cont exts (incCurrentPos buf) + ':'# -> case lookAhead# buf 1# of + ']'# | parrEnabled exts -> cont ITcpabrack + (addToCurrentPos buf 2#) + _ -> lex_sym cont exts (incCurrentPos buf) + + '#'# -> case lookAhead# buf 1# of - ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) + ')'# | glaExtsEnabled exts + -> cont ITcubxparen (addToCurrentPos buf 2#) '-'# -> case lookAhead# buf 2# of - '}'# -> cont ITclose_prag (setCurrentPos# buf 3#) - _ -> lex_sym cont (incLexeme buf) - _ -> lex_sym cont (incLexeme buf) + '}'# -> cont ITclose_prag (addToCurrentPos buf 3#) + _ -> lex_sym cont exts (incCurrentPos buf) + _ -> lex_sym cont exts (incCurrentPos buf) - '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'# - -> lex_cstring cont (setCurrentPos# buf 2#) + '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'# + -> lex_cstring cont (addToCurrentPos buf 2#) | otherwise - -> cont ITbackquote (incLexeme buf) + -> cont ITbackquote (incCurrentPos buf) - '{'# -> -- look for "{-##" special iface pragma - case lookAhead# buf 1# of + '{'# -> -- for Emacs: -} + case lookAhead# buf 1# of + '|'# | glaExtsEnabled exts + -> cont ITocurlybar (addToCurrentPos buf 2#) '-'# -> case lookAhead# buf 2# of - '#'# -> case lookAhead# buf 3# of - '#'# -> - let (lexeme, buf') - = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' - _ -> lex_prag cont (setCurrentPos# buf 3#) - _ -> cont ITocurly (incLexeme buf) - _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) + '#'# -> lex_prag cont (addToCurrentPos buf 3#) + _ -> cont ITocurly (incCurrentPos buf) + _ -> (layoutOff `thenP_` cont ITocurly) (incCurrentPos buf) + + + -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) - '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) - - -- strictness and cpr pragmas and __scc treated specially. - '_'# | flag glaexts -> - case lookAhead# buf 1# of - '_'# -> case lookAhead# buf 2# of - 'S'# -> - lex_demand cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __S - 'M'# -> - lex_cpr cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __M - 's'# -> - case prefixMatch (stepOnBy# buf 3#) "cc" of - Just buf' -> lex_scc cont (stepOverLexeme buf') - Nothing -> lex_id cont glaexts buf - _ -> lex_id cont glaexts buf - _ -> lex_id cont glaexts buf + '\"'#{-"-} -> lex_string cont exts [] (incCurrentPos buf) + '\''# -> lex_char (char_end cont) exts (incCurrentPos buf) -- Hexadecimal and octal constants '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2 - -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex + -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2 - -> readNum (after_lexnum cont glaexts) buf' is_octdigit 8 oct_or_dec + -> readNum (after_lexnum cont exts) buf' is_octdigit 8 oct_or_dec where ch = lookAhead# buf 1# ch2 = lookAhead# buf 2# - buf' = setCurrentPos# buf 2# + buf' = addToCurrentPos buf 2# '\NUL'# -> if bufferExhausted (stepOn buf) then @@ -590,10 +657,21 @@ lexToken cont glaexts buf = trace "lexIface: misplaced NUL?" $ cont (ITunknown "\NUL") (stepOn buf) - c | is_digit c -> lex_num cont glaexts 0 buf - | is_symbol c -> lex_sym cont buf - | is_upper c -> lex_con cont glaexts buf - | is_ident c -> lex_id cont glaexts buf + '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) -> -- ?x implicit parameter + specialPrefixId ITdupipvarid cont exts (incCurrentPos buf) + '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) -> + specialPrefixId ITsplitipvarid cont exts (incCurrentPos buf) + + ---------------- MetaHaskell Extensions for quotation escape + '$'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) -> -- $x variable escape + specialPrefixId ITidEscape cont exts (addToCurrentPos buf 1#) + '$'# | glaExtsEnabled exts && -- $( f x ) expression escape + ((lookAhead# buf 1#) `eqChar#` '('#) -> cont ITparenEscape (addToCurrentPos buf 2#) + + c | is_digit c -> lex_num cont exts 0 buf + | is_symbol c -> lex_sym cont exts buf + | is_upper c -> lex_con cont exts buf + | is_lower c -> lex_id cont exts buf | otherwise -> lexError "illegal character" buf -- Int# is unlifted, and therefore faster than Bool for flags. @@ -617,68 +695,75 @@ lex_prag cont buf ------------------------------------------------------------------------------- -- Strings & Chars -lex_string cont glaexts s buf +lex_string cont exts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in - case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') - _ -> cont (ITstring s') buf' + let buf' = incCurrentPos buf + in case currentChar# buf' of + '#'# | glaExtsEnabled exts -> + if any (> 0xFF) s + then lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf' + else let s' = mkFastStringNarrow (map chr (reverse s)) in + -- always a narrow string/byte array + cont (ITprimstring s') (incCurrentPos buf') + + _other -> let s' = mkFastString (map chr (reverse s)) + in cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps '\\'# | next_ch `eqChar#` '&'# - -> lex_string cont glaexts s buf' + -> lex_string cont exts s buf' | is_space next_ch - -> lex_stringgap cont glaexts s (incLexeme buf) + -> lex_stringgap cont exts s (incCurrentPos buf) where next_ch = lookAhead# buf 1# - buf' = setCurrentPos# buf 2# + buf' = addToCurrentPos buf 2# - _ -> lex_char (lex_next_string cont s) glaexts buf + _ -> lex_char (lex_next_string cont s) exts buf -lex_stringgap cont glaexts s buf - = let buf' = incLexeme buf in +lex_stringgap cont exts s buf + = let buf' = incCurrentPos buf in case currentChar# buf of - '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf' + '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf' st{loc = incSrcLine loc} - '\\'# -> lex_string cont glaexts s buf' - c | is_space c -> lex_stringgap cont glaexts s buf' + '\\'# -> lex_string cont exts s buf' + c | is_space c -> lex_stringgap cont exts s buf' other -> charError buf' -lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf +lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a -lex_char cont glaexts buf +lex_char :: (Int# -> Int -> P a) -> Int# -> P a +lex_char cont exts buf = case currentChar# buf of - '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + '\\'# -> lex_escape (cont exts) (incCurrentPos buf) + c | is_any c -> cont exts (I# (ord# c)) (incCurrentPos buf) other -> charError buf -char_end cont glaexts c buf +char_end cont exts c buf = case currentChar# buf of - '\''# -> let buf' = incLexeme buf in + '\''# -> let buf' = incCurrentPos buf in case currentChar# buf' of - '#'# | flag glaexts - -> cont (ITprimchar c) (incLexeme buf') + '#'# | glaExtsEnabled exts + -> cont (ITprimchar c) (incCurrentPos buf') _ -> cont (ITchar c) buf' _ -> charError buf lex_escape cont buf - = let buf' = incLexeme buf in + = let buf' = incCurrentPos buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incCurrentPos buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -688,20 +773,19 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x10FFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 where read buf i = case currentChar# buf of { c -> if is_digit c - then read (incLexeme buf) (i*base + (toInteger (I# (conv c)))) + then read (incCurrentPos buf) (i*base + (toInteger (I# (conv c)))) else cont i buf } @@ -760,63 +844,11 @@ silly_escape_chars = [ ("DEL", '\DEL') ] -------------------------------------------------------------------------------- - -lex_demand cont buf = - case read_em [] buf of { (ls,buf') -> - case currentChar# buf' of - 'B'# -> cont (ITstrict (ls, True )) (incLexeme buf') - _ -> cont (ITstrict (ls, False)) buf' - } - where - -- code snatched from Demand.lhs - read_em acc buf = - case currentChar# buf of - 'L'# -> read_em (WwLazy False : acc) (stepOn buf) - 'A'# -> read_em (WwLazy True : acc) (stepOn buf) - 'S'# -> read_em (WwStrict : acc) (stepOn buf) - 'P'# -> read_em (WwPrim : acc) (stepOn buf) - 'E'# -> read_em (WwEnum : acc) (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#) - 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#) - 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#) - 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#) - _ -> (reverse acc, buf) - - do_unpack new_or_data wrapper_unpacks acc buf - = case read_em [] buf of - (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest - -lex_cpr cont buf = - case read_em [] buf of { (cpr_inf,buf') -> - ASSERT ( null (tail cpr_inf) ) - cont (ITcprinfo $ head cpr_inf) buf' - } - where - -- code snatched from lex_demand above - read_em acc buf = - case currentChar# buf of - '-'# -> read_em (NoCPRInfo : acc) (stepOn buf) - '('# -> do_unpack acc (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - _ -> (reverse acc, buf) - - do_unpack acc buf - = case read_em [] buf of - (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest - ------------------- -lex_scc cont buf = - case currentChar# buf of - 'C'# -> cont ITsccAllCafs (incLexeme buf) - other -> cont ITscc buf - ----------------------------------------------------------------------------- -- Numbers lex_num :: (Token -> P a) -> Int# -> Integer -> P a -lex_num cont glaexts acc buf = +lex_num cont exts acc buf = case scanNumLit acc buf of (acc',buf') -> case currentChar# buf' of @@ -824,37 +856,90 @@ lex_num cont glaexts acc buf = -- this case is not optimised at all, as the -- presence of floating point numbers in interface -- files is not that common. (ToDo) - case expandWhile# is_digit (incLexeme buf') of + case expandWhile# is_digit (incCurrentPos buf') of buf2 -> -- points to first non digit char - - let l = case currentChar# buf2 of - 'E'# -> do_exponent - 'e'# -> do_exponent - _ -> buf2 - - do_exponent - = let buf3 = incLexeme buf2 in - case currentChar# buf3 of - '-'# -> expandWhile# is_digit (incLexeme buf3) - '+'# -> expandWhile# is_digit (incLexeme buf3) - x | is_digit x -> expandWhile# is_digit buf3 - _ -> buf2 - - v = readRational__ (lexemeToString l) - - in case currentChar# l of -- glasgow exts only - '#'# | flag glaexts -> let l' = incLexeme l in - case currentChar# l' of - '#'# -> cont (ITprimdouble v) (incLexeme l') - _ -> cont (ITprimfloat v) l' - _ -> cont (ITrational v) l - - _ -> after_lexnum cont glaexts acc' buf' + case currentChar# buf2 of + 'E'# -> float_exponent cont exts buf2 + 'e'# -> float_exponent cont exts buf2 + _ -> float_done cont exts buf2 + + -- numbers like '9e4' are floats + 'E'# -> float_exponent cont exts buf' + 'e'# -> float_exponent cont exts buf' + _ -> after_lexnum cont exts acc' buf' -- it's an integer -after_lexnum cont glaexts i buf +float_exponent cont exts buf2 = + let buf3 = incCurrentPos buf2 + buf4 = case currentChar# buf3 of + '-'# | is_digit (lookAhead# buf3 1#) + -> expandWhile# is_digit (incCurrentPos buf3) + '+'# | is_digit (lookAhead# buf3 1#) + -> expandWhile# is_digit (incCurrentPos buf3) + x | is_digit x -> expandWhile# is_digit buf3 + _ -> buf2 + in + float_done cont exts buf4 + +float_done cont exts buf = + case currentChar# buf of -- glasgow exts only + '#'# | glaExtsEnabled exts -> + let buf' = incCurrentPos buf in + case currentChar# buf' of + '#'# -> cont (ITprimdouble v) (incCurrentPos buf') + _ -> cont (ITprimfloat v) buf' + _ -> cont (ITrational v) buf + where + v = readRational__ (lexemeToString buf) + +after_lexnum cont exts i buf = case currentChar# buf of - '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf) - _ -> cont (ITinteger i) buf + '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incCurrentPos buf) + _ -> cont (ITinteger i) buf + +readRational :: ReadS Rational -- NB: doesn't handle leading "-" +readRational r = do + (n,d,s) <- readFix r + (k,t) <- readExp s + return ((n%1)*10^^(k-d), t) + where + readFix r = do + (ds,s) <- lexDecDigits r + (ds',t) <- lexDotDigits s + return (read (ds++ds'), length ds', t) + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = return (0,s) + + readExp' ('+':s) = readDec s + readExp' ('-':s) = do + (k,t) <- readDec s + return (-k,t) + readExp' s = readDec s + + readDec s = do + (ds,r) <- nonnull isDigit s + return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], + r) + + lexDecDigits = nonnull isDigit + + lexDotDigits ('.':s) = return (span isDigit s) + lexDotDigits s = return ("",s) + + nonnull p s = do (cs@(_:_),t) <- return (span p s) + return (cs,t) + +readRational__ :: String -> Rational -- NB: *does* handle a leading "-" +readRational__ top_s + = case top_s of + '-' : xs -> - (read_me xs) + xs -> read_me xs + where + read_me s + = case (do { (x,"") <- readRational s ; return x }) of + [x] -> x + [] -> error ("readRational__: no parse:" ++ top_s) + _ -> error ("readRational__: ambiguous parse:" ++ top_s) ----------------------------------------------------------------------------- -- C "literal literal"s (i.e. things like ``NULL'', ``stdout'' etc.) @@ -864,119 +949,130 @@ after_lexnum cont glaexts i buf lex_cstring cont buf = case expandUntilMatch (stepOverLexeme buf) "\'\'" of - buf' -> cont (ITlitlit (lexemeToFastString - (setCurrentPos# buf' (negateInt# 2#)))) - (mergeLexemes buf buf') - ------------------------------------------------------------------------------- --- Character Classes - -is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool - -{-# INLINE is_ctype #-} -#if __GLASGOW_HASKELL__ >= 303 -is_ctype :: Word8 -> Char# -> Bool -is_ctype mask = \c -> - (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 -#else -is_ctype :: Int -> Char# -> Bool -is_ctype (I# mask) = \c -> - let (A# ctype) = ``char_types'' :: Addr - flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) - in - (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) -#endif - -is_ident = is_ctype 1 -is_symbol = is_ctype 2 -is_any = is_ctype 4 -is_space = is_ctype 8 -is_upper = is_ctype 16 -is_digit = is_ctype 32 + Just buf' -> cont (ITlitlit (lexemeToFastString + (addToCurrentPos buf' (negateInt# 2#)))) + (mergeLexemes buf buf') + Nothing -> lexError "unterminated ``" buf ----------------------------------------------------------------------------- -- identifiers, symbols etc. -lex_id cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> +-- used for identifiers with special prefixes like +-- ?x (implicit parameters), $x (MetaHaskell escapes) and #x +-- we've already seen the prefix char, so look for an id, and wrap +-- the new "ip_constr" around the lexeme returned + +specialPrefixId ip_constr cont exts buf = lex_id newcont exts buf + where newcont (ITvarid lexeme) buf2 = cont (ip_constr (tailFS lexeme)) buf2 + newcont token buf2 = cont token buf2 +{- + case expandWhile# is_ident buf of + buf' -> cont (ip_constr (tailFS lexeme)) buf' + where lexeme = lexemeToFastString buf' +-} + +lex_id cont exts buf = + let buf1 = expandWhile# is_ident buf in + seq buf1 $ - case (if flag glaexts + case (if glaExtsEnabled exts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes else buf1) of { buf' -> + seq buf' $ let lexeme = lexemeToFastString buf' in - case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { - Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $ + case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { + Just kwd_token -> --trace ("hkeywd: "++unpackFS(lexeme)) $ cont kwd_token buf'; Nothing -> - let var_token = cont (mk_var_token lexeme) buf' in - - if not (flag glaexts) - then var_token - else + let var_token = cont (ITvarid lexeme) buf' in case lookupUFM ghcExtensionKeywordsFM lexeme of { - Just kwd_token -> cont kwd_token buf'; - Nothing -> var_token + Just (kwd_token, validExts) + | validExts .&. (toInt32 exts) /= 0 -> cont kwd_token buf'; + _ -> var_token - }}}} + }}} -lex_sym cont buf = +lex_sym cont exts buf = + -- trace "lex_sym" $ case expandWhile# is_symbol buf of buf' -> case lookupUFM haskellKeySymsFM lexeme of { - Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ - cont kwd_token buf' ; - Nothing -> --trace ("sym: "++unpackFS lexeme) $ - cont (mk_var_token lexeme) buf' + Just (kwd_token, Nothing) + -> cont kwd_token buf' ; + Just (kwd_token, Just validExts) + | validExts .&. toInt32 exts /= 0 + -> cont kwd_token buf' ; + other -> cont (mk_var_token lexeme) buf' } where lexeme = lexemeToFastString buf' -lex_con cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> - case slurp_trailing_hashes buf1 glaexts of { buf' -> +-- lex_con recursively collects components of a qualified identifer. +-- The argument buf is the StringBuffer representing the lexeme +-- identified so far, where the next character is upper-case. + +lex_con cont exts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ + let empty_buf = stepOverLexeme buf in + case expandWhile# is_ident empty_buf of { buf1 -> + case slurp_trailing_hashes buf1 exts of { con_buf -> + + let all_buf = mergeLexemes buf con_buf + + con_lexeme = lexemeToFastString con_buf + mod_lexeme = lexemeToFastString (decCurrentPos buf) + all_lexeme = lexemeToFastString all_buf - case currentChar# buf' of - '.'# -> munch + just_a_conid + | emptyLexeme buf = cont (ITconid con_lexeme) all_buf + | otherwise = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf + in + + case currentChar# all_buf of + '.'# -> maybe_qualified cont exts all_lexeme + (incCurrentPos all_buf) just_a_conid _ -> just_a_conid - - where - just_a_conid = --trace ("con: "++unpackFS lexeme) $ - cont (ITconid lexeme) buf' - lexeme = lexemeToFastString buf' - munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid - }} - -lex_qid cont glaexts mod buf just_a_conid = + }} + + +maybe_qualified cont exts mod buf just_a_conid = + -- trace ("qid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of - ']'# -> cont (ITqconid (mod,SLIT("[]"))) (setCurrentPos# buf 2#) + ']'# -> cont (ITqconid (mod,FSLIT("[]"))) (addToCurrentPos buf 2#) _ -> just_a_conid '('# -> -- Special case for (,,,) -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)" case lookAhead# buf 1# of - '#'# | flag glaexts -> case lookAhead# buf 2# of - ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#) + '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of + ','# -> lex_ubx_tuple cont mod (addToCurrentPos buf 3#) just_a_conid _ -> just_a_conid - ')'# -> cont (ITqconid (mod,SLIT("()"))) (setCurrentPos# buf 2#) - ','# -> lex_tuple cont mod (setCurrentPos# buf 2#) just_a_conid + ')'# -> cont (ITqconid (mod,FSLIT("()"))) (addToCurrentPos buf 2#) + ','# -> lex_tuple cont mod (addToCurrentPos buf 2#) just_a_conid _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) - _ -> lex_id3 cont glaexts mod buf just_a_conid - _ -> lex_id3 cont glaexts mod buf just_a_conid + '>'# -> cont (ITqconid (mod,FSLIT("(->)"))) (addToCurrentPos buf 2#) + _ -> lex_id3 cont exts mod buf just_a_conid + + _ -> lex_id3 cont exts mod buf just_a_conid + + +lex_id3 cont exts mod buf just_a_conid + | is_upper (currentChar# buf) = + lex_con cont exts buf -lex_id3 cont glaexts mod buf just_a_conid | is_symbol (currentChar# buf) = let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id31 "{-++unpackFS lexeme-}) $ case expandWhile# is_symbol start_new_lexeme of { buf' -> let lexeme = lexemeToFastString buf' @@ -991,44 +1087,44 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id32 "{-++unpackFS lexeme-}) $ case expandWhile# is_ident start_new_lexeme of { buf1 -> if emptyLexeme buf1 then just_a_conid else - case slurp_trailing_hashes buf1 glaexts of { buf' -> + case slurp_trailing_hashes buf1 exts of { buf' -> let - lexeme = lexemeToFastString buf' - new_buf = mergeLexemes buf buf' + lexeme = lexemeToFastString buf' + new_buf = mergeLexemes buf buf' is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf in - case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { - Just kwd_token -> just_a_conid; -- avoid M.where etc. - Nothing -> is_a_qvarid - -- TODO: special ids (as, qualified, hiding) shouldn't be - -- recognised as keywords here. ie. M.as is a qualified varid. - }}} + case _scc_ "haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { + Nothing -> is_a_qvarid ; + Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be + -> is_a_qvarid -- recognised as keywords here. + | otherwise + -> just_a_conid -- avoid M.where etc. + }}} -slurp_trailing_hashes buf glaexts - | flag glaexts = expandWhile# (`eqChar#` '#'#) buf - | otherwise = buf +slurp_trailing_hashes buf exts + | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf + | otherwise = buf mk_var_token pk_str | is_upper f = ITconid pk_str - -- _[A-Z] is treated as a constructor in interface files. - | f `eqChar#` '_'# && not (_NULL_ tl) - && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str | is_ident f = ITvarid pk_str | f `eqChar#` ':'# = ITconsym pk_str | otherwise = ITvarsym pk_str where - (C# f) = _HEAD_ pk_str - tl = _TAIL_ pk_str + (C# f) = headFS pk_str + -- tl = _TAIL_ pk_str mk_qvar_token m token = +-- trace ("mk_qvar ") $ case mk_var_token token of ITconid n -> ITqconid (m,n) ITvarid n -> ITqvarid (m,n) @@ -1047,7 +1143,7 @@ lex_tuple cont mod buf back_off = go n buf = case currentChar# buf of ','# -> go (n+1) (stepOn buf) - ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf) _ -> back_off lex_ubx_tuple cont mod buf back_off = @@ -1057,50 +1153,13 @@ lex_ubx_tuple cont mod buf back_off = case currentChar# buf of ','# -> go (n+1) (stepOn buf) '#'# -> case lookAhead# buf 1# of - ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n))) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n))) (stepOnBy# buf 2#) _ -> back_off _ -> back_off \end{code} ----------------------------------------------------------------------------- -doDiscard rips along really fast, looking for a '#-}', -indicating the end of the pragma we're skipping - -\begin{code} -doDiscard inStr buf = - case currentChar# buf of - '#'# | not inStr -> - case lookAhead# buf 1# of { '#'# -> - case lookAhead# buf 2# of { '-'# -> - case lookAhead# buf 3# of { '}'# -> - (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#)); - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) } - '"'# -> - let - odd_slashes buf flg i# = - case lookAhead# buf i# of - '\\'# -> odd_slashes buf (not flg) (i# -# 1#) - _ -> flg - in - case lookAhead# buf (negateInt# 1#) of --backwards, actually - '\\'# -> -- escaping something.. - if odd_slashes buf True (negateInt# 2#) then - -- odd number of slashes, " is escaped. - doDiscard inStr (incLexeme buf) - else - -- even number of slashes, \ is escaped. - doDiscard (not inStr) (incLexeme buf) - _ -> case inStr of -- forced to avoid build-up - True -> doDiscard False (incLexeme buf) - False -> doDiscard True (incLexeme buf) - _ -> doDiscard inStr (incLexeme buf) - -\end{code} - ------------------------------------------------------------------------------ \begin{code} data LayoutContext @@ -1112,11 +1171,11 @@ data ParseResult a | PFailed Message data PState = PState { - loc :: SrcLoc, - glasgow_exts :: Int#, - bol :: Int#, - atbol :: Int#, - context :: [LayoutContext] + loc :: SrcLoc, + extsBitmap :: Int#, -- bitmap that determines permitted extensions + bol :: Int#, + atbol :: Int#, + context :: [LayoutContext] } type P a = StringBuffer -- Input string @@ -1155,12 +1214,16 @@ lexError str buf s@PState{ loc = loc } getSrcLocP :: P SrcLoc getSrcLocP buf s@(PState{ loc = loc }) = POk s loc -getSrcFile :: P FAST_STRING +-- use a temporary SrcLoc for the duration of the argument +setSrcLocP :: SrcLoc -> P a -> P a +setSrcLocP new_loc p buf s = + case p buf s{ loc=new_loc } of + POk _ a -> POk s a + PFailed e -> PFailed e + +getSrcFile :: P FastString getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc) -getContext :: P [LayoutContext] -getContext buf s@(PState{ context = ctx }) = POk s ctx - pushContext :: LayoutContext -> P () pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} () @@ -1232,49 +1295,78 @@ layoutOff buf s@(PState{ context = ctx }) = POk s{ context = NoLayout:ctx } () popContext :: P () -popContext = \ buf s@(PState{ context = ctx }) -> +popContext = \ buf s@(PState{ context = ctx, loc = loc }) -> case ctx of (_:tl) -> POk s{ context = tl } () - [] -> panic "Lex.popContext: empty context" - -{- - Note that if the name of the file we're processing ends - with `hi-boot', we accept it on faith as having the right - version. This is done so that .hi-boot files that comes - with hsc don't have to be updated before every release, - *and* it allows us to share .hi-boot files with versions - of hsc that don't have .hi version checking (e.g., ghc-2.10's) - - If the version number is 0, the checking is also turned off. - (needed to deal with GHC.hi only!) - - Once we can assume we're compiling with a version of ghc that - supports interface file checking, we can drop the special - pleading --} -checkVersion :: Maybe Integer -> P () -checkVersion mb@(Just v) buf s@(PState{loc = loc}) - | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = POk s () - | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-}) -checkVersion mb@Nothing buf s@(PState{loc = loc}) - | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s () - | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-}) - ------------------------------------------------------------------ - -ifaceParseErr :: StringBuffer -> SrcLoc -> Message -ifaceParseErr s l - = hsep [ppr l, ptext SLIT("Interface file parse error; on input `"), - text (lexemeToString s), char '\''] - -ifaceVersionErr hi_vers l toks - = hsep [ppr l, ptext SLIT("Interface file version error;"), - ptext SLIT("Expected"), int opt_HiVersion, - ptext SLIT("found "), pp_version] + [] -> PFailed (srcParseErr buf loc) + +-- for reasons of efficiency, flags indicating language extensions (eg, +-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- integer + +glaExtsBit, ffiBit, parrBit :: Int +glaExtsBit = 0 +ffiBit = 1 +parrBit = 2 +withBit = 3 +arrowsBit = 4 + +glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool +glaExtsEnabled flags = testBit (toInt32 flags) glaExtsBit +ffiEnabled flags = testBit (toInt32 flags) ffiBit +withEnabled flags = testBit (toInt32 flags) withBit +parrEnabled flags = testBit (toInt32 flags) parrBit +arrowsEnabled flags = testBit (toInt32 flags) arrowsBit + +toInt32 :: Int# -> Int32 +toInt32 x# = fromIntegral (I# x#) + +-- convenient record-based bitmap for the interface to the rest of the world +-- +-- NB: `glasgowExtsEF' implies `ffiEF' (see `mkPState' below) +-- +data ExtFlags = ExtFlags { + glasgowExtsEF :: Bool, + ffiEF :: Bool, + withEF :: Bool, + parrEF :: Bool, + arrowsEF :: Bool + } + +-- create a parse state +-- +mkPState :: SrcLoc -> ExtFlags -> PState +mkPState loc exts = + PState { + loc = loc, + extsBitmap = case (fromIntegral bitmap) of {I# bits -> bits}, + bol = 0#, + atbol = 1#, + context = [] + } where - pp_version = - case hi_vers of - Nothing -> ptext SLIT("pre ghc-3.02 version") - Just v -> ptext SLIT("version") <+> integer v + bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts + .|. ffiBit `setBitIf` (ffiEF exts + || glasgowExtsEF exts) + .|. withBit `setBitIf` withEF exts + .|. parrBit `setBitIf` parrEF exts + .|. arrowsBit `setBitIf` arrowsEF exts + -- + setBitIf :: Int -> Bool -> Int32 + b `setBitIf` cond | cond = bit b + | otherwise = 0 + +----------------------------------------------------------------------------- + +srcParseErr :: StringBuffer -> SrcLoc -> Message +srcParseErr s l + = hcat [ppr l, + if null token + then ptext SLIT(": parse error (possibly incorrect indentation)") + else hcat [ptext SLIT(": parse error on input "), + char '`', text token, char '\''] + ] + where + token = lexemeToString s \end{code}