X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=01fcc3bd48395ff182fd6981b7134f8b3ec1bdb4;hb=408439c03f074ed86b0bbe534c7210efb271b543;hp=8fdd6adadad09a8e65db6d285256b91ff5d43516;hpb=3b7176c6e285089187f6772e502872cb57537870;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index 8fdd6ad..01fcc3b 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -23,11 +23,11 @@ module Lex ( -- Monad for parser Token(..), lexer, ParseResult(..), PState(..), - checkVersion, + checkVersion, ExtFlags(..), mkPState, StringBuffer, P, thenP, thenP_, returnP, mapP, failP, failMsgP, - getSrcLocP, getSrcFile, + getSrcLocP, setSrcLocP, getSrcFile, layoutOn, layoutOff, pushContext, popContext ) where @@ -36,12 +36,13 @@ module Lex ( import Char ( isSpace, toUpper ) import List ( isSuffixOf ) -import IdInfo ( InlinePragInfo(..) ) import PrelNames ( mkTupNameStr ) import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck ) -import Demand ( Demand(..) {- instance Read -} ) +import ForeignCall ( Safety(..) ) +import NewDemand ( StrictSig(..), Demand(..), Demands(..), + DmdResult(..), mkTopDmdType, evalDmd, lazyDmd ) import UniqFM ( listToUFM, lookupUFM ) -import BasicTypes ( NewOrData(..), Boxity(..) ) +import BasicTypes ( Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) @@ -52,8 +53,9 @@ import FastString import StringBuffer import GlaExts import Ctype -import Char ( ord ) +import Char ( chr, ord ) import PrelRead ( readRational__ ) -- Glasgow non-std +import PrelBits ( Bits(..) ) -- non-std \end{code} %************************************************************************ @@ -117,10 +119,12 @@ data Token | ITexport | ITlabel | ITdynamic + | ITsafe | ITunsafe | ITwith | ITstdcallconv | ITccallconv + | ITdotnet | ITinterface -- interface keywords | IT__export @@ -130,7 +134,7 @@ data Token | ITcoerce | ITinlineMe | ITinlineCall - | ITccall (Bool,Bool,Bool) -- (is_dyn, is_casm, may_gc) + | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc) | ITdefaultbranch | ITbottom | ITinteger_lit @@ -149,8 +153,8 @@ data Token | ITarity | ITspecialise | ITnocaf - | ITunfold InlinePragInfo - | ITstrict ([Demand], Bool) + | ITunfold + | ITstrict StrictSig | ITrules | ITcprinfo | ITdeprecated @@ -179,6 +183,7 @@ data Token | ITdarrow | ITminus | ITbang + | ITstar | ITdot | ITbiglam -- GHC-extension symbols @@ -189,6 +194,8 @@ data Token | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack + | ITopabrack -- [:, for parallel arrays with -fparr + | ITcpabrack -- :], for parallel arrays with -fparr | ITcbrack | IToparen | ITcparen @@ -208,7 +215,8 @@ data Token | ITqvarsym (FAST_STRING,FAST_STRING) | ITqconsym (FAST_STRING,FAST_STRING) - | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITdupipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITsplitipvarid FAST_STRING -- GHC extension: implicit param: %x | ITpragma StringBuffer @@ -289,6 +297,7 @@ isSpecial ITforall = True isSpecial ITexport = True isSpecial ITlabel = True isSpecial ITdynamic = True +isSpecial ITsafe = True isSpecial ITunsafe = True isSpecial ITwith = True isSpecial ITccallconv = True @@ -303,14 +312,16 @@ ghcExtensionKeywordsFM = listToUFM $ ( "export", ITexport ), ( "label", ITlabel ), ( "dynamic", ITdynamic ), + ( "safe", ITunsafe ), ( "unsafe", ITunsafe ), ( "with", ITwith ), ( "stdcall", ITstdcallconv), ( "ccall", ITccallconv), - ("_ccall_", ITccall (False, False, False)), - ("_ccall_GC_", ITccall (False, False, True)), - ("_casm_", ITccall (False, True, False)), - ("_casm_GC_", ITccall (False, True, True)), + ( "dotnet", ITdotnet), + ("_ccall_", ITccall (False, False, PlayRisky)), + ("_ccall_GC_", ITccall (False, False, PlaySafe)), + ("_casm_", ITccall (False, True, PlayRisky)), + ("_casm_GC_", ITccall (False, True, PlaySafe)), -- interface keywords ("__interface", ITinterface), @@ -342,16 +353,16 @@ ghcExtensionKeywordsFM = listToUFM $ ("__C", ITnocaf), ("__R", ITrules), ("__D", ITdeprecated), - ("__U", ITunfold NoInlinePragInfo), + ("__U", ITunfold), - ("__ccall", ITccall (False, False, False)), - ("__ccall_GC", ITccall (False, False, True)), - ("__dyn_ccall", ITccall (True, False, False)), - ("__dyn_ccall_GC", ITccall (True, False, True)), - ("__casm", ITccall (False, True, False)), - ("__dyn_casm", ITccall (True, True, False)), - ("__casm_GC", ITccall (False, True, True)), - ("__dyn_casm_GC", ITccall (True, True, True)), + ("__ccall", ITccall (False, False, PlayRisky)), + ("__ccall_GC", ITccall (False, False, PlaySafe)), + ("__dyn_ccall", ITccall (True, False, PlayRisky)), + ("__dyn_ccall_GC", ITccall (True, False, PlaySafe)), + ("__casm", ITccall (False, True, PlayRisky)), + ("__dyn_casm", ITccall (True, True, PlayRisky)), + ("__casm_GC", ITccall (False, True, PlaySafe)), + ("__dyn_casm_GC", ITccall (True, True, PlaySafe)), ("/\\", ITbiglam) ] @@ -371,6 +382,7 @@ haskellKeySymsFM = listToUFM $ ,("=>", ITdarrow) ,("-", ITminus) ,("!", ITbang) + ,("*", ITstar) ,(".", ITdot) -- sadly, for 'forall a . t' ] \end{code} @@ -380,7 +392,8 @@ The lexical analyser Lexer state: - - (glaexts) lexing an interface file or -fglasgow-exts + - (exts) lexing a source with extensions, eg, an interface file or + with -fglasgow-exts - (bol) pointer to beginning of line (for column calculations) - (buf) pointer to beginning of token - (buf) pointer to current char @@ -390,7 +403,7 @@ Lexer state: lexer :: (Token -> P a) -> P a lexer cont buf s@(PState{ loc = loc, - glasgow_exts = glaexts, + extsBitmap = exts, bol = bol, atbol = atbol, context = ctx @@ -402,7 +415,7 @@ lexer cont buf s@(PState{ where line = srcLocLine loc - tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ + tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ case currentChar# buf of '\NUL'# -> @@ -436,6 +449,9 @@ lexer cont buf s@(PState{ let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of + -- ignore RULES pragmas when -fglasgow-exts is off + Just ITrules_prag | not (glaExtsEnabled exts) -> + skip_to_end (stepOnBy# buf 2#) s' Just ITline_prag -> line_prag skip_to_end buf2 s' Just other -> is_a_token @@ -444,13 +460,19 @@ lexer cont buf s@(PState{ else skip_to_end (stepOnBy# buf 2#) s' where - skip_to_end = nested_comment (lexer cont) + skip_to_end = skipNestedComment (lexer cont) -- special GHC extension: we grok cpp-style #line pragmas '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 - case expandWhile# is_space (stepOn buf) of { buf1 -> - if is_digit (currentChar# buf1) - then line_prag next_line buf1 s' + let buf1 | lookAhead# buf 1# `eqChar#` 'l'# && + lookAhead# buf 2# `eqChar#` 'i'# && + lookAhead# buf 3# `eqChar#` 'n'# && + lookAhead# buf 4# `eqChar#` 'e'# = stepOnBy# buf 5# + | otherwise = stepOn buf + in + case expandWhile# is_space buf1 of { buf2 -> + if is_digit (currentChar# buf2) + then line_prag next_line buf2 s' else is_a_token } where @@ -465,7 +487,7 @@ lexer cont buf s@(PState{ atbol = atbol} is_a_token | atbol /=# 0# = lexBOL cont buf s' - | otherwise = lexToken cont glaexts buf s' + | otherwise = lexToken cont exts buf s' -- {-# LINE .. #-} pragmas. yeuch. line_prag cont buf s@PState{loc=loc} = @@ -490,26 +512,34 @@ line_prag cont buf s@PState{loc=loc} = _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} -nested_comment :: P a -> P a -nested_comment cont buf = loop buf +skipNestedComment :: P a -> P a +skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state + +skipNestedComment' :: SrcLoc -> P a -> P a +skipNestedComment' orig_loc cont buf = loop buf where loop buf = case currentChar# buf of - '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf -- -} - '-'# | lookAhead# buf 1# `eqChar#` '}'# -> - cont (stepOnBy# buf 2#) + '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) '{'# | lookAhead# buf 1# `eqChar#` '-'# -> - nested_comment (nested_comment cont) (stepOnBy# buf 2#) + skipNestedComment + (skipNestedComment' orig_loc cont) + (stepOnBy# buf 2#) '\n'# -> \ s@PState{loc=loc} -> let buf' = stepOn buf in - nested_comment cont buf' - s{loc = incSrcLine loc, bol = currentIndex# buf', - atbol = 1#} + loop buf' s{loc = incSrcLine loc, + bol = currentIndex# buf', + atbol = 1#} + + -- pass the original SrcLoc to lexError so that the error is + -- reported at the line it was originally on, not the line at + -- the end of the file. + '\NUL'# | bufferExhausted (stepOn buf) -> + \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -} - _ -> nested_comment cont (stepOn buf) + _ -> loop (stepOn buf) -- When we are lexing the first token of a line, check whether we need to -- insert virtual semicolons or close braces due to layout. @@ -517,7 +547,7 @@ nested_comment cont buf = loop buf lexBOL :: (Token -> P a) -> P a lexBOL cont buf s@(PState{ loc = loc, - glasgow_exts = glaexts, + extsBitmap = exts, bol = bol, atbol = atbol, context = ctx @@ -529,7 +559,7 @@ lexBOL cont buf s@(PState{ --trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $ cont ITsemi buf s{atbol = 0#} else - lexToken cont glaexts buf s{atbol = 0#} + lexToken cont exts buf s{atbol = 0#} where col = currentIndex# buf -# bol @@ -548,18 +578,21 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a -lexToken cont glaexts buf = - -- trace "lexToken" $ +lexToken cont exts buf = +-- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- - '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'# + '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'# -> cont IToubxparen (setCurrentPos# buf 2#) | otherwise -> cont IToparen (incLexeme buf) ')'# -> cont ITcparen (incLexeme buf) - '['# -> cont ITobrack (incLexeme buf) + '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# -> + cont ITopabrack (setCurrentPos# buf 2#) + | otherwise -> + cont ITobrack (incLexeme buf) ']'# -> cont ITcbrack (incLexeme buf) ','# -> cont ITcomma (incLexeme buf) ';'# -> cont ITsemi (incLexeme buf) @@ -568,43 +601,50 @@ lexToken cont glaexts buf = (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} _ -> lexError "too many '}'s" buf s '|'# -> case lookAhead# buf 1# of - '}'# | flag glaexts -> cont ITccurlybar - (setCurrentPos# buf 2#) - _ -> lex_sym cont (incLexeme buf) + '}'# | glaExtsEnabled exts -> cont ITccurlybar + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) + ':'# -> case lookAhead# buf 1# of + ']'# | parrEnabled exts -> cont ITcpabrack + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) '#'# -> case lookAhead# buf 1# of - ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) + ')'# | glaExtsEnabled exts + -> cont ITcubxparen (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '}'# -> cont ITclose_prag (setCurrentPos# buf 3#) _ -> lex_sym cont (incLexeme buf) _ -> lex_sym cont (incLexeme buf) - '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'# + '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'# -> lex_cstring cont (setCurrentPos# buf 2#) | otherwise -> cont ITbackquote (incLexeme buf) - '{'# -> -- look for "{-##" special iface pragma + '{'# -> -- look for "{-##" special iface pragma -- for Emacs: -} case lookAhead# buf 1# of - '|'# | flag glaexts + '|'# | glaExtsEnabled exts -> cont ITocurlybar (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '#'# -> case lookAhead# buf 3# of '#'# -> - let (lexeme, buf') - = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' + lexPragma + cont + (\ cont lexeme buf' -> cont (ITpragma lexeme) buf') + 0# + (stepOnBy# (stepOverLexeme buf) 4#) _ -> lex_prag cont (setCurrentPos# buf 3#) _ -> cont ITocurly (incLexeme buf) _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) - '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) + '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf) + '\''# -> lex_char (char_end cont) exts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. - '_'# | flag glaexts -> + '_'# | glaExtsEnabled exts -> case lookAhead# buf 1# of '_'# -> case lookAhead# buf 2# of 'S'# -> @@ -616,15 +656,15 @@ lexToken cont glaexts buf = 's'# -> case prefixMatch (stepOnBy# buf 3#) "cc" of Just buf' -> lex_scc cont (stepOverLexeme buf') - Nothing -> lex_id cont glaexts buf - _ -> lex_id cont glaexts buf - _ -> lex_id cont glaexts buf + Nothing -> lex_id cont exts buf + _ -> lex_id cont exts buf + _ -> lex_id cont exts buf -- Hexadecimal and octal constants '0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2 - -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex + -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex | (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2 - -> readNum (after_lexnum cont glaexts) buf' is_octdigit 8 oct_or_dec + -> readNum (after_lexnum cont exts) buf' is_octdigit 8 oct_or_dec where ch = lookAhead# buf 1# ch2 = lookAhead# buf 2# buf' = setCurrentPos# buf 2# @@ -636,12 +676,14 @@ lexToken cont glaexts buf = trace "lexIface: misplaced NUL?" $ cont (ITunknown "\NUL") (stepOn buf) - '?'# | flag glaexts && is_lower (lookAhead# buf 1#) -> - lex_ip cont (incLexeme buf) - c | is_digit c -> lex_num cont glaexts 0 buf + '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) -> + lex_ip ITdupipvarid cont (incLexeme buf) + '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) -> + lex_ip ITsplitipvarid cont (incLexeme buf) + c | is_digit c -> lex_num cont exts 0 buf | is_symbol c -> lex_sym cont buf - | is_upper c -> lex_con cont glaexts buf - | is_ident c -> lex_id cont glaexts buf + | is_upper c -> lex_con cont exts buf + | is_ident c -> lex_id cont exts buf | otherwise -> lexError "illegal character" buf -- Int# is unlifted, and therefore faster than Bool for flags. @@ -665,50 +707,51 @@ lex_prag cont buf ------------------------------------------------------------------------------- -- Strings & Chars -lex_string cont glaexts s buf +lex_string cont exts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in - case currentChar# buf' of - '#'# | flag glaexts -> if all (<= 0xFF) s + let buf' = incLexeme buf + s' = mkFastStringNarrow (map chr (reverse s)) + in case currentChar# buf' of + '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s then cont (ITprimstring s') (incLexeme buf') - else lexError "primitive string literal must contain only characters <= '\xFF'" buf' + else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps '\\'# | next_ch `eqChar#` '&'# - -> lex_string cont glaexts s buf' + -> lex_string cont exts s buf' | is_space next_ch - -> lex_stringgap cont glaexts s (incLexeme buf) + -> lex_stringgap cont exts s (incLexeme buf) where next_ch = lookAhead# buf 1# buf' = setCurrentPos# buf 2# - _ -> lex_char (lex_next_string cont s) glaexts buf + _ -> lex_char (lex_next_string cont s) exts buf -lex_stringgap cont glaexts s buf +lex_stringgap cont exts s buf = let buf' = incLexeme buf in case currentChar# buf of - '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf' + '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf' st{loc = incSrcLine loc} - '\\'# -> lex_string cont glaexts s buf' - c | is_space c -> lex_stringgap cont glaexts s buf' + '\\'# -> lex_string cont exts s buf' + c | is_space c -> lex_stringgap cont exts s buf' other -> charError buf' -lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf +lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf lex_char :: (Int# -> Int -> P a) -> Int# -> P a -lex_char cont glaexts buf +lex_char cont exts buf = case currentChar# buf of - '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) + '\\'# -> lex_escape (cont exts) (incLexeme buf) + c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf) other -> charError buf -char_end cont glaexts c buf +char_end cont exts c buf = case currentChar# buf of '\''# -> let buf' = incLexeme buf in case currentChar# buf' of - '#'# | flag glaexts + '#'# | glaExtsEnabled exts -> cont (ITprimchar c) (incLexeme buf') _ -> cont (ITchar c) buf' _ -> charError buf @@ -742,7 +785,7 @@ lex_escape cont buf [] -> charError buf' after_charnum cont i buf - = if i >= 0 && i <= 0x7FFFFFFF + = if i >= 0 && i <= 0x10FFFF then cont (fromInteger i) buf else charError buf @@ -814,29 +857,44 @@ silly_escape_chars = [ lex_demand cont buf = case read_em [] buf of { (ls,buf') -> case currentChar# buf' of - 'B'# -> cont (ITstrict (ls, True )) (incLexeme buf') - _ -> cont (ITstrict (ls, False)) buf' + 'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf') + 'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf') + _ -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf' } where - -- code snatched from Demand.lhs read_em acc buf = case currentChar# buf of - 'L'# -> read_em (WwLazy False : acc) (stepOn buf) - 'A'# -> read_em (WwLazy True : acc) (stepOn buf) - 'S'# -> read_em (WwStrict : acc) (stepOn buf) - 'P'# -> read_em (WwPrim : acc) (stepOn buf) - 'E'# -> read_em (WwEnum : acc) (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#) - 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#) - 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#) - 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#) - _ -> (reverse acc, buf) + 'T'# -> read_em (Top : acc) (stepOn buf) + 'L'# -> read_em (lazyDmd : acc) (stepOn buf) + 'A'# -> read_em (Abs : acc) (stepOn buf) + 'V'# -> read_em (evalDmd : acc) (stepOn buf) -- Temporary, until + -- we've recompiled prelude etc + 'C'# -> do_unary Call acc (stepOnBy# buf 2#) -- Skip 'C(' + + 'U'# -> do_seq1 Eval acc (stepOnBy# buf 1#) + 'D'# -> do_seq1 Defer acc (stepOnBy# buf 1#) + 'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#) - do_unpack new_or_data wrapper_unpacks acc buf - = case read_em [] buf of - (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest + _ -> (reverse acc, buf) + do_seq1 fn acc buf + = case currentChar# buf of + '('# -> do_seq2 fn acc (stepOnBy# buf 1#) + _ -> read_em (fn (Poly Abs) : acc) buf + + do_seq2 fn acc buf + = case read_em [] buf of { (dmds, buf) -> + case currentChar# buf of + ')'# -> read_em (fn (Prod dmds) : acc) + (stepOn buf) + '*'# -> ASSERT( length dmds == 1 ) + read_em (fn (Poly (head dmds)) : acc) + (stepOnBy# buf 2#) -- Skip '*)' + } + + do_unary fn acc buf + = case read_em [] buf of + ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest) -- Skip ')' ------------------ lex_scc cont buf = @@ -848,7 +906,7 @@ lex_scc cont buf = -- Numbers lex_num :: (Token -> P a) -> Int# -> Integer -> P a -lex_num cont glaexts acc buf = +lex_num cont exts acc buf = case scanNumLit acc buf of (acc',buf') -> case currentChar# buf' of @@ -875,18 +933,18 @@ lex_num cont glaexts acc buf = v = readRational__ (lexemeToString l) in case currentChar# l of -- glasgow exts only - '#'# | flag glaexts -> let l' = incLexeme l in + '#'# | glaExtsEnabled exts -> let l' = incLexeme l in case currentChar# l' of '#'# -> cont (ITprimdouble v) (incLexeme l') _ -> cont (ITprimfloat v) l' _ -> cont (ITrational v) l - _ -> after_lexnum cont glaexts acc' buf' + _ -> after_lexnum cont exts acc' buf' -after_lexnum cont glaexts i buf +after_lexnum cont exts i buf = case currentChar# buf of - '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf) - _ -> cont (ITinteger i) buf + '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf) + _ -> cont (ITinteger i) buf ----------------------------------------------------------------------------- -- C "literal literal"s (i.e. things like ``NULL'', ``stdout'' etc.) @@ -904,16 +962,16 @@ lex_cstring cont buf = ----------------------------------------------------------------------------- -- identifiers, symbols etc. -lex_ip cont buf = +lex_ip ip_constr cont buf = case expandWhile# is_ident buf of - buf' -> cont (ITipvarid lexeme) buf' - where lexeme = lexemeToFastString buf' + buf' -> cont (ip_constr (tailFS lexeme)) buf' + where lexeme = lexemeToFastString buf' -lex_id cont glaexts buf = +lex_id cont exts buf = let buf1 = expandWhile# is_ident buf in seq buf1 $ - case (if flag glaexts + case (if glaExtsEnabled exts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes else buf1) of { buf' -> @@ -926,7 +984,7 @@ lex_id cont glaexts buf = let var_token = cont (ITvarid lexeme) buf' in - if not (flag glaexts) + if not (glaExtsEnabled exts) then var_token else @@ -948,23 +1006,36 @@ lex_sym cont buf = where lexeme = lexemeToFastString buf' -lex_con cont glaexts buf = - -- trace ("con: "{-++unpackFS lexeme-}) $ - case expandWhile# is_ident buf of { buf1 -> - case slurp_trailing_hashes buf1 glaexts of { buf' -> +-- lex_con recursively collects components of a qualified identifer. +-- The argument buf is the StringBuffer representing the lexeme +-- identified so far, where the next character is upper-case. - case currentChar# buf' of - '.'# -> munch +lex_con cont exts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ + let empty_buf = stepOverLexeme buf in + case expandWhile# is_ident empty_buf of { buf1 -> + case slurp_trailing_hashes buf1 exts of { con_buf -> + + let all_buf = mergeLexemes buf con_buf + + con_lexeme = lexemeToFastString con_buf + mod_lexeme = lexemeToFastString (decLexeme buf) + all_lexeme = lexemeToFastString all_buf + + just_a_conid + | emptyLexeme buf = cont (ITconid con_lexeme) all_buf + | otherwise = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf + in + + case currentChar# all_buf of + '.'# -> maybe_qualified cont exts all_lexeme + (incLexeme all_buf) just_a_conid _ -> just_a_conid - - where - just_a_conid = cont (ITconid lexeme) buf' - lexeme = lexemeToFastString buf' - munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid - }} - -lex_qid cont glaexts mod buf just_a_conid = - -- trace ("quid: "{-++unpackFS lexeme-}) $ + }} + + +maybe_qualified cont exts mod buf just_a_conid = + -- trace ("qid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of @@ -974,7 +1045,7 @@ lex_qid cont glaexts mod buf just_a_conid = '('# -> -- Special case for (,,,) -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)" case lookAhead# buf 1# of - '#'# | flag glaexts -> case lookAhead# buf 2# of + '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of ','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#) just_a_conid _ -> just_a_conid @@ -984,10 +1055,15 @@ lex_qid cont glaexts mod buf just_a_conid = '-'# -> case lookAhead# buf 1# of '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) - _ -> lex_id3 cont glaexts mod buf just_a_conid - _ -> lex_id3 cont glaexts mod buf just_a_conid + _ -> lex_id3 cont exts mod buf just_a_conid + + _ -> lex_id3 cont exts mod buf just_a_conid + + +lex_id3 cont exts mod buf just_a_conid + | is_upper (currentChar# buf) = + lex_con cont exts buf -lex_id3 cont glaexts mod buf just_a_conid | is_symbol (currentChar# buf) = let start_new_lexeme = stepOverLexeme buf @@ -1013,7 +1089,7 @@ lex_id3 cont glaexts mod buf just_a_conid then just_a_conid else - case slurp_trailing_hashes buf1 glaexts of { buf' -> + case slurp_trailing_hashes buf1 exts of { buf' -> let lexeme = lexemeToFastString buf' @@ -1029,9 +1105,9 @@ lex_id3 cont glaexts mod buf just_a_conid -> just_a_conid -- avoid M.where etc. }}} -slurp_trailing_hashes buf glaexts - | flag glaexts = expandWhile# (`eqChar#` '#'#) buf - | otherwise = buf +slurp_trailing_hashes buf exts + | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf + | otherwise = buf mk_var_token pk_str @@ -1080,20 +1156,21 @@ lex_ubx_tuple cont mod buf back_off = \end{code} ----------------------------------------------------------------------------- -doDiscard rips along really fast, looking for a '##-}', +'lexPragma' rips along really fast, looking for a '##-}', indicating the end of the pragma we're skipping \begin{code} -doDiscard inStr buf = +lexPragma cont contf inStr buf = case currentChar# buf of '#'# | inStr ==# 0# -> case lookAhead# buf 1# of { '#'# -> case lookAhead# buf 2# of { '-'# -> case lookAhead# buf 3# of { '}'# -> - (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#)); - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) } + contf cont (lexemeToBuffer buf) + (stepOverLexeme (setCurrentPos# buf 4#)); + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) } '"'# -> let @@ -1108,19 +1185,24 @@ doDiscard inStr buf = '\\'# -> -- escaping something.. if odd_slashes buf True (negateInt# 2#) then -- odd number of slashes, " is escaped. - doDiscard inStr (incLexeme buf) + lexPragma cont contf inStr (incLexeme buf) else -- even number of slashes, \ is escaped. - doDiscard not_inStr (incLexeme buf) - _ -> doDiscard not_inStr (incLexeme buf) + lexPragma cont contf not_inStr (incLexeme buf) + _ -> lexPragma cont contf not_inStr (incLexeme buf) '\''# | inStr ==# 0# -> case lookAhead# buf 1# of { '"'# -> case lookAhead# buf 2# of { '\''# -> - doDiscard inStr (setCurrentPos# buf 3#); - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) } + lexPragma cont contf inStr (setCurrentPos# buf 3#); + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) } + + -- a sign that the input is ill-formed, since pragmas are + -- assumed to always be properly closed (in .hi files). + '\NUL'# -> trace "lexPragma: unexpected end-of-file" $ + cont (ITunknown "\NUL") buf - _ -> doDiscard inStr (incLexeme buf) + _ -> lexPragma cont contf inStr (incLexeme buf) \end{code} @@ -1136,11 +1218,11 @@ data ParseResult a | PFailed Message data PState = PState { - loc :: SrcLoc, - glasgow_exts :: Int#, - bol :: Int#, - atbol :: Int#, - context :: [LayoutContext] + loc :: SrcLoc, + extsBitmap :: Int#, -- bitmap that determines permitted extensions + bol :: Int#, + atbol :: Int#, + context :: [LayoutContext] } type P a = StringBuffer -- Input string @@ -1179,12 +1261,16 @@ lexError str buf s@PState{ loc = loc } getSrcLocP :: P SrcLoc getSrcLocP buf s@(PState{ loc = loc }) = POk s loc +-- use a temporary SrcLoc for the duration of the argument +setSrcLocP :: SrcLoc -> P a -> P a +setSrcLocP new_loc p buf s = + case p buf s{ loc=new_loc } of + POk _ a -> POk s a + PFailed e -> PFailed e + getSrcFile :: P FAST_STRING getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc) -getContext :: P [LayoutContext] -getContext buf s@(PState{ context = ctx }) = POk s ctx - pushContext :: LayoutContext -> P () pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} () @@ -1284,6 +1370,48 @@ checkVersion mb@Nothing buf s@(PState{loc = loc}) | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s () | otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-}) + +-- for reasons of efficiency, flags indicating language extensions (eg, +-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- integer + +glaExtsBit, ffiBit, parrBit :: Int +glaExtsBit = 0 +ffiBit = 1 -- FIXME: not used yet; still part of `glaExtsBit' +parrBit = 2 + +glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool +glaExtsEnabled flags = testBit (I# flags) glaExtsBit +ffiEnabled flags = testBit (I# flags) ffiBit +parrEnabled flags = testBit (I# flags) parrBit + +-- convenient record-based bitmap for the interface to the rest of the world +-- +data ExtFlags = ExtFlags { + glasgowExtsEF :: Bool, +-- ffiEF :: Bool, -- commented out to avoid warnings + parrEF :: Bool -- while not used yet + } + +-- create a parse state +-- +mkPState :: SrcLoc -> ExtFlags -> PState +mkPState loc exts = PState { + loc = loc, + extsBitmap = case bitmap of {I# bits -> bits}, + bol = 0#, + atbol = 1#, + context = [] + } + where + bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts +-- .|. ffiBit `setBitIf` ffiEF exts + .|. parrBit `setBitIf` parrEF exts + -- + b `setBitIf` cond | cond = bit b + | otherwise = 0 + + ----------------------------------------------------------------- ifaceParseErr :: StringBuffer -> SrcLoc -> Message