X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=dfc3945b6e8ce67227fb23ccf717c8b02f3d4969;hb=0299e1a135c5805e09ed8e2271b3b17fc8a04869;hp=b2f04b04ff14a9e80ddf573f555df325938932d2;hpb=f0a01a1fc19bfa76aa36fa113942e1c57f3733f4;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index b2f04b0..dfc3945 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -16,11 +16,10 @@ An example that provokes the error is -------------------------------------------------------- \begin{code} -{-# OPTIONS -#include "ctypes.h" #-} module Lex ( - ifaceParseErr, + ifaceParseErr, srcParseErr, -- Monad for parser Token(..), lexer, ParseResult(..), PState(..), @@ -28,41 +27,33 @@ module Lex ( StringBuffer, P, thenP, thenP_, returnP, mapP, failP, failMsgP, - getSrcLocP, getSrcFile, + getSrcLocP, setSrcLocP, getSrcFile, layoutOn, layoutOff, pushContext, popContext ) where #include "HsVersions.h" -import Char ( ord, isSpace, toUpper ) +import Char ( isSpace, toUpper ) import List ( isSuffixOf ) -import IdInfo ( InlinePragInfo(..), CprInfo(..) ) -import Name ( isLowerISO, isUpperISO ) -import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) -import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) -import Demand ( Demand(..) {- instance Read -} ) -import UniqFM ( UniqFM, listToUFM, lookupUFM) -import BasicTypes ( NewOrData(..) ) +import PrelNames ( mkTupNameStr ) +import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck ) +import ForeignCall ( Safety(..) ) +import NewDemand ( StrictSig(..), Demand(..), Demands(..), + DmdResult(..), mkTopDmdType, evalDmd, lazyDmd ) +import UniqFM ( listToUFM, lookupUFM ) +import BasicTypes ( Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) -import Maybes ( MaybeErr(..) ) import ErrUtils ( Message ) import Outputable import FastString import StringBuffer import GlaExts -import ST ( runST ) - -#if __GLASGOW_HASKELL__ >= 303 -import Bits -import Word -#endif - -import Char ( chr ) -import Addr +import Ctype +import Char ( chr, ord ) import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} @@ -120,17 +111,19 @@ data Token | ITthen | ITtype | ITwhere - | ITscc + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) | ITforall -- GHC extension keywords | ITforeign | ITexport | ITlabel | ITdynamic + | ITsafe | ITunsafe | ITwith | ITstdcallconv | ITccallconv + | ITdotnet | ITinterface -- interface keywords | IT__export @@ -140,13 +133,17 @@ data Token | ITcoerce | ITinlineMe | ITinlineCall - | ITccall (Bool,Bool,Bool) -- (is_dyn, is_casm, may_gc) + | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc) | ITdefaultbranch | ITbottom | ITinteger_lit | ITfloat_lit + | ITword_lit + | ITword64_lit + | ITint64_lit | ITrational_lit | ITaddr_lit + | ITlabel_lit | ITlit_lit | ITstring_lit | ITtypeapp @@ -155,11 +152,11 @@ data Token | ITarity | ITspecialise | ITnocaf - | ITunfold InlinePragInfo - | ITstrict ([Demand], Bool) + | ITunfold + | ITstrict StrictSig | ITrules + | ITcprinfo | ITdeprecated - | ITcprinfo (CprInfo) | IT__scc | ITsccAllCafs @@ -170,6 +167,7 @@ data Token | ITrules_prag | ITdeprecated_prag | ITline_prag + | ITscc_prag | ITclose_prag | ITdotdot -- reserved symbols @@ -190,6 +188,8 @@ data Token | ITocurly -- special symbols | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack | ITcbrack @@ -211,16 +211,17 @@ data Token | ITqvarsym (FAST_STRING,FAST_STRING) | ITqconsym (FAST_STRING,FAST_STRING) - | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITdupipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITsplitipvarid FAST_STRING -- GHC extension: implicit param: %x | ITpragma StringBuffer - | ITchar Char + | ITchar Int | ITstring FAST_STRING - | ITinteger Integer + | ITinteger Integer | ITrational Rational - | ITprimchar Char + | ITprimchar Int | ITprimstring FAST_STRING | ITprimint Integer | ITprimfloat Rational @@ -229,7 +230,7 @@ data Token | ITunknown String -- Used when the lexer can't make sense of it | ITeof -- end of file token - deriving Text -- debugging + deriving Show -- debugging \end{code} ----------------------------------------------------------------------------- @@ -247,6 +248,7 @@ pragmaKeywordsFM = listToUFM $ ( "LINE", ITline_prag ), ( "RULES", ITrules_prag ), ( "RULEZ", ITrules_prag ), -- american spelling :-) + ( "SCC", ITscc_prag ), ( "DEPRECATED", ITdeprecated_prag ) ] @@ -277,9 +279,27 @@ haskellKeywordsFM = listToUFM $ ( "then", ITthen ), ( "type", ITtype ), ( "where", ITwhere ), - ( "_scc_", ITscc ) + ( "_scc_", ITscc ) -- ToDo: remove ] +isSpecial :: Token -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITsafe = True +isSpecial ITunsafe = True +isSpecial ITwith = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial _ = False + -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP) ghcExtensionKeywordsFM = listToUFM $ map (\ (x,y) -> (_PK_ x,y)) @@ -288,14 +308,16 @@ ghcExtensionKeywordsFM = listToUFM $ ( "export", ITexport ), ( "label", ITlabel ), ( "dynamic", ITdynamic ), + ( "safe", ITunsafe ), ( "unsafe", ITunsafe ), ( "with", ITwith ), ( "stdcall", ITstdcallconv), ( "ccall", ITccallconv), - ("_ccall_", ITccall (False, False, False)), - ("_ccall_GC_", ITccall (False, False, True)), - ("_casm_", ITccall (False, True, False)), - ("_casm_GC_", ITccall (False, True, True)), + ( "dotnet", ITdotnet), + ("_ccall_", ITccall (False, False, PlayRisky)), + ("_ccall_GC_", ITccall (False, False, PlaySafe)), + ("_casm_", ITccall (False, True, PlayRisky)), + ("_casm_GC_", ITccall (False, True, PlaySafe)), -- interface keywords ("__interface", ITinterface), @@ -311,8 +333,12 @@ ghcExtensionKeywordsFM = listToUFM $ ("__bot", ITbottom), ("__integer", ITinteger_lit), ("__float", ITfloat_lit), + ("__int64", ITint64_lit), + ("__word", ITword_lit), + ("__word64", ITword64_lit), ("__rational", ITrational_lit), ("__addr", ITaddr_lit), + ("__label", ITlabel_lit), ("__litlit", ITlit_lit), ("__string", ITstring_lit), ("__a", ITtypeapp), @@ -323,16 +349,16 @@ ghcExtensionKeywordsFM = listToUFM $ ("__C", ITnocaf), ("__R", ITrules), ("__D", ITdeprecated), - ("__U", ITunfold NoInlinePragInfo), + ("__U", ITunfold), - ("__ccall", ITccall (False, False, False)), - ("__ccall_GC", ITccall (False, False, True)), - ("__dyn_ccall", ITccall (True, False, False)), - ("__dyn_ccall_GC", ITccall (True, False, True)), - ("__casm", ITccall (False, True, False)), - ("__dyn_casm", ITccall (True, True, False)), - ("__casm_GC", ITccall (False, True, True)), - ("__dyn_casm_GC", ITccall (True, True, True)), + ("__ccall", ITccall (False, False, PlayRisky)), + ("__ccall_GC", ITccall (False, False, PlaySafe)), + ("__dyn_ccall", ITccall (True, False, PlayRisky)), + ("__dyn_ccall_GC", ITccall (True, False, PlaySafe)), + ("__casm", ITccall (False, True, PlayRisky)), + ("__dyn_casm", ITccall (True, True, PlayRisky)), + ("__casm_GC", ITccall (False, True, PlaySafe)), + ("__dyn_casm_GC", ITccall (True, True, PlaySafe)), ("/\\", ITbiglam) ] @@ -401,17 +427,15 @@ lexer cont buf s@(PState{ if next `eqChar#` '-'# then trundle (n +# 1#) else if is_symbol next || n <# 2# then is_a_token - else case untilChar# (stepOnBy# buf n) '\n'# of - { buf' -> tab y bol atbol (stepOverLexeme buf') - } + else tab y bol atbol + (stepOnUntilChar# (stepOnBy# buf n) '\n'#) in trundle 1# -- comments and pragmas. We deal with LINE pragmas here, -- and throw out any unrecognised pragmas as comments. Any -- pragmas we know about are dealt with later (after any layout -- processing if necessary). - - '{'# | lookAhead# buf 1# `eqChar#` '-'# -> + '{'# | lookAhead# buf 1# `eqChar#` '-'# -> if lookAhead# buf 2# `eqChar#` '#'# then if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1-> @@ -419,14 +443,34 @@ lexer cont buf s@(PState{ let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of - Just ITline_prag -> line_prag (lexer cont) buf2 s' + -- ignore RULES pragmas when -fglasgow-exts is off + Just ITrules_prag | not (flag glaexts) -> + skip_to_end (stepOnBy# buf 2#) s' + Just ITline_prag -> + line_prag skip_to_end buf2 s' Just other -> is_a_token - Nothing -> skip_to_end (stepOnBy# buf 2#) + Nothing -> skip_to_end (stepOnBy# buf 2#) s' }} - - else skip_to_end (stepOnBy# buf 2#) + + else skip_to_end (stepOnBy# buf 2#) s' + where + skip_to_end = skipNestedComment (lexer cont) + + -- special GHC extension: we grok cpp-style #line pragmas + '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 + let buf1 | lookAhead# buf 1# `eqChar#` 'l'# && + lookAhead# buf 2# `eqChar#` 'i'# && + lookAhead# buf 3# `eqChar#` 'n'# && + lookAhead# buf 4# `eqChar#` 'e'# = stepOnBy# buf 5# + | otherwise = stepOn buf + in + case expandWhile# is_space buf1 of { buf2 -> + if is_digit (currentChar# buf2) + then line_prag next_line buf2 s' + else is_a_token + } where - skip_to_end buf = nested_comment (lexer cont) buf s' + next_line buf = lexer cont (stepOnUntilChar# buf '\n'#) -- tabs have been expanded beforehand c | is_space c -> tab y bol atbol (stepOn buf) @@ -440,45 +484,56 @@ lexer cont buf s@(PState{ | otherwise = lexToken cont glaexts buf s' -- {-# LINE .. #-} pragmas. yeuch. -line_prag cont buf = +line_prag cont buf s@PState{loc=loc} = case expandWhile# is_space buf of { buf1 -> case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) -> -- subtract one: the line number refers to the *following* line. let real_line = line - 1 in case fromInteger real_line of { i@(I# l) -> + -- ToDo, if no filename then we skip the newline.... d'oh case expandWhile# is_space buf2 of { buf3 -> case currentChar# buf3 of '\"'#{-"-} -> case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 -> - let file = lexemeToFastString buf4 in - \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i} + let + file = lexemeToFastString buf4 + new_buf = stepOn (stepOverLexeme buf4) + in + if nullFastString file + then cont new_buf s{loc = replaceSrcLine loc l} + else cont new_buf s{loc = mkSrcLoc file i} } - other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l} + _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} - where - skipToEnd buf = nested_comment cont buf -nested_comment :: P a -> P a -nested_comment cont buf = loop buf +skipNestedComment :: P a -> P a +skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state + +skipNestedComment' :: SrcLoc -> P a -> P a +skipNestedComment' orig_loc cont buf = loop buf where loop buf = case currentChar# buf of - '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf - - '-'# | lookAhead# buf 1# `eqChar#` '}'# -> - cont (stepOnBy# buf 2#) + '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) '{'# | lookAhead# buf 1# `eqChar#` '-'# -> - nested_comment (nested_comment cont) (stepOnBy# buf 2#) + skipNestedComment + (skipNestedComment' orig_loc cont) + (stepOnBy# buf 2#) '\n'# -> \ s@PState{loc=loc} -> let buf' = stepOn buf in - nested_comment cont buf' - s{loc = incSrcLine loc, bol = currentIndex# buf', - atbol = 1#} + loop buf' s{loc = incSrcLine loc, + bol = currentIndex# buf', + atbol = 1#} + + -- pass the original SrcLoc to lexError so that the error is + -- reported at the line it was originally on, not the line at + -- the end of the file. + '\NUL'# | bufferExhausted (stepOn buf) -> + \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -} - _ -> nested_comment cont (stepOn buf) + _ -> loop (stepOn buf) -- When we are lexing the first token of a line, check whether we need to -- insert virtual semicolons or close braces due to layout. @@ -518,7 +573,7 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a lexToken cont glaexts buf = - --trace "lexToken" $ +-- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- @@ -532,12 +587,16 @@ lexToken cont glaexts buf = ']'# -> cont ITcbrack (incLexeme buf) ','# -> cont ITcomma (incLexeme buf) ';'# -> cont ITsemi (incLexeme buf) - '}'# -> \ s@PState{context = ctx} -> case ctx of (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} _ -> lexError "too many '}'s" buf s + '|'# -> case lookAhead# buf 1# of + '}'# | flag glaexts -> cont ITccurlybar + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) + '#'# -> case lookAhead# buf 1# of ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of @@ -551,19 +610,23 @@ lexToken cont glaexts buf = -> cont ITbackquote (incLexeme buf) '{'# -> -- look for "{-##" special iface pragma - case lookAhead# buf 1# of + case lookAhead# buf 1# of + '|'# | flag glaexts + -> cont ITocurlybar (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '#'# -> case lookAhead# buf 3# of - '#'# -> - let (lexeme, buf') - = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' + '#'# -> + lexPragma + cont + (\ cont lexeme buf' -> cont (ITpragma lexeme) buf') + 0# + (stepOnBy# (stepOverLexeme buf) 4#) _ -> lex_prag cont (setCurrentPos# buf 3#) - _ -> cont ITocurly (incLexeme buf) - _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) + _ -> cont ITocurly (incLexeme buf) + _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) + '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. @@ -574,8 +637,8 @@ lexToken cont glaexts buf = lex_demand cont (stepOnUntil (not . isSpace) (stepOnBy# buf 3#)) -- past __S 'M'# -> - lex_cpr cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __M + cont ITcprinfo (stepOnBy# buf 3#) -- past __M + 's'# -> case prefixMatch (stepOnBy# buf 3#) "cc" of Just buf' -> lex_scc cont (stepOverLexeme buf') @@ -600,7 +663,9 @@ lexToken cont glaexts buf = cont (ITunknown "\NUL") (stepOn buf) '?'# | flag glaexts && is_lower (lookAhead# buf 1#) -> - lex_ip cont (incLexeme buf) + lex_ip ITdupipvarid cont (incLexeme buf) + '%'# | flag glaexts && is_lower (lookAhead# buf 1#) -> + lex_ip ITsplitipvarid cont (incLexeme buf) c | is_digit c -> lex_num cont glaexts 0 buf | is_symbol c -> lex_sym cont buf | is_upper c -> lex_con cont glaexts buf @@ -631,9 +696,12 @@ lex_prag cont buf lex_string cont glaexts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in - case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') + let buf' = incLexeme buf + s' = mkFastStringNarrow (map chr (reverse s)) + in case currentChar# buf' of + '#'# | flag glaexts -> if all (<= 0xFF) s + then cont (ITprimstring s') (incLexeme buf') + else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps @@ -658,11 +726,11 @@ lex_stringgap cont glaexts s buf lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a +lex_char :: (Int# -> Int -> P a) -> Int# -> P a lex_char cont glaexts buf = case currentChar# buf of '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) other -> charError buf char_end cont glaexts c buf @@ -677,19 +745,19 @@ char_end cont glaexts c buf lex_escape cont buf = let buf' = incLexeme buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -699,13 +767,12 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x10FFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 @@ -776,46 +843,44 @@ silly_escape_chars = [ lex_demand cont buf = case read_em [] buf of { (ls,buf') -> case currentChar# buf' of - 'B'# -> cont (ITstrict (ls, True )) (incLexeme buf') - _ -> cont (ITstrict (ls, False)) buf' + 'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf') + 'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf') + _ -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf' } where - -- code snatched from Demand.lhs read_em acc buf = case currentChar# buf of - 'L'# -> read_em (WwLazy False : acc) (stepOn buf) - 'A'# -> read_em (WwLazy True : acc) (stepOn buf) - 'S'# -> read_em (WwStrict : acc) (stepOn buf) - 'P'# -> read_em (WwPrim : acc) (stepOn buf) - 'E'# -> read_em (WwEnum : acc) (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#) - 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#) - 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#) - 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#) - _ -> (reverse acc, buf) + 'T'# -> read_em (Top : acc) (stepOn buf) + 'L'# -> read_em (lazyDmd : acc) (stepOn buf) + 'A'# -> read_em (Abs : acc) (stepOn buf) + 'V'# -> read_em (evalDmd : acc) (stepOn buf) -- Temporary, until + -- we've recompiled prelude etc + 'C'# -> do_unary Call acc (stepOnBy# buf 2#) -- Skip 'C(' - do_unpack new_or_data wrapper_unpacks acc buf - = case read_em [] buf of - (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest + 'U'# -> do_seq1 Eval acc (stepOnBy# buf 1#) + 'D'# -> do_seq1 Defer acc (stepOnBy# buf 1#) + 'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#) -lex_cpr cont buf = - case read_em [] buf of { (cpr_inf,buf') -> - ASSERT ( null (tail cpr_inf) ) - cont (ITcprinfo $ head cpr_inf) buf' - } - where - -- code snatched from lex_demand above - read_em acc buf = - case currentChar# buf of - '-'# -> read_em (NoCPRInfo : acc) (stepOn buf) - '('# -> do_unpack acc (stepOn buf) - ')'# -> (reverse acc, stepOn buf) _ -> (reverse acc, buf) - do_unpack acc buf - = case read_em [] buf of - (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest + do_seq1 fn acc buf + = case currentChar# buf of + '('# -> do_seq2 fn acc (stepOnBy# buf 1#) + _ -> read_em (fn (Poly Abs) : acc) buf + + do_seq2 fn acc buf + = case read_em [] buf of { (dmds, buf) -> + case currentChar# buf of + ')'# -> read_em (fn (Prod dmds) : acc) + (stepOn buf) + '*'# -> ASSERT( length dmds == 1 ) + read_em (fn (Poly (head dmds)) : acc) + (stepOnBy# buf 2#) -- Skip '*)' + } + + do_unary fn acc buf + = case read_em [] buf of + ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest) -- Skip ')' ------------------ lex_scc cont buf = @@ -880,43 +945,17 @@ lex_cstring cont buf = (mergeLexemes buf buf') Nothing -> lexError "unterminated ``" buf ------------------------------------------------------------------------------- --- Character Classes - -is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool - -{-# INLINE is_ctype #-} -#if __GLASGOW_HASKELL__ >= 303 -is_ctype :: Word8 -> Char# -> Bool -is_ctype mask = \c -> - (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 -#else -is_ctype :: Int -> Char# -> Bool -is_ctype (I# mask) = \c -> - let (A# ctype) = ``char_types'' :: Addr - flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) - in - (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) -#endif - -is_ident = is_ctype 1 -is_symbol = is_ctype 2 -is_any = is_ctype 4 -is_space = is_ctype 8 -is_lower = is_ctype 16 -is_upper = is_ctype 32 -is_digit = is_ctype 64 - ----------------------------------------------------------------------------- -- identifiers, symbols etc. -lex_ip cont buf = +lex_ip ip_constr cont buf = case expandWhile# is_ident buf of - buf' -> cont (ITipvarid lexeme) buf' - where lexeme = lexemeToFastString buf' + buf' -> cont (ip_constr (tailFS lexeme)) buf' + where lexeme = lexemeToFastString buf' lex_id cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> + let buf1 = expandWhile# is_ident buf in + seq buf1 $ case (if flag glaexts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes @@ -929,7 +968,7 @@ lex_id cont glaexts buf = cont kwd_token buf'; Nothing -> - let var_token = cont (mk_var_token lexeme) buf' in + let var_token = cont (ITvarid lexeme) buf' in if not (flag glaexts) then var_token @@ -939,9 +978,10 @@ lex_id cont glaexts buf = Just kwd_token -> cont kwd_token buf'; Nothing -> var_token - }}}} + }}} lex_sym cont buf = + -- trace "lex_sym" $ case expandWhile# is_symbol buf of buf' -> case lookupUFM haskellKeySymsFM lexeme of { Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ @@ -952,22 +992,36 @@ lex_sym cont buf = where lexeme = lexemeToFastString buf' -lex_con cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> - case slurp_trailing_hashes buf1 glaexts of { buf' -> +-- lex_con recursively collects components of a qualified identifer. +-- The argument buf is the StringBuffer representing the lexeme +-- identified so far, where the next character is upper-case. - case currentChar# buf' of - '.'# -> munch +lex_con cont glaexts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ + let empty_buf = stepOverLexeme buf in + case expandWhile# is_ident empty_buf of { buf1 -> + case slurp_trailing_hashes buf1 glaexts of { con_buf -> + + let all_buf = mergeLexemes buf con_buf + + con_lexeme = lexemeToFastString con_buf + mod_lexeme = lexemeToFastString (decLexeme buf) + all_lexeme = lexemeToFastString all_buf + + just_a_conid + | emptyLexeme buf = cont (ITconid con_lexeme) all_buf + | otherwise = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf + in + + case currentChar# all_buf of + '.'# -> maybe_qualified cont glaexts all_lexeme + (incLexeme all_buf) just_a_conid _ -> just_a_conid - - where - just_a_conid = --trace ("con: "++unpackFS lexeme) $ - cont (ITconid lexeme) buf' - lexeme = lexemeToFastString buf' - munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid - }} - -lex_qid cont glaexts mod buf just_a_conid = + }} + + +maybe_qualified cont glaexts mod buf just_a_conid = + -- trace ("qid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of @@ -986,15 +1040,21 @@ lex_qid cont glaexts mod buf just_a_conid = _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) + '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) _ -> lex_id3 cont glaexts mod buf just_a_conid + _ -> lex_id3 cont glaexts mod buf just_a_conid + lex_id3 cont glaexts mod buf just_a_conid + | is_upper (currentChar# buf) = + lex_con cont glaexts buf + | is_symbol (currentChar# buf) = let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id31 "{-++unpackFS lexeme-}) $ case expandWhile# is_symbol start_new_lexeme of { buf' -> let lexeme = lexemeToFastString buf' @@ -1009,6 +1069,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id32 "{-++unpackFS lexeme-}) $ case expandWhile# is_ident start_new_lexeme of { buf1 -> if emptyLexeme buf1 then just_a_conid @@ -1017,17 +1078,18 @@ lex_id3 cont glaexts mod buf just_a_conid case slurp_trailing_hashes buf1 glaexts of { buf' -> let - lexeme = lexemeToFastString buf' - new_buf = mergeLexemes buf buf' + lexeme = lexemeToFastString buf' + new_buf = mergeLexemes buf buf' is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf in case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { - Just kwd_token -> just_a_conid; -- avoid M.where etc. - Nothing -> is_a_qvarid - -- TODO: special ids (as, qualified, hiding) shouldn't be - -- recognised as keywords here. ie. M.as is a qualified varid. - }}} + Nothing -> is_a_qvarid ; + Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be + -> is_a_qvarid -- recognised as keywords here. + | otherwise + -> just_a_conid -- avoid M.where etc. + }}} slurp_trailing_hashes buf glaexts | flag glaexts = expandWhile# (`eqChar#` '#'#) buf @@ -1036,17 +1098,15 @@ slurp_trailing_hashes buf glaexts mk_var_token pk_str | is_upper f = ITconid pk_str - -- _[A-Z] is treated as a constructor in interface files. - | f `eqChar#` '_'# && not (_NULL_ tl) - && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str | is_ident f = ITvarid pk_str | f `eqChar#` ':'# = ITconsym pk_str | otherwise = ITvarsym pk_str where (C# f) = _HEAD_ pk_str - tl = _TAIL_ pk_str + -- tl = _TAIL_ pk_str mk_qvar_token m token = +-- trace ("mk_qvar ") $ case mk_var_token token of ITconid n -> ITqconid (m,n) ITvarid n -> ITqvarid (m,n) @@ -1065,7 +1125,7 @@ lex_tuple cont mod buf back_off = go n buf = case currentChar# buf of ','# -> go (n+1) (stepOn buf) - ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf) _ -> back_off lex_ubx_tuple cont mod buf back_off = @@ -1075,46 +1135,60 @@ lex_ubx_tuple cont mod buf back_off = case currentChar# buf of ','# -> go (n+1) (stepOn buf) '#'# -> case lookAhead# buf 1# of - ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n))) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n))) (stepOnBy# buf 2#) _ -> back_off _ -> back_off \end{code} ----------------------------------------------------------------------------- -doDiscard rips along really fast, looking for a '#-}', +'lexPragma' rips along really fast, looking for a '##-}', indicating the end of the pragma we're skipping \begin{code} -doDiscard inStr buf = +lexPragma cont contf inStr buf = case currentChar# buf of - '#'# | not inStr -> + '#'# | inStr ==# 0# -> case lookAhead# buf 1# of { '#'# -> case lookAhead# buf 2# of { '-'# -> case lookAhead# buf 3# of { '}'# -> - (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#)); - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) }; - _ -> doDiscard inStr (incLexeme buf) } + contf cont (lexemeToBuffer buf) + (stepOverLexeme (setCurrentPos# buf 4#)); + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) } + '"'# -> let odd_slashes buf flg i# = case lookAhead# buf i# of '\\'# -> odd_slashes buf (not flg) (i# -# 1#) _ -> flg + + not_inStr = if inStr ==# 0# then 1# else 0# in case lookAhead# buf (negateInt# 1#) of --backwards, actually '\\'# -> -- escaping something.. - if odd_slashes buf True (negateInt# 2#) then - -- odd number of slashes, " is escaped. - doDiscard inStr (incLexeme buf) - else - -- even number of slashes, \ is escaped. - doDiscard (not inStr) (incLexeme buf) - _ -> case inStr of -- forced to avoid build-up - True -> doDiscard False (incLexeme buf) - False -> doDiscard True (incLexeme buf) - _ -> doDiscard inStr (incLexeme buf) + if odd_slashes buf True (negateInt# 2#) + then -- odd number of slashes, " is escaped. + lexPragma cont contf inStr (incLexeme buf) + else -- even number of slashes, \ is escaped. + lexPragma cont contf not_inStr (incLexeme buf) + _ -> lexPragma cont contf not_inStr (incLexeme buf) + + '\''# | inStr ==# 0# -> + case lookAhead# buf 1# of { '"'# -> + case lookAhead# buf 2# of { '\''# -> + lexPragma cont contf inStr (setCurrentPos# buf 3#); + _ -> lexPragma cont contf inStr (incLexeme buf) }; + _ -> lexPragma cont contf inStr (incLexeme buf) } + + -- a sign that the input is ill-formed, since pragmas are + -- assumed to always be properly closed (in .hi files). + '\NUL'# -> trace "lexPragma: unexpected end-of-file" $ + cont (ITunknown "\NUL") buf + + _ -> lexPragma cont contf inStr (incLexeme buf) \end{code} @@ -1173,12 +1247,16 @@ lexError str buf s@PState{ loc = loc } getSrcLocP :: P SrcLoc getSrcLocP buf s@(PState{ loc = loc }) = POk s loc +-- use a temporary SrcLoc for the duration of the argument +setSrcLocP :: SrcLoc -> P a -> P a +setSrcLocP new_loc p buf s = + case p buf s{ loc=new_loc } of + POk _ a -> POk s a + PFailed e -> PFailed e + getSrcFile :: P FAST_STRING getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc) -getContext :: P [LayoutContext] -getContext buf s@(PState{ context = ctx }) = POk s ctx - pushContext :: LayoutContext -> P () pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} () @@ -1250,10 +1328,10 @@ layoutOff buf s@(PState{ context = ctx }) = POk s{ context = NoLayout:ctx } () popContext :: P () -popContext = \ buf s@(PState{ context = ctx }) -> +popContext = \ buf s@(PState{ context = ctx, loc = loc }) -> case ctx of (_:tl) -> POk s{ context = tl } () - [] -> panic "Lex.popContext: empty context" + [] -> PFailed (srcParseErr buf loc) {- Note that if the name of the file we're processing ends @@ -1295,4 +1373,17 @@ ifaceVersionErr hi_vers l toks Nothing -> ptext SLIT("pre ghc-3.02 version") Just v -> ptext SLIT("version") <+> integer v +----------------------------------------------------------------------------- + +srcParseErr :: StringBuffer -> SrcLoc -> Message +srcParseErr s l + = hcat [ppr l, + if null token + then ptext SLIT(": parse error (possibly incorrect indentation)") + else hcat [ptext SLIT(": parse error on input "), + char '`', text token, char '\''] + ] + where + token = lexemeToString s + \end{code}