X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=c15f46fa06b46c4450ae156c9b9b7f96c51bcd64;hb=0588967a4bc5f2e1b5e3bf433334fad59aa07221;hp=25aa14c5f5a650ee8bcfdb9d2d90833309142947;hpb=a33ecb97353d7e411c40edd662a4afcfc603fe28;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index 25aa14c..c15f46f 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -16,11 +16,10 @@ An example that provokes the error is -------------------------------------------------------- \begin{code} -{-# OPTIONS -#include "ctypes.h" #-} module Lex ( - ifaceParseErr, + ifaceParseErr, srcParseErr, -- Monad for parser Token(..), lexer, ParseResult(..), PState(..), @@ -55,12 +54,7 @@ import FastString import StringBuffer import GlaExts import ST ( runST ) - -#if __GLASGOW_HASKELL__ >= 303 -import Bits -import Word -#endif - +import Ctypes import Char ( chr ) import Addr import PrelRead ( readRational__ ) -- Glasgow non-std @@ -128,6 +122,9 @@ data Token | ITlabel | ITdynamic | ITunsafe + | ITwith + | ITstdcallconv + | ITccallconv | ITinterface -- interface keywords | IT__export @@ -142,20 +139,24 @@ data Token | ITbottom | ITinteger_lit | ITfloat_lit + | ITword_lit + | ITword64_lit + | ITint64_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit | ITtypeapp - | ITonce - | ITmany + | ITusage + | ITfuall | ITarity | ITspecialise | ITnocaf | ITunfold InlinePragInfo | ITstrict ([Demand], Bool) | ITrules - | ITcprinfo (CprInfo) + | ITcprinfo + | ITdeprecated | IT__scc | ITsccAllCafs @@ -164,6 +165,7 @@ data Token | ITinline_prag | ITnoinline_prag | ITrules_prag + | ITdeprecated_prag | ITline_prag | ITclose_prag @@ -206,6 +208,8 @@ data Token | ITqvarsym (FAST_STRING,FAST_STRING) | ITqconsym (FAST_STRING,FAST_STRING) + | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITpragma StringBuffer | ITchar Char @@ -222,7 +226,7 @@ data Token | ITunknown String -- Used when the lexer can't make sense of it | ITeof -- end of file token - deriving Text -- debugging + deriving Show -- debugging \end{code} ----------------------------------------------------------------------------- @@ -239,7 +243,8 @@ pragmaKeywordsFM = listToUFM $ ( "NOTINLINE", ITnoinline_prag ), ( "LINE", ITline_prag ), ( "RULES", ITrules_prag ), - ( "RULEZ", ITrules_prag ) -- american spelling :-) + ( "RULEZ", ITrules_prag ), -- american spelling :-) + ( "DEPRECATED", ITdeprecated_prag ) ] haskellKeywordsFM = listToUFM $ @@ -272,6 +277,7 @@ haskellKeywordsFM = listToUFM $ ( "_scc_", ITscc ) ] +-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP) ghcExtensionKeywordsFM = listToUFM $ map (\ (x,y) -> (_PK_ x,y)) [ ( "forall", ITforall ), @@ -280,6 +286,9 @@ ghcExtensionKeywordsFM = listToUFM $ ( "label", ITlabel ), ( "dynamic", ITdynamic ), ( "unsafe", ITunsafe ), + ( "with", ITwith ), + ( "stdcall", ITstdcallconv), + ( "ccall", ITccallconv), ("_ccall_", ITccall (False, False, False)), ("_ccall_GC_", ITccall (False, False, True)), ("_casm_", ITccall (False, True, False)), @@ -299,18 +308,22 @@ ghcExtensionKeywordsFM = listToUFM $ ("__bot", ITbottom), ("__integer", ITinteger_lit), ("__float", ITfloat_lit), + ("__int64", ITint64_lit), + ("__word", ITword_lit), + ("__word64", ITword64_lit), ("__rational", ITrational_lit), ("__addr", ITaddr_lit), ("__litlit", ITlit_lit), ("__string", ITstring_lit), ("__a", ITtypeapp), - ("__o", ITonce), - ("__m", ITmany), + ("__u", ITusage), + ("__fuall", ITfuall), ("__A", ITarity), ("__P", ITspecialise), ("__C", ITnocaf), ("__R", ITrules), - ("__u", ITunfold NoInlinePragInfo), + ("__D", ITdeprecated), + ("__U", ITunfold NoInlinePragInfo), ("__ccall", ITccall (False, False, False)), ("__ccall_GC", ITccall (False, False, True)), @@ -365,7 +378,8 @@ lexer cont buf s@(PState{ }) -- first, start a new lexeme and lose all the whitespace - = tab line bol atbol (stepOverLexeme buf) + = _scc_ "Lexer" + tab line bol atbol (stepOverLexeme buf) where line = srcLocLine loc @@ -505,7 +519,6 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a lexToken cont glaexts buf = --trace "lexToken" $ - _scc_ "Lexer" case currentChar# buf of -- special symbols ---------------------------------------------------- @@ -523,7 +536,7 @@ lexToken cont glaexts buf = '}'# -> \ s@PState{context = ctx} -> case ctx of (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} - _ -> lexError "too many '}'s" buf s + _ -> lexError "too many '}'s" buf s '#'# -> case lookAhead# buf 1# of ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) @@ -561,8 +574,8 @@ lexToken cont glaexts buf = lex_demand cont (stepOnUntil (not . isSpace) (stepOnBy# buf 3#)) -- past __S 'M'# -> - lex_cpr cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __M + cont ITcprinfo (stepOnBy# buf 3#) -- past __M + 's'# -> case prefixMatch (stepOnBy# buf 3#) "cc" of Just buf' -> lex_scc cont (stepOverLexeme buf') @@ -586,6 +599,8 @@ lexToken cont glaexts buf = trace "lexIface: misplaced NUL?" $ cont (ITunknown "\NUL") (stepOn buf) + '?'# | flag glaexts && is_lower (lookAhead# buf 1#) -> + lex_ip cont (incLexeme buf) c | is_digit c -> lex_num cont glaexts 0 buf | is_symbol c -> lex_sym cont buf | is_upper c -> lex_con cont glaexts buf @@ -623,9 +638,9 @@ lex_string cont glaexts s buf -- ignore \& in a string, deal with string gaps '\\'# | next_ch `eqChar#` '&'# - -> lex_string cont glaexts s (setCurrentPos# buf 2#) + -> lex_string cont glaexts s buf' | is_space next_ch - -> lex_stringgap cont glaexts s buf' + -> lex_stringgap cont glaexts s (incLexeme buf) where next_ch = lookAhead# buf 1# buf' = setCurrentPos# buf 2# @@ -703,8 +718,8 @@ readNum cont buf is_digit base conv = read buf 0 is_hexdigit c = is_digit c - || (c `geChar#` 'a'# && c `leChar#` 'h'#) - || (c `geChar#` 'A'# && c `leChar#` 'H'#) + || (c `geChar#` 'a'# && c `leChar#` 'f'#) + || (c `geChar#` 'A'# && c `leChar#` 'F'#) hex c | is_digit c = ord# c -# ord# '0'# | otherwise = ord# (to_lower c) -# ord# 'a'# +# 10# @@ -784,23 +799,6 @@ lex_demand cont buf = = case read_em [] buf of (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest -lex_cpr cont buf = - case read_em [] buf of { (cpr_inf,buf') -> - ASSERT ( null (tail cpr_inf) ) - cont (ITcprinfo $ head cpr_inf) buf' - } - where - -- code snatched from lex_demand above - read_em acc buf = - case currentChar# buf of - '-'# -> read_em (NoCPRInfo : acc) (stepOn buf) - '('# -> do_unpack acc (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - _ -> (reverse acc, buf) - - do_unpack acc buf - = case read_em [] buf of - (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest ------------------ lex_scc cont buf = @@ -860,39 +858,19 @@ after_lexnum cont glaexts i buf lex_cstring cont buf = case expandUntilMatch (stepOverLexeme buf) "\'\'" of - buf' -> cont (ITlitlit (lexemeToFastString + Just buf' -> cont (ITlitlit (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#)))) - (mergeLexemes buf buf') - ------------------------------------------------------------------------------- --- Character Classes - -is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool - -{-# INLINE is_ctype #-} -#if __GLASGOW_HASKELL__ >= 303 -is_ctype :: Word8 -> Char# -> Bool -is_ctype mask = \c -> - (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 -#else -is_ctype :: Int -> Char# -> Bool -is_ctype (I# mask) = \c -> - let (A# ctype) = ``char_types'' :: Addr - flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) - in - (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) -#endif - -is_ident = is_ctype 1 -is_symbol = is_ctype 2 -is_any = is_ctype 4 -is_space = is_ctype 8 -is_upper = is_ctype 16 -is_digit = is_ctype 32 + (mergeLexemes buf buf') + Nothing -> lexError "unterminated ``" buf ----------------------------------------------------------------------------- -- identifiers, symbols etc. +lex_ip cont buf = + case expandWhile# is_ident buf of + buf' -> cont (ITipvarid lexeme) buf' + where lexeme = lexemeToFastString buf' + lex_id cont glaexts buf = case expandWhile# is_ident buf of { buf1 -> @@ -979,10 +957,9 @@ lex_id3 cont glaexts mod buf just_a_conid -- real lexeme is M. new_buf = mergeLexemes buf buf' in - case lookupUFM haskellKeySymsFM lexeme of { - Just kwd_token -> just_a_conid; -- avoid M.:: etc. - Nothing -> cont (mk_qvar_token mod lexeme) new_buf - }} + cont (mk_qvar_token mod lexeme) new_buf + -- wrong, but arguably morally right: M... is now a qvarsym + } | otherwise = let @@ -1015,9 +992,6 @@ slurp_trailing_hashes buf glaexts mk_var_token pk_str | is_upper f = ITconid pk_str - -- _[A-Z] is treated as a constructor in interface files. - | f `eqChar#` '_'# && not (_NULL_ tl) - && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str | is_ident f = ITvarid pk_str | f `eqChar#` ':'# = ITconsym pk_str | otherwise = ITvarsym pk_str @@ -1199,13 +1173,25 @@ h = h - we still need to insert another '}' followed by a ';', hence the atbol trick. +There's also a special hack in here to deal with + + do + .... + e $ do + blah + +i.e. the inner context is at the same indentation level as the outer +context. This is strictly illegal according to Haskell 98, but +there's a lot of existing code using this style and it doesn't make +any sense to disallow it, since empty 'do' lists don't make sense. -} -layoutOn :: P () -layoutOn buf s@(PState{ bol = bol, context = ctx }) = +layoutOn :: Bool -> P () +layoutOn strict buf s@(PState{ bol = bol, context = ctx }) = let offset = lexemeIndex buf -# bol in case ctx of - Layout prev_off : _ | prev_off >=# offset -> + Layout prev_off : _ + | if strict then prev_off >=# offset else prev_off ># offset -> --trace ("layout on, column: " ++ show (I# offset)) $ POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } () other -> @@ -1217,10 +1203,10 @@ layoutOff buf s@(PState{ context = ctx }) = POk s{ context = NoLayout:ctx } () popContext :: P () -popContext = \ buf s@(PState{ context = ctx }) -> +popContext = \ buf s@(PState{ context = ctx, loc = loc }) -> case ctx of (_:tl) -> POk s{ context = tl } () - [] -> panic "Lex.popContext: empty context" + [] -> PFailed (srcParseErr buf loc) {- Note that if the name of the file we're processing ends @@ -1262,4 +1248,17 @@ ifaceVersionErr hi_vers l toks Nothing -> ptext SLIT("pre ghc-3.02 version") Just v -> ptext SLIT("version") <+> integer v +----------------------------------------------------------------------------- + +srcParseErr :: StringBuffer -> SrcLoc -> Message +srcParseErr s l + = hcat [ppr l, + if null token + then ptext SLIT(": parse error (possibly incorrect indentation)") + else hcat [ptext SLIT(": parse error on input "), + char '`', text token, char '\''] + ] + where + token = lexemeToString s + \end{code}