X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=62d1e5e1033328e5881ad39d3f00ac786fbc75b9;hb=17d77514239158bb9c88ccd6589ad49f3d87068e;hp=efcda1b6d46c30e43c1b9355b52b5b1b65330578;hpb=cc02bb851921a07a83006988c5bc19b72a3f9049;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index efcda1b..62d1e5e 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -16,11 +16,10 @@ An example that provokes the error is -------------------------------------------------------- \begin{code} -{-# OPTIONS -#include "ctypes.h" #-} - +{-# OPTIONS -#include "hs_ctype.h" #-} module Lex ( - ifaceParseErr, + ifaceParseErr, srcParseErr, -- Monad for parser Token(..), lexer, ParseResult(..), PState(..), @@ -34,35 +33,26 @@ module Lex ( #include "HsVersions.h" -import Char ( ord, isSpace, toUpper ) +import Char ( isSpace, toUpper ) import List ( isSuffixOf ) -import IdInfo ( InlinePragInfo(..), CprInfo(..) ) -import Name ( isLowerISO, isUpperISO ) -import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) -import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) +import IdInfo ( InlinePragInfo(..) ) +import PrelNames ( mkTupNameStr ) +import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck ) import Demand ( Demand(..) {- instance Read -} ) -import UniqFM ( UniqFM, listToUFM, lookupUFM) -import BasicTypes ( NewOrData(..) ) +import UniqFM ( listToUFM, lookupUFM ) +import BasicTypes ( NewOrData(..), Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) -import Maybes ( MaybeErr(..) ) import ErrUtils ( Message ) import Outputable import FastString import StringBuffer import GlaExts -import ST ( runST ) - -#if __GLASGOW_HASKELL__ >= 303 -import Bits -import Word -#endif - -import Char ( chr ) -import Addr +import Ctype +import Char ( ord ) import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} @@ -120,7 +110,7 @@ data Token | ITthen | ITtype | ITwhere - | ITscc + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) | ITforall -- GHC extension keywords | ITforeign @@ -128,6 +118,9 @@ data Token | ITlabel | ITdynamic | ITunsafe + | ITwith + | ITstdcallconv + | ITccallconv | ITinterface -- interface keywords | IT__export @@ -142,20 +135,25 @@ data Token | ITbottom | ITinteger_lit | ITfloat_lit + | ITword_lit + | ITword64_lit + | ITint64_lit | ITrational_lit | ITaddr_lit + | ITlabel_lit | ITlit_lit | ITstring_lit | ITtypeapp - | ITonce - | ITmany + | ITusage + | ITfuall | ITarity | ITspecialise | ITnocaf | ITunfold InlinePragInfo | ITstrict ([Demand], Bool) | ITrules - | ITcprinfo (CprInfo) + | ITcprinfo + | ITdeprecated | IT__scc | ITsccAllCafs @@ -164,7 +162,9 @@ data Token | ITinline_prag | ITnoinline_prag | ITrules_prag + | ITdeprecated_prag | ITline_prag + | ITscc_prag | ITclose_prag | ITdotdot -- reserved symbols @@ -185,6 +185,8 @@ data Token | ITocurly -- special symbols | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack | ITcbrack @@ -206,14 +208,16 @@ data Token | ITqvarsym (FAST_STRING,FAST_STRING) | ITqconsym (FAST_STRING,FAST_STRING) + | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x + | ITpragma StringBuffer - | ITchar Char + | ITchar Int | ITstring FAST_STRING - | ITinteger Integer + | ITinteger Integer | ITrational Rational - | ITprimchar Char + | ITprimchar Int | ITprimstring FAST_STRING | ITprimint Integer | ITprimfloat Rational @@ -222,7 +226,7 @@ data Token | ITunknown String -- Used when the lexer can't make sense of it | ITeof -- end of file token - deriving Text -- debugging + deriving Show -- debugging \end{code} ----------------------------------------------------------------------------- @@ -239,7 +243,9 @@ pragmaKeywordsFM = listToUFM $ ( "NOTINLINE", ITnoinline_prag ), ( "LINE", ITline_prag ), ( "RULES", ITrules_prag ), - ( "RULEZ", ITrules_prag ) -- american spelling :-) + ( "RULEZ", ITrules_prag ), -- american spelling :-) + ( "SCC", ITscc_prag ), + ( "DEPRECATED", ITdeprecated_prag ) ] haskellKeywordsFM = listToUFM $ @@ -269,9 +275,27 @@ haskellKeywordsFM = listToUFM $ ( "then", ITthen ), ( "type", ITtype ), ( "where", ITwhere ), - ( "_scc_", ITscc ) + ( "_scc_", ITscc ) -- ToDo: remove ] +isSpecial :: Token -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITunsafe = True +isSpecial ITwith = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial _ = False + +-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP) ghcExtensionKeywordsFM = listToUFM $ map (\ (x,y) -> (_PK_ x,y)) [ ( "forall", ITforall ), @@ -280,6 +304,9 @@ ghcExtensionKeywordsFM = listToUFM $ ( "label", ITlabel ), ( "dynamic", ITdynamic ), ( "unsafe", ITunsafe ), + ( "with", ITwith ), + ( "stdcall", ITstdcallconv), + ( "ccall", ITccallconv), ("_ccall_", ITccall (False, False, False)), ("_ccall_GC_", ITccall (False, False, True)), ("_casm_", ITccall (False, True, False)), @@ -299,18 +326,23 @@ ghcExtensionKeywordsFM = listToUFM $ ("__bot", ITbottom), ("__integer", ITinteger_lit), ("__float", ITfloat_lit), + ("__int64", ITint64_lit), + ("__word", ITword_lit), + ("__word64", ITword64_lit), ("__rational", ITrational_lit), ("__addr", ITaddr_lit), + ("__label", ITlabel_lit), ("__litlit", ITlit_lit), ("__string", ITstring_lit), ("__a", ITtypeapp), - ("__o", ITonce), - ("__m", ITmany), + ("__u", ITusage), + ("__fuall", ITfuall), ("__A", ITarity), ("__P", ITspecialise), ("__C", ITnocaf), ("__R", ITrules), - ("__u", ITunfold NoInlinePragInfo), + ("__D", ITdeprecated), + ("__U", ITunfold NoInlinePragInfo), ("__ccall", ITccall (False, False, False)), ("__ccall_GC", ITccall (False, False, True)), @@ -365,11 +397,12 @@ lexer cont buf s@(PState{ }) -- first, start a new lexeme and lose all the whitespace - = tab line bol atbol (stepOverLexeme buf) + = _scc_ "Lexer" + tab line bol atbol (stepOverLexeme buf) where line = srcLocLine loc - tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ + tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ case currentChar# buf of '\NUL'# -> @@ -387,17 +420,15 @@ lexer cont buf s@(PState{ if next `eqChar#` '-'# then trundle (n +# 1#) else if is_symbol next || n <# 2# then is_a_token - else case untilChar# (stepOnBy# buf n) '\n'# of - { buf' -> tab y bol atbol (stepOverLexeme buf') - } + else tab y bol atbol + (stepOnUntilChar# (stepOnBy# buf n) '\n'#) in trundle 1# -- comments and pragmas. We deal with LINE pragmas here, -- and throw out any unrecognised pragmas as comments. Any -- pragmas we know about are dealt with later (after any layout -- processing if necessary). - - '{'# | lookAhead# buf 1# `eqChar#` '-'# -> + '{'# | lookAhead# buf 1# `eqChar#` '-'# -> if lookAhead# buf 2# `eqChar#` '#'# then if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1-> @@ -405,14 +436,25 @@ lexer cont buf s@(PState{ let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of - Just ITline_prag -> line_prag (lexer cont) buf2 s' + Just ITline_prag -> + line_prag skip_to_end buf2 s' Just other -> is_a_token - Nothing -> skip_to_end (stepOnBy# buf 2#) + Nothing -> skip_to_end (stepOnBy# buf 2#) s' }} - - else skip_to_end (stepOnBy# buf 2#) + + else skip_to_end (stepOnBy# buf 2#) s' + where + skip_to_end = nested_comment (lexer cont) + + -- special GHC extension: we grok cpp-style #line pragmas + '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 + case expandWhile# is_space (stepOn buf) of { buf1 -> + if is_digit (currentChar# buf1) + then line_prag next_line buf1 s' + else is_a_token + } where - skip_to_end buf = nested_comment (lexer cont) buf s' + next_line buf = lexer cont (stepOnUntilChar# buf '\n'#) -- tabs have been expanded beforehand c | is_space c -> tab y bol atbol (stepOn buf) @@ -426,23 +468,27 @@ lexer cont buf s@(PState{ | otherwise = lexToken cont glaexts buf s' -- {-# LINE .. #-} pragmas. yeuch. -line_prag cont buf = +line_prag cont buf s@PState{loc=loc} = case expandWhile# is_space buf of { buf1 -> case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) -> -- subtract one: the line number refers to the *following* line. let real_line = line - 1 in case fromInteger real_line of { i@(I# l) -> + -- ToDo, if no filename then we skip the newline.... d'oh case expandWhile# is_space buf2 of { buf3 -> case currentChar# buf3 of '\"'#{-"-} -> case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 -> - let file = lexemeToFastString buf4 in - \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i} + let + file = lexemeToFastString buf4 + new_buf = stepOn (stepOverLexeme buf4) + in + if nullFastString file + then cont new_buf s{loc = replaceSrcLine loc l} + else cont new_buf s{loc = mkSrcLoc file i} } - other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l} + _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} - where - skipToEnd buf = nested_comment cont buf nested_comment :: P a -> P a nested_comment cont buf = loop buf @@ -450,8 +496,7 @@ nested_comment cont buf = loop buf loop buf = case currentChar# buf of '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf - + lexError "unterminated `{-'" buf -- -} '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) @@ -504,8 +549,7 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a lexToken cont glaexts buf = - --trace "lexToken" $ - _scc_ "Lexer" + -- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- @@ -519,12 +563,16 @@ lexToken cont glaexts buf = ']'# -> cont ITcbrack (incLexeme buf) ','# -> cont ITcomma (incLexeme buf) ';'# -> cont ITsemi (incLexeme buf) - '}'# -> \ s@PState{context = ctx} -> case ctx of (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} - _ -> lexError "too many '}'s" buf s + _ -> lexError "too many '}'s" buf s + '|'# -> case lookAhead# buf 1# of + '}'# | flag glaexts -> cont ITccurlybar + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) + '#'# -> case lookAhead# buf 1# of ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of @@ -538,19 +586,21 @@ lexToken cont glaexts buf = -> cont ITbackquote (incLexeme buf) '{'# -> -- look for "{-##" special iface pragma - case lookAhead# buf 1# of + case lookAhead# buf 1# of + '|'# | flag glaexts + -> cont ITocurlybar (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '#'# -> case lookAhead# buf 3# of - '#'# -> + '#'# -> let (lexeme, buf') - = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' + = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in + cont (ITpragma lexeme) buf' _ -> lex_prag cont (setCurrentPos# buf 3#) - _ -> cont ITocurly (incLexeme buf) - _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) + _ -> cont ITocurly (incLexeme buf) + _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) + '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. @@ -561,8 +611,8 @@ lexToken cont glaexts buf = lex_demand cont (stepOnUntil (not . isSpace) (stepOnBy# buf 3#)) -- past __S 'M'# -> - lex_cpr cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __M + cont ITcprinfo (stepOnBy# buf 3#) -- past __M + 's'# -> case prefixMatch (stepOnBy# buf 3#) "cc" of Just buf' -> lex_scc cont (stepOverLexeme buf') @@ -586,6 +636,8 @@ lexToken cont glaexts buf = trace "lexIface: misplaced NUL?" $ cont (ITunknown "\NUL") (stepOn buf) + '?'# | flag glaexts && is_lower (lookAhead# buf 1#) -> + lex_ip cont (incLexeme buf) c | is_digit c -> lex_num cont glaexts 0 buf | is_symbol c -> lex_sym cont buf | is_upper c -> lex_con cont glaexts buf @@ -616,16 +668,18 @@ lex_prag cont buf lex_string cont glaexts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in + let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') + '#'# | flag glaexts -> if all (<= 0xFF) s + then cont (ITprimstring s') (incLexeme buf') + else lexError "primitive string literal must contain only characters <= '\xFF'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps '\\'# | next_ch `eqChar#` '&'# - -> lex_string cont glaexts s (setCurrentPos# buf 2#) + -> lex_string cont glaexts s buf' | is_space next_ch - -> lex_stringgap cont glaexts s buf' + -> lex_stringgap cont glaexts s (incLexeme buf) where next_ch = lookAhead# buf 1# buf' = setCurrentPos# buf 2# @@ -643,11 +697,11 @@ lex_stringgap cont glaexts s buf lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a +lex_char :: (Int# -> Int -> P a) -> Int# -> P a lex_char cont glaexts buf = case currentChar# buf of '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) other -> charError buf char_end cont glaexts c buf @@ -662,19 +716,19 @@ char_end cont glaexts c buf lex_escape cont buf = let buf' = incLexeme buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -684,13 +738,12 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x7FFFFFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 @@ -703,8 +756,8 @@ readNum cont buf is_digit base conv = read buf 0 is_hexdigit c = is_digit c - || (c `geChar#` 'a'# && c `leChar#` 'h'#) - || (c `geChar#` 'A'# && c `leChar#` 'H'#) + || (c `geChar#` 'a'# && c `leChar#` 'f'#) + || (c `geChar#` 'A'# && c `leChar#` 'F'#) hex c | is_digit c = ord# c -# ord# '0'# | otherwise = ord# (to_lower c) -# ord# 'a'# +# 10# @@ -784,23 +837,6 @@ lex_demand cont buf = = case read_em [] buf of (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest -lex_cpr cont buf = - case read_em [] buf of { (cpr_inf,buf') -> - ASSERT ( null (tail cpr_inf) ) - cont (ITcprinfo $ head cpr_inf) buf' - } - where - -- code snatched from lex_demand above - read_em acc buf = - case currentChar# buf of - '-'# -> read_em (NoCPRInfo : acc) (stepOn buf) - '('# -> do_unpack acc (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - _ -> (reverse acc, buf) - - do_unpack acc buf - = case read_em [] buf of - (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest ------------------ lex_scc cont buf = @@ -860,41 +896,22 @@ after_lexnum cont glaexts i buf lex_cstring cont buf = case expandUntilMatch (stepOverLexeme buf) "\'\'" of - buf' -> cont (ITlitlit (lexemeToFastString + Just buf' -> cont (ITlitlit (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#)))) - (mergeLexemes buf buf') - ------------------------------------------------------------------------------- --- Character Classes - -is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool - -{-# INLINE is_ctype #-} -#if __GLASGOW_HASKELL__ >= 303 -is_ctype :: Word8 -> Char# -> Bool -is_ctype mask = \c -> - (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 -#else -is_ctype :: Int -> Char# -> Bool -is_ctype (I# mask) = \c -> - let (A# ctype) = ``char_types'' :: Addr - flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) - in - (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) -#endif - -is_ident = is_ctype 1 -is_symbol = is_ctype 2 -is_any = is_ctype 4 -is_space = is_ctype 8 -is_upper = is_ctype 16 -is_digit = is_ctype 32 + (mergeLexemes buf buf') + Nothing -> lexError "unterminated ``" buf ----------------------------------------------------------------------------- -- identifiers, symbols etc. +lex_ip cont buf = + case expandWhile# is_ident buf of + buf' -> cont (ITipvarid lexeme) buf' + where lexeme = lexemeToFastString buf' + lex_id cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> + let buf1 = expandWhile# is_ident buf in + seq buf1 $ case (if flag glaexts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes @@ -907,7 +924,7 @@ lex_id cont glaexts buf = cont kwd_token buf'; Nothing -> - let var_token = cont (mk_var_token lexeme) buf' in + let var_token = cont (ITvarid lexeme) buf' in if not (flag glaexts) then var_token @@ -917,9 +934,10 @@ lex_id cont glaexts buf = Just kwd_token -> cont kwd_token buf'; Nothing -> var_token - }}}} + }}} lex_sym cont buf = + -- trace "lex_sym" $ case expandWhile# is_symbol buf of buf' -> case lookupUFM haskellKeySymsFM lexeme of { Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ @@ -931,6 +949,7 @@ lex_sym cont buf = lex_con cont glaexts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ case expandWhile# is_ident buf of { buf1 -> case slurp_trailing_hashes buf1 glaexts of { buf' -> @@ -939,13 +958,13 @@ lex_con cont glaexts buf = _ -> just_a_conid where - just_a_conid = --trace ("con: "++unpackFS lexeme) $ - cont (ITconid lexeme) buf' + just_a_conid = cont (ITconid lexeme) buf' lexeme = lexemeToFastString buf' munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid }} lex_qid cont glaexts mod buf just_a_conid = + -- trace ("quid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of @@ -964,7 +983,7 @@ lex_qid cont glaexts mod buf just_a_conid = _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) + '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) _ -> lex_id3 cont glaexts mod buf just_a_conid _ -> lex_id3 cont glaexts mod buf just_a_conid @@ -973,6 +992,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id31 "{-++unpackFS lexeme-}) $ case expandWhile# is_symbol start_new_lexeme of { buf' -> let lexeme = lexemeToFastString buf' @@ -987,6 +1007,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id32 "{-++unpackFS lexeme-}) $ case expandWhile# is_ident start_new_lexeme of { buf1 -> if emptyLexeme buf1 then just_a_conid @@ -995,17 +1016,18 @@ lex_id3 cont glaexts mod buf just_a_conid case slurp_trailing_hashes buf1 glaexts of { buf' -> let - lexeme = lexemeToFastString buf' - new_buf = mergeLexemes buf buf' + lexeme = lexemeToFastString buf' + new_buf = mergeLexemes buf buf' is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf in case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { - Just kwd_token -> just_a_conid; -- avoid M.where etc. - Nothing -> is_a_qvarid - -- TODO: special ids (as, qualified, hiding) shouldn't be - -- recognised as keywords here. ie. M.as is a qualified varid. - }}} + Nothing -> is_a_qvarid ; + Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be + -> is_a_qvarid -- recognised as keywords here. + | otherwise + -> just_a_conid -- avoid M.where etc. + }}} slurp_trailing_hashes buf glaexts | flag glaexts = expandWhile# (`eqChar#` '#'#) buf @@ -1014,17 +1036,15 @@ slurp_trailing_hashes buf glaexts mk_var_token pk_str | is_upper f = ITconid pk_str - -- _[A-Z] is treated as a constructor in interface files. - | f `eqChar#` '_'# && not (_NULL_ tl) - && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str | is_ident f = ITvarid pk_str | f `eqChar#` ':'# = ITconsym pk_str | otherwise = ITvarsym pk_str where (C# f) = _HEAD_ pk_str - tl = _TAIL_ pk_str + -- tl = _TAIL_ pk_str mk_qvar_token m token = +-- trace ("mk_qvar ") $ case mk_var_token token of ITconid n -> ITqconid (m,n) ITvarid n -> ITqvarid (m,n) @@ -1043,7 +1063,7 @@ lex_tuple cont mod buf back_off = go n buf = case currentChar# buf of ','# -> go (n+1) (stepOn buf) - ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf) _ -> back_off lex_ubx_tuple cont mod buf back_off = @@ -1053,20 +1073,20 @@ lex_ubx_tuple cont mod buf back_off = case currentChar# buf of ','# -> go (n+1) (stepOn buf) '#'# -> case lookAhead# buf 1# of - ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n))) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n))) (stepOnBy# buf 2#) _ -> back_off _ -> back_off \end{code} ----------------------------------------------------------------------------- -doDiscard rips along really fast, looking for a '#-}', +doDiscard rips along really fast, looking for a '##-}', indicating the end of the pragma we're skipping \begin{code} doDiscard inStr buf = case currentChar# buf of - '#'# | not inStr -> + '#'# | inStr ==# 0# -> case lookAhead# buf 1# of { '#'# -> case lookAhead# buf 2# of { '-'# -> case lookAhead# buf 3# of { '}'# -> @@ -1074,24 +1094,32 @@ doDiscard inStr buf = _ -> doDiscard inStr (incLexeme buf) }; _ -> doDiscard inStr (incLexeme buf) }; _ -> doDiscard inStr (incLexeme buf) } + '"'# -> let odd_slashes buf flg i# = case lookAhead# buf i# of '\\'# -> odd_slashes buf (not flg) (i# -# 1#) _ -> flg + + not_inStr = if inStr ==# 0# then 1# else 0# in case lookAhead# buf (negateInt# 1#) of --backwards, actually '\\'# -> -- escaping something.. - if odd_slashes buf True (negateInt# 2#) then - -- odd number of slashes, " is escaped. - doDiscard inStr (incLexeme buf) - else - -- even number of slashes, \ is escaped. - doDiscard (not inStr) (incLexeme buf) - _ -> case inStr of -- forced to avoid build-up - True -> doDiscard False (incLexeme buf) - False -> doDiscard True (incLexeme buf) + if odd_slashes buf True (negateInt# 2#) + then -- odd number of slashes, " is escaped. + doDiscard inStr (incLexeme buf) + else -- even number of slashes, \ is escaped. + doDiscard not_inStr (incLexeme buf) + _ -> doDiscard not_inStr (incLexeme buf) + + '\''# | inStr ==# 0# -> + case lookAhead# buf 1# of { '"'# -> + case lookAhead# buf 2# of { '\''# -> + doDiscard inStr (setCurrentPos# buf 3#); + _ -> doDiscard inStr (incLexeme buf) }; + _ -> doDiscard inStr (incLexeme buf) } + _ -> doDiscard inStr (incLexeme buf) \end{code} @@ -1228,10 +1256,10 @@ layoutOff buf s@(PState{ context = ctx }) = POk s{ context = NoLayout:ctx } () popContext :: P () -popContext = \ buf s@(PState{ context = ctx }) -> +popContext = \ buf s@(PState{ context = ctx, loc = loc }) -> case ctx of (_:tl) -> POk s{ context = tl } () - [] -> panic "Lex.popContext: empty context" + [] -> PFailed (srcParseErr buf loc) {- Note that if the name of the file we're processing ends @@ -1273,4 +1301,17 @@ ifaceVersionErr hi_vers l toks Nothing -> ptext SLIT("pre ghc-3.02 version") Just v -> ptext SLIT("version") <+> integer v +----------------------------------------------------------------------------- + +srcParseErr :: StringBuffer -> SrcLoc -> Message +srcParseErr s l + = hcat [ppr l, + if null token + then ptext SLIT(": parse error (possibly incorrect indentation)") + else hcat [ptext SLIT(": parse error on input "), + char '`', text token, char '\''] + ] + where + token = lexemeToString s + \end{code}