X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Freader%2FLex.lhs;h=116f6bdcfbb24583b0b0a627ff4bb9d170cd0318;hb=7e602b0a11e567fcb035d1afd34015aebcf9a577;hp=a353f79eca73e5aa1be20ebb67e23871d7d1569a;hpb=bb521c6bba76f19474f12195b990b29eda66a4e8;p=ghc-hetmet.git diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs index a353f79..116f6bd 100644 --- a/ghc/compiler/reader/Lex.lhs +++ b/ghc/compiler/reader/Lex.lhs @@ -1,174 +1,197 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % \section[Lexical analysis]{Lexical analysis} -\begin{code} -#include "HsVersions.h" - -module Lex ( - - isLexCon, isLexVar, isLexId, isLexSym, - isLexConId, isLexConSym, isLexVarId, isLexVarSym, - mkTupNameStr, +-------------------------------------------------------- +[Jan 98] +There's a known bug in here: - -- Monad for parser - IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError + If an interface file ends prematurely, Lex tries to + do headFS of an empty FastString. - ) where +An example that provokes the error is + f _:_ _forall_ [a] <<>> +-------------------------------------------------------- -IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper)) +\begin{code} +{-# OPTIONS -#include "ctypes.h" #-} -import Demand ( Demand {- instance Read -} ) -import FiniteMap ( FiniteMap, listToFM, lookupFM ) -import Maybes ( Maybe(..), MaybeErr(..) ) -import Pretty -import CharSeq ( CSeq ) -import ErrUtils ( Error(..) ) -import Outputable ( Outputable(..) ) -import PprStyle ( PprStyle(..) ) -import Util ( nOfThem, panic ) +module Lex ( -\end{code} + ifaceParseErr, -%************************************************************************ -%* * -\subsection{Lexical categories} -%* * -%************************************************************************ + -- Monad for parser + IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf, + checkVersion, + happyError, + StringBuffer -These functions test strings to see if they fit the lexical categories -defined in the Haskell report. Normally applied as in e.g. @isCon -(getLocalName foo)@. + ) where -\begin{code} -isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym, - isLexVarId, isLexVarSym :: FAST_STRING -> Bool +#include "HsVersions.h" -isLexCon cs = isLexConId cs || isLexConSym cs -isLexVar cs = isLexVarId cs || isLexVarSym cs +import Char ( ord, isSpace ) +import List ( isSuffixOf ) -isLexId cs = isLexConId cs || isLexVarId cs -isLexSym cs = isLexConSym cs || isLexVarSym cs +import CostCentre -- Pretty much all of it +import IdInfo ( InlinePragInfo(..) ) +import Name ( isLowerISO, isUpperISO, mkModule ) -------------- +import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) +import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) +import Demand ( Demand(..) {- instance Read -} ) +import UniqFM ( UniqFM, listToUFM, lookupUFM) +import BasicTypes ( NewOrData(..), IfaceFlavour(..) ) +import SrcLoc ( SrcLoc, incSrcLine, srcLocFile ) -isLexConId cs - | _NULL_ cs = False - | cs == SLIT("[]") = True - | c == '(' = True -- (), (,), (,,), ... - | otherwise = isUpper c || isUpperISO c - where - c = _HEAD_ cs +import Maybes ( MaybeErr(..) ) +import ErrUtils ( ErrMsg ) +import Outputable -isLexVarId cs - | _NULL_ cs = False - | otherwise = isLower c || isLowerISO c - where - c = _HEAD_ cs +import FastString +import StringBuffer +import GlaExts +import ST ( runST ) -isLexConSym cs - | _NULL_ cs = False - | otherwise = c == ':' - || cs == SLIT("->") - where - c = _HEAD_ cs +#if __GLASGOW_HASKELL__ >= 303 +import Bits +import Word +#endif -isLexVarSym cs - | _NULL_ cs = False - | otherwise = isSymbolASCII c - || isSymbolISO c - where - c = _HEAD_ cs +import Addr -------------- -isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-" -isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf]) -isUpperISO c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c -isLowerISO c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c +import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} - %************************************************************************ %* * -\subsection{Tuple strings -- ugh!} +\subsection{Data types} %* * %************************************************************************ -\begin{code} -mkTupNameStr 0 = SLIT("()") -mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???" -mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary -mkTupNameStr 3 = _PK_ "(,,)" -- ditto -mkTupNameStr 4 = _PK_ "(,,,)" -- ditto -mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")") -\end{code} +The token data type, fairly un-interesting except from one +constructor, @ITidinfo@, which is used to lazily lex id info (arity, +strictness, unfolding etc). +The Idea/Observation here is that the renamer needs to scan through +all of an interface file before it can continue. But only a fraction +of the information contained in the file turns out to be useful, so +delaying as much as possible of the scanning and parsing of an +interface file Makes Sense (Heap profiles of the compiler +show a reduction in heap usage by at least a factor of two, +post-renamer). +Hence, the interface file lexer spots when value declarations are +being scanned and return the @ITidinfo@ and @ITtype@ constructors +for the type and any other id info for that binding (unfolding, strictness +etc). These constructors are applied to the result of lexing these sub-chunks. -%************************************************************************ -%* * -\subsection{Data types} -%* * -%************************************************************************ +The lexing of the type and id info is all done lazily, of course, so +the scanning (and subsequent parsing) will be done *only* on the ids the +renamer finds out that it is interested in. The rest will just be junked. +Laziness, you know it makes sense :-) \begin{code} data IfaceToken - = ITinterface -- keywords - | ITusages - | ITversions - | ITexports - | ITinstance_modules - | ITinstances - | ITfixities - | ITdeclarations - | ITpragmas + = ITcase -- Haskell keywords + | ITclass | ITdata - | ITtype - | ITnewtype + | ITdefault | ITderiving - | ITclass - | ITwhere - | ITinstance + | ITdo + | ITelse + | ITif + | ITimport + | ITin + | ITinfix | ITinfixl | ITinfixr - | ITinfix + | ITinstance + | ITlet + | ITmodule + | ITnewtype + | ITof + | ITthen + | ITtype + | ITwhere + | ITas + | ITqualified + | IThiding + + | ITinterface -- GHC-extension keywords + | ITexport + | ITinstimport | ITforall - | ITbang -- magic symbols - | ITvbar + | ITletrec + | ITcoerce + | ITinline + | ITccall (Bool,Bool) -- (is_casm, may_gc) + | ITdefaultbranch + | ITbottom + | ITinteger_lit + | ITfloat_lit + | ITrational_lit + | ITaddr_lit + | ITlit_lit + | ITstring_lit + | ITtypeapp + | ITarity + | ITspecialise + | ITnocaf + | ITunfold InlinePragInfo + | ITstrict ([Demand], Bool) + | ITscc CostCentre + + | ITdotdot -- reserved symbols | ITdcolon - | ITcomma - | ITdarrow - | ITdotdot | ITequal - | ITocurly - | ITdccurly - | ITdocurly - | ITobrack - | IToparen + | ITlam + | ITvbar + | ITlarrow | ITrarrow + | ITat + | ITtilde + | ITdarrow + | ITminus + | ITbang + + | ITbiglam -- GHC-extension symbols + + | ITocurly -- special symbols | ITccurly + | ITobrack | ITcbrack + | IToparen | ITcparen + | IToubxparen + | ITcubxparen | ITsemi - | ITinteger Integer -- numbers and names - | ITvarid FAST_STRING + | ITcomma + + | ITvarid FAST_STRING -- identifiers | ITconid FAST_STRING | ITvarsym FAST_STRING | ITconsym FAST_STRING - | ITqvarid (FAST_STRING,FAST_STRING) - | ITqconid (FAST_STRING,FAST_STRING) - | ITqvarsym (FAST_STRING,FAST_STRING) - | ITqconsym (FAST_STRING,FAST_STRING) - - -- Stuff for reading unfoldings - | ITarity | ITstrict | ITunfold - | ITdemand [Demand] | ITbottom - | ITlam | ITbiglam | ITcase | ITlet | ITletrec | ITin | ITof - | ITcoerce_in | ITcoerce_out - | ITchar Char | ITstring FAST_STRING + | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour) + | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour) + | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour) + | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour) + + | ITpragma StringBuffer + + | ITchar Char + | ITstring FAST_STRING + | ITinteger Integer + | ITrational Rational + + | ITunknown String -- Used when the lexer can't make sense of it + | ITeof -- end of file token deriving Text -- debugging + +instance Text CostCentre -- cheat! + \end{code} %************************************************************************ @@ -178,195 +201,606 @@ data IfaceToken %************************************************************************ \begin{code} -lexIface :: String -> [IfaceToken] - -lexIface input - = _scc_ "Lexer" - case input of - [] -> [] - - -- whitespace and comments - ' ' : cs -> lexIface cs - '\t' : cs -> lexIface cs - '\n' : cs -> lexIface cs - '-' : '-' : cs -> lex_comment cs - --- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake? --- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs - - '(' : '.' : '.' : ')' : cs -> ITdotdot : lexIface cs - '{' : cs -> ITocurly : lexIface cs - '}' : cs -> ITccurly : lexIface cs - '(' : ',' : cs -> lex_tuple Nothing cs - '(' : ')' : cs -> ITconid SLIT("()") : lexIface cs - '(' : cs -> IToparen : lexIface cs - ')' : cs -> ITcparen : lexIface cs - '[' : ']' : cs -> ITconid SLIT("[]") : lexIface cs - '[' : cs -> ITobrack : lexIface cs - ']' : cs -> ITcbrack : lexIface cs - ',' : cs -> ITcomma : lexIface cs - ':' : ':' : cs -> ITdcolon : lexIface cs - ';' : cs -> ITsemi : lexIface cs - '\"' : cs -> case read input of - ((str, rest) : _) -> ITstring (_PK_ (str::String)) : lexIface rest - '\'' : cs -> case read input of - ((ch, rest) : _) -> ITchar ch : lexIface rest - - '_' : 'S' : '_' : cs -> ITstrict : lex_demand cs - '_' : cs -> lex_keyword cs - - c : cs | isDigit c -> lex_num input - | otherwise -> lex_id input - - other -> error ("lexing:"++other) +lexIface :: (IfaceToken -> IfM a) -> IfM a +lexIface cont buf = + _scc_ "Lexer" +-- if bufferExhausted buf then +-- [] +-- else +-- trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $ + case currentChar# buf of + -- whitespace and comments, ignore. + ' '# -> lexIface cont (stepOn buf) + '\t'# -> lexIface cont (stepOn buf) + '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc) + +-- Numbers and comments + '-'# -> + case lookAhead# buf 1# of + '-'# -> lex_comment cont (stepOnBy# buf 2#) + c -> + if is_digit c + then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf)) + else lex_sym cont buf + + '{'# -> -- look for "{-##" special iface pragma + case lookAhead# buf 1# of + '-'# -> case lookAhead# buf 2# of + '#'# -> case lookAhead# buf 3# of + '#'# -> + let (lexeme, buf') + = doDiscard False (stepOnBy# buf 4#) in + cont (ITpragma lexeme) buf' + _ -> lex_nested_comment (lexIface cont) buf + _ -> cont ITocurly (stepOn buf) + -- lex_nested_comment (lexIface cont) buf + _ -> cont ITocurly (stepOn buf) + + -- special symbols ---------------------------------------------------- + '('# -> + case prefixMatch (stepOn buf) "..)" of + Just buf' -> cont ITdotdot (stepOverLexeme buf') + Nothing -> + case lookAhead# buf 1# of + '#'# -> cont IToubxparen (stepOnBy# buf 2#) + _ -> cont IToparen (stepOn buf) + ')'# -> cont ITcparen (stepOn buf) + '}'# -> cont ITccurly (stepOn buf) + '#'# -> case lookAhead# buf 1# of + ')'# -> cont ITcubxparen (stepOnBy# buf 2#) + _ -> lex_sym cont (incLexeme buf) + '['# -> cont ITobrack (stepOn buf) + ']'# -> cont ITcbrack (stepOn buf) + ','# -> cont ITcomma (stepOn buf) + ';'# -> cont ITsemi (stepOn buf) + + -- strings/characters ------------------------------------------------- + '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of + buf' -> + -- the string literal does *not* include the dquotes + case lexemeToFastString buf' of + v -> cont (ITstring v) (stepOn (stepOverLexeme buf')) + + '\''# -> -- + -- untilEndOfChar# extends the current lexeme until + -- it hits a non-escaped single quote. The lexeme of the + -- StringBuffer returned does *not* include the closing quote, + -- hence we augment the lexeme and make sure to add the + -- starting quote, before `read'ing the string. + -- + case untilEndOfChar# (stepOn buf) of + buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of + [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf')) + + -- strictness pragma and __scc treated specially. + '_'# -> + case lookAhead# buf 1# of + '_'# -> case lookAhead# buf 2# of + 'S'# -> + lex_demand cont (stepOnUntil (not . isSpace) + (stepOnBy# buf 3#)) -- past __S + 's'# -> + case prefixMatch (stepOnBy# buf 3#) "cc" of + Just buf' -> lex_scc cont + (stepOnUntil (not . isSpace) + (stepOverLexeme buf')) + Nothing -> lex_id cont buf + _ -> lex_id cont buf + _ -> lex_id cont buf + +-- ``thingy'' form for casm + '`'# -> + case lookAhead# buf 1# of + '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go. + _ -> lex_sym cont (incLexeme buf) -- add ` to lexeme and assume + -- scanning an id of some sort. + + '\NUL'# -> + if bufferExhausted (stepOn buf) then + cont ITeof buf + else + trace "lexIface: misplaced NUL?" $ + cont (ITunknown "\NUL") (stepOn buf) + + c | is_digit c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf) + | is_symbol c -> lex_sym cont buf + | is_upper c -> lex_con cont buf + | is_ident c -> lex_id cont buf + +-- where +lex_comment cont buf = +-- _trace ("comment: "++[C# (currentChar# buf)]) $ + case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')} + +------------------------------------------------------------------------------- + +lex_nested_comment cont buf = + case currentChar# buf of + '-'# -> case lookAhead# buf 1# of + '}'# -> cont (stepOnBy# buf 2#) + _ -> lex_nested_comment cont (stepOn buf) + + '{'# -> case lookAhead# buf 1# of + '-'# -> lex_nested_comment + (lex_nested_comment cont) + (stepOnBy# buf 2#) + _ -> lex_nested_comment cont (stepOn buf) + + _ -> lex_nested_comment cont (stepOn buf) + +------------------------------------------------------------------------------- + +lex_demand cont buf = + case read_em [] buf of { (ls,buf') -> + case currentChar# buf' of + 'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf')) + _ -> cont (ITstrict (ls, False)) (stepOverLexeme buf') + } + where + -- code snatched from Demand.lhs + read_em acc buf = + case currentChar# buf of + 'L'# -> read_em (WwLazy False : acc) (stepOn buf) + 'A'# -> read_em (WwLazy True : acc) (stepOn buf) + 'S'# -> read_em (WwStrict : acc) (stepOn buf) + 'P'# -> read_em (WwPrim : acc) (stepOn buf) + 'E'# -> read_em (WwEnum : acc) (stepOn buf) + ')'# -> (reverse acc, stepOn buf) + 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#) + 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#) + 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#) + 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#) + _ -> (reverse acc, buf) + + do_unpack new_or_data wrapper_unpacks acc buf + = case read_em [] buf of + (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest + +------------------ +lex_scc cont buf = + case currentChar# buf of + '"'# -> + case prefixMatch (stepOn buf) "CAFs." of + Just buf' -> + case untilChar# (stepOverLexeme buf') '\"'# of + buf'' -> cont (ITscc (mkAllCafsCC (mkModule (lexemeToString buf'')) _NIL_)) + (stepOn (stepOverLexeme buf'')) + Nothing -> + case prefixMatch (stepOn buf) "DICTs." of + Just buf' -> + case untilChar# (stepOverLexeme buf') '\"'# of + buf'' -> cont (ITscc (mkAllDictsCC (mkModule (lexemeToString buf'')) _NIL_ True)) + (stepOn (stepOverLexeme buf'')) + Nothing -> + let + match_user_cc buf = + case untilChar# buf '/'# of + buf' -> + let mod_name = mkModule (lexemeToString buf') in +-- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of +-- buf'' -> +-- let grp_name = lexemeToFastString buf'' in + case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of + buf'' -> + -- The label may contain arbitrary characters, so it + -- may have been escaped etc., hence we `read' it in to get + -- rid of these meta-chars in the string and then pack it (again.) + -- ToDo: do the same for module name (single quotes allowed in m-names). + -- BTW, the code in this module is totally gruesome.. + let upk_label = _UNPK_ (lexemeToFastString buf'') in + case reads ('"':upk_label++"\"") of + ((cc_label,_):_) -> + let cc_name = _PK_ cc_label in + (mkUserCC cc_name mod_name _NIL_{-grp_name-}, + stepOn (stepOverLexeme buf'')) + _ -> + trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring") + (mkUserCC _NIL_ mod_name _NIL_{-grp_name-}, + stepOn (stepOverLexeme buf'')) + in + case prefixMatch (stepOn buf) "CAF:" of + Just buf' -> + case match_user_cc (stepOverLexeme buf') of + (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf'' + Nothing -> + case match_user_cc (stepOn buf) of + (cc, buf'') -> cont (ITscc cc) buf'' + c -> cont (ITunknown [C# c]) (stepOn buf) + + +----------- +lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a +lex_num cont minus acc# buf = + --trace ("lex_num: "++[C# (currentChar# buf)]) $ + case scanNumLit (I# acc#) buf of + (acc',buf') -> + case currentChar# buf' of + '.'# -> + -- this case is not optimised at all, as the + -- presence of floating point numbers in interface + -- files is not that common. (ToDo) + case expandWhile# is_digit (incLexeme buf') of + buf2 -> -- points to first non digit char + let l = case currentChar# buf2 of + 'e'# -> let buf3 = incLexeme buf2 in + case currentChar# buf3 of + '-'# -> expandWhile# is_digit (incLexeme buf3) + _ -> expandWhile# is_digit buf3 + _ -> buf2 + in let v = readRational__ (lexemeToString l) in + cont (ITrational v) (stepOverLexeme l) + + _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf') + +----------- +lex_cstring cont buf = + case expandUntilMatch buf "\'\'" of + buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#)))) + (stepOverLexeme buf') + +------------------------------------------------------------------------------ +-- Character Classes + +is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool + +{-# INLINE is_ctype #-} +#if __GLASGOW_HASKELL__ >= 303 +is_ctype :: Word8 -> Char# -> Bool +is_ctype mask = \c -> + (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 +#else +is_ctype :: Int -> Char# -> Bool +is_ctype (I# mask) = \c -> + let (A# ctype) = ``char_types'' :: Addr + flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) + in + (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) +#endif + +is_ident = is_ctype 1 +is_symbol = is_ctype 2 +is_any = is_ctype 4 +is_space = is_ctype 8 +is_upper = is_ctype 16 +is_digit = is_ctype 32 + +----------------------------------------------------------------------------- +-- identifiers, symbols etc. + +lex_id cont buf = + case expandWhile# is_ident buf of { buf1 -> + case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on + let new_buf = stepOverLexeme buf' + lexeme = lexemeToFastString buf' + in + case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { + Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $ + cont kwd_token new_buf; + Nothing -> + case lookupUFM ifaceKeywordsFM lexeme of { + Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $ + cont kwd_token new_buf; + Nothing -> --trace ("id: "++_UNPK_(lexeme)) $ + cont (mk_var_token lexeme) new_buf + }}}} + +lex_sym cont buf = + case expandWhile# is_symbol buf of + buf' -> case lookupUFM haskellKeySymsFM lexeme of { + Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ + cont kwd_token new_buf ; + Nothing -> --trace ("sym: "++unpackFS lexeme) $ + cont (mk_var_token lexeme) new_buf + } + where lexeme = lexemeToFastString buf' + new_buf = stepOverLexeme buf' + +lex_con cont buf = + case expandWhile# is_ident buf of { buf1 -> + case expandWhile# (eqChar# '#'#) buf1 of { buf' -> + case currentChar# buf' of + '.'# -> munch HiFile + '!'# -> munch HiBootFile + _ -> just_a_conid + + where + just_a_conid = --trace ("con: "++unpackFS lexeme) $ + cont (ITconid lexeme) new_buf + lexeme = lexemeToFastString buf' + new_buf = stepOverLexeme buf' + munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid + }} + +lex_qid cont mod hif buf just_a_conid = + case currentChar# buf of + '['# -> -- Special case for [] + case lookAhead# buf 1# of + ']'# -> cont (ITqconid (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#) + _ -> just_a_conid + + '('# -> -- Special case for (,,,) + -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)" + case lookAhead# buf 1# of + '#'# -> case lookAhead# buf 2# of + ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#) + just_a_conid + _ -> just_a_conid + ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#) + ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid + _ -> just_a_conid + + '-'# -> case lookAhead# buf 1# of + '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#) + _ -> lex_id3 cont mod hif buf just_a_conid + _ -> lex_id3 cont mod hif buf just_a_conid + +lex_id3 cont mod hif buf just_a_conid + | is_symbol c = + case expandWhile# is_symbol buf of { buf' -> + let + lexeme = lexemeToFastString buf' + new_buf = stepOverLexeme buf' + in + case lookupUFM haskellKeySymsFM lexeme of { + Just kwd_token -> just_a_conid; -- avoid M.:: etc. + Nothing -> cont (mk_qvar_token mod hif lexeme) new_buf + }} + + | otherwise = + case expandWhile# is_ident buf of { buf1 -> + if emptyLexeme buf1 + then just_a_conid + else + case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on + let + lexeme = lexemeToFastString buf' + new_buf = stepOverLexeme buf' + in + case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { + Just kwd_token -> just_a_conid; -- avoid M.where etc. + Nothing -> + case lookupUFM ifaceKeywordsFM lexeme of { -- only for iface files + Just kwd_token -> just_a_conid; + Nothing -> cont (mk_qvar_token mod hif lexeme) new_buf + }}}} + where c = currentChar# buf + +mk_var_token pk_str + | is_upper f = ITconid pk_str + -- _[A-Z] is treated as a constructor in interface files. + | f `eqChar#` '_'# && not (_NULL_ tl) + && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str + | is_ident f = ITvarid pk_str + | f `eqChar#` ':'# = ITconsym pk_str + | otherwise = ITvarsym pk_str where - lex_comment str - = case (span ((/=) '\n') str) of { (junk, rest) -> - lexIface rest } - - ------------------ - lex_demand (c:cs) | isSpace c = lex_demand cs - | otherwise = case readList (c:cs) of - ((demand,rest) : _) -> ITdemand demand : lexIface rest - ----------- - lex_num str - = case (span isDigit str) of { (num, rest) -> - ITinteger (read num) : lexIface rest } - - ------------ - lex_keyword str - = case (span is_kwd_mod_char str) of { (kw, rest) -> - case (lookupFM ifaceKeywordsFM kw) of - Nothing -> panic ("lex_keyword:"++str) - Just xx -> xx : lexIface rest - } - - is_kwd_mod_char '_' = True - is_kwd_mod_char c = isAlphanum c - - ----------- - lex_tuple module_dot orig_cs = go 2 orig_cs - where - go n (',':cs) = go (n+1) cs - go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs - go n other = panic ("lex_tuple" ++ orig_cs) - - -- NB: ':' isn't valid inside an identifier, only at the start. - -- otherwise we get confused by a::t! - is_id_char c = isAlphanum c || c `elem` "_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic - - lex_id cs = go [] cs - where - go xs (f :cs) | is_kwd_mod_char f = go (f : xs) cs - go xs ('.':cs) | not (null xs) = lex_id2 (Just (_PK_ (reverse xs))) [] cs - go xs cs = lex_id2 Nothing xs cs - - -- Dealt with the Module.part - lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs - lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs - lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs - lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs - lex_id2 module_dot xs cs = lex_id3 module_dot xs cs - - -- Dealt with [], (), : special cases - lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs - - lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of - Just kwd_token -> kwd_token : lexIface rest - other -> (mk_var_token rxs) : lexIface rest - where - rxs = reverse xs - - lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest - - mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n - | f == ':' = ITconsym n - | isAlpha f = ITvarid n - | otherwise = ITvarsym n - where - n = _PK_ xs - - end_lex_id (Just m) (ITconid n) cs = ITqconid (m,n) : lexIface cs - end_lex_id (Just m) (ITvarid n) cs = ITqvarid (m,n) : lexIface cs - end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs - end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs - end_lex_id (Just m) ITbang cs = ITqvarsym (m,SLIT("!")) : lexIface cs - end_lex_id (Just m) token cs = panic ("end_lex_id:" ++ show token) - end_lex_id Nothing token cs = token : lexIface cs - - ------------ - ifaceKeywordsFM :: FiniteMap String IfaceToken - ifaceKeywordsFM = listToFM [ - ("interface_", ITinterface) - ,("usages_", ITusages) - ,("versions_", ITversions) - ,("exports_", ITexports) - ,("instance_modules_", ITinstance_modules) - ,("instances_", ITinstances) - ,("fixities_", ITfixities) - ,("declarations_", ITdeclarations) - ,("pragmas_", ITpragmas) - ,("forall_", ITforall) - ,("U_", ITunfold) - ,("A_", ITarity) - ,("coerce_in_", ITcoerce_in) - ,("coerce_out_", ITcoerce_out) - ,("A_", ITarity) - ,("A_", ITarity) - ,("!_", ITbottom) + (C# f) = _HEAD_ pk_str + tl = _TAIL_ pk_str + +mk_qvar_token m hif token = + case mk_var_token token of + ITconid n -> ITqconid (m,n,hif) + ITvarid n -> ITqvarid (m,n,hif) + ITconsym n -> ITqconsym (m,n,hif) + ITvarsym n -> ITqvarsym (m,n,hif) + _ -> ITunknown (show token) +\end{code} - ] +---------------------------------------------------------------------------- +Horrible stuff for dealing with M.(,,,) - haskellKeywordsFM = listToFM [ - ("data", ITdata) - ,("type", ITtype) - ,("newtype", ITnewtype) - ,("class", ITclass) - ,("where", ITwhere) - ,("instance", ITinstance) - ,("infixl", ITinfixl) - ,("infixr", ITinfixr) - ,("infix", ITinfix) - ,("case", ITcase) - ,("of", ITof) - ,("in", ITin) - ,("let", ITlet) - ,("letrec", ITletrec) - ,("deriving", ITderiving) +\begin{code} +lex_tuple cont mod hif buf back_off = + go 2 buf + where + go n buf = + case currentChar# buf of + ','# -> go (n+1) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf) + _ -> back_off + +lex_ubx_tuple cont mod hif buf back_off = + go 2 buf + where + go n buf = + case currentChar# buf of + ','# -> go (n+1) (stepOn buf) + '#'# -> case lookAhead# buf 1# of + ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif)) + (stepOnBy# buf 2#) + _ -> back_off + _ -> back_off +\end{code} - ,("->", ITrarrow) +----------------------------------------------------------------------------- +Keyword Lists + +\begin{code} +ifaceKeywordsFM :: UniqFM IfaceToken +ifaceKeywordsFM = listToUFM $ + map (\ (x,y) -> (_PK_ x,y)) + [ ("__interface", ITinterface), + ("__export", ITexport), + ("__instimport", ITinstimport), + ("__forall", ITforall), + ("__letrec", ITletrec), + ("__coerce", ITcoerce), + ("__inline", ITinline), + ("__DEFAULT", ITdefaultbranch), + ("__bot", ITbottom), + ("__integer", ITinteger_lit), + ("__float", ITfloat_lit), + ("__rational", ITrational_lit), + ("__addr", ITaddr_lit), + ("__litlit", ITlit_lit), + ("__string", ITstring_lit), + ("__a", ITtypeapp), + ("__A", ITarity), + ("__P", ITspecialise), + ("__C", ITnocaf), + ("__u", ITunfold NoInlinePragInfo), + ("__U", ITunfold IWantToBeINLINEd), + ("__UU", ITunfold IMustBeINLINEd), + ("__Unot", ITunfold IMustNotBeINLINEd), + ("__Ux", ITunfold IAmALoopBreaker), + + ("__ccall", ITccall (False, False)), + ("__ccall_GC", ITccall (False, True)), + ("__casm", ITccall (True, False)), + ("__casm_GC", ITccall (True, True)), + + ("/\\", ITbiglam) + ] + +haskellKeywordsFM = listToUFM $ + map (\ (x,y) -> (_PK_ x,y)) + [( "case", ITcase ), + ( "class", ITclass ), + ( "data", ITdata ), + ( "default", ITdefault ), + ( "deriving", ITderiving ), + ( "do", ITdo ), + ( "else", ITelse ), + ( "if", ITif ), + ( "import", ITimport ), + ( "in", ITin ), + ( "infix", ITinfix ), + ( "infixl", ITinfixl ), + ( "infixr", ITinfixr ), + ( "instance", ITinstance ), + ( "let", ITlet ), + ( "module", ITmodule ), + ( "newtype", ITnewtype ), + ( "of", ITof ), + ( "then", ITthen ), + ( "type", ITtype ), + ( "where", ITwhere ) + +-- These three aren't Haskell keywords at all +-- and 'as' is often used as a variable name +-- ( "as", ITas ), +-- ( "qualified", ITqualified ), +-- ( "hiding", IThiding ) + + ] + +haskellKeySymsFM = listToUFM $ + map (\ (x,y) -> (_PK_ x,y)) + [ ("..", ITdotdot) + ,("::", ITdcolon) + ,("=", ITequal) ,("\\", ITlam) - ,("/\\", ITbiglam) ,("|", ITvbar) - ,("!", ITbang) + ,("<-", ITlarrow) + ,("->", ITrarrow) + ,("@", ITat) + ,("~", ITtilde) ,("=>", ITdarrow) - ,("=", ITequal) + ,("-", ITminus) + ,("!", ITbang) ] \end{code} - -%************************************************************************ -%* * -\subsection{Other utility functions -%* * -%************************************************************************ +----------------------------------------------------------------------------- +doDiscard rips along really fast, looking for a '#-}', +indicating the end of the pragma we're skipping \begin{code} -type IfM a = MaybeErr a Error +doDiscard inStr buf = + case currentChar# buf of + '#'# | not inStr -> + case lookAhead# buf 1# of { '#'# -> + case lookAhead# buf 2# of { '-'# -> + case lookAhead# buf 3# of { '}'# -> + (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#)); + _ -> doDiscard inStr (incLexeme buf) }; + _ -> doDiscard inStr (incLexeme buf) }; + _ -> doDiscard inStr (incLexeme buf) } + '"'# -> + let + odd_slashes buf flg i# = + case lookAhead# buf i# of + '\\'# -> odd_slashes buf (not flg) (i# -# 1#) + _ -> flg + in + case lookAhead# buf (negateInt# 1#) of --backwards, actually + '\\'# -> -- escaping something.. + if odd_slashes buf True (negateInt# 2#) then + -- odd number of slashes, " is escaped. + doDiscard inStr (incLexeme buf) + else + -- even number of slashes, \ is escaped. + doDiscard (not inStr) (incLexeme buf) + _ -> case inStr of -- forced to avoid build-up + True -> doDiscard False (incLexeme buf) + False -> doDiscard True (incLexeme buf) + _ -> doDiscard inStr (incLexeme buf) -returnIf :: a -> IfM a -thenIf :: IfM a -> (a -> IfM b) -> IfM b -happyError :: Int -> [IfaceToken] -> IfM a +\end{code} -returnIf a = Succeeded a +----------------------------------------------------------------------------- -thenIf (Succeeded a) k = k a -thenIf (Failed err) _ = Failed err +\begin{code} +type IfM a = StringBuffer -- Input string + -> SrcLoc + -> MaybeErr a ErrMsg -happyError ln toks = Failed (ifaceParseErr ln toks) +returnIf :: a -> IfM a +returnIf a s l = Succeeded a + +thenIf :: IfM a -> (a -> IfM b) -> IfM b +m `thenIf` k = \s l -> + case m s l of + Succeeded a -> k a s l + Failed err -> Failed err + +getSrcLocIf :: IfM SrcLoc +getSrcLocIf s l = Succeeded l + +happyError :: IfM a +happyError s l = Failed (ifaceParseErr s l) + + +{- + Note that if the name of the file we're processing ends + with `hi-boot', we accept it on faith as having the right + version. This is done so that .hi-boot files that comes + with hsc don't have to be updated before every release, + *and* it allows us to share .hi-boot files with versions + of hsc that don't have .hi version checking (e.g., ghc-2.10's) + + If the version number is 0, the checking is also turned off. + (needed to deal with GHC.hi only!) + + Once we can assume we're compiling with a version of ghc that + supports interface file checking, we can drop the special + pleading +-} +checkVersion :: Maybe Integer -> IfM () +checkVersion mb@(Just v) s l + | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded () + | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-}) +checkVersion mb@Nothing s l + | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded () + | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-}) ----------------------------------------------------------------- -ifaceParseErr ln toks sty - = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))] +ifaceParseErr :: StringBuffer -> SrcLoc -> ErrMsg +ifaceParseErr s l + = hsep [ppr l, ptext SLIT("Interface-file parse error;"), + ptext SLIT("current input ="), text first_bit] + where + first_bit = lexemeToString (stepOnBy# s 100#) + +ifaceVersionErr hi_vers l toks + = hsep [ppr l, ptext SLIT("Interface file version error;"), + ptext SLIT("Expected"), int opt_HiVersion, + ptext SLIT("found "), pp_version] + where + pp_version = + case hi_vers of + Nothing -> ptext SLIT("pre ghc-3.02 version") + Just v -> ptext SLIT("version") <+> integer v + \end{code}