X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=ghc%2Fcompiler%2Freader%2FLex.lhs;h=32f20e9e1f5a207347d1ee52941f5eee6feebba2;hp=ec761e4659f25f0c3a2222b661dae876eecd371c;hb=1fb1ab5d53a09607e7f6d2450806760688396387;hpb=fa6fb09e2e4e6918eebc79ed187f32c88817c9db diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs index ec761e4..32f20e9 100644 --- a/ghc/compiler/reader/Lex.lhs +++ b/ghc/compiler/reader/Lex.lhs @@ -10,27 +10,39 @@ module Lex ( isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym, isLexVarId, isLexVarSym, - mkTupNameStr, + mkTupNameStr, ifaceParseErr, -- Monad for parser - IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError + IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError, + StringBuffer ) where IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper)) +IMPORT_DELOOPER(Ubiq) +IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here import CmdLineOpts ( opt_IgnoreIfacePragmas ) -import Demand ( Demand {- instance Read -} ) -import FiniteMap ( FiniteMap, listToFM, lookupFM ) +import Demand ( Demand(..) {- instance Read -} ) +import UniqFM ( UniqFM, listToUFM, lookupUFM) +--import FiniteMap ( FiniteMap, listToFM, lookupFM ) import Maybes ( Maybe(..), MaybeErr(..) ) import Pretty import CharSeq ( CSeq ) + + + import ErrUtils ( Error(..) ) import Outputable ( Outputable(..) ) import PprStyle ( PprStyle(..) ) import Util ( nOfThem, panic ) +import FastString +import StringBuffer + +import PreludeGlaST + \end{code} %************************************************************************ @@ -86,8 +98,10 @@ isLexVarSym cs ------------- isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-" isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf]) -isUpperISO c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c -isLowerISO c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c +isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'# +--0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c +isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'# +--0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c \end{code} @@ -114,6 +128,28 @@ mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")") %* * %************************************************************************ +The token data type, fairly un-interesting except from two constructors, +@ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity, +strictness, unfolding etc) and types for id decls. + +The Idea/Observation here is that the renamer needs to scan through +all of an interface file before it can continue. But only a fraction +of the information contained in the file turns out to be useful, so +delaying as much as possible of the scanning and parsing of an +interface file Makes Sense (Heap profiles of the compiler +show at a reduction in heap usage by at least a factor of two, +post-renamer). + +Hence, the interface file lexer spots when value declarations are +being scanned and return the @ITidinfo@ and @ITtype@ constructors +for the type and any other id info for that binding (unfolding, strictness +etc). These constructors are applied to the result of lexing these sub-chunks. + +The lexing of the type and id info is all done lazily, of course, so +the scanning (and subsequent parsing) will be done *only* on the ids the +renamer finds out that it is interested in. The rest will just be junked. +Laziness, you know it makes sense :-) + \begin{code} data IfaceToken = ITinterface -- keywords @@ -144,8 +180,6 @@ data IfaceToken | ITdotdot | ITequal | ITocurly - | ITdccurly - | ITdocurly | ITobrack | IToparen | ITrarrow @@ -162,17 +196,25 @@ data IfaceToken | ITqvarsym (FAST_STRING,FAST_STRING) | ITqconsym (FAST_STRING,FAST_STRING) + | ITidinfo [IfaceToken] -- lazily return the stream of tokens for + -- the info attached to an id. + | ITtysig [IfaceToken] -- lazily return the stream of tokens for + -- the info attached to an id. -- Stuff for reading unfoldings | ITarity | ITstrict | ITunfold | ITdemand [Demand] | ITbottom | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof | ITcoerce_in | ITcoerce_out | ITatsign | ITccall (Bool,Bool) -- (is_casm, may_gc) - + | ITscc CostCentre | ITchar Char | ITstring FAST_STRING | ITinteger Integer | ITdouble Double | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit + | ITunknown String -- Used when the lexer can't make sense of it deriving Text -- debugging + +instance Text CostCentre -- cheat! + \end{code} %************************************************************************ @@ -182,144 +224,487 @@ data IfaceToken %************************************************************************ \begin{code} -lexIface :: String -> [IfaceToken] - -lexIface input - = _scc_ "Lexer" - case input of - [] -> [] - - -- whitespace and comments - ' ' : cs -> lexIface cs - '\t' : cs -> lexIface cs - '\n' : cs -> lexIface cs - '-' : '-' : cs -> lex_comment cs +lexIface :: StringBuffer -> [IfaceToken] +lexIface buf = + _scc_ "Lexer" +-- if bufferExhausted buf then +-- [] +-- else +-- _trace ("Lexer: "++[C# (currentChar# buf)]) $ + case currentChar# buf of + -- whitespace and comments, ignore. + ' '# -> lexIface (stepOn buf) + '\t'# -> lexIface (stepOn buf) + '\n'# -> lexIface (stepOn buf) + +-- Numbers and comments + '-'# -> + case lookAhead# buf 1# of + '-'# -> lex_comment (stepOnBy# buf 2#) + c -> + if isDigit (C# c) + then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf)) + else lex_id buf -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake? -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs - '(' : '.' : '.' : ')' : cs -> ITdotdot : lexIface cs - '{' : cs -> ITocurly : lexIface cs - '}' : cs -> ITccurly : lexIface cs - '(' : ',' : cs -> lex_tuple Nothing cs - '(' : ')' : cs -> ITconid SLIT("()") : lexIface cs - '(' : cs -> IToparen : lexIface cs - ')' : cs -> ITcparen : lexIface cs - '[' : ']' : cs -> ITconid SLIT("[]") : lexIface cs - '[' : cs -> ITobrack : lexIface cs - ']' : cs -> ITcbrack : lexIface cs - ',' : cs -> ITcomma : lexIface cs - ':' : ':' : cs -> ITdcolon : lexIface cs - ';' : cs -> ITsemi : lexIface cs - '\"' : cs -> case reads input of - [(str, rest)] -> ITstring (_PK_ (str::String)) : lexIface rest - '\'' : cs -> case reads input of - [(ch, rest)] -> ITchar ch : lexIface rest + '('# -> + case prefixMatch (stepOn buf) "..)" of + Just buf' -> ITdotdot : lexIface (stepOverLexeme buf') + Nothing -> + case lookAhead# buf 1# of + ','# -> lex_tuple Nothing (stepOnBy# buf 2#) + ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#) + _ -> IToparen : lexIface (stepOn buf) + + '{'# -> ITocurly : lexIface (stepOn buf) + '}'# -> ITccurly : lexIface (stepOn buf) + ')'# -> ITcparen : lexIface (stepOn buf) + '['# -> + case lookAhead# buf 1# of + ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#) + _ -> ITobrack : lexIface (stepOn buf) + ']'# -> ITcbrack : lexIface (stepOn buf) + ','# -> ITcomma : lexIface (stepOn buf) + ':'# -> case lookAhead# buf 1# of + ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#) + _ -> lex_id (incLexeme buf) + ';'# -> ITsemi : lexIface (stepOn buf) + '\"'# -> case untilEndOfString# (stepOn buf) of + buf' -> + -- the string literal does *not* include the dquotes + case lexemeToFastString buf' of + v -> ITstring v : lexIface (stepOn (stepOverLexeme buf')) + + '\''# -> -- + -- untilEndOfChar# extends the current lexeme until + -- it hits a non-escaped single quote. The lexeme of the + -- StringBuffer returned does *not* include the closing quote, + -- hence we augment the lexeme and make sure to add the + -- starting quote, before `read'ing the string. + -- + case untilEndOfChar# (stepOn buf) of + buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of + [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf')) -- ``thingy'' form for casm - '`' : '`' : cs -> lex_cstring "" cs - + '`'# -> + case lookAhead# buf 1# of + '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go. + _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume + -- scanning an id of some sort. -- Keywords - '_' : 'S' : '_' : cs -> ITstrict : lex_demand cs - '_' : cs -> lex_keyword cs - --- Numbers - '-' : c : cs | isDigit c -> lex_num "-" (c:cs) - c : cs | isDigit c -> lex_num "" (c:cs) - - other -> lex_id input - where - lex_comment str - = case (span ((/=) '\n') str) of { (junk, rest) -> - lexIface rest } - - ------------------ - lex_demand (c:cs) | isSpace c = lex_demand cs - | otherwise = case readList (c:cs) of - ((demand,rest) : _) -> ITdemand demand : lexIface rest - - ----------- - lex_num minus str - = case (span isDigit str) of { (num, rest) -> - case rest of - '.' : str2 -> case (span isDigit str2) of { (num2,rest2) -> - ITdouble (read (minus ++ num ++ ('.':num2))) : lexIface rest2 - } - - other -> ITinteger (read (minus ++ num)) : lexIface rest - } - - ------------ - lex_keyword str - = case (span is_kwd_mod_char str) of { (kw, rest) -> - case (lookupFM ifaceKeywordsFM kw) of - Nothing -> panic ("lex_keyword:"++str) - - Just xx | startDiscard xx && - opt_IgnoreIfacePragmas -> lexIface (doDiscard rest) - | otherwise -> xx : lexIface rest - } - - is_kwd_mod_char c = isAlphanum c || c `elem` "_@/\\" - - ----------- - lex_cstring so_far ('\'' : '\'' : cs) = ITstring (_PK_ (reverse (so_far::String))) : lexIface cs - lex_cstring so_far (c : cs) = lex_cstring (c:so_far) cs + '_'# -> + case lookAhead# buf 1# of + 'S'# -> case lookAhead# buf 2# of + '_'# -> ITstrict : + lex_demand (stepOnUntil (not . isSpace) + (stepOnBy# buf 3#)) -- past _S_ + 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of + Just buf' -> lex_scc (stepOnUntil (not . isSpace) + (stepOverLexeme buf')) + Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume + -- it is a keyword. + _ -> lex_keyword (stepOn buf) + + '\NUL'# -> + if bufferExhausted (stepOn buf) then + [] + else + lex_id buf + c -> + if isDigit (C# c) then + lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf) + else + lex_id buf +-- where +lex_comment buf = +-- _trace ("comment: "++[C# (currentChar# buf)]) $ + case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')} + +------------------ +lex_demand buf = +-- _trace ("demand: "++[C# (currentChar# buf)]) $ + case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')} + where + -- code snatched from Demand.lhs + read_em acc buf = +-- _trace ("read_em: "++[C# (currentChar# buf)]) $ + case currentChar# buf of + 'L'# -> read_em (WwLazy False : acc) (stepOn buf) + 'A'# -> read_em (WwLazy True : acc) (stepOn buf) + 'S'# -> read_em (WwStrict : acc) (stepOn buf) + 'P'# -> read_em (WwPrim : acc) (stepOn buf) + 'E'# -> read_em (WwEnum : acc) (stepOn buf) + ')'# -> (reverse acc, stepOn buf) + 'U'# -> do_unpack True acc (stepOnBy# buf 2#) + 'u'# -> do_unpack False acc (stepOnBy# buf 2#) + _ -> (reverse acc, buf) + + do_unpack wrapper_unpacks acc buf + = case read_em [] buf of + (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest + +------------------ +lex_scc buf = +-- _trace ("scc: "++[C# (currentChar# buf)]) $ + case currentChar# buf of + '"'# -> + -- YUCK^2 + case prefixMatch (stepOn buf) "NO_CC\"" of + Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "CURRENT_CC\"" of + Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "OVERHEAD\"" of + Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "DONT_CARE\"" of + Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "SUBSUMED\"" of + Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "CAFs_in_...\"" of + Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "CC_CAFs_in_..." of + Just buf' -> + case untilChar# (stepOverLexeme buf') '\"'# of + buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_): + lexIface (stepOverLexeme buf'') + Nothing -> + case prefixMatch (stepOn buf) "DICTs_in_...\"" of + Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf') + Nothing -> + case prefixMatch (stepOn buf) "CC_DICTs_in_..." of + Just buf' -> + case untilChar# (stepOverLexeme buf') '\"'# of + buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True): + lexIface (stepOverLexeme buf'') + Nothing -> + case prefixMatch (stepOn buf) "CAF:" of + Just buf' -> + case untilChar# (stepOverLexeme buf') '\"'# of + buf'' -> ITscc (cafifyCC (mkUserCC (lexemeToFastString buf'') _NIL_ _NIL_)): + lexIface (stepOverLexeme buf'') + Nothing -> + case untilChar# (stepOn buf) '\"'# of + buf' -> ITscc (mkUserCC (lexemeToFastString buf') _NIL_ _NIL_): + lexIface (stepOverLexeme buf') + c -> ITunknown [C# c] : lexIface (stepOn buf) + + +----------- +lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken] +lex_num minus acc# buf = +-- _trace ("lex_num: "++[C# (currentChar# buf)]) $ + case scanNumLit (I# acc#) buf of + (acc',buf') -> + case currentChar# buf' of + '.'# -> + -- this case is not optimised at all, as the + -- presence of floating point numbers in interface + -- files is not that common. (ToDo) + case expandWhile (isDigit) (incLexeme buf') of + buf'' -> -- points to first non digit char + case reads (lexemeToString buf'') of + [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'') + _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf') + +-- case reads (lexemeToString buf') of +-- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf') + +------------ +lex_keyword buf = +-- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $ + case currentChar# buf of + ':'# -> case lookAhead# buf 1# of + '_'# -> -- a binding, type (and other id-info) follows, + -- to make the parser ever so slightly, we push + -- + lex_decl (stepOnBy# buf 2#) + v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#) + _ -> + case expandWhile (is_kwd_char) buf of + buf' -> + let kw = lexemeToFastString buf' in +-- _trace ("kw: "++lexemeToString buf') $ + case lookupUFM ifaceKeywordsFM kw of + Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh + lexIface (stepOverLexeme buf') + Just xx -> xx : lexIface (stepOverLexeme buf') + +lex_decl buf = + case expandUntilMatch buf ";;" of + buf' -> +-- _trace (show (lexemeToString buf')) $ + case currentChar# buf' of + '\n'# -> -- newline, no id info. + ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : + lexIface (stepOverLexeme buf') + '\r'# -> -- just to be sure for those Win* boxes.. + ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : + lexIface (stepOverLexeme buf') + '\NUL'# -> + ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : + lexIface (stepOverLexeme buf') + c -> -- run all over the id info + case expandUntilMatch (stepOverLexeme buf') ";;" of + buf'' -> + --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $ + --_trace (show (lexemeToString (decLexeme buf''))) $ + ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))): + let ls = lexIface (stepOverLexeme buf'') in + if opt_IgnoreIfacePragmas then + ls + else + let is = lexIface (lexemeToBuffer (decLexeme buf'')) in + --_trace (show is) $ + ITidinfo is : ls + +-- ToDo: hammer! +is_kwd_char c@(C# c#) = + isAlphanum c || -- OLD: c `elem` "_@/\\" + (case c# of + '_'# -> True + '@'# -> True + '/'# -> True + '\\'# -> True + _ -> False) + + + +----------- +lex_cstring buf = +-- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $ + case expandUntilMatch buf "\'\'" of + buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) : + lexIface (stepOverLexeme buf') - - ----------- - lex_tuple module_dot orig_cs = go 2 orig_cs - where - go n (',':cs) = go (n+1) cs - go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs - go n other = panic ("lex_tuple" ++ orig_cs) - - -- Similarly ' itself is ok inside an identifier, but not at the start - is_id_char c = isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic - - lex_id cs = go [] cs - where - go xs (f :cs) | is_kwd_mod_char f = go (f : xs) cs - go xs ('.':cs) | not (null xs) = lex_id2 (Just (_PK_ (reverse xs))) [] cs - go xs cs = lex_id2 Nothing xs cs - - -- Dealt with the Module.part - lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs - lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs - lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs - lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs - lex_id2 module_dot xs cs = lex_id3 module_dot xs cs - - -- Dealt with [], (), : special cases - lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs - - lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of - Just kwd_token -> kwd_token : lexIface rest - other -> (mk_var_token rxs) : lexIface rest - where - rxs = reverse xs - - lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest - +----------- +lex_tuple module_dot buf = +-- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $ + go 2 buf + where + go n buf = + case currentChar# buf of + ','# -> go (n+1) (stepOn buf) + ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf) + _ -> ITunknown ("tuple " ++ show n) : lexIface buf + +-- Similarly ' itself is ok inside an identifier, but not at the start + +id_arr :: _ByteArray Int +id_arr = + unsafePerformPrimIO ( + newCharArray (0,255) `thenPrimIO` \ barr -> + let + loop 256# = returnPrimIO () + loop i# = + if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then + writeCharArray barr (I# i#) '\1' `seqPrimIO` + loop (i# +# 1#) + else + writeCharArray barr (I# i#) '\0' `seqPrimIO` + loop (i# +# 1#) + in + loop 0# `seqPrimIO` + unsafeFreezeByteArray barr) + +is_id_char (C# c#) = + let + _ByteArray _ arr# = id_arr + in + case ord# (indexCharArray# arr# (ord# c#)) of + 0# -> False + 1# -> True + +--is_id_char c@(C# c#) = isAlphanum c || is_sym c# + +is_sym c#= + case c# of { + ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True; + '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True; + '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True; + '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True; + '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True; + '-'# -> True; '~'# -> True; _ -> False } + +--isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic + + +mod_arr :: _ByteArray Int +mod_arr = + unsafePerformPrimIO ( + newCharArray (0,255) `thenPrimIO` \ barr -> + let + loop 256# = returnPrimIO () + loop i# = + if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then + writeCharArray barr (I# i#) '\1' `seqPrimIO` + loop (i# +# 1#) + else + writeCharArray barr (I# i#) '\0' `seqPrimIO` + loop (i# +# 1#) + in + loop 0# `seqPrimIO` + unsafeFreezeByteArray barr) + + +is_mod_char (C# c#) = + let + _ByteArray _ arr# = mod_arr + in + case ord# (indexCharArray# arr# (ord# c#)) of + 0# -> False + 1# -> True + +--isAlphanum c || c == '_' || c== '\'' --`elem` "_'" + +{- +lex_id cs = + case _scc_ "lex_id.span" my_span' (is_mod_char) cs of + (xs, len, cs') -> + case cs' of + [] -> case xs of + [] -> lex_id2 Nothing cs + _ -> lex_id3 Nothing len xs cs + + '.':cs'' -> + case xs of + [] -> lex_id2 Nothing cs + _ -> + let + pk_str = _PK_ (xs::String) + len = lengthPS pk_str + in + if len==len+1 then + error "Well, I never!" + else + lex_id2 (Just pk_str) cs'' + _ -> case xs of + [] -> lex_id2 Nothing cs + _ -> lex_id3 Nothing len xs cs' + +-} + +lex_id buf = +-- _trace ("lex_id: "++[C# (currentChar# buf)]) $ + case expandWhile (is_mod_char) buf of + buf' -> + case currentChar# buf' of + '.'# -> + if not (emptyLexeme buf') then +-- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $ + case lexemeToFastString buf' of + l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#)) + (stepOn (stepOverLexeme buf')) + else + lex_id2 Nothing buf' + _ -> lex_id2 Nothing buf' + +-- Dealt with the Module.part +lex_id2 module_dot buf = +-- _trace ("lex_id2: "++[C# (currentChar# buf)]) $ + case currentChar# buf of + '['# -> + case lookAhead# buf 1# of + ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#) + _ -> lex_id3 module_dot buf + '('# -> + case lookAhead# buf 1# of + ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#) + ','# -> lex_tuple module_dot (stepOnBy# buf 2#) + _ -> lex_id3 module_dot buf + ':'# -> lex_id3 module_dot (incLexeme buf) + _ -> lex_id3 module_dot buf + + + +-- Dealt with [], (), : special cases + +lex_id3 module_dot buf = +-- _trace ("lex_id3: "++[C# (currentChar# buf)]) $ + case expandWhile (is_id_char) buf of + buf' -> + case module_dot of + Just _ -> + end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf') + Nothing -> + case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of + Just kwd_token -> kwd_token : lexIface new_buf + Nothing -> mk_var_token lexeme : lexIface new_buf + where + lexeme = lexemeToFastString buf' + new_buf = stepOverLexeme buf' + + +{- OLD: +lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs +lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs +lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs +lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs +lex_id2 module_dot xs cs = lex_id3 module_dot xs cs +-} + + +-- Dealt with [], (), : special cases + +{- +lex_id3 module_dot len_xs xs cs = + case my_span' (is_id_char) cs of + (xs1,len_xs1,rest) -> + case module_dot of + Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest + Nothing -> + case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of + Just kwd_token -> kwd_token : lexIface rest + other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest + where + rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs) +-} +mk_var_token pk_str = + let + f = _HEAD_ pk_str + in + -- + -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower, + -- remove the second half of disjunction when using a 1.3 prelude. + -- + if isUpper f then ITconid pk_str + else if isLower f then ITvarid pk_str + else if f == ':' then ITconsym pk_str + else if isLowerISO f then ITvarid pk_str + else if isUpperISO f then ITconid pk_str + else ITvarsym pk_str + +{- mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n | f == ':' = ITconsym n | isAlpha f = ITvarid n | otherwise = ITvarsym n where n = _PK_ xs +-} - end_lex_id (Just m) (ITconid n) cs = ITqconid (m,n) : lexIface cs - end_lex_id (Just m) (ITvarid n) cs = ITqvarid (m,n) : lexIface cs - end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs - end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs - end_lex_id (Just m) ITbang cs = ITqvarsym (m,SLIT("!")) : lexIface cs - end_lex_id (Just m) token cs = panic ("end_lex_id:" ++ show token) - end_lex_id Nothing token cs = token : lexIface cs - - ------------ - ifaceKeywordsFM :: FiniteMap String IfaceToken - ifaceKeywordsFM = listToFM [ - ("/\\_", ITbiglam) +end_lex_id Nothing token buf = token : lexIface buf +end_lex_id (Just m) token buf = + case token of + ITconid n -> ITqconid (m,n) : lexIface buf + ITvarid n -> ITqvarid (m,n) : lexIface buf + ITconsym n -> ITqconsym (m,n) : lexIface buf + ITvarsym n -> ITqvarsym (m,n) : lexIface buf + ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf + _ -> ITunknown (show token) : lexIface buf + +------------ +ifaceKeywordsFM :: UniqFM IfaceToken +ifaceKeywordsFM = listToUFM $ + map (\ (x,y) -> (_PK_ x,y)) + [("/\\_", ITbiglam) ,("@_", ITatsign) ,("interface_", ITinterface) ,("usages_", ITusages) @@ -348,8 +733,9 @@ lexIface input ,("casm_GC_", ITccall (True, True)) ] - haskellKeywordsFM = listToFM [ - ("data", ITdata) +haskellKeywordsFM = listToUFM $ + map (\ (x,y) -> (_PK_ x,y)) + [ ("data", ITdata) ,("type", ITtype) ,("newtype", ITnewtype) ,("class", ITclass) @@ -374,18 +760,33 @@ lexIface input ,("=", ITequal) ] -startDiscard ITarity = True -startDiscard ITunfold = True -startDiscard ITstrict = True -startDiscard other = False -- doDiscard rips along really fast looking for a double semicolon, -- indicating the end of the pragma we're skipping -doDiscard rest@(';' : ';' : _) = rest -doDiscard ( _ : rest) = doDiscard rest -doDiscard [] = [] +doDiscard buf = + case currentChar# buf of + ';'# -> + case lookAhead# buf 1# of + ';'# -> stepOnBy# buf 2# + _ -> doDiscard (stepOn buf) + _ -> doDiscard (stepOn buf) + \end{code} +begin{code} +my_span :: (a -> Bool) -> [a] -> ([a],[a]) +my_span p xs = go [] xs + where + go so_far (x:xs') | p x = go (x:so_far) xs' + go so_far xs = (reverse so_far, xs) + +my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a]) +my_span' p xs = go [] 0 xs + where + go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs' + go so_far n xs = (reverse so_far,n, xs) +end{code} + %************************************************************************ %* * @@ -410,5 +811,5 @@ happyError ln toks = Failed (ifaceParseErr ln toks) ----------------------------------------------------------------- ifaceParseErr ln toks sty - = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))] + = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppPStr SLIT("toks="), ppStr (show (take 10 toks))] \end{code}