1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
34 #include "HsVersions.h"
36 import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
37 import List ( isSuffixOf )
39 import {-# SOURCE #-} CostCentre
41 import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
42 import Demand ( Demand(..) {- instance Read -} )
43 import UniqFM ( UniqFM, listToUFM, lookupUFM)
44 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
45 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile )
47 import Maybes ( MaybeErr(..) )
48 import ErrUtils ( ErrMsg )
50 import Util ( nOfThem, panic )
57 import PrelRead ( readRational__ ) -- Glasgow non-std
60 %************************************************************************
62 \subsection{Lexical categories}
64 %************************************************************************
66 These functions test strings to see if they fit the lexical categories
67 defined in the Haskell report. Normally applied as in e.g. @isCon
71 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
72 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
74 isLexCon cs = isLexConId cs || isLexConSym cs
75 isLexVar cs = isLexVarId cs || isLexVarSym cs
77 isLexId cs = isLexConId cs || isLexVarId cs
78 isLexSym cs = isLexConSym cs || isLexVarSym cs
84 | cs == SLIT("[]") = True
85 | c == '(' = True -- (), (,), (,,), ...
86 | otherwise = isUpper c || isUpperISO c
92 | otherwise = isLower c || isLowerISO c
98 | otherwise = c == ':'
105 | otherwise = isSymbolASCII c
111 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
112 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
113 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
114 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
115 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
116 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
120 %************************************************************************
122 \subsection{Tuple strings -- ugh!}
124 %************************************************************************
127 mkTupNameStr 0 = SLIT("()")
128 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
129 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
130 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
131 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
132 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
137 %************************************************************************
139 \subsection{Data types}
141 %************************************************************************
143 The token data type, fairly un-interesting except from two constructors,
144 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
145 strictness, unfolding etc) and types for id decls.
147 The Idea/Observation here is that the renamer needs to scan through
148 all of an interface file before it can continue. But only a fraction
149 of the information contained in the file turns out to be useful, so
150 delaying as much as possible of the scanning and parsing of an
151 interface file Makes Sense (Heap profiles of the compiler
152 show a reduction in heap usage by at least a factor of two,
155 Hence, the interface file lexer spots when value declarations are
156 being scanned and return the @ITidinfo@ and @ITtype@ constructors
157 for the type and any other id info for that binding (unfolding, strictness
158 etc). These constructors are applied to the result of lexing these sub-chunks.
160 The lexing of the type and id info is all done lazily, of course, so
161 the scanning (and subsequent parsing) will be done *only* on the ids the
162 renamer finds out that it is interested in. The rest will just be junked.
163 Laziness, you know it makes sense :-)
167 = ITinterface -- keywords
186 | ITbang -- magic symbols
201 | ITvarid FAST_STRING
202 | ITconid FAST_STRING
203 | ITvarsym FAST_STRING
204 | ITconsym FAST_STRING
205 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
206 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
208 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
210 | ITtysig StringBuffer (Maybe StringBuffer)
211 -- lazily return the stream of tokens for
212 -- the info attached to an id.
213 -- Stuff for reading unfoldings
215 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
216 | ITstrict [Demand] | ITbottom
218 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
219 | ITcoerce | ITinline | ITatsign
220 | ITccall (Bool,Bool) -- (is_casm, may_gc)
222 | ITchar Char | ITstring FAST_STRING
223 | ITinteger Integer | ITrational Rational
224 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
225 | ITunknown String -- Used when the lexer can't make sense of it
226 | ITeof -- end of file token
227 deriving Text -- debugging
229 instance Text CostCentre -- cheat!
233 %************************************************************************
235 \subsection{The lexical analyser}
237 %************************************************************************
240 lexIface :: (IfaceToken -> IfM a) -> IfM a
243 -- if bufferExhausted buf then
246 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
247 case currentChar# buf of
248 -- whitespace and comments, ignore.
249 ' '# -> lexIface cont (stepOn buf)
250 '\t'# -> lexIface cont (stepOn buf)
251 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
253 -- Numbers and comments
255 case lookAhead# buf 1# of
256 '-'# -> lex_comment cont (stepOnBy# buf 2#)
259 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
262 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
263 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
266 case prefixMatch (stepOn buf) "..)" of
267 Just buf' -> cont ITdotdot (stepOverLexeme buf')
269 case lookAhead# buf 1# of
270 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
271 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
272 _ -> cont IToparen (stepOn buf)
274 '{'# -> cont ITocurly (stepOn buf)
275 '}'# -> cont ITccurly (stepOn buf)
276 ')'# -> cont ITcparen (stepOn buf)
278 case lookAhead# buf 1# of
279 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
280 _ -> cont ITobrack (stepOn buf)
281 ']'# -> cont ITcbrack (stepOn buf)
282 ','# -> cont ITcomma (stepOn buf)
283 ';'# -> cont ITsemi (stepOn buf)
284 '\"'# -> case untilEndOfString# (stepOn buf) of
286 -- the string literal does *not* include the dquotes
287 case lexemeToFastString buf' of
288 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
291 -- untilEndOfChar# extends the current lexeme until
292 -- it hits a non-escaped single quote. The lexeme of the
293 -- StringBuffer returned does *not* include the closing quote,
294 -- hence we augment the lexeme and make sure to add the
295 -- starting quote, before `read'ing the string.
297 case untilEndOfChar# (stepOn buf) of
298 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
299 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
301 -- ``thingy'' form for casm
303 case lookAhead# buf 1# of
304 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
305 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
306 -- scanning an id of some sort.
309 case lookAhead# buf 1# of
310 'S'# -> case lookAhead# buf 2# of
312 lex_demand cont (stepOnUntil (not . isSpace)
313 (stepOnBy# buf 3#)) -- past _S_
314 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
315 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
316 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
318 _ -> lex_keyword cont (stepOn buf)
321 if bufferExhausted (stepOn buf) then
326 if isDigit (C# c) then
327 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
331 lex_comment cont buf =
332 -- _trace ("comment: "++[C# (currentChar# buf)]) $
333 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
336 lex_demand cont buf =
337 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
339 -- code snatched from Demand.lhs
341 case currentChar# buf of
342 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
343 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
344 'S'# -> read_em (WwStrict : acc) (stepOn buf)
345 'P'# -> read_em (WwPrim : acc) (stepOn buf)
346 'E'# -> read_em (WwEnum : acc) (stepOn buf)
347 ')'# -> (reverse acc, stepOn buf)
348 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
349 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
350 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
351 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
352 _ -> (reverse acc, buf)
354 do_unpack new_or_data wrapper_unpacks acc buf
355 = case read_em [] buf of
356 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
360 case currentChar# buf of
363 case prefixMatch (stepOn buf) "NO_CC\"" of
364 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
366 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
367 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
369 case prefixMatch (stepOn buf) "OVERHEAD\"" of
370 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
372 case prefixMatch (stepOn buf) "DONT_CARE\"" of
373 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
375 case prefixMatch (stepOn buf) "SUBSUMED\"" of
376 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
378 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
379 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
381 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
383 case untilChar# (stepOverLexeme buf') '\"'# of
384 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
386 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
387 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
389 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
391 case untilChar# (stepOverLexeme buf') '\"'# of
392 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
393 (stepOn (stepOverLexeme buf''))
397 case untilChar# buf '/'# of
399 let mod_name = lexemeToFastString buf' in
400 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
402 -- let grp_name = lexemeToFastString buf'' in
403 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
405 -- The label may contain arbitrary characters, so it
406 -- may have been escaped etc., hence we `read' it in to get
407 -- rid of these meta-chars in the string and then pack it (again.)
408 -- ToDo: do the same for module name (single quotes allowed in m-names).
409 -- BTW, the code in this module is totally gruesome..
410 let upk_label = _UNPK_ (lexemeToFastString buf'') in
411 case reads ('"':upk_label++"\"") of
413 let cc_name = _PK_ cc_label in
414 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
415 stepOn (stepOverLexeme buf''))
417 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
418 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
419 stepOn (stepOverLexeme buf''))
421 case prefixMatch (stepOn buf) "CAF:" of
423 case match_user_cc (stepOverLexeme buf') of
424 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
426 case match_user_cc (stepOn buf) of
427 (cc, buf'') -> cont (ITscc cc) buf''
428 c -> cont (ITunknown [C# c]) (stepOn buf)
432 lex_num :: (IfaceToken -> IfM a) ->
433 (Int -> Int) -> Int# -> IfM a
434 lex_num cont minus acc# buf =
435 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
436 case scanNumLit (I# acc#) buf of
438 case currentChar# buf' of
440 -- this case is not optimised at all, as the
441 -- presence of floating point numbers in interface
442 -- files is not that common. (ToDo)
443 case expandWhile (isDigit) (incLexeme buf') of
444 buf2 -> -- points to first non digit char
445 let l = case currentChar# buf2 of
446 'e'# -> let buf3 = incLexeme buf2 in
447 case currentChar# buf3 of
448 '-'# -> expandWhile (isDigit) (incLexeme buf3)
449 _ -> expandWhile (isDigit) buf3
451 in let v = readRational__ (lexemeToString l) in
452 cont (ITrational v) (stepOverLexeme l)
454 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
459 lex_keyword cont buf =
460 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
461 case currentChar# buf of
462 ':'# -> case lookAhead# buf 1# of
463 '_'# -> -- a binding, type (and other id-info) follows,
464 -- to make the parser ever so slightly, we push
466 lex_decl cont (stepOnBy# buf 2#)
467 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
469 case expandWhile (is_kwd_char) buf of
471 let kw = lexemeToFastString buf' in
472 -- _trace ("kw: "++lexemeToString buf') $
473 case lookupUFM ifaceKeywordsFM kw of
474 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
475 (stepOverLexeme buf')
476 Just xx -> cont xx (stepOverLexeme buf')
479 case doDiscard False buf of -- spin until ;; is found
481 {- _trace (show (lexemeToString buf')) $ -}
482 case currentChar# buf' of
483 '\n'# -> -- newline, no id info.
484 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
485 (stepOverLexeme buf')
486 '\r'# -> -- just to be sure for those Win* boxes..
487 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
488 (stepOverLexeme buf')
490 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
491 (stepOverLexeme buf')
492 c -> -- run all over the id info
493 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
495 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
496 --_trace (show (lexemeToString (decLexeme buf''))) $
498 if opt_IgnoreIfacePragmas then
501 Just (lexemeToBuffer (decLexeme buf''))
504 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
505 (stepOverLexeme buf'')
508 is_kwd_char c@(C# c#) =
509 isAlphanum c || -- OLD: c `elem` "_@/\\"
520 lex_cstring cont buf =
521 case expandUntilMatch buf "\'\'" of
522 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
523 (stepOverLexeme buf')
526 lex_tuple cont module_dot buf =
530 case currentChar# buf of
531 ','# -> go (n+1) (stepOn buf)
532 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
533 _ -> cont (ITunknown ("tuple " ++ show n)) buf
535 -- Similarly ' itself is ok inside an identifier, but not at the start
537 -- id_arr is an array of bytes, indexed by characters,
538 -- containing 0 if the character isn't a valid character from an identifier
539 -- and 1 if it is. It's just a memo table for is_id_char.
540 id_arr :: ByteArray Int
543 newCharArray (0,255) >>= \ barr ->
545 loop 256# = return ()
547 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
548 writeCharArray barr (I# i#) '\1' >>
551 writeCharArray barr (I# i#) '\0' >>
555 unsafeFreezeByteArray barr)
559 ByteArray _ arr# = id_arr
561 case ord# (indexCharArray# arr# (ord# c#)) of
565 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
569 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
570 '#'# -> True; '$'# -> True; '%'# -> True;
571 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
572 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
573 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
574 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
576 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
579 -- mod_arr is an array of bytes, indexed by characters,
580 -- containing 0 if the character isn't a valid character from a module name,
582 mod_arr :: ByteArray Int
585 newCharArray (0,255) >>= \ barr ->
587 loop 256# = return ()
589 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
590 writeCharArray barr (I# i#) '\1' >>
593 writeCharArray barr (I# i#) '\0' >>
597 unsafeFreezeByteArray barr)
600 is_mod_char (C# c#) =
602 ByteArray _ arr# = mod_arr
604 case ord# (indexCharArray# arr# (ord# c#)) of
608 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
611 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
612 case expandWhile (is_mod_char) buf of
614 case currentChar# buf' of
615 '.'# -> munch buf' HiFile
616 '!'# -> munch buf' HiBootFile
617 _ -> lex_id2 cont Nothing buf'
620 if not (emptyLexeme buf') then
621 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
622 case lexemeToFastString buf' of
623 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
624 (stepOn (stepOverLexeme buf'))
626 lex_id2 cont Nothing buf'
629 -- Dealt with the Module.part
630 lex_id2 cont module_dot buf =
631 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
632 case currentChar# buf of
634 '['# -> -- Special case for []
635 case lookAhead# buf 1# of
636 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
637 _ -> lex_id3 cont module_dot buf
639 '('# -> -- Special case for (,,,)
640 case lookAhead# buf 1# of
641 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
642 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
643 _ -> lex_id3 cont module_dot buf
644 ':'# -> lex_id3 cont module_dot (incLexeme buf)
647 Nothing -> lex_id3 cont module_dot buf
648 Just ghc -> -- this should be "GHC" (current home of (->))
649 case lookAhead# buf 1# of
650 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
652 _ -> lex_id3 cont module_dot buf
653 _ -> lex_id3 cont module_dot buf
657 -- Dealt with [], (), : special cases
659 lex_id3 cont module_dot buf =
660 case expandWhile (is_id_char) buf of
664 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
666 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
667 Just kwd_token -> cont kwd_token new_buf
668 Nothing -> cont (mk_var_token lexeme) new_buf
670 lexeme = lexemeToFastString buf'
671 new_buf = stepOverLexeme buf'
674 -- Dealt with [], (), : special cases
675 mk_var_token pk_str =
680 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
681 -- remove the second half of disjunction when using a 1.3 prelude.
683 if isUpper f then ITconid pk_str
684 else if isLower f then ITvarid pk_str
685 else if f == ':' then ITconsym pk_str
686 else if isLowerISO f then ITvarid pk_str
687 else if isUpperISO f then ITconid pk_str
690 end_lex_id cont Nothing token buf = cont token buf
691 end_lex_id cont (Just (m,hif)) token buf =
693 ITconid n -> cont (ITqconid (m,n,hif)) buf
694 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
695 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
697 -- Special case for ->
698 -- "->" by itself is a special token (ITrarrow),
699 -- but M.-> is a ITqconid
700 ITvarsym n | n == SLIT("->")
701 -> cont (ITqconsym (m,n,hif)) buf
703 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
705 -- ITbang can't happen here I think
706 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
708 _ -> cont (ITunknown (show token)) buf
711 ifaceKeywordsFM :: UniqFM IfaceToken
712 ifaceKeywordsFM = listToUFM $
713 map (\ (x,y) -> (_PK_ x,y))
716 ,("letrec_", ITletrec)
717 ,("interface_", ITinterface)
718 ,("usages_", ITusages)
719 ,("versions_", ITversions)
720 ,("exports_", ITexports)
721 ,("instance_modules_", ITinstance_modules)
722 ,("instances_", ITinstances)
723 ,("fixities_", ITfixities)
724 ,("declarations_", ITdeclarations)
725 ,("pragmas_", ITpragmas)
726 ,("forall_", ITforall)
727 ,("u_", ITunfold False)
728 ,("U_", ITunfold True)
730 ,("P_", ITspecialise)
731 ,("coerce_", ITcoerce)
732 ,("inline_", ITinline)
734 ,("integer_", ITinteger_lit)
735 ,("rational_", ITrational_lit)
736 ,("addr_", ITaddr_lit)
737 ,("float_", ITfloat_lit)
738 ,("string_", ITstring_lit)
739 ,("litlit_", ITlit_lit)
740 ,("ccall_", ITccall (False, False))
741 ,("ccall_GC_", ITccall (False, True))
742 ,("casm_", ITccall (True, False))
743 ,("casm_GC_", ITccall (True, True))
746 haskellKeywordsFM = listToUFM $
747 map (\ (x,y) -> (_PK_ x,y))
750 ,("newtype", ITnewtype)
753 ,("instance", ITinstance)
754 ,("infixl", ITinfixl)
755 ,("infixr", ITinfixr)
758 ,("case#", ITprim_case)
773 -- doDiscard rips along really fast, looking for a double semicolon,
774 -- indicating the end of the pragma we're skipping
775 doDiscard inStr buf =
776 -- _trace (show (C# (currentChar# buf))) $
777 case currentChar# buf of
780 case lookAhead# buf 1# of
781 ';'# -> incLexeme (incLexeme buf)
782 _ -> doDiscard inStr (incLexeme buf)
784 doDiscard inStr (incLexeme buf)
787 odd_slashes buf flg i# =
788 case lookAhead# buf i# of
789 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
792 case lookAhead# buf (negateInt# 1#) of --backwards, actually
793 '\\'# -> -- escaping something..
794 if odd_slashes buf True (negateInt# 2#) then
795 -- odd number of slashes, " is escaped.
796 doDiscard inStr (incLexeme buf)
798 -- even number of slashes, \ is escaped.
799 doDiscard (not inStr) (incLexeme buf)
800 _ -> case inStr of -- forced to avoid build-up
801 True -> doDiscard False (incLexeme buf)
802 False -> doDiscard True (incLexeme buf)
803 _ -> doDiscard inStr (incLexeme buf)
808 my_span :: (a -> Bool) -> [a] -> ([a],[a])
809 my_span p xs = go [] xs
811 go so_far (x:xs') | p x = go (x:so_far) xs'
812 go so_far xs = (reverse so_far, xs)
814 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
815 my_span' p xs = go [] 0 xs
817 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
818 go so_far n xs = (reverse so_far,n, xs)
822 %************************************************************************
824 \subsection{Other utility functions
826 %************************************************************************
829 type IfM a = StringBuffer -- Input string
833 returnIf :: a -> IfM a
834 returnIf a s l = Succeeded a
836 thenIf :: IfM a -> (a -> IfM b) -> IfM b
837 m `thenIf` k = \s l ->
839 Succeeded a -> k a s l
840 Failed err -> Failed err
842 getSrcLocIf :: IfM SrcLoc
843 getSrcLocIf s l = Succeeded l
846 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
850 Note that if the name of the file we're processing ends
851 with `hi-boot', we accept it on faith as having the right
852 version. This is done so that .hi-boot files that comes
853 with hsc don't have to be updated before every release,
854 *and* it allows us to share .hi-boot files with versions
855 of hsc that don't have .hi version checking (e.g., ghc-2.10's)
857 If the version number is 0, the checking is also turned off.
858 (needed to deal with GHC.hi only!)
860 Once we can assume we're compiling with a version of ghc that
861 supports interface file checking, we can drop the special
864 checkVersion :: Maybe Integer -> IfM ()
865 checkVersion mb@(Just v) s l
866 | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
867 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
868 checkVersion mb@Nothing s l
869 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
870 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
872 -----------------------------------------------------------------
875 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
876 ptext SLIT("toks="), text (show (take 10 toks))]
878 ifaceVersionErr hi_vers l toks
879 = hsep [ppr l, ptext SLIT("Interface file version error;"),
880 ptext SLIT("Expected"), int opt_HiVersion,
881 ptext SLIT(" found "), pp_version]
885 Nothing -> ptext SLIT("pre ghc-3.02 version")
886 Just v -> ptext SLIT("version") <+> integer v