1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
34 #include "HsVersions.h"
36 import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
37 import List ( isSuffixOf )
39 import {-# SOURCE #-} CostCentre
41 import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
42 import Demand ( Demand(..) {- instance Read -} )
43 import UniqFM ( UniqFM, listToUFM, lookupUFM)
44 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
45 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile )
47 import Maybes ( MaybeErr(..) )
48 import ErrUtils ( ErrMsg )
50 import Util ( nOfThem, panic )
57 import PrelRead ( readRational__ ) -- Glasgow non-std
60 %************************************************************************
62 \subsection{Lexical categories}
64 %************************************************************************
66 These functions test strings to see if they fit the lexical categories
67 defined in the Haskell report. Normally applied as in e.g. @isCon
71 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
72 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
74 isLexCon cs = isLexConId cs || isLexConSym cs
75 isLexVar cs = isLexVarId cs || isLexVarSym cs
77 isLexId cs = isLexConId cs || isLexVarId cs
78 isLexSym cs = isLexConSym cs || isLexVarSym cs
84 | cs == SLIT("[]") = True
85 | c == '(' = True -- (), (,), (,,), ...
86 | otherwise = isUpper c || isUpperISO c
92 | otherwise = isLower c || isLowerISO c
98 | otherwise = c == ':'
105 | otherwise = isSymbolASCII c
111 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
112 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
113 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
114 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
115 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
116 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
120 %************************************************************************
122 \subsection{Tuple strings -- ugh!}
124 %************************************************************************
127 mkTupNameStr 0 = SLIT("()")
128 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
129 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
130 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
131 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
132 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
137 %************************************************************************
139 \subsection{Data types}
141 %************************************************************************
143 The token data type, fairly un-interesting except from two constructors,
144 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
145 strictness, unfolding etc) and types for id decls.
147 The Idea/Observation here is that the renamer needs to scan through
148 all of an interface file before it can continue. But only a fraction
149 of the information contained in the file turns out to be useful, so
150 delaying as much as possible of the scanning and parsing of an
151 interface file Makes Sense (Heap profiles of the compiler
152 show a reduction in heap usage by at least a factor of two,
155 Hence, the interface file lexer spots when value declarations are
156 being scanned and return the @ITidinfo@ and @ITtype@ constructors
157 for the type and any other id info for that binding (unfolding, strictness
158 etc). These constructors are applied to the result of lexing these sub-chunks.
160 The lexing of the type and id info is all done lazily, of course, so
161 the scanning (and subsequent parsing) will be done *only* on the ids the
162 renamer finds out that it is interested in. The rest will just be junked.
163 Laziness, you know it makes sense :-)
167 = ITinterface -- keywords
187 | ITbang -- magic symbols
202 | ITvarid FAST_STRING
203 | ITconid FAST_STRING
204 | ITvarsym FAST_STRING
205 | ITconsym FAST_STRING
206 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
208 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
209 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
211 | ITtysig StringBuffer (Maybe StringBuffer)
212 -- lazily return the stream of tokens for
213 -- the info attached to an id.
214 -- Stuff for reading unfoldings
216 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
217 | ITstrict [Demand] | ITbottom
219 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
220 | ITcoerce | ITinline | ITatsign
221 | ITccall (Bool,Bool) -- (is_casm, may_gc)
223 | ITchar Char | ITstring FAST_STRING
224 | ITinteger Integer | ITrational Rational
225 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
226 | ITunknown String -- Used when the lexer can't make sense of it
227 | ITeof -- end of file token
228 deriving Text -- debugging
230 instance Text CostCentre -- cheat!
234 %************************************************************************
236 \subsection{The lexical analyser}
238 %************************************************************************
241 lexIface :: (IfaceToken -> IfM a) -> IfM a
244 -- if bufferExhausted buf then
247 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
248 case currentChar# buf of
249 -- whitespace and comments, ignore.
250 ' '# -> lexIface cont (stepOn buf)
251 '\t'# -> lexIface cont (stepOn buf)
252 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
254 -- Numbers and comments
256 case lookAhead# buf 1# of
257 '-'# -> lex_comment cont (stepOnBy# buf 2#)
260 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
263 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
264 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
267 case prefixMatch (stepOn buf) "..)" of
268 Just buf' -> cont ITdotdot (stepOverLexeme buf')
270 case lookAhead# buf 1# of
271 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
272 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
273 _ -> cont IToparen (stepOn buf)
275 '{'# -> cont ITocurly (stepOn buf)
276 '}'# -> cont ITccurly (stepOn buf)
277 ')'# -> cont ITcparen (stepOn buf)
279 case lookAhead# buf 1# of
280 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
281 _ -> cont ITobrack (stepOn buf)
282 ']'# -> cont ITcbrack (stepOn buf)
283 ','# -> cont ITcomma (stepOn buf)
284 ';'# -> cont ITsemi (stepOn buf)
285 '\"'# -> case untilEndOfString# (stepOn buf) of
287 -- the string literal does *not* include the dquotes
288 case lexemeToFastString buf' of
289 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
292 -- untilEndOfChar# extends the current lexeme until
293 -- it hits a non-escaped single quote. The lexeme of the
294 -- StringBuffer returned does *not* include the closing quote,
295 -- hence we augment the lexeme and make sure to add the
296 -- starting quote, before `read'ing the string.
298 case untilEndOfChar# (stepOn buf) of
299 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
300 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
302 -- ``thingy'' form for casm
304 case lookAhead# buf 1# of
305 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
306 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
307 -- scanning an id of some sort.
310 case lookAhead# buf 1# of
311 'S'# -> case lookAhead# buf 2# of
313 lex_demand cont (stepOnUntil (not . isSpace)
314 (stepOnBy# buf 3#)) -- past _S_
315 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
316 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
317 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
319 _ -> lex_keyword cont (stepOn buf)
322 if bufferExhausted (stepOn buf) then
327 if isDigit (C# c) then
328 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
332 lex_comment cont buf =
333 -- _trace ("comment: "++[C# (currentChar# buf)]) $
334 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
337 lex_demand cont buf =
338 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
340 -- code snatched from Demand.lhs
342 case currentChar# buf of
343 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
344 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
345 'S'# -> read_em (WwStrict : acc) (stepOn buf)
346 'P'# -> read_em (WwPrim : acc) (stepOn buf)
347 'E'# -> read_em (WwEnum : acc) (stepOn buf)
348 ')'# -> (reverse acc, stepOn buf)
349 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
350 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
351 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
352 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
353 _ -> (reverse acc, buf)
355 do_unpack new_or_data wrapper_unpacks acc buf
356 = case read_em [] buf of
357 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
361 case currentChar# buf of
364 case prefixMatch (stepOn buf) "NO_CC\"" of
365 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
367 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
368 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
370 case prefixMatch (stepOn buf) "OVERHEAD\"" of
371 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
373 case prefixMatch (stepOn buf) "DONT_CARE\"" of
374 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
376 case prefixMatch (stepOn buf) "SUBSUMED\"" of
377 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
379 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
380 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
382 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
384 case untilChar# (stepOverLexeme buf') '\"'# of
385 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
387 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
388 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
390 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
392 case untilChar# (stepOverLexeme buf') '\"'# of
393 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
394 (stepOn (stepOverLexeme buf''))
398 case untilChar# buf '/'# of
400 let mod_name = lexemeToFastString buf' in
401 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
403 -- let grp_name = lexemeToFastString buf'' in
404 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
406 -- The label may contain arbitrary characters, so it
407 -- may have been escaped etc., hence we `read' it in to get
408 -- rid of these meta-chars in the string and then pack it (again.)
409 -- ToDo: do the same for module name (single quotes allowed in m-names).
410 -- BTW, the code in this module is totally gruesome..
411 let upk_label = _UNPK_ (lexemeToFastString buf'') in
412 case reads ('"':upk_label++"\"") of
414 let cc_name = _PK_ cc_label in
415 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
416 stepOn (stepOverLexeme buf''))
418 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
419 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
420 stepOn (stepOverLexeme buf''))
422 case prefixMatch (stepOn buf) "CAF:" of
424 case match_user_cc (stepOverLexeme buf') of
425 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
427 case match_user_cc (stepOn buf) of
428 (cc, buf'') -> cont (ITscc cc) buf''
429 c -> cont (ITunknown [C# c]) (stepOn buf)
433 lex_num :: (IfaceToken -> IfM a) ->
434 (Int -> Int) -> Int# -> IfM a
435 lex_num cont minus acc# buf =
436 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
437 case scanNumLit (I# acc#) buf of
439 case currentChar# buf' of
441 -- this case is not optimised at all, as the
442 -- presence of floating point numbers in interface
443 -- files is not that common. (ToDo)
444 case expandWhile (isDigit) (incLexeme buf') of
445 buf2 -> -- points to first non digit char
446 let l = case currentChar# buf2 of
447 'e'# -> let buf3 = incLexeme buf2 in
448 case currentChar# buf3 of
449 '-'# -> expandWhile (isDigit) (incLexeme buf3)
450 _ -> expandWhile (isDigit) buf3
452 in let v = readRational__ (lexemeToString l) in
453 cont (ITrational v) (stepOverLexeme l)
455 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
460 lex_keyword cont buf =
461 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
462 case currentChar# buf of
463 ':'# -> case lookAhead# buf 1# of
464 '_'# -> -- a binding, type (and other id-info) follows,
465 -- to make the parser ever so slightly, we push
467 lex_decl cont (stepOnBy# buf 2#)
468 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
470 case expandWhile (is_kwd_char) buf of
472 let kw = lexemeToFastString buf' in
473 -- _trace ("kw: "++lexemeToString buf') $
474 case lookupUFM ifaceKeywordsFM kw of
475 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
476 (stepOverLexeme buf')
477 Just xx -> cont xx (stepOverLexeme buf')
480 case doDiscard False buf of -- spin until ;; is found
482 {- _trace (show (lexemeToString buf')) $ -}
483 case currentChar# buf' of
484 '\n'# -> -- newline, no id info.
485 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
486 (stepOverLexeme buf')
487 '\r'# -> -- just to be sure for those Win* boxes..
488 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
489 (stepOverLexeme buf')
491 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
492 (stepOverLexeme buf')
493 c -> -- run all over the id info
494 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
496 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
497 --_trace (show (lexemeToString (decLexeme buf''))) $
499 if opt_IgnoreIfacePragmas then
502 Just (lexemeToBuffer (decLexeme buf''))
505 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
506 (stepOverLexeme buf'')
509 is_kwd_char c@(C# c#) =
510 isAlphanum c || -- OLD: c `elem` "_@/\\"
521 lex_cstring cont buf =
522 case expandUntilMatch buf "\'\'" of
523 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
524 (stepOverLexeme buf')
527 lex_tuple cont module_dot buf =
531 case currentChar# buf of
532 ','# -> go (n+1) (stepOn buf)
533 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
534 _ -> cont (ITunknown ("tuple " ++ show n)) buf
536 -- Similarly ' itself is ok inside an identifier, but not at the start
538 -- id_arr is an array of bytes, indexed by characters,
539 -- containing 0 if the character isn't a valid character from an identifier
540 -- and 1 if it is. It's just a memo table for is_id_char.
541 id_arr :: ByteArray Int
544 newCharArray (0,255) >>= \ barr ->
546 loop 256# = return ()
548 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
549 writeCharArray barr (I# i#) '\1' >>
552 writeCharArray barr (I# i#) '\0' >>
556 unsafeFreezeByteArray barr)
560 ByteArray _ arr# = id_arr
562 case ord# (indexCharArray# arr# (ord# c#)) of
566 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
570 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
571 '#'# -> True; '$'# -> True; '%'# -> True;
572 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
573 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
574 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
575 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
577 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
580 -- mod_arr is an array of bytes, indexed by characters,
581 -- containing 0 if the character isn't a valid character from a module name,
583 mod_arr :: ByteArray Int
586 newCharArray (0,255) >>= \ barr ->
588 loop 256# = return ()
590 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
591 writeCharArray barr (I# i#) '\1' >>
594 writeCharArray barr (I# i#) '\0' >>
598 unsafeFreezeByteArray barr)
601 is_mod_char (C# c#) =
603 ByteArray _ arr# = mod_arr
605 case ord# (indexCharArray# arr# (ord# c#)) of
609 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
612 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
613 case expandWhile (is_mod_char) buf of
615 case currentChar# buf' of
616 '.'# -> munch buf' HiFile
617 '!'# -> munch buf' HiBootFile
618 _ -> lex_id2 cont Nothing buf'
621 if not (emptyLexeme buf') then
622 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
623 case lexemeToFastString buf' of
624 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
625 (stepOn (stepOverLexeme buf'))
627 lex_id2 cont Nothing buf'
630 -- Dealt with the Module.part
631 lex_id2 cont module_dot buf =
632 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
633 case currentChar# buf of
635 '['# -> -- Special case for []
636 case lookAhead# buf 1# of
637 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
638 _ -> lex_id3 cont module_dot buf
640 '('# -> -- Special case for (,,,)
641 case lookAhead# buf 1# of
642 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
643 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
644 _ -> lex_id3 cont module_dot buf
645 ':'# -> lex_id3 cont module_dot (incLexeme buf)
648 Nothing -> lex_id3 cont module_dot buf
649 Just ghc -> -- this should be "GHC" (current home of (->))
650 case lookAhead# buf 1# of
651 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
653 _ -> lex_id3 cont module_dot buf
654 _ -> lex_id3 cont module_dot buf
658 -- Dealt with [], (), : special cases
660 lex_id3 cont module_dot buf =
661 case expandWhile (is_id_char) buf of
665 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
667 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
668 Just kwd_token -> cont kwd_token new_buf
669 Nothing -> cont (mk_var_token lexeme) new_buf
671 lexeme = lexemeToFastString buf'
672 new_buf = stepOverLexeme buf'
675 -- Dealt with [], (), : special cases
676 mk_var_token pk_str =
681 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
682 -- remove the second half of disjunction when using a 1.3 prelude.
684 if isUpper f then ITconid pk_str
685 else if isLower f then ITvarid pk_str
686 else if f == ':' then ITconsym pk_str
687 else if isLowerISO f then ITvarid pk_str
688 else if isUpperISO f then ITconid pk_str
691 end_lex_id cont Nothing token buf = cont token buf
692 end_lex_id cont (Just (m,hif)) token buf =
694 ITconid n -> cont (ITqconid (m,n,hif)) buf
695 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
696 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
698 -- Special case for ->
699 -- "->" by itself is a special token (ITrarrow),
700 -- but M.-> is a ITqconid
701 ITvarsym n | n == SLIT("->")
702 -> cont (ITqconsym (m,n,hif)) buf
704 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
706 -- ITbang can't happen here I think
707 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
709 _ -> cont (ITunknown (show token)) buf
712 ifaceKeywordsFM :: UniqFM IfaceToken
713 ifaceKeywordsFM = listToUFM $
714 map (\ (x,y) -> (_PK_ x,y))
717 ,("letrec_", ITletrec)
718 ,("interface_", ITinterface)
719 ,("usages_", ITusages)
720 ,("versions_", ITversions)
721 ,("exports_", ITexports)
722 ,("instance_modules_", ITinstance_modules)
723 ,("instances_", ITinstances)
724 ,("fixities_", ITfixities)
725 ,("declarations_", ITdeclarations)
726 ,("pragmas_", ITpragmas)
727 ,("forall_", ITforall)
728 ,("u_", ITunfold False)
729 ,("U_", ITunfold True)
731 ,("P_", ITspecialise)
732 ,("coerce_", ITcoerce)
733 ,("inline_", ITinline)
735 ,("integer_", ITinteger_lit)
736 ,("rational_", ITrational_lit)
737 ,("addr_", ITaddr_lit)
738 ,("float_", ITfloat_lit)
739 ,("string_", ITstring_lit)
740 ,("litlit_", ITlit_lit)
741 ,("ccall_", ITccall (False, False))
742 ,("ccall_GC_", ITccall (False, True))
743 ,("casm_", ITccall (True, False))
744 ,("casm_GC_", ITccall (True, True))
747 haskellKeywordsFM = listToUFM $
748 map (\ (x,y) -> (_PK_ x,y))
751 ,("newtype", ITnewtype)
754 ,("instance", ITinstance)
755 ,("infixl", ITinfixl)
756 ,("infixr", ITinfixr)
759 ,("case#", ITprim_case)
763 ,("deriving", ITderiving)
775 -- doDiscard rips along really fast, looking for a double semicolon,
776 -- indicating the end of the pragma we're skipping
777 doDiscard inStr buf =
778 -- _trace (show (C# (currentChar# buf))) $
779 case currentChar# buf of
782 case lookAhead# buf 1# of
783 ';'# -> incLexeme (incLexeme buf)
784 _ -> doDiscard inStr (incLexeme buf)
786 doDiscard inStr (incLexeme buf)
789 odd_slashes buf flg i# =
790 case lookAhead# buf i# of
791 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
794 case lookAhead# buf (negateInt# 1#) of --backwards, actually
795 '\\'# -> -- escaping something..
796 if odd_slashes buf True (negateInt# 2#) then
797 -- odd number of slashes, " is escaped.
798 doDiscard inStr (incLexeme buf)
800 -- even number of slashes, \ is escaped.
801 doDiscard (not inStr) (incLexeme buf)
802 _ -> case inStr of -- forced to avoid build-up
803 True -> doDiscard False (incLexeme buf)
804 False -> doDiscard True (incLexeme buf)
805 _ -> doDiscard inStr (incLexeme buf)
810 my_span :: (a -> Bool) -> [a] -> ([a],[a])
811 my_span p xs = go [] xs
813 go so_far (x:xs') | p x = go (x:so_far) xs'
814 go so_far xs = (reverse so_far, xs)
816 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
817 my_span' p xs = go [] 0 xs
819 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
820 go so_far n xs = (reverse so_far,n, xs)
824 %************************************************************************
826 \subsection{Other utility functions
828 %************************************************************************
831 type IfM a = StringBuffer -- Input string
835 returnIf :: a -> IfM a
836 returnIf a s l = Succeeded a
838 thenIf :: IfM a -> (a -> IfM b) -> IfM b
839 m `thenIf` k = \s l ->
841 Succeeded a -> k a s l
842 Failed err -> Failed err
844 getSrcLocIf :: IfM SrcLoc
845 getSrcLocIf s l = Succeeded l
848 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
852 Note that if the name of the file we're processing ends
853 with `hi-boot', we accept it on faith as having the right
854 version. This is done so that .hi-boot files that comes
855 with hsc don't have to be updated before every release,
856 *and* it allows us to share .hi-boot files with versions
857 of hsc that don't have .hi version checking (e.g., ghc-2.10's)
859 If the version number is 0, the checking is also turned off.
860 (needed to deal with GHC.hi only!)
862 Once we can assume we're compiling with a version of ghc that
863 supports interface file checking, we can drop the special
866 checkVersion :: Maybe Integer -> IfM ()
867 checkVersion mb@(Just v) s l
868 | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
869 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
870 checkVersion mb@Nothing s l
871 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
872 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
874 -----------------------------------------------------------------
877 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
878 ptext SLIT("toks="), text (show (take 10 toks))]
880 ifaceVersionErr hi_vers l toks
881 = hsep [ppr l, ptext SLIT("Interface file version error;"),
882 ptext SLIT("Expected"), int opt_HiVersion,
883 ptext SLIT(" found "), pp_version]
887 Nothing -> ptext SLIT("pre ghc-3.02 version")
888 Just v -> ptext SLIT("version") <+> integer v