1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
34 #include "HsVersions.h"
36 import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
37 import List ( isSuffixOf )
39 import {-# SOURCE #-} CostCentre
41 import CmdLineOpts ( opt_IgnoreIfacePragmas )
42 import Demand ( Demand(..) {- instance Read -} )
43 import UniqFM ( UniqFM, listToUFM, lookupUFM)
44 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
45 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile )
47 import Maybes ( MaybeErr(..) )
48 import ErrUtils ( ErrMsg(..) )
50 import Util ( nOfThem, panic )
58 %************************************************************************
60 \subsection{Lexical categories}
62 %************************************************************************
64 These functions test strings to see if they fit the lexical categories
65 defined in the Haskell report. Normally applied as in e.g. @isCon
69 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
70 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
72 isLexCon cs = isLexConId cs || isLexConSym cs
73 isLexVar cs = isLexVarId cs || isLexVarSym cs
75 isLexId cs = isLexConId cs || isLexVarId cs
76 isLexSym cs = isLexConSym cs || isLexVarSym cs
82 | cs == SLIT("[]") = True
83 | c == '(' = True -- (), (,), (,,), ...
84 | otherwise = isUpper c || isUpperISO c
90 | otherwise = isLower c || isLowerISO c
96 | otherwise = c == ':'
103 | otherwise = isSymbolASCII c
109 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
110 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
111 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
112 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
113 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
114 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
118 %************************************************************************
120 \subsection{Tuple strings -- ugh!}
122 %************************************************************************
125 mkTupNameStr 0 = SLIT("()")
126 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
127 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
128 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
129 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
130 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
135 %************************************************************************
137 \subsection{Data types}
139 %************************************************************************
141 The token data type, fairly un-interesting except from two constructors,
142 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
143 strictness, unfolding etc) and types for id decls.
145 The Idea/Observation here is that the renamer needs to scan through
146 all of an interface file before it can continue. But only a fraction
147 of the information contained in the file turns out to be useful, so
148 delaying as much as possible of the scanning and parsing of an
149 interface file Makes Sense (Heap profiles of the compiler
150 show at a reduction in heap usage by at least a factor of two,
153 Hence, the interface file lexer spots when value declarations are
154 being scanned and return the @ITidinfo@ and @ITtype@ constructors
155 for the type and any other id info for that binding (unfolding, strictness
156 etc). These constructors are applied to the result of lexing these sub-chunks.
158 The lexing of the type and id info is all done lazily, of course, so
159 the scanning (and subsequent parsing) will be done *only* on the ids the
160 renamer finds out that it is interested in. The rest will just be junked.
161 Laziness, you know it makes sense :-)
165 = ITinterface -- keywords
185 | ITbang -- magic symbols
200 | ITvarid FAST_STRING
201 | ITconid FAST_STRING
202 | ITvarsym FAST_STRING
203 | ITconsym FAST_STRING
204 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
205 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
206 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
209 | ITtysig StringBuffer (Maybe StringBuffer)
210 -- lazily return the stream of tokens for
211 -- the info attached to an id.
212 -- Stuff for reading unfoldings
214 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
215 | ITstrict [Demand] | ITbottom
217 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
218 | ITcoerce | ITinline | ITatsign
219 | ITccall (Bool,Bool) -- (is_casm, may_gc)
221 | ITchar Char | ITstring FAST_STRING
222 | ITinteger Integer | ITdouble Double
223 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
224 | ITunknown String -- Used when the lexer can't make sense of it
225 | ITeof -- end of file token
226 deriving Text -- debugging
228 instance Text CostCentre -- cheat!
232 %************************************************************************
234 \subsection{The lexical analyser}
236 %************************************************************************
239 lexIface :: (IfaceToken -> IfM a) -> IfM a
242 -- if bufferExhausted buf then
245 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
246 case currentChar# buf of
247 -- whitespace and comments, ignore.
248 ' '# -> lexIface cont (stepOn buf)
249 '\t'# -> lexIface cont (stepOn buf)
250 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
252 -- Numbers and comments
254 case lookAhead# buf 1# of
255 '-'# -> lex_comment cont (stepOnBy# buf 2#)
258 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
261 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
262 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
265 case prefixMatch (stepOn buf) "..)" of
266 Just buf' -> cont ITdotdot (stepOverLexeme buf')
268 case lookAhead# buf 1# of
269 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
270 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
271 _ -> cont IToparen (stepOn buf)
273 '{'# -> cont ITocurly (stepOn buf)
274 '}'# -> cont ITccurly (stepOn buf)
275 ')'# -> cont ITcparen (stepOn buf)
277 case lookAhead# buf 1# of
278 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
279 _ -> cont ITobrack (stepOn buf)
280 ']'# -> cont ITcbrack (stepOn buf)
281 ','# -> cont ITcomma (stepOn buf)
282 ';'# -> cont ITsemi (stepOn buf)
283 '\"'# -> case untilEndOfString# (stepOn buf) of
285 -- the string literal does *not* include the dquotes
286 case lexemeToFastString buf' of
287 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
290 -- untilEndOfChar# extends the current lexeme until
291 -- it hits a non-escaped single quote. The lexeme of the
292 -- StringBuffer returned does *not* include the closing quote,
293 -- hence we augment the lexeme and make sure to add the
294 -- starting quote, before `read'ing the string.
296 case untilEndOfChar# (stepOn buf) of
297 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
298 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
300 -- ``thingy'' form for casm
302 case lookAhead# buf 1# of
303 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
304 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
305 -- scanning an id of some sort.
308 case lookAhead# buf 1# of
309 'S'# -> case lookAhead# buf 2# of
311 lex_demand cont (stepOnUntil (not . isSpace)
312 (stepOnBy# buf 3#)) -- past _S_
313 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
314 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
315 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
317 _ -> lex_keyword cont (stepOn buf)
320 if bufferExhausted (stepOn buf) then
325 if isDigit (C# c) then
326 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
330 lex_comment cont buf =
331 -- _trace ("comment: "++[C# (currentChar# buf)]) $
332 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
335 lex_demand cont buf =
336 -- _trace ("demand: "++[C# (currentChar# buf)]) $
337 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
339 -- code snatched from Demand.lhs
341 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
342 case currentChar# buf of
343 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
344 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
345 'S'# -> read_em (WwStrict : acc) (stepOn buf)
346 'P'# -> read_em (WwPrim : acc) (stepOn buf)
347 'E'# -> read_em (WwEnum : acc) (stepOn buf)
348 ')'# -> (reverse acc, stepOn buf)
349 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
350 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
351 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
352 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
353 _ -> (reverse acc, buf)
355 do_unpack new_or_data wrapper_unpacks acc buf
356 = case read_em [] buf of
357 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
361 -- _trace ("scc: "++[C# (currentChar# buf)]) $
362 case currentChar# buf of
365 case prefixMatch (stepOn buf) "NO_CC\"" of
366 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
368 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
369 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
371 case prefixMatch (stepOn buf) "OVERHEAD\"" of
372 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
374 case prefixMatch (stepOn buf) "DONT_CARE\"" of
375 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
377 case prefixMatch (stepOn buf) "SUBSUMED\"" of
378 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
380 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
381 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
383 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
385 case untilChar# (stepOverLexeme buf') '\"'# of
386 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
388 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
389 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
391 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
393 case untilChar# (stepOverLexeme buf') '\"'# of
394 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
395 (stepOn (stepOverLexeme buf''))
399 case untilChar# buf '/'# of
401 let mod_name = lexemeToFastString buf' in
402 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
404 -- let grp_name = lexemeToFastString buf'' in
405 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
407 -- The label may contain arbitrary characters, so it
408 -- may have been escaped etc., hence we `read' it in to get
409 -- rid of these meta-chars in the string and then pack it (again.)
410 -- ToDo: do the same for module name (single quotes allowed in m-names).
411 -- BTW, the code in this module is totally gruesome..
412 let upk_label = _UNPK_ (lexemeToFastString buf'') in
413 case reads ('"':upk_label++"\"") of
415 let cc_name = _PK_ cc_label in
416 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
417 stepOn (stepOverLexeme buf''))
419 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
420 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
421 stepOn (stepOverLexeme buf''))
423 case prefixMatch (stepOn buf) "CAF:" of
425 case match_user_cc (stepOverLexeme buf') of
426 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
428 case match_user_cc (stepOn buf) of
429 (cc, buf'') -> cont (ITscc cc) buf''
430 c -> cont (ITunknown [C# c]) (stepOn buf)
434 lex_num :: (IfaceToken -> IfM a) ->
435 (Int -> Int) -> Int# -> IfM a
436 lex_num cont minus acc# buf =
437 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
438 case scanNumLit (I# acc#) buf of
440 case currentChar# buf' of
442 -- this case is not optimised at all, as the
443 -- presence of floating point numbers in interface
444 -- files is not that common. (ToDo)
445 case expandWhile (isDigit) (incLexeme buf') of
446 buf'' -> -- points to first non digit char
447 case reads (lexemeToString buf'') of
448 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
449 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
451 -- case reads (lexemeToString buf') of
452 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
455 lex_keyword cont buf =
456 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
457 case currentChar# buf of
458 ':'# -> case lookAhead# buf 1# of
459 '_'# -> -- a binding, type (and other id-info) follows,
460 -- to make the parser ever so slightly, we push
462 lex_decl cont (stepOnBy# buf 2#)
463 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
465 case expandWhile (is_kwd_char) buf of
467 let kw = lexemeToFastString buf' in
468 -- _trace ("kw: "++lexemeToString buf') $
469 case lookupUFM ifaceKeywordsFM kw of
470 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
471 (stepOverLexeme buf')
472 Just xx -> cont xx (stepOverLexeme buf')
475 case doDiscard False buf of -- spin until ;; is found
477 {- _trace (show (lexemeToString buf')) $ -}
478 case currentChar# buf' of
479 '\n'# -> -- newline, no id info.
480 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
481 (stepOverLexeme buf')
482 '\r'# -> -- just to be sure for those Win* boxes..
483 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
484 (stepOverLexeme buf')
486 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
487 (stepOverLexeme buf')
488 c -> -- run all over the id info
489 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
491 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
492 --_trace (show (lexemeToString (decLexeme buf''))) $
494 if opt_IgnoreIfacePragmas then
497 Just (lexemeToBuffer (decLexeme buf''))
500 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
501 (stepOverLexeme buf'')
504 is_kwd_char c@(C# c#) =
505 isAlphanum c || -- OLD: c `elem` "_@/\\"
516 lex_cstring cont buf =
517 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
518 case expandUntilMatch buf "\'\'" of
519 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
520 (stepOverLexeme buf')
523 lex_tuple cont module_dot buf =
524 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
528 case currentChar# buf of
529 ','# -> go (n+1) (stepOn buf)
530 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
531 _ -> cont (ITunknown ("tuple " ++ show n)) buf
533 -- Similarly ' itself is ok inside an identifier, but not at the start
535 -- id_arr is an array of bytes, indexed by characters,
536 -- containing 0 if the character isn't a valid character from an identifier
537 -- and 1 if it is. It's just a memo table for is_id_char.
538 id_arr :: ByteArray Int
541 newCharArray (0,255) >>= \ barr ->
543 loop 256# = return ()
545 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
546 writeCharArray barr (I# i#) '\1' >>
549 writeCharArray barr (I# i#) '\0' >>
553 unsafeFreezeByteArray barr)
557 ByteArray _ arr# = id_arr
559 case ord# (indexCharArray# arr# (ord# c#)) of
563 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
567 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
568 '#'# -> True; '$'# -> True; '%'# -> True;
569 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
570 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
571 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
572 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
574 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
577 -- mod_arr is an array of bytes, indexed by characters,
578 -- containing 0 if the character isn't a valid character from a module name,
580 mod_arr :: ByteArray Int
583 newCharArray (0,255) >>= \ barr ->
585 loop 256# = return ()
587 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
588 writeCharArray barr (I# i#) '\1' >>
591 writeCharArray barr (I# i#) '\0' >>
595 unsafeFreezeByteArray barr)
598 is_mod_char (C# c#) =
600 ByteArray _ arr# = mod_arr
602 case ord# (indexCharArray# arr# (ord# c#)) of
606 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
609 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
610 case expandWhile (is_mod_char) buf of
612 case currentChar# buf' of
613 '.'# -> munch buf' HiFile
614 '!'# -> munch buf' HiBootFile
615 _ -> lex_id2 cont Nothing buf'
618 if not (emptyLexeme buf') then
619 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
620 case lexemeToFastString buf' of
621 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
622 (stepOn (stepOverLexeme buf'))
624 lex_id2 cont Nothing buf'
627 -- Dealt with the Module.part
628 lex_id2 cont module_dot buf =
629 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
630 case currentChar# buf of
632 '['# -> -- Special case for []
633 case lookAhead# buf 1# of
634 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
635 _ -> lex_id3 cont module_dot buf
637 '('# -> -- Special case for (,,,)
638 case lookAhead# buf 1# of
639 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
640 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
641 _ -> lex_id3 cont module_dot buf
642 ':'# -> lex_id3 cont module_dot (incLexeme buf)
645 Nothing -> lex_id3 cont module_dot buf
646 Just ghc -> -- this should be "GHC" (current home of (->))
647 case lookAhead# buf 1# of
648 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
650 _ -> lex_id3 cont module_dot buf
651 _ -> lex_id3 cont module_dot buf
655 -- Dealt with [], (), : special cases
657 lex_id3 cont module_dot buf =
658 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
659 case expandWhile (is_id_char) buf of
663 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
665 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
666 Just kwd_token -> cont kwd_token new_buf
667 Nothing -> cont (mk_var_token lexeme) new_buf
669 lexeme = lexemeToFastString buf'
670 new_buf = stepOverLexeme buf'
674 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
675 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
676 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
677 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
678 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
681 -- Dealt with [], (), : special cases
684 lex_id3 module_dot len_xs xs cs =
685 case my_span' (is_id_char) cs of
686 (xs1,len_xs1,rest) ->
688 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
690 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
691 Just kwd_token -> kwd_token : lexIface rest
692 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
694 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
696 mk_var_token pk_str =
701 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
702 -- remove the second half of disjunction when using a 1.3 prelude.
704 if isUpper f then ITconid pk_str
705 else if isLower f then ITvarid pk_str
706 else if f == ':' then ITconsym pk_str
707 else if isLowerISO f then ITvarid pk_str
708 else if isUpperISO f then ITconid pk_str
712 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
713 | f == ':' = ITconsym n
714 | isAlpha f = ITvarid n
715 | otherwise = ITvarsym n
720 end_lex_id cont Nothing token buf = cont token buf
721 end_lex_id cont (Just (m,hif)) token buf =
723 ITconid n -> cont (ITqconid (m,n,hif)) buf
724 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
725 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
727 -- Special case for ->
728 -- "->" by itself is a special token (ITrarrow),
729 -- but M.-> is a ITqconid
730 ITvarsym n | n == SLIT("->")
731 -> cont (ITqconsym (m,n,hif)) buf
733 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
735 -- ITbang can't happen here I think
736 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
738 _ -> cont (ITunknown (show token)) buf
741 ifaceKeywordsFM :: UniqFM IfaceToken
742 ifaceKeywordsFM = listToUFM $
743 map (\ (x,y) -> (_PK_ x,y))
746 ,("letrec_", ITletrec)
747 ,("interface_", ITinterface)
748 ,("usages_", ITusages)
749 ,("versions_", ITversions)
750 ,("exports_", ITexports)
751 ,("instance_modules_", ITinstance_modules)
752 ,("instances_", ITinstances)
753 ,("fixities_", ITfixities)
754 ,("declarations_", ITdeclarations)
755 ,("pragmas_", ITpragmas)
756 ,("forall_", ITforall)
757 ,("u_", ITunfold False)
758 ,("U_", ITunfold True)
760 ,("P_", ITspecialise)
761 ,("coerce_", ITcoerce)
762 ,("inline_", ITinline)
764 ,("integer_", ITinteger_lit)
765 ,("rational_", ITrational_lit)
766 ,("addr_", ITaddr_lit)
767 ,("float_", ITfloat_lit)
768 ,("string_", ITstring_lit)
769 ,("litlit_", ITlit_lit)
770 ,("ccall_", ITccall (False, False))
771 ,("ccall_GC_", ITccall (False, True))
772 ,("casm_", ITccall (True, False))
773 ,("casm_GC_", ITccall (True, True))
776 haskellKeywordsFM = listToUFM $
777 map (\ (x,y) -> (_PK_ x,y))
780 ,("newtype", ITnewtype)
783 ,("instance", ITinstance)
784 ,("infixl", ITinfixl)
785 ,("infixr", ITinfixr)
788 ,("case#", ITprim_case)
792 ,("deriving", ITderiving)
804 -- doDiscard rips along really fast, looking for a double semicolon,
805 -- indicating the end of the pragma we're skipping
806 doDiscard inStr buf =
807 -- _trace (show (C# (currentChar# buf))) $
808 case currentChar# buf of
811 case lookAhead# buf 1# of
812 ';'# -> incLexeme (incLexeme buf)
813 _ -> doDiscard inStr (incLexeme buf)
815 doDiscard inStr (incLexeme buf)
818 odd_slashes buf flg i# =
819 case lookAhead# buf i# of
820 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
823 case lookAhead# buf (negateInt# 1#) of --backwards, actually
824 '\\'# -> -- escaping something..
825 if odd_slashes buf True (negateInt# 2#) then
826 -- odd number of slashes, " is escaped.
827 doDiscard inStr (incLexeme buf)
829 -- even number of slashes, \ is escaped.
830 doDiscard (not inStr) (incLexeme buf)
831 _ -> case inStr of -- forced to avoid build-up
832 True -> doDiscard False (incLexeme buf)
833 False -> doDiscard True (incLexeme buf)
834 _ -> doDiscard inStr (incLexeme buf)
839 my_span :: (a -> Bool) -> [a] -> ([a],[a])
840 my_span p xs = go [] xs
842 go so_far (x:xs') | p x = go (x:so_far) xs'
843 go so_far xs = (reverse so_far, xs)
845 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
846 my_span' p xs = go [] 0 xs
848 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
849 go so_far n xs = (reverse so_far,n, xs)
853 %************************************************************************
855 \subsection{Other utility functions
857 %************************************************************************
860 type IfM a = StringBuffer -- Input string
864 returnIf :: a -> IfM a
865 returnIf a s l = Succeeded a
867 thenIf :: IfM a -> (a -> IfM b) -> IfM b
868 m `thenIf` k = \s l ->
870 Succeeded a -> k a s l
871 Failed err -> Failed err
873 getSrcLocIf :: IfM SrcLoc
874 getSrcLocIf s l = Succeeded l
877 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
881 Note that if the file we're processing ends with `hi-boot',
882 we accept it on faith as having the right version.
883 This is done so that .hi-boot files that comes with hsc
884 don't have to be updated before every release, and it
885 allows us to share .hi-boot files with versions of hsc
886 that don't have .hi version checking (e.g., ghc-2.10's)
888 If the version number is 0, the checking is also turned off.
890 checkVersion :: Maybe Integer -> IfM ()
891 checkVersion mb@(Just v) s l
892 | (v==0) || (v == PROJECTVERSION) = Succeeded ()
893 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
894 checkVersion mb@Nothing s l
895 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
896 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
898 -----------------------------------------------------------------
901 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
902 ptext SLIT("toks="), text (show (take 10 toks))]
904 ifaceVersionErr hi_vers l toks
905 = hsep [ppr l, ptext SLIT("Interface file version error;"),
906 ptext SLIT("Expected"), int PROJECTVERSION,
907 ptext SLIT(" found "), pp_version]
911 Nothing -> ptext SLIT("pre ghc-3.02 version")
912 Just v -> ptext SLIT("version") <+> integer v