1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
34 #include "HsVersions.h"
36 import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
37 import List ( isSuffixOf )
39 import {-# SOURCE #-} CostCentre
41 import CmdLineOpts ( opt_IgnoreIfacePragmas )
42 import Demand ( Demand(..) {- instance Read -} )
43 import UniqFM ( UniqFM, listToUFM, lookupUFM)
44 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
45 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile )
47 import Maybes ( MaybeErr(..) )
48 import ErrUtils ( ErrMsg(..) )
50 import Util ( nOfThem, panic )
57 import PrelRead ( readRational__ ) -- Glasgow non-std
60 %************************************************************************
62 \subsection{Lexical categories}
64 %************************************************************************
66 These functions test strings to see if they fit the lexical categories
67 defined in the Haskell report. Normally applied as in e.g. @isCon
71 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
72 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
74 isLexCon cs = isLexConId cs || isLexConSym cs
75 isLexVar cs = isLexVarId cs || isLexVarSym cs
77 isLexId cs = isLexConId cs || isLexVarId cs
78 isLexSym cs = isLexConSym cs || isLexVarSym cs
84 | cs == SLIT("[]") = True
85 | c == '(' = True -- (), (,), (,,), ...
86 | otherwise = isUpper c || isUpperISO c
92 | otherwise = isLower c || isLowerISO c
98 | otherwise = c == ':'
105 | otherwise = isSymbolASCII c
111 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
112 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
113 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
114 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
115 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
116 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
120 %************************************************************************
122 \subsection{Tuple strings -- ugh!}
124 %************************************************************************
127 mkTupNameStr 0 = SLIT("()")
128 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
129 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
130 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
131 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
132 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
137 %************************************************************************
139 \subsection{Data types}
141 %************************************************************************
143 The token data type, fairly un-interesting except from two constructors,
144 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
145 strictness, unfolding etc) and types for id decls.
147 The Idea/Observation here is that the renamer needs to scan through
148 all of an interface file before it can continue. But only a fraction
149 of the information contained in the file turns out to be useful, so
150 delaying as much as possible of the scanning and parsing of an
151 interface file Makes Sense (Heap profiles of the compiler
152 show at a reduction in heap usage by at least a factor of two,
155 Hence, the interface file lexer spots when value declarations are
156 being scanned and return the @ITidinfo@ and @ITtype@ constructors
157 for the type and any other id info for that binding (unfolding, strictness
158 etc). These constructors are applied to the result of lexing these sub-chunks.
160 The lexing of the type and id info is all done lazily, of course, so
161 the scanning (and subsequent parsing) will be done *only* on the ids the
162 renamer finds out that it is interested in. The rest will just be junked.
163 Laziness, you know it makes sense :-)
167 = ITinterface -- keywords
187 | ITbang -- magic symbols
202 | ITvarid FAST_STRING
203 | ITconid FAST_STRING
204 | ITvarsym FAST_STRING
205 | ITconsym FAST_STRING
206 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
208 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
209 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
211 | ITtysig StringBuffer (Maybe StringBuffer)
212 -- lazily return the stream of tokens for
213 -- the info attached to an id.
214 -- Stuff for reading unfoldings
216 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
217 | ITstrict [Demand] | ITbottom
219 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
220 | ITcoerce | ITinline | ITatsign
221 | ITccall (Bool,Bool) -- (is_casm, may_gc)
223 | ITchar Char | ITstring FAST_STRING
224 | ITinteger Integer | ITrational Rational
225 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
226 | ITunknown String -- Used when the lexer can't make sense of it
227 | ITeof -- end of file token
228 deriving Text -- debugging
230 instance Text CostCentre -- cheat!
234 %************************************************************************
236 \subsection{The lexical analyser}
238 %************************************************************************
241 lexIface :: (IfaceToken -> IfM a) -> IfM a
244 -- if bufferExhausted buf then
247 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
248 case currentChar# buf of
249 -- whitespace and comments, ignore.
250 ' '# -> lexIface cont (stepOn buf)
251 '\t'# -> lexIface cont (stepOn buf)
252 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
254 -- Numbers and comments
256 case lookAhead# buf 1# of
257 '-'# -> lex_comment cont (stepOnBy# buf 2#)
260 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
263 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
264 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
267 case prefixMatch (stepOn buf) "..)" of
268 Just buf' -> cont ITdotdot (stepOverLexeme buf')
270 case lookAhead# buf 1# of
271 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
272 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
273 _ -> cont IToparen (stepOn buf)
275 '{'# -> cont ITocurly (stepOn buf)
276 '}'# -> cont ITccurly (stepOn buf)
277 ')'# -> cont ITcparen (stepOn buf)
279 case lookAhead# buf 1# of
280 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
281 _ -> cont ITobrack (stepOn buf)
282 ']'# -> cont ITcbrack (stepOn buf)
283 ','# -> cont ITcomma (stepOn buf)
284 ';'# -> cont ITsemi (stepOn buf)
285 '\"'# -> case untilEndOfString# (stepOn buf) of
287 -- the string literal does *not* include the dquotes
288 case lexemeToFastString buf' of
289 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
292 -- untilEndOfChar# extends the current lexeme until
293 -- it hits a non-escaped single quote. The lexeme of the
294 -- StringBuffer returned does *not* include the closing quote,
295 -- hence we augment the lexeme and make sure to add the
296 -- starting quote, before `read'ing the string.
298 case untilEndOfChar# (stepOn buf) of
299 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
300 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
302 -- ``thingy'' form for casm
304 case lookAhead# buf 1# of
305 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
306 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
307 -- scanning an id of some sort.
310 case lookAhead# buf 1# of
311 'S'# -> case lookAhead# buf 2# of
313 lex_demand cont (stepOnUntil (not . isSpace)
314 (stepOnBy# buf 3#)) -- past _S_
315 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
316 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
317 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
319 _ -> lex_keyword cont (stepOn buf)
322 if bufferExhausted (stepOn buf) then
327 if isDigit (C# c) then
328 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
332 lex_comment cont buf =
333 -- _trace ("comment: "++[C# (currentChar# buf)]) $
334 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
337 lex_demand cont buf =
338 -- _trace ("demand: "++[C# (currentChar# buf)]) $
339 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
341 -- code snatched from Demand.lhs
343 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
344 case currentChar# buf of
345 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
346 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
347 'S'# -> read_em (WwStrict : acc) (stepOn buf)
348 'P'# -> read_em (WwPrim : acc) (stepOn buf)
349 'E'# -> read_em (WwEnum : acc) (stepOn buf)
350 ')'# -> (reverse acc, stepOn buf)
351 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
352 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
353 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
354 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
355 _ -> (reverse acc, buf)
357 do_unpack new_or_data wrapper_unpacks acc buf
358 = case read_em [] buf of
359 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
363 -- _trace ("scc: "++[C# (currentChar# buf)]) $
364 case currentChar# buf of
367 case prefixMatch (stepOn buf) "NO_CC\"" of
368 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
370 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
371 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
373 case prefixMatch (stepOn buf) "OVERHEAD\"" of
374 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
376 case prefixMatch (stepOn buf) "DONT_CARE\"" of
377 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
379 case prefixMatch (stepOn buf) "SUBSUMED\"" of
380 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
382 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
383 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
385 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
387 case untilChar# (stepOverLexeme buf') '\"'# of
388 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
390 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
391 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
393 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
395 case untilChar# (stepOverLexeme buf') '\"'# of
396 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
397 (stepOn (stepOverLexeme buf''))
401 case untilChar# buf '/'# of
403 let mod_name = lexemeToFastString buf' in
404 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
406 -- let grp_name = lexemeToFastString buf'' in
407 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
409 -- The label may contain arbitrary characters, so it
410 -- may have been escaped etc., hence we `read' it in to get
411 -- rid of these meta-chars in the string and then pack it (again.)
412 -- ToDo: do the same for module name (single quotes allowed in m-names).
413 -- BTW, the code in this module is totally gruesome..
414 let upk_label = _UNPK_ (lexemeToFastString buf'') in
415 case reads ('"':upk_label++"\"") of
417 let cc_name = _PK_ cc_label in
418 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
419 stepOn (stepOverLexeme buf''))
421 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
422 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
423 stepOn (stepOverLexeme buf''))
425 case prefixMatch (stepOn buf) "CAF:" of
427 case match_user_cc (stepOverLexeme buf') of
428 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
430 case match_user_cc (stepOn buf) of
431 (cc, buf'') -> cont (ITscc cc) buf''
432 c -> cont (ITunknown [C# c]) (stepOn buf)
436 lex_num :: (IfaceToken -> IfM a) ->
437 (Int -> Int) -> Int# -> IfM a
438 lex_num cont minus acc# buf =
439 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
440 case scanNumLit (I# acc#) buf of
442 case currentChar# buf' of
444 -- this case is not optimised at all, as the
445 -- presence of floating point numbers in interface
446 -- files is not that common. (ToDo)
447 case expandWhile (isDigit) (incLexeme buf') of
448 buf2 -> -- points to first non digit char
449 let l = case currentChar# buf2 of
450 'e'# -> let buf3 = incLexeme buf2 in
451 case currentChar# buf3 of
452 '-'# -> expandWhile (isDigit) (incLexeme buf3)
453 _ -> expandWhile (isDigit) buf3
455 in let v = readRational__ (lexemeToString l) in
456 cont (ITrational v) (stepOverLexeme l)
458 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
463 lex_keyword cont buf =
464 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
465 case currentChar# buf of
466 ':'# -> case lookAhead# buf 1# of
467 '_'# -> -- a binding, type (and other id-info) follows,
468 -- to make the parser ever so slightly, we push
470 lex_decl cont (stepOnBy# buf 2#)
471 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
473 case expandWhile (is_kwd_char) buf of
475 let kw = lexemeToFastString buf' in
476 -- _trace ("kw: "++lexemeToString buf') $
477 case lookupUFM ifaceKeywordsFM kw of
478 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
479 (stepOverLexeme buf')
480 Just xx -> cont xx (stepOverLexeme buf')
483 case doDiscard False buf of -- spin until ;; is found
485 {- _trace (show (lexemeToString buf')) $ -}
486 case currentChar# buf' of
487 '\n'# -> -- newline, no id info.
488 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
489 (stepOverLexeme buf')
490 '\r'# -> -- just to be sure for those Win* boxes..
491 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
492 (stepOverLexeme buf')
494 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
495 (stepOverLexeme buf')
496 c -> -- run all over the id info
497 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
499 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
500 --_trace (show (lexemeToString (decLexeme buf''))) $
502 if opt_IgnoreIfacePragmas then
505 Just (lexemeToBuffer (decLexeme buf''))
508 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
509 (stepOverLexeme buf'')
512 is_kwd_char c@(C# c#) =
513 isAlphanum c || -- OLD: c `elem` "_@/\\"
524 lex_cstring cont buf =
525 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
526 case expandUntilMatch buf "\'\'" of
527 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
528 (stepOverLexeme buf')
531 lex_tuple cont module_dot buf =
532 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
536 case currentChar# buf of
537 ','# -> go (n+1) (stepOn buf)
538 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
539 _ -> cont (ITunknown ("tuple " ++ show n)) buf
541 -- Similarly ' itself is ok inside an identifier, but not at the start
543 -- id_arr is an array of bytes, indexed by characters,
544 -- containing 0 if the character isn't a valid character from an identifier
545 -- and 1 if it is. It's just a memo table for is_id_char.
546 id_arr :: ByteArray Int
549 newCharArray (0,255) >>= \ barr ->
551 loop 256# = return ()
553 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
554 writeCharArray barr (I# i#) '\1' >>
557 writeCharArray barr (I# i#) '\0' >>
561 unsafeFreezeByteArray barr)
565 ByteArray _ arr# = id_arr
567 case ord# (indexCharArray# arr# (ord# c#)) of
571 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
575 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
576 '#'# -> True; '$'# -> True; '%'# -> True;
577 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
578 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
579 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
580 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
582 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
585 -- mod_arr is an array of bytes, indexed by characters,
586 -- containing 0 if the character isn't a valid character from a module name,
588 mod_arr :: ByteArray Int
591 newCharArray (0,255) >>= \ barr ->
593 loop 256# = return ()
595 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
596 writeCharArray barr (I# i#) '\1' >>
599 writeCharArray barr (I# i#) '\0' >>
603 unsafeFreezeByteArray barr)
606 is_mod_char (C# c#) =
608 ByteArray _ arr# = mod_arr
610 case ord# (indexCharArray# arr# (ord# c#)) of
614 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
617 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
618 case expandWhile (is_mod_char) buf of
620 case currentChar# buf' of
621 '.'# -> munch buf' HiFile
622 '!'# -> munch buf' HiBootFile
623 _ -> lex_id2 cont Nothing buf'
626 if not (emptyLexeme buf') then
627 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
628 case lexemeToFastString buf' of
629 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
630 (stepOn (stepOverLexeme buf'))
632 lex_id2 cont Nothing buf'
635 -- Dealt with the Module.part
636 lex_id2 cont module_dot buf =
637 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
638 case currentChar# buf of
640 '['# -> -- Special case for []
641 case lookAhead# buf 1# of
642 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
643 _ -> lex_id3 cont module_dot buf
645 '('# -> -- Special case for (,,,)
646 case lookAhead# buf 1# of
647 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
648 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
649 _ -> lex_id3 cont module_dot buf
650 ':'# -> lex_id3 cont module_dot (incLexeme buf)
653 Nothing -> lex_id3 cont module_dot buf
654 Just ghc -> -- this should be "GHC" (current home of (->))
655 case lookAhead# buf 1# of
656 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
658 _ -> lex_id3 cont module_dot buf
659 _ -> lex_id3 cont module_dot buf
663 -- Dealt with [], (), : special cases
665 lex_id3 cont module_dot buf =
666 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
667 case expandWhile (is_id_char) buf of
671 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
673 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
674 Just kwd_token -> cont kwd_token new_buf
675 Nothing -> cont (mk_var_token lexeme) new_buf
677 lexeme = lexemeToFastString buf'
678 new_buf = stepOverLexeme buf'
682 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
683 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
684 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
685 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
686 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
689 -- Dealt with [], (), : special cases
692 lex_id3 module_dot len_xs xs cs =
693 case my_span' (is_id_char) cs of
694 (xs1,len_xs1,rest) ->
696 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
698 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
699 Just kwd_token -> kwd_token : lexIface rest
700 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
702 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
704 mk_var_token pk_str =
709 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
710 -- remove the second half of disjunction when using a 1.3 prelude.
712 if isUpper f then ITconid pk_str
713 else if isLower f then ITvarid pk_str
714 else if f == ':' then ITconsym pk_str
715 else if isLowerISO f then ITvarid pk_str
716 else if isUpperISO f then ITconid pk_str
720 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
721 | f == ':' = ITconsym n
722 | isAlpha f = ITvarid n
723 | otherwise = ITvarsym n
728 end_lex_id cont Nothing token buf = cont token buf
729 end_lex_id cont (Just (m,hif)) token buf =
731 ITconid n -> cont (ITqconid (m,n,hif)) buf
732 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
733 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
735 -- Special case for ->
736 -- "->" by itself is a special token (ITrarrow),
737 -- but M.-> is a ITqconid
738 ITvarsym n | n == SLIT("->")
739 -> cont (ITqconsym (m,n,hif)) buf
741 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
743 -- ITbang can't happen here I think
744 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
746 _ -> cont (ITunknown (show token)) buf
749 ifaceKeywordsFM :: UniqFM IfaceToken
750 ifaceKeywordsFM = listToUFM $
751 map (\ (x,y) -> (_PK_ x,y))
754 ,("letrec_", ITletrec)
755 ,("interface_", ITinterface)
756 ,("usages_", ITusages)
757 ,("versions_", ITversions)
758 ,("exports_", ITexports)
759 ,("instance_modules_", ITinstance_modules)
760 ,("instances_", ITinstances)
761 ,("fixities_", ITfixities)
762 ,("declarations_", ITdeclarations)
763 ,("pragmas_", ITpragmas)
764 ,("forall_", ITforall)
765 ,("u_", ITunfold False)
766 ,("U_", ITunfold True)
768 ,("P_", ITspecialise)
769 ,("coerce_", ITcoerce)
770 ,("inline_", ITinline)
772 ,("integer_", ITinteger_lit)
773 ,("rational_", ITrational_lit)
774 ,("addr_", ITaddr_lit)
775 ,("float_", ITfloat_lit)
776 ,("string_", ITstring_lit)
777 ,("litlit_", ITlit_lit)
778 ,("ccall_", ITccall (False, False))
779 ,("ccall_GC_", ITccall (False, True))
780 ,("casm_", ITccall (True, False))
781 ,("casm_GC_", ITccall (True, True))
784 haskellKeywordsFM = listToUFM $
785 map (\ (x,y) -> (_PK_ x,y))
788 ,("newtype", ITnewtype)
791 ,("instance", ITinstance)
792 ,("infixl", ITinfixl)
793 ,("infixr", ITinfixr)
796 ,("case#", ITprim_case)
800 ,("deriving", ITderiving)
812 -- doDiscard rips along really fast, looking for a double semicolon,
813 -- indicating the end of the pragma we're skipping
814 doDiscard inStr buf =
815 -- _trace (show (C# (currentChar# buf))) $
816 case currentChar# buf of
819 case lookAhead# buf 1# of
820 ';'# -> incLexeme (incLexeme buf)
821 _ -> doDiscard inStr (incLexeme buf)
823 doDiscard inStr (incLexeme buf)
826 odd_slashes buf flg i# =
827 case lookAhead# buf i# of
828 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
831 case lookAhead# buf (negateInt# 1#) of --backwards, actually
832 '\\'# -> -- escaping something..
833 if odd_slashes buf True (negateInt# 2#) then
834 -- odd number of slashes, " is escaped.
835 doDiscard inStr (incLexeme buf)
837 -- even number of slashes, \ is escaped.
838 doDiscard (not inStr) (incLexeme buf)
839 _ -> case inStr of -- forced to avoid build-up
840 True -> doDiscard False (incLexeme buf)
841 False -> doDiscard True (incLexeme buf)
842 _ -> doDiscard inStr (incLexeme buf)
847 my_span :: (a -> Bool) -> [a] -> ([a],[a])
848 my_span p xs = go [] xs
850 go so_far (x:xs') | p x = go (x:so_far) xs'
851 go so_far xs = (reverse so_far, xs)
853 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
854 my_span' p xs = go [] 0 xs
856 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
857 go so_far n xs = (reverse so_far,n, xs)
861 %************************************************************************
863 \subsection{Other utility functions
865 %************************************************************************
868 type IfM a = StringBuffer -- Input string
872 returnIf :: a -> IfM a
873 returnIf a s l = Succeeded a
875 thenIf :: IfM a -> (a -> IfM b) -> IfM b
876 m `thenIf` k = \s l ->
878 Succeeded a -> k a s l
879 Failed err -> Failed err
881 getSrcLocIf :: IfM SrcLoc
882 getSrcLocIf s l = Succeeded l
885 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
889 Note that if the file we're processing ends with `hi-boot',
890 we accept it on faith as having the right version.
891 This is done so that .hi-boot files that comes with hsc
892 don't have to be updated before every release, and it
893 allows us to share .hi-boot files with versions of hsc
894 that don't have .hi version checking (e.g., ghc-2.10's)
896 If the version number is 0, the checking is also turned off.
898 checkVersion :: Maybe Integer -> IfM ()
899 checkVersion mb@(Just v) s l
900 | (v==0) || (v == PROJECTVERSION) = Succeeded ()
901 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
902 checkVersion mb@Nothing s l
903 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
904 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
906 -----------------------------------------------------------------
909 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
910 ptext SLIT("toks="), text (show (take 10 toks))]
912 ifaceVersionErr hi_vers l toks
913 = hsep [ppr l, ptext SLIT("Interface file version error;"),
914 ptext SLIT("Expected"), int PROJECTVERSION,
915 ptext SLIT(" found "), pp_version]
919 Nothing -> ptext SLIT("pre ghc-3.02 version")
920 Just v -> ptext SLIT("version") <+> integer v