1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
34 #include "HsVersions.h"
36 import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
37 import List ( isSuffixOf )
39 import {-# SOURCE #-} CostCentre
41 import CmdLineOpts ( opt_IgnoreIfacePragmas )
42 import Demand ( Demand(..) {- instance Read -} )
43 import UniqFM ( UniqFM, listToUFM, lookupUFM)
44 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
45 import SrcLoc ( SrcLoc, incSrcLine, srcLocFile )
47 import Maybes ( MaybeErr(..) )
48 import ErrUtils ( ErrMsg(..) )
50 import Util ( nOfThem, panic )
58 %************************************************************************
60 \subsection{Lexical categories}
62 %************************************************************************
64 These functions test strings to see if they fit the lexical categories
65 defined in the Haskell report. Normally applied as in e.g. @isCon
69 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
70 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
72 isLexCon cs = isLexConId cs || isLexConSym cs
73 isLexVar cs = isLexVarId cs || isLexVarSym cs
75 isLexId cs = isLexConId cs || isLexVarId cs
76 isLexSym cs = isLexConSym cs || isLexVarSym cs
82 | cs == SLIT("[]") = True
83 | c == '(' = True -- (), (,), (,,), ...
84 | otherwise = isUpper c || isUpperISO c
90 | otherwise = isLower c || isLowerISO c
96 | otherwise = c == ':'
103 | otherwise = isSymbolASCII c
109 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
110 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
111 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
112 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
113 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
114 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
118 %************************************************************************
120 \subsection{Tuple strings -- ugh!}
122 %************************************************************************
125 mkTupNameStr 0 = SLIT("()")
126 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
127 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
128 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
129 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
130 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
135 %************************************************************************
137 \subsection{Data types}
139 %************************************************************************
141 The token data type, fairly un-interesting except from two constructors,
142 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
143 strictness, unfolding etc) and types for id decls.
145 The Idea/Observation here is that the renamer needs to scan through
146 all of an interface file before it can continue. But only a fraction
147 of the information contained in the file turns out to be useful, so
148 delaying as much as possible of the scanning and parsing of an
149 interface file Makes Sense (Heap profiles of the compiler
150 show at a reduction in heap usage by at least a factor of two,
153 Hence, the interface file lexer spots when value declarations are
154 being scanned and return the @ITidinfo@ and @ITtype@ constructors
155 for the type and any other id info for that binding (unfolding, strictness
156 etc). These constructors are applied to the result of lexing these sub-chunks.
158 The lexing of the type and id info is all done lazily, of course, so
159 the scanning (and subsequent parsing) will be done *only* on the ids the
160 renamer finds out that it is interested in. The rest will just be junked.
161 Laziness, you know it makes sense :-)
165 = ITinterface -- keywords
185 | ITbang -- magic symbols
200 | ITvarid FAST_STRING
201 | ITconid FAST_STRING
202 | ITvarsym FAST_STRING
203 | ITconsym FAST_STRING
204 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
205 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
206 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
209 | ITtysig StringBuffer (Maybe StringBuffer)
210 -- lazily return the stream of tokens for
211 -- the info attached to an id.
212 -- Stuff for reading unfoldings
214 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
215 | ITstrict [Demand] | ITbottom
216 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
217 | ITcoerce_in | ITcoerce_out | ITatsign
218 | ITccall (Bool,Bool) -- (is_casm, may_gc)
220 | ITchar Char | ITstring FAST_STRING
221 | ITinteger Integer | ITdouble Double
222 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
223 | ITunknown String -- Used when the lexer can't make sense of it
224 | ITeof -- end of file token
225 deriving Text -- debugging
227 instance Text CostCentre -- cheat!
231 %************************************************************************
233 \subsection{The lexical analyser}
235 %************************************************************************
238 lexIface :: (IfaceToken -> IfM a) -> IfM a
241 -- if bufferExhausted buf then
244 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
245 case currentChar# buf of
246 -- whitespace and comments, ignore.
247 ' '# -> lexIface cont (stepOn buf)
248 '\t'# -> lexIface cont (stepOn buf)
249 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
251 -- Numbers and comments
253 case lookAhead# buf 1# of
254 '-'# -> lex_comment cont (stepOnBy# buf 2#)
257 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
260 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
261 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
264 case prefixMatch (stepOn buf) "..)" of
265 Just buf' -> cont ITdotdot (stepOverLexeme buf')
267 case lookAhead# buf 1# of
268 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
269 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
270 _ -> cont IToparen (stepOn buf)
272 '{'# -> cont ITocurly (stepOn buf)
273 '}'# -> cont ITccurly (stepOn buf)
274 ')'# -> cont ITcparen (stepOn buf)
276 case lookAhead# buf 1# of
277 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
278 _ -> cont ITobrack (stepOn buf)
279 ']'# -> cont ITcbrack (stepOn buf)
280 ','# -> cont ITcomma (stepOn buf)
281 ';'# -> cont ITsemi (stepOn buf)
282 '\"'# -> case untilEndOfString# (stepOn buf) of
284 -- the string literal does *not* include the dquotes
285 case lexemeToFastString buf' of
286 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
289 -- untilEndOfChar# extends the current lexeme until
290 -- it hits a non-escaped single quote. The lexeme of the
291 -- StringBuffer returned does *not* include the closing quote,
292 -- hence we augment the lexeme and make sure to add the
293 -- starting quote, before `read'ing the string.
295 case untilEndOfChar# (stepOn buf) of
296 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
297 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
299 -- ``thingy'' form for casm
301 case lookAhead# buf 1# of
302 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
303 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
304 -- scanning an id of some sort.
307 case lookAhead# buf 1# of
308 'S'# -> case lookAhead# buf 2# of
310 lex_demand cont (stepOnUntil (not . isSpace)
311 (stepOnBy# buf 3#)) -- past _S_
312 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
313 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
314 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
316 _ -> lex_keyword cont (stepOn buf)
319 if bufferExhausted (stepOn buf) then
324 if isDigit (C# c) then
325 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
329 lex_comment cont buf =
330 -- _trace ("comment: "++[C# (currentChar# buf)]) $
331 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
334 lex_demand cont buf =
335 -- _trace ("demand: "++[C# (currentChar# buf)]) $
336 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
338 -- code snatched from Demand.lhs
340 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
341 case currentChar# buf of
342 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
343 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
344 'S'# -> read_em (WwStrict : acc) (stepOn buf)
345 'P'# -> read_em (WwPrim : acc) (stepOn buf)
346 'E'# -> read_em (WwEnum : acc) (stepOn buf)
347 ')'# -> (reverse acc, stepOn buf)
348 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
349 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
350 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
351 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
352 _ -> (reverse acc, buf)
354 do_unpack new_or_data wrapper_unpacks acc buf
355 = case read_em [] buf of
356 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
360 -- _trace ("scc: "++[C# (currentChar# buf)]) $
361 case currentChar# buf of
364 case prefixMatch (stepOn buf) "NO_CC\"" of
365 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
367 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
368 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
370 case prefixMatch (stepOn buf) "OVERHEAD\"" of
371 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
373 case prefixMatch (stepOn buf) "DONT_CARE\"" of
374 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
376 case prefixMatch (stepOn buf) "SUBSUMED\"" of
377 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
379 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
380 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
382 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
384 case untilChar# (stepOverLexeme buf') '\"'# of
385 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
387 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
388 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
390 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
392 case untilChar# (stepOverLexeme buf') '\"'# of
393 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
394 (stepOn (stepOverLexeme buf''))
398 case untilChar# buf '/'# of
400 let mod_name = lexemeToFastString buf' in
401 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
403 -- let grp_name = lexemeToFastString buf'' in
404 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
406 -- The label may contain arbitrary characters, so it
407 -- may have been escaped etc., hence we `read' it in to get
408 -- rid of these meta-chars in the string and then pack it (again.)
409 -- ToDo: do the same for module name (single quotes allowed in m-names).
410 -- BTW, the code in this module is totally gruesome..
411 let upk_label = _UNPK_ (lexemeToFastString buf'') in
412 case reads ('"':upk_label++"\"") of
414 let cc_name = _PK_ cc_label in
415 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
416 stepOn (stepOverLexeme buf''))
418 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
419 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
420 stepOn (stepOverLexeme buf''))
422 case prefixMatch (stepOn buf) "CAF:" of
424 case match_user_cc (stepOverLexeme buf') of
425 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
427 case match_user_cc (stepOn buf) of
428 (cc, buf'') -> cont (ITscc cc) buf''
429 c -> cont (ITunknown [C# c]) (stepOn buf)
433 lex_num :: (IfaceToken -> IfM a) ->
434 (Int -> Int) -> Int# -> IfM a
435 lex_num cont minus acc# buf =
436 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
437 case scanNumLit (I# acc#) buf of
439 case currentChar# buf' of
441 -- this case is not optimised at all, as the
442 -- presence of floating point numbers in interface
443 -- files is not that common. (ToDo)
444 case expandWhile (isDigit) (incLexeme buf') of
445 buf'' -> -- points to first non digit char
446 case reads (lexemeToString buf'') of
447 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
448 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
450 -- case reads (lexemeToString buf') of
451 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
454 lex_keyword cont buf =
455 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
456 case currentChar# buf of
457 ':'# -> case lookAhead# buf 1# of
458 '_'# -> -- a binding, type (and other id-info) follows,
459 -- to make the parser ever so slightly, we push
461 lex_decl cont (stepOnBy# buf 2#)
462 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
464 case expandWhile (is_kwd_char) buf of
466 let kw = lexemeToFastString buf' in
467 -- _trace ("kw: "++lexemeToString buf') $
468 case lookupUFM ifaceKeywordsFM kw of
469 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
470 (stepOverLexeme buf')
471 Just xx -> cont xx (stepOverLexeme buf')
474 case doDiscard False buf of -- spin until ;; is found
476 {- _trace (show (lexemeToString buf')) $ -}
477 case currentChar# buf' of
478 '\n'# -> -- newline, no id info.
479 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
480 (stepOverLexeme buf')
481 '\r'# -> -- just to be sure for those Win* boxes..
482 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
483 (stepOverLexeme buf')
485 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
486 (stepOverLexeme buf')
487 c -> -- run all over the id info
488 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
490 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
491 --_trace (show (lexemeToString (decLexeme buf''))) $
493 if opt_IgnoreIfacePragmas then
496 Just (lexemeToBuffer (decLexeme buf''))
499 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
500 (stepOverLexeme buf'')
503 is_kwd_char c@(C# c#) =
504 isAlphanum c || -- OLD: c `elem` "_@/\\"
515 lex_cstring cont buf =
516 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
517 case expandUntilMatch buf "\'\'" of
518 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
519 (stepOverLexeme buf')
522 lex_tuple cont module_dot buf =
523 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
527 case currentChar# buf of
528 ','# -> go (n+1) (stepOn buf)
529 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
530 _ -> cont (ITunknown ("tuple " ++ show n)) buf
532 -- Similarly ' itself is ok inside an identifier, but not at the start
534 -- id_arr is an array of bytes, indexed by characters,
535 -- containing 0 if the character isn't a valid character from an identifier
536 -- and 1 if it is. It's just a memo table for is_id_char.
537 id_arr :: ByteArray Int
540 newCharArray (0,255) >>= \ barr ->
542 loop 256# = return ()
544 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
545 writeCharArray barr (I# i#) '\1' >>
548 writeCharArray barr (I# i#) '\0' >>
552 unsafeFreezeByteArray barr)
556 ByteArray _ arr# = id_arr
558 case ord# (indexCharArray# arr# (ord# c#)) of
562 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
566 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
567 '#'# -> True; '$'# -> True; '%'# -> True;
568 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
569 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
570 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
571 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
573 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
576 -- mod_arr is an array of bytes, indexed by characters,
577 -- containing 0 if the character isn't a valid character from a module name,
579 mod_arr :: ByteArray Int
582 newCharArray (0,255) >>= \ barr ->
584 loop 256# = return ()
586 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
587 writeCharArray barr (I# i#) '\1' >>
590 writeCharArray barr (I# i#) '\0' >>
594 unsafeFreezeByteArray barr)
597 is_mod_char (C# c#) =
599 ByteArray _ arr# = mod_arr
601 case ord# (indexCharArray# arr# (ord# c#)) of
605 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
608 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
609 case expandWhile (is_mod_char) buf of
611 case currentChar# buf' of
612 '.'# -> munch buf' HiFile
613 '!'# -> munch buf' HiBootFile
614 _ -> lex_id2 cont Nothing buf'
617 if not (emptyLexeme buf') then
618 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
619 case lexemeToFastString buf' of
620 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
621 (stepOn (stepOverLexeme buf'))
623 lex_id2 cont Nothing buf'
626 -- Dealt with the Module.part
627 lex_id2 cont module_dot buf =
628 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
629 case currentChar# buf of
631 '['# -> -- Special case for []
632 case lookAhead# buf 1# of
633 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
634 _ -> lex_id3 cont module_dot buf
636 '('# -> -- Special case for (,,,)
637 case lookAhead# buf 1# of
638 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
639 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
640 _ -> lex_id3 cont module_dot buf
641 ':'# -> lex_id3 cont module_dot (incLexeme buf)
644 Nothing -> lex_id3 cont module_dot buf
645 Just ghc -> -- this should be "GHC" (current home of (->))
646 case lookAhead# buf 1# of
647 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
649 _ -> lex_id3 cont module_dot buf
650 _ -> lex_id3 cont module_dot buf
654 -- Dealt with [], (), : special cases
656 lex_id3 cont module_dot buf =
657 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
658 case expandWhile (is_id_char) buf of
662 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
664 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
665 Just kwd_token -> cont kwd_token new_buf
666 Nothing -> cont (mk_var_token lexeme) new_buf
668 lexeme = lexemeToFastString buf'
669 new_buf = stepOverLexeme buf'
673 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
674 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
675 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
676 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
677 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
680 -- Dealt with [], (), : special cases
683 lex_id3 module_dot len_xs xs cs =
684 case my_span' (is_id_char) cs of
685 (xs1,len_xs1,rest) ->
687 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
689 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
690 Just kwd_token -> kwd_token : lexIface rest
691 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
693 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
695 mk_var_token pk_str =
700 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
701 -- remove the second half of disjunction when using a 1.3 prelude.
703 if isUpper f then ITconid pk_str
704 else if isLower f then ITvarid pk_str
705 else if f == ':' then ITconsym pk_str
706 else if isLowerISO f then ITvarid pk_str
707 else if isUpperISO f then ITconid pk_str
711 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
712 | f == ':' = ITconsym n
713 | isAlpha f = ITvarid n
714 | otherwise = ITvarsym n
719 end_lex_id cont Nothing token buf = cont token buf
720 end_lex_id cont (Just (m,hif)) token buf =
722 ITconid n -> cont (ITqconid (m,n,hif)) buf
723 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
724 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
726 -- Special case for ->
727 -- "->" by itself is a special token (ITrarrow),
728 -- but M.-> is a ITqconid
729 ITvarsym n | n == SLIT("->")
730 -> cont (ITqconsym (m,n,hif)) buf
732 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
734 -- ITbang can't happen here I think
735 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
737 _ -> cont (ITunknown (show token)) buf
740 ifaceKeywordsFM :: UniqFM IfaceToken
741 ifaceKeywordsFM = listToUFM $
742 map (\ (x,y) -> (_PK_ x,y))
745 ,("letrec_", ITletrec)
746 ,("interface_", ITinterface)
747 ,("usages_", ITusages)
748 ,("versions_", ITversions)
749 ,("exports_", ITexports)
750 ,("instance_modules_", ITinstance_modules)
751 ,("instances_", ITinstances)
752 ,("fixities_", ITfixities)
753 ,("declarations_", ITdeclarations)
754 ,("pragmas_", ITpragmas)
755 ,("forall_", ITforall)
756 ,("U_", ITunfold False)
757 ,("U!_", ITunfold True)
759 ,("coerce_in_", ITcoerce_in)
760 ,("coerce_out_", ITcoerce_out)
762 ,("integer_", ITinteger_lit)
763 ,("rational_", ITrational_lit)
764 ,("addr_", ITaddr_lit)
765 ,("float_", ITfloat_lit)
766 ,("string_", ITstring_lit)
767 ,("litlit_", ITlit_lit)
768 ,("ccall_", ITccall (False, False))
769 ,("ccall_GC_", ITccall (False, True))
770 ,("casm_", ITccall (True, False))
771 ,("casm_GC_", ITccall (True, True))
774 haskellKeywordsFM = listToUFM $
775 map (\ (x,y) -> (_PK_ x,y))
778 ,("newtype", ITnewtype)
781 ,("instance", ITinstance)
782 ,("infixl", ITinfixl)
783 ,("infixr", ITinfixr)
786 ,("case#", ITprim_case)
790 ,("deriving", ITderiving)
802 -- doDiscard rips along really fast, looking for a double semicolon,
803 -- indicating the end of the pragma we're skipping
804 doDiscard inStr buf =
805 -- _trace (show (C# (currentChar# buf))) $
806 case currentChar# buf of
809 case lookAhead# buf 1# of
810 ';'# -> incLexeme (incLexeme buf)
811 _ -> doDiscard inStr (incLexeme buf)
813 doDiscard inStr (incLexeme buf)
816 odd_slashes buf flg i# =
817 case lookAhead# buf i# of
818 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
821 case lookAhead# buf (negateInt# 1#) of --backwards, actually
822 '\\'# -> -- escaping something..
823 if odd_slashes buf True (negateInt# 2#) then
824 -- odd number of slashes, " is escaped.
825 doDiscard inStr (incLexeme buf)
827 -- even number of slashes, \ is escaped.
828 doDiscard (not inStr) (incLexeme buf)
829 _ -> case inStr of -- forced to avoid build-up
830 True -> doDiscard False (incLexeme buf)
831 False -> doDiscard True (incLexeme buf)
832 _ -> doDiscard inStr (incLexeme buf)
837 my_span :: (a -> Bool) -> [a] -> ([a],[a])
838 my_span p xs = go [] xs
840 go so_far (x:xs') | p x = go (x:so_far) xs'
841 go so_far xs = (reverse so_far, xs)
843 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
844 my_span' p xs = go [] 0 xs
846 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
847 go so_far n xs = (reverse so_far,n, xs)
851 %************************************************************************
853 \subsection{Other utility functions
855 %************************************************************************
858 type IfM a = StringBuffer -- Input string
862 returnIf :: a -> IfM a
863 returnIf a s l = Succeeded a
865 thenIf :: IfM a -> (a -> IfM b) -> IfM b
866 m `thenIf` k = \s l ->
868 Succeeded a -> k a s l
869 Failed err -> Failed err
871 getSrcLocIf :: IfM SrcLoc
872 getSrcLocIf s l = Succeeded l
875 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
879 Note that if the file we're processing ends with `hi-boot',
880 we accept it on faith as having the right version.
881 This is done so that .hi-boot files that comes with hsc
882 don't have to be updated before every release, and it
883 allows us to share .hi-boot files with versions of hsc
884 that don't have .hi version checking (e.g., ghc-2.10's)
886 If the version number is 0, the checking is also turned off.
888 checkVersion :: Maybe Integer -> IfM ()
889 checkVersion mb@(Just v) s l
890 | (v==0) || (v == PROJECTVERSION) = Succeeded ()
891 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
892 checkVersion mb@Nothing s l
893 | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
894 | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
896 -----------------------------------------------------------------
899 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
900 ptext SLIT("toks="), text (show (take 10 toks))]
902 ifaceVersionErr hi_vers l toks
903 = hsep [ppr l, ptext SLIT("Interface file version error;"),
904 ptext SLIT("Expected"), int PROJECTVERSION,
905 ptext SLIT(" found "), pp_version]
909 Nothing -> ptext SLIT("pre ghc-3.02 version")
910 Just v -> ptext SLIT("version") <+> integer v