1 --------------------------------------------------------
3 There's a known bug in here:
5 If an interface file ends prematurely, Lex tries to
6 do headFS of an empty FastString.
8 An example that provokes the error is
10 f _:_ _forall_ [a] <<<END OF FILE>>>
11 --------------------------------------------------------
15 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
17 \section[Lexical analysis]{Lexical analysis}
22 isLexCon, isLexVar, isLexId, isLexSym,
23 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
24 mkTupNameStr, ifaceParseErr,
27 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
33 #include "HsVersions.h"
35 import Char (isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord )
37 import {-# SOURCE #-} CostCentre
39 import CmdLineOpts ( opt_IgnoreIfacePragmas )
40 import Demand ( Demand(..) {- instance Read -} )
41 import UniqFM ( UniqFM, listToUFM, lookupUFM)
42 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
43 import SrcLoc ( SrcLoc, incSrcLine )
45 import Maybes ( MaybeErr(..) )
46 import ErrUtils ( ErrMsg(..) )
48 import Util ( nOfThem, panic )
56 %************************************************************************
58 \subsection{Lexical categories}
60 %************************************************************************
62 These functions test strings to see if they fit the lexical categories
63 defined in the Haskell report. Normally applied as in e.g. @isCon
67 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
68 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
70 isLexCon cs = isLexConId cs || isLexConSym cs
71 isLexVar cs = isLexVarId cs || isLexVarSym cs
73 isLexId cs = isLexConId cs || isLexVarId cs
74 isLexSym cs = isLexConSym cs || isLexVarSym cs
80 | cs == SLIT("[]") = True
81 | c == '(' = True -- (), (,), (,,), ...
82 | otherwise = isUpper c || isUpperISO c
88 | otherwise = isLower c || isLowerISO c
94 | otherwise = c == ':'
101 | otherwise = isSymbolASCII c
107 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
108 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
109 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
110 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
111 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
112 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
116 %************************************************************************
118 \subsection{Tuple strings -- ugh!}
120 %************************************************************************
123 mkTupNameStr 0 = SLIT("()")
124 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
125 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
126 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
127 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
128 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
133 %************************************************************************
135 \subsection{Data types}
137 %************************************************************************
139 The token data type, fairly un-interesting except from two constructors,
140 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
141 strictness, unfolding etc) and types for id decls.
143 The Idea/Observation here is that the renamer needs to scan through
144 all of an interface file before it can continue. But only a fraction
145 of the information contained in the file turns out to be useful, so
146 delaying as much as possible of the scanning and parsing of an
147 interface file Makes Sense (Heap profiles of the compiler
148 show at a reduction in heap usage by at least a factor of two,
151 Hence, the interface file lexer spots when value declarations are
152 being scanned and return the @ITidinfo@ and @ITtype@ constructors
153 for the type and any other id info for that binding (unfolding, strictness
154 etc). These constructors are applied to the result of lexing these sub-chunks.
156 The lexing of the type and id info is all done lazily, of course, so
157 the scanning (and subsequent parsing) will be done *only* on the ids the
158 renamer finds out that it is interested in. The rest will just be junked.
159 Laziness, you know it makes sense :-)
163 = ITinterface -- keywords
183 | ITbang -- magic symbols
198 | ITvarid FAST_STRING
199 | ITconid FAST_STRING
200 | ITvarsym FAST_STRING
201 | ITconsym FAST_STRING
202 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
203 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
204 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
205 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
207 | ITtysig StringBuffer (Maybe StringBuffer)
208 -- lazily return the stream of tokens for
209 -- the info attached to an id.
210 -- Stuff for reading unfoldings
212 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
213 | ITstrict [Demand] | ITbottom
214 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
215 | ITcoerce_in | ITcoerce_out | ITatsign
216 | ITccall (Bool,Bool) -- (is_casm, may_gc)
218 | ITchar Char | ITstring FAST_STRING
219 | ITinteger Integer | ITdouble Double
220 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
221 | ITunknown String -- Used when the lexer can't make sense of it
222 | ITeof -- end of file token
223 deriving Text -- debugging
225 instance Text CostCentre -- cheat!
229 %************************************************************************
231 \subsection{The lexical analyser}
233 %************************************************************************
236 lexIface :: (IfaceToken -> IfM a) -> IfM a
239 -- if bufferExhausted buf then
242 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
243 case currentChar# buf of
244 -- whitespace and comments, ignore.
245 ' '# -> lexIface cont (stepOn buf)
246 '\t'# -> lexIface cont (stepOn buf)
247 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
249 -- Numbers and comments
251 case lookAhead# buf 1# of
252 '-'# -> lex_comment cont (stepOnBy# buf 2#)
255 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
258 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
259 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
262 case prefixMatch (stepOn buf) "..)" of
263 Just buf' -> cont ITdotdot (stepOverLexeme buf')
265 case lookAhead# buf 1# of
266 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
267 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
268 _ -> cont IToparen (stepOn buf)
270 '{'# -> cont ITocurly (stepOn buf)
271 '}'# -> cont ITccurly (stepOn buf)
272 ')'# -> cont ITcparen (stepOn buf)
274 case lookAhead# buf 1# of
275 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
276 _ -> cont ITobrack (stepOn buf)
277 ']'# -> cont ITcbrack (stepOn buf)
278 ','# -> cont ITcomma (stepOn buf)
279 ';'# -> cont ITsemi (stepOn buf)
280 '\"'# -> case untilEndOfString# (stepOn buf) of
282 -- the string literal does *not* include the dquotes
283 case lexemeToFastString buf' of
284 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
287 -- untilEndOfChar# extends the current lexeme until
288 -- it hits a non-escaped single quote. The lexeme of the
289 -- StringBuffer returned does *not* include the closing quote,
290 -- hence we augment the lexeme and make sure to add the
291 -- starting quote, before `read'ing the string.
293 case untilEndOfChar# (stepOn buf) of
294 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
295 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
297 -- ``thingy'' form for casm
299 case lookAhead# buf 1# of
300 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
301 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
302 -- scanning an id of some sort.
305 case lookAhead# buf 1# of
306 'S'# -> case lookAhead# buf 2# of
308 lex_demand cont (stepOnUntil (not . isSpace)
309 (stepOnBy# buf 3#)) -- past _S_
310 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
311 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
312 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
314 _ -> lex_keyword cont (stepOn buf)
317 if bufferExhausted (stepOn buf) then
322 if isDigit (C# c) then
323 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
327 lex_comment cont buf =
328 -- _trace ("comment: "++[C# (currentChar# buf)]) $
329 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
332 lex_demand cont buf =
333 -- _trace ("demand: "++[C# (currentChar# buf)]) $
334 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
336 -- code snatched from Demand.lhs
338 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
339 case currentChar# buf of
340 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
341 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
342 'S'# -> read_em (WwStrict : acc) (stepOn buf)
343 'P'# -> read_em (WwPrim : acc) (stepOn buf)
344 'E'# -> read_em (WwEnum : acc) (stepOn buf)
345 ')'# -> (reverse acc, stepOn buf)
346 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
347 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
348 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
349 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
350 _ -> (reverse acc, buf)
352 do_unpack new_or_data wrapper_unpacks acc buf
353 = case read_em [] buf of
354 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
358 -- _trace ("scc: "++[C# (currentChar# buf)]) $
359 case currentChar# buf of
362 case prefixMatch (stepOn buf) "NO_CC\"" of
363 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
365 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
366 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
368 case prefixMatch (stepOn buf) "OVERHEAD\"" of
369 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
371 case prefixMatch (stepOn buf) "DONT_CARE\"" of
372 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
374 case prefixMatch (stepOn buf) "SUBSUMED\"" of
375 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
377 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
378 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
380 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
382 case untilChar# (stepOverLexeme buf') '\"'# of
383 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
385 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
386 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
388 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
390 case untilChar# (stepOverLexeme buf') '\"'# of
391 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
392 (stepOn (stepOverLexeme buf''))
396 case untilChar# buf '/'# of
398 let mod_name = lexemeToFastString buf' in
399 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
401 -- let grp_name = lexemeToFastString buf'' in
402 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
404 -- The label may contain arbitrary characters, so it
405 -- may have been escaped etc., hence we `read' it in to get
406 -- rid of these meta-chars in the string and then pack it (again.)
407 -- ToDo: do the same for module name (single quotes allowed in m-names).
408 -- BTW, the code in this module is totally gruesome..
409 let upk_label = _UNPK_ (lexemeToFastString buf'') in
410 case reads ('"':upk_label++"\"") of
412 let cc_name = _PK_ cc_label in
413 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
414 stepOn (stepOverLexeme buf''))
416 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
417 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
418 stepOn (stepOverLexeme buf''))
420 case prefixMatch (stepOn buf) "CAF:" of
422 case match_user_cc (stepOverLexeme buf') of
423 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
425 case match_user_cc (stepOn buf) of
426 (cc, buf'') -> cont (ITscc cc) buf''
427 c -> cont (ITunknown [C# c]) (stepOn buf)
431 lex_num :: (IfaceToken -> IfM a) ->
432 (Int -> Int) -> Int# -> IfM a
433 lex_num cont minus acc# buf =
434 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
435 case scanNumLit (I# acc#) buf of
437 case currentChar# buf' of
439 -- this case is not optimised at all, as the
440 -- presence of floating point numbers in interface
441 -- files is not that common. (ToDo)
442 case expandWhile (isDigit) (incLexeme buf') of
443 buf'' -> -- points to first non digit char
444 case reads (lexemeToString buf'') of
445 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
446 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
448 -- case reads (lexemeToString buf') of
449 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
452 lex_keyword cont buf =
453 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
454 case currentChar# buf of
455 ':'# -> case lookAhead# buf 1# of
456 '_'# -> -- a binding, type (and other id-info) follows,
457 -- to make the parser ever so slightly, we push
459 lex_decl cont (stepOnBy# buf 2#)
460 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
462 case expandWhile (is_kwd_char) buf of
464 let kw = lexemeToFastString buf' in
465 -- _trace ("kw: "++lexemeToString buf') $
466 case lookupUFM ifaceKeywordsFM kw of
467 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
468 (stepOverLexeme buf')
469 Just xx -> cont xx (stepOverLexeme buf')
472 case doDiscard False buf of -- spin until ;; is found
474 {- _trace (show (lexemeToString buf')) $ -}
475 case currentChar# buf' of
476 '\n'# -> -- newline, no id info.
477 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
478 (stepOverLexeme buf')
479 '\r'# -> -- just to be sure for those Win* boxes..
480 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
481 (stepOverLexeme buf')
483 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
484 (stepOverLexeme buf')
485 c -> -- run all over the id info
486 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
488 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
489 --_trace (show (lexemeToString (decLexeme buf''))) $
491 if opt_IgnoreIfacePragmas then
494 Just (lexemeToBuffer (decLexeme buf''))
497 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
498 (stepOverLexeme buf'')
501 is_kwd_char c@(C# c#) =
502 isAlphanum c || -- OLD: c `elem` "_@/\\"
513 lex_cstring cont buf =
514 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
515 case expandUntilMatch buf "\'\'" of
516 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
517 (stepOverLexeme buf')
520 lex_tuple cont module_dot buf =
521 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
525 case currentChar# buf of
526 ','# -> go (n+1) (stepOn buf)
527 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
528 _ -> cont (ITunknown ("tuple " ++ show n)) buf
530 -- Similarly ' itself is ok inside an identifier, but not at the start
532 -- id_arr is an array of bytes, indexed by characters,
533 -- containing 0 if the character isn't a valid character from an identifier
534 -- and 1 if it is. It's just a memo table for is_id_char.
535 id_arr :: ByteArray Int
538 newCharArray (0,255) >>= \ barr ->
540 loop 256# = return ()
542 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
543 writeCharArray barr (I# i#) '\1' >>
546 writeCharArray barr (I# i#) '\0' >>
550 unsafeFreezeByteArray barr)
554 ByteArray _ arr# = id_arr
556 case ord# (indexCharArray# arr# (ord# c#)) of
560 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
564 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
565 '#'# -> True; '$'# -> True; '%'# -> True;
566 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
567 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
568 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
569 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
571 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
574 -- mod_arr is an array of bytes, indexed by characters,
575 -- containing 0 if the character isn't a valid character from a module name,
577 mod_arr :: ByteArray Int
580 newCharArray (0,255) >>= \ barr ->
582 loop 256# = return ()
584 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
585 writeCharArray barr (I# i#) '\1' >>
588 writeCharArray barr (I# i#) '\0' >>
592 unsafeFreezeByteArray barr)
595 is_mod_char (C# c#) =
597 ByteArray _ arr# = mod_arr
599 case ord# (indexCharArray# arr# (ord# c#)) of
603 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
606 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
607 case expandWhile (is_mod_char) buf of
609 case currentChar# buf' of
610 '.'# -> munch buf' HiFile
611 '!'# -> munch buf' HiBootFile
612 _ -> lex_id2 cont Nothing buf'
615 if not (emptyLexeme buf') then
616 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
617 case lexemeToFastString buf' of
618 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
619 (stepOn (stepOverLexeme buf'))
621 lex_id2 cont Nothing buf'
624 -- Dealt with the Module.part
625 lex_id2 cont module_dot buf =
626 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
627 case currentChar# buf of
629 '['# -> -- Special case for []
630 case lookAhead# buf 1# of
631 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
632 _ -> lex_id3 cont module_dot buf
634 '('# -> -- Special case for (,,,)
635 case lookAhead# buf 1# of
636 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
637 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
638 _ -> lex_id3 cont module_dot buf
639 ':'# -> lex_id3 cont module_dot (incLexeme buf)
642 Nothing -> lex_id3 cont module_dot buf
643 Just ghc -> -- this should be "GHC" (current home of (->))
644 case lookAhead# buf 1# of
645 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
647 _ -> lex_id3 cont module_dot buf
648 _ -> lex_id3 cont module_dot buf
652 -- Dealt with [], (), : special cases
654 lex_id3 cont module_dot buf =
655 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
656 case expandWhile (is_id_char) buf of
660 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
662 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
663 Just kwd_token -> cont kwd_token new_buf
664 Nothing -> cont (mk_var_token lexeme) new_buf
666 lexeme = lexemeToFastString buf'
667 new_buf = stepOverLexeme buf'
671 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
672 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
673 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
674 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
675 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
678 -- Dealt with [], (), : special cases
681 lex_id3 module_dot len_xs xs cs =
682 case my_span' (is_id_char) cs of
683 (xs1,len_xs1,rest) ->
685 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
687 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
688 Just kwd_token -> kwd_token : lexIface rest
689 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
691 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
693 mk_var_token pk_str =
698 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
699 -- remove the second half of disjunction when using a 1.3 prelude.
701 if isUpper f then ITconid pk_str
702 else if isLower f then ITvarid pk_str
703 else if f == ':' then ITconsym pk_str
704 else if isLowerISO f then ITvarid pk_str
705 else if isUpperISO f then ITconid pk_str
709 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
710 | f == ':' = ITconsym n
711 | isAlpha f = ITvarid n
712 | otherwise = ITvarsym n
717 end_lex_id cont Nothing token buf = cont token buf
718 end_lex_id cont (Just (m,hif)) token buf =
720 ITconid n -> cont (ITqconid (m,n,hif)) buf
721 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
722 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
724 -- Special case for ->
725 -- "->" by itself is a special token (ITrarrow),
726 -- but M.-> is a ITqconid
727 ITvarsym n | n == SLIT("->")
728 -> cont (ITqconsym (m,n,hif)) buf
730 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
732 -- ITbang can't happen here I think
733 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
735 _ -> cont (ITunknown (show token)) buf
738 ifaceKeywordsFM :: UniqFM IfaceToken
739 ifaceKeywordsFM = listToUFM $
740 map (\ (x,y) -> (_PK_ x,y))
743 ,("letrec_", ITletrec)
744 ,("interface_", ITinterface)
745 ,("usages_", ITusages)
746 ,("versions_", ITversions)
747 ,("exports_", ITexports)
748 ,("instance_modules_", ITinstance_modules)
749 ,("instances_", ITinstances)
750 ,("fixities_", ITfixities)
751 ,("declarations_", ITdeclarations)
752 ,("pragmas_", ITpragmas)
753 ,("forall_", ITforall)
754 ,("U_", ITunfold False)
755 ,("U!_", ITunfold True)
757 ,("coerce_in_", ITcoerce_in)
758 ,("coerce_out_", ITcoerce_out)
760 ,("integer_", ITinteger_lit)
761 ,("rational_", ITrational_lit)
762 ,("addr_", ITaddr_lit)
763 ,("float_", ITfloat_lit)
764 ,("string_", ITstring_lit)
765 ,("litlit_", ITlit_lit)
766 ,("ccall_", ITccall (False, False))
767 ,("ccall_GC_", ITccall (False, True))
768 ,("casm_", ITccall (True, False))
769 ,("casm_GC_", ITccall (True, True))
772 haskellKeywordsFM = listToUFM $
773 map (\ (x,y) -> (_PK_ x,y))
776 ,("newtype", ITnewtype)
779 ,("instance", ITinstance)
780 ,("infixl", ITinfixl)
781 ,("infixr", ITinfixr)
784 ,("case#", ITprim_case)
788 ,("deriving", ITderiving)
800 -- doDiscard rips along really fast, looking for a double semicolon,
801 -- indicating the end of the pragma we're skipping
802 doDiscard inStr buf =
803 -- _trace (show (C# (currentChar# buf))) $
804 case currentChar# buf of
807 case lookAhead# buf 1# of
808 ';'# -> incLexeme (incLexeme buf)
809 _ -> doDiscard inStr (incLexeme buf)
811 doDiscard inStr (incLexeme buf)
814 odd_slashes buf flg i# =
815 case lookAhead# buf i# of
816 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
819 case lookAhead# buf (negateInt# 1#) of --backwards, actually
820 '\\'# -> -- escaping something..
821 if odd_slashes buf True (negateInt# 2#) then
822 -- odd number of slashes, " is escaped.
823 doDiscard inStr (incLexeme buf)
825 -- even number of slashes, \ is escaped.
826 doDiscard (not inStr) (incLexeme buf)
827 _ -> case inStr of -- forced to avoid build-up
828 True -> doDiscard False (incLexeme buf)
829 False -> doDiscard True (incLexeme buf)
830 _ -> doDiscard inStr (incLexeme buf)
835 my_span :: (a -> Bool) -> [a] -> ([a],[a])
836 my_span p xs = go [] xs
838 go so_far (x:xs') | p x = go (x:so_far) xs'
839 go so_far xs = (reverse so_far, xs)
841 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
842 my_span' p xs = go [] 0 xs
844 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
845 go so_far n xs = (reverse so_far,n, xs)
849 %************************************************************************
851 \subsection{Other utility functions
853 %************************************************************************
856 type IfM a = StringBuffer -- Input string
860 returnIf :: a -> IfM a
861 returnIf a s l = Succeeded a
863 thenIf :: IfM a -> (a -> IfM b) -> IfM b
864 m `thenIf` k = \s l ->
866 Succeeded a -> k a s l
867 Failed err -> Failed err
869 getSrcLocIf :: IfM SrcLoc
870 getSrcLocIf s l = Succeeded l
873 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
875 -----------------------------------------------------------------
878 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
879 ptext SLIT("toks="), text (show (take 10 toks))]