2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
9 isLexCon, isLexVar, isLexId, isLexSym,
10 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
11 mkTupNameStr, ifaceParseErr,
14 IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
20 #include "HsVersions.h"
22 import Char (isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord )
24 import {-# SOURCE #-} CostCentre
26 import CmdLineOpts ( opt_IgnoreIfacePragmas )
27 import Demand ( Demand(..) {- instance Read -} )
28 import UniqFM ( UniqFM, listToUFM, lookupUFM)
29 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
30 import SrcLoc ( SrcLoc, incSrcLine )
32 import Maybes ( MaybeErr(..) )
33 import ErrUtils ( ErrMsg(..) )
35 import Util ( nOfThem, panic )
43 %************************************************************************
45 \subsection{Lexical categories}
47 %************************************************************************
49 These functions test strings to see if they fit the lexical categories
50 defined in the Haskell report. Normally applied as in e.g. @isCon
54 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
55 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
57 isLexCon cs = isLexConId cs || isLexConSym cs
58 isLexVar cs = isLexVarId cs || isLexVarSym cs
60 isLexId cs = isLexConId cs || isLexVarId cs
61 isLexSym cs = isLexConSym cs || isLexVarSym cs
67 | cs == SLIT("[]") = True
68 | c == '(' = True -- (), (,), (,,), ...
69 | otherwise = isUpper c || isUpperISO c
75 | otherwise = isLower c || isLowerISO c
81 | otherwise = c == ':'
88 | otherwise = isSymbolASCII c
94 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
95 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
96 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
97 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
98 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
99 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
103 %************************************************************************
105 \subsection{Tuple strings -- ugh!}
107 %************************************************************************
110 mkTupNameStr 0 = SLIT("()")
111 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
112 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
113 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
114 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
115 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
120 %************************************************************************
122 \subsection{Data types}
124 %************************************************************************
126 The token data type, fairly un-interesting except from two constructors,
127 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
128 strictness, unfolding etc) and types for id decls.
130 The Idea/Observation here is that the renamer needs to scan through
131 all of an interface file before it can continue. But only a fraction
132 of the information contained in the file turns out to be useful, so
133 delaying as much as possible of the scanning and parsing of an
134 interface file Makes Sense (Heap profiles of the compiler
135 show at a reduction in heap usage by at least a factor of two,
138 Hence, the interface file lexer spots when value declarations are
139 being scanned and return the @ITidinfo@ and @ITtype@ constructors
140 for the type and any other id info for that binding (unfolding, strictness
141 etc). These constructors are applied to the result of lexing these sub-chunks.
143 The lexing of the type and id info is all done lazily, of course, so
144 the scanning (and subsequent parsing) will be done *only* on the ids the
145 renamer finds out that it is interested in. The rest will just be junked.
146 Laziness, you know it makes sense :-)
150 = ITinterface -- keywords
170 | ITbang -- magic symbols
185 | ITvarid FAST_STRING
186 | ITconid FAST_STRING
187 | ITvarsym FAST_STRING
188 | ITconsym FAST_STRING
189 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
190 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
191 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
192 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
194 | ITtysig StringBuffer (Maybe StringBuffer)
195 -- lazily return the stream of tokens for
196 -- the info attached to an id.
197 -- Stuff for reading unfoldings
199 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
200 | ITstrict [Demand] | ITbottom
201 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
202 | ITcoerce_in | ITcoerce_out | ITatsign
203 | ITccall (Bool,Bool) -- (is_casm, may_gc)
205 | ITchar Char | ITstring FAST_STRING
206 | ITinteger Integer | ITdouble Double
207 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
208 | ITunknown String -- Used when the lexer can't make sense of it
209 | ITeof -- end of file token
210 deriving Text -- debugging
212 instance Text CostCentre -- cheat!
216 %************************************************************************
218 \subsection{The lexical analyser}
220 %************************************************************************
223 lexIface :: (IfaceToken -> IfM a) -> IfM a
226 -- if bufferExhausted buf then
229 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
230 case currentChar# buf of
231 -- whitespace and comments, ignore.
232 ' '# -> lexIface cont (stepOn buf)
233 '\t'# -> lexIface cont (stepOn buf)
234 '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
236 -- Numbers and comments
238 case lookAhead# buf 1# of
239 '-'# -> lex_comment cont (stepOnBy# buf 2#)
242 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
245 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
246 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
249 case prefixMatch (stepOn buf) "..)" of
250 Just buf' -> cont ITdotdot (stepOverLexeme buf')
252 case lookAhead# buf 1# of
253 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
254 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
255 _ -> cont IToparen (stepOn buf)
257 '{'# -> cont ITocurly (stepOn buf)
258 '}'# -> cont ITccurly (stepOn buf)
259 ')'# -> cont ITcparen (stepOn buf)
261 case lookAhead# buf 1# of
262 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
263 _ -> cont ITobrack (stepOn buf)
264 ']'# -> cont ITcbrack (stepOn buf)
265 ','# -> cont ITcomma (stepOn buf)
266 ';'# -> cont ITsemi (stepOn buf)
267 '\"'# -> case untilEndOfString# (stepOn buf) of
269 -- the string literal does *not* include the dquotes
270 case lexemeToFastString buf' of
271 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
274 -- untilEndOfChar# extends the current lexeme until
275 -- it hits a non-escaped single quote. The lexeme of the
276 -- StringBuffer returned does *not* include the closing quote,
277 -- hence we augment the lexeme and make sure to add the
278 -- starting quote, before `read'ing the string.
280 case untilEndOfChar# (stepOn buf) of
281 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
282 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
284 -- ``thingy'' form for casm
286 case lookAhead# buf 1# of
287 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
288 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
289 -- scanning an id of some sort.
292 case lookAhead# buf 1# of
293 'S'# -> case lookAhead# buf 2# of
295 lex_demand cont (stepOnUntil (not . isSpace)
296 (stepOnBy# buf 3#)) -- past _S_
297 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
298 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
299 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
301 _ -> lex_keyword cont (stepOn buf)
304 if bufferExhausted (stepOn buf) then
309 if isDigit (C# c) then
310 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
314 lex_comment cont buf =
315 -- _trace ("comment: "++[C# (currentChar# buf)]) $
316 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
319 lex_demand cont buf =
320 -- _trace ("demand: "++[C# (currentChar# buf)]) $
321 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
323 -- code snatched from Demand.lhs
325 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
326 case currentChar# buf of
327 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
328 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
329 'S'# -> read_em (WwStrict : acc) (stepOn buf)
330 'P'# -> read_em (WwPrim : acc) (stepOn buf)
331 'E'# -> read_em (WwEnum : acc) (stepOn buf)
332 ')'# -> (reverse acc, stepOn buf)
333 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
334 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
335 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
336 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
337 _ -> (reverse acc, buf)
339 do_unpack new_or_data wrapper_unpacks acc buf
340 = case read_em [] buf of
341 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
345 -- _trace ("scc: "++[C# (currentChar# buf)]) $
346 case currentChar# buf of
349 case prefixMatch (stepOn buf) "NO_CC\"" of
350 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
352 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
353 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
355 case prefixMatch (stepOn buf) "OVERHEAD\"" of
356 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
358 case prefixMatch (stepOn buf) "DONT_CARE\"" of
359 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
361 case prefixMatch (stepOn buf) "SUBSUMED\"" of
362 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
364 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
365 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
367 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
369 case untilChar# (stepOverLexeme buf') '\"'# of
370 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
372 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
373 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
375 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
377 case untilChar# (stepOverLexeme buf') '\"'# of
378 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
379 (stepOn (stepOverLexeme buf''))
383 case untilChar# buf '/'# of
385 let mod_name = lexemeToFastString buf' in
386 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
388 -- let grp_name = lexemeToFastString buf'' in
389 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
391 -- The label may contain arbitrary characters, so it
392 -- may have been escaped etc., hence we `read' it in to get
393 -- rid of these meta-chars in the string and then pack it (again.)
394 -- ToDo: do the same for module name (single quotes allowed in m-names).
395 -- BTW, the code in this module is totally gruesome..
396 let upk_label = _UNPK_ (lexemeToFastString buf'') in
397 case reads ('"':upk_label++"\"") of
399 let cc_name = _PK_ cc_label in
400 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
401 stepOn (stepOverLexeme buf''))
403 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
404 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
405 stepOn (stepOverLexeme buf''))
407 case prefixMatch (stepOn buf) "CAF:" of
409 case match_user_cc (stepOverLexeme buf') of
410 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
412 case match_user_cc (stepOn buf) of
413 (cc, buf'') -> cont (ITscc cc) buf''
414 c -> cont (ITunknown [C# c]) (stepOn buf)
418 lex_num :: (IfaceToken -> IfM a) ->
419 (Int -> Int) -> Int# -> IfM a
420 lex_num cont minus acc# buf =
421 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
422 case scanNumLit (I# acc#) buf of
424 case currentChar# buf' of
426 -- this case is not optimised at all, as the
427 -- presence of floating point numbers in interface
428 -- files is not that common. (ToDo)
429 case expandWhile (isDigit) (incLexeme buf') of
430 buf'' -> -- points to first non digit char
431 case reads (lexemeToString buf'') of
432 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
433 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
435 -- case reads (lexemeToString buf') of
436 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
439 lex_keyword cont buf =
440 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
441 case currentChar# buf of
442 ':'# -> case lookAhead# buf 1# of
443 '_'# -> -- a binding, type (and other id-info) follows,
444 -- to make the parser ever so slightly, we push
446 lex_decl cont (stepOnBy# buf 2#)
447 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
449 case expandWhile (is_kwd_char) buf of
451 let kw = lexemeToFastString buf' in
452 -- _trace ("kw: "++lexemeToString buf') $
453 case lookupUFM ifaceKeywordsFM kw of
454 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
455 (stepOverLexeme buf')
456 Just xx -> cont xx (stepOverLexeme buf')
459 case doDiscard False buf of -- spin until ;; is found
461 {- _trace (show (lexemeToString buf')) $ -}
462 case currentChar# buf' of
463 '\n'# -> -- newline, no id info.
464 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
465 (stepOverLexeme buf')
466 '\r'# -> -- just to be sure for those Win* boxes..
467 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
468 (stepOverLexeme buf')
470 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
471 (stepOverLexeme buf')
472 c -> -- run all over the id info
473 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
475 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
476 --_trace (show (lexemeToString (decLexeme buf''))) $
478 if opt_IgnoreIfacePragmas then
481 Just (lexemeToBuffer (decLexeme buf''))
484 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
485 (stepOverLexeme buf'')
488 is_kwd_char c@(C# c#) =
489 isAlphanum c || -- OLD: c `elem` "_@/\\"
500 lex_cstring cont buf =
501 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
502 case expandUntilMatch buf "\'\'" of
503 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
504 (stepOverLexeme buf')
507 lex_tuple cont module_dot buf =
508 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
512 case currentChar# buf of
513 ','# -> go (n+1) (stepOn buf)
514 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
515 _ -> cont (ITunknown ("tuple " ++ show n)) buf
517 -- Similarly ' itself is ok inside an identifier, but not at the start
519 -- id_arr is an array of bytes, indexed by characters,
520 -- containing 0 if the character isn't a valid character from an identifier
521 -- and 1 if it is. It's just a memo table for is_id_char.
522 id_arr :: ByteArray Int
525 newCharArray (0,255) >>= \ barr ->
527 loop 256# = return ()
529 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
530 writeCharArray barr (I# i#) '\1' >>
533 writeCharArray barr (I# i#) '\0' >>
537 unsafeFreezeByteArray barr)
541 ByteArray _ arr# = id_arr
543 case ord# (indexCharArray# arr# (ord# c#)) of
547 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
551 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
552 '#'# -> True; '$'# -> True; '%'# -> True;
553 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
554 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
555 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
556 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
558 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
561 -- mod_arr is an array of bytes, indexed by characters,
562 -- containing 0 if the character isn't a valid character from a module name,
564 mod_arr :: ByteArray Int
567 newCharArray (0,255) >>= \ barr ->
569 loop 256# = return ()
571 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
572 writeCharArray barr (I# i#) '\1' >>
575 writeCharArray barr (I# i#) '\0' >>
579 unsafeFreezeByteArray barr)
582 is_mod_char (C# c#) =
584 ByteArray _ arr# = mod_arr
586 case ord# (indexCharArray# arr# (ord# c#)) of
590 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
593 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
594 case expandWhile (is_mod_char) buf of
596 case currentChar# buf' of
597 '.'# -> munch buf' HiFile
598 '!'# -> munch buf' HiBootFile
599 _ -> lex_id2 cont Nothing buf'
602 if not (emptyLexeme buf') then
603 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
604 case lexemeToFastString buf' of
605 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
606 (stepOn (stepOverLexeme buf'))
608 lex_id2 cont Nothing buf'
611 -- Dealt with the Module.part
612 lex_id2 cont module_dot buf =
613 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
614 case currentChar# buf of
616 '['# -> -- Special case for []
617 case lookAhead# buf 1# of
618 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
619 _ -> lex_id3 cont module_dot buf
621 '('# -> -- Special case for (,,,)
622 case lookAhead# buf 1# of
623 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
624 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
625 _ -> lex_id3 cont module_dot buf
626 ':'# -> lex_id3 cont module_dot (incLexeme buf)
629 Nothing -> lex_id3 cont module_dot buf
630 Just ghc -> -- this should be "GHC" (current home of (->))
631 case lookAhead# buf 1# of
632 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
634 _ -> lex_id3 cont module_dot buf
635 _ -> lex_id3 cont module_dot buf
639 -- Dealt with [], (), : special cases
641 lex_id3 cont module_dot buf =
642 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
643 case expandWhile (is_id_char) buf of
647 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
649 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
650 Just kwd_token -> cont kwd_token new_buf
651 Nothing -> cont (mk_var_token lexeme) new_buf
653 lexeme = lexemeToFastString buf'
654 new_buf = stepOverLexeme buf'
658 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
659 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
660 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
661 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
662 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
665 -- Dealt with [], (), : special cases
668 lex_id3 module_dot len_xs xs cs =
669 case my_span' (is_id_char) cs of
670 (xs1,len_xs1,rest) ->
672 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
674 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
675 Just kwd_token -> kwd_token : lexIface rest
676 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
678 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
680 mk_var_token pk_str =
685 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
686 -- remove the second half of disjunction when using a 1.3 prelude.
688 if isUpper f then ITconid pk_str
689 else if isLower f then ITvarid pk_str
690 else if f == ':' then ITconsym pk_str
691 else if isLowerISO f then ITvarid pk_str
692 else if isUpperISO f then ITconid pk_str
696 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
697 | f == ':' = ITconsym n
698 | isAlpha f = ITvarid n
699 | otherwise = ITvarsym n
704 end_lex_id cont Nothing token buf = cont token buf
705 end_lex_id cont (Just (m,hif)) token buf =
707 ITconid n -> cont (ITqconid (m,n,hif)) buf
708 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
709 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
711 -- Special case for ->
712 -- "->" by itself is a special token (ITrarrow),
713 -- but M.-> is a ITqconid
714 ITvarsym n | n == SLIT("->")
715 -> cont (ITqconsym (m,n,hif)) buf
717 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
719 -- ITbang can't happen here I think
720 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
722 _ -> cont (ITunknown (show token)) buf
725 ifaceKeywordsFM :: UniqFM IfaceToken
726 ifaceKeywordsFM = listToUFM $
727 map (\ (x,y) -> (_PK_ x,y))
730 ,("letrec_", ITletrec)
731 ,("interface_", ITinterface)
732 ,("usages_", ITusages)
733 ,("versions_", ITversions)
734 ,("exports_", ITexports)
735 ,("instance_modules_", ITinstance_modules)
736 ,("instances_", ITinstances)
737 ,("fixities_", ITfixities)
738 ,("declarations_", ITdeclarations)
739 ,("pragmas_", ITpragmas)
740 ,("forall_", ITforall)
741 ,("U_", ITunfold False)
742 ,("U!_", ITunfold True)
744 ,("coerce_in_", ITcoerce_in)
745 ,("coerce_out_", ITcoerce_out)
747 ,("integer_", ITinteger_lit)
748 ,("rational_", ITrational_lit)
749 ,("addr_", ITaddr_lit)
750 ,("float_", ITfloat_lit)
751 ,("string_", ITstring_lit)
752 ,("litlit_", ITlit_lit)
753 ,("ccall_", ITccall (False, False))
754 ,("ccall_GC_", ITccall (False, True))
755 ,("casm_", ITccall (True, False))
756 ,("casm_GC_", ITccall (True, True))
759 haskellKeywordsFM = listToUFM $
760 map (\ (x,y) -> (_PK_ x,y))
763 ,("newtype", ITnewtype)
766 ,("instance", ITinstance)
767 ,("infixl", ITinfixl)
768 ,("infixr", ITinfixr)
771 ,("case#", ITprim_case)
775 ,("deriving", ITderiving)
787 -- doDiscard rips along really fast, looking for a double semicolon,
788 -- indicating the end of the pragma we're skipping
789 doDiscard inStr buf =
790 -- _trace (show (C# (currentChar# buf))) $
791 case currentChar# buf of
794 case lookAhead# buf 1# of
795 ';'# -> incLexeme (incLexeme buf)
796 _ -> doDiscard inStr (incLexeme buf)
798 doDiscard inStr (incLexeme buf)
801 odd_slashes buf flg i# =
802 case lookAhead# buf i# of
803 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
806 case lookAhead# buf (negateInt# 1#) of --backwards, actually
807 '\\'# -> -- escaping something..
808 if odd_slashes buf True (negateInt# 2#) then
809 -- odd number of slashes, " is escaped.
810 doDiscard inStr (incLexeme buf)
812 -- even number of slashes, \ is escaped.
813 doDiscard (not inStr) (incLexeme buf)
814 _ -> case inStr of -- forced to avoid build-up
815 True -> doDiscard False (incLexeme buf)
816 False -> doDiscard True (incLexeme buf)
817 _ -> doDiscard inStr (incLexeme buf)
822 my_span :: (a -> Bool) -> [a] -> ([a],[a])
823 my_span p xs = go [] xs
825 go so_far (x:xs') | p x = go (x:so_far) xs'
826 go so_far xs = (reverse so_far, xs)
828 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
829 my_span' p xs = go [] 0 xs
831 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
832 go so_far n xs = (reverse so_far,n, xs)
836 %************************************************************************
838 \subsection{Other utility functions
840 %************************************************************************
843 type IfM a = StringBuffer -- Input string
847 returnIf :: a -> IfM a
848 returnIf a s l = Succeeded a
850 thenIf :: IfM a -> (a -> IfM b) -> IfM b
851 m `thenIf` k = \s l ->
853 Succeeded a -> k a s l
854 Failed err -> Failed err
856 getSrcLocIf :: IfM SrcLoc
857 getSrcLocIf s l = Succeeded l
860 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
862 -----------------------------------------------------------------
865 = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
866 ptext SLIT("toks="), text (show (take 10 toks))]