2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
25 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
26 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
28 import {-# SOURCE #-} CostCentre
31 import CmdLineOpts ( opt_IgnoreIfacePragmas )
32 import Demand ( Demand(..) {- instance Read -} )
33 import UniqFM ( UniqFM, listToUFM, lookupUFM)
34 import BasicTypes ( NewOrData(..) )
36 #if __GLASGOW_HASKELL__ >= 202
37 import Maybes ( MaybeErr(..) )
39 import Maybes ( Maybe(..), MaybeErr(..) )
45 import ErrUtils ( Error(..) )
46 import Outputable ( Outputable(..), PprStyle(..) )
47 import Util ( nOfThem, panic )
52 #if __GLASGOW_HASKELL__ <= 201
59 %************************************************************************
61 \subsection{Lexical categories}
63 %************************************************************************
65 These functions test strings to see if they fit the lexical categories
66 defined in the Haskell report. Normally applied as in e.g. @isCon
70 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
71 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
73 isLexCon cs = isLexConId cs || isLexConSym cs
74 isLexVar cs = isLexVarId cs || isLexVarSym cs
76 isLexId cs = isLexConId cs || isLexVarId cs
77 isLexSym cs = isLexConSym cs || isLexVarSym cs
83 | cs == SLIT("[]") = True
84 | c == '(' = True -- (), (,), (,,), ...
85 | otherwise = isUpper c || isUpperISO c
91 | otherwise = isLower c || isLowerISO c
97 | otherwise = c == ':'
104 | otherwise = isSymbolASCII c
110 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
111 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
112 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
113 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
114 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
115 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
119 %************************************************************************
121 \subsection{Tuple strings -- ugh!}
123 %************************************************************************
126 mkTupNameStr 0 = SLIT("()")
127 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
128 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
129 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
130 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
131 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
136 %************************************************************************
138 \subsection{Data types}
140 %************************************************************************
142 The token data type, fairly un-interesting except from two constructors,
143 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
144 strictness, unfolding etc) and types for id decls.
146 The Idea/Observation here is that the renamer needs to scan through
147 all of an interface file before it can continue. But only a fraction
148 of the information contained in the file turns out to be useful, so
149 delaying as much as possible of the scanning and parsing of an
150 interface file Makes Sense (Heap profiles of the compiler
151 show at a reduction in heap usage by at least a factor of two,
154 Hence, the interface file lexer spots when value declarations are
155 being scanned and return the @ITidinfo@ and @ITtype@ constructors
156 for the type and any other id info for that binding (unfolding, strictness
157 etc). These constructors are applied to the result of lexing these sub-chunks.
159 The lexing of the type and id info is all done lazily, of course, so
160 the scanning (and subsequent parsing) will be done *only* on the ids the
161 renamer finds out that it is interested in. The rest will just be junked.
162 Laziness, you know it makes sense :-)
166 = ITinterface -- keywords
186 | ITbang -- magic symbols
201 | ITvarid FAST_STRING
202 | ITconid FAST_STRING
203 | ITvarsym FAST_STRING
204 | ITconsym FAST_STRING
205 | ITqvarid (FAST_STRING,FAST_STRING)
206 | ITqconid (FAST_STRING,FAST_STRING)
207 | ITqvarsym (FAST_STRING,FAST_STRING)
208 | ITqconsym (FAST_STRING,FAST_STRING)
210 | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
211 -- the info attached to an id.
212 | ITtysig [IfaceToken] -- lazily return the stream of tokens for
213 -- the info attached to an id.
214 -- Stuff for reading unfoldings
216 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
217 | ITdemand [Demand] | ITbottom
218 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
219 | ITcoerce_in | ITcoerce_out | ITatsign
220 | ITccall (Bool,Bool) -- (is_casm, may_gc)
222 | ITchar Char | ITstring FAST_STRING
223 | ITinteger Integer | ITdouble Double
224 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
225 | ITunknown String -- Used when the lexer can't make sense of it
226 deriving Text -- debugging
228 instance Text CostCentre -- cheat!
232 %************************************************************************
234 \subsection{The lexical analyser}
236 %************************************************************************
239 lexIface :: StringBuffer -> [IfaceToken]
242 -- if bufferExhausted buf then
245 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
246 case currentChar# buf of
247 -- whitespace and comments, ignore.
248 ' '# -> lexIface (stepOn buf)
249 '\t'# -> lexIface (stepOn buf)
250 '\n'# -> lexIface (stepOn buf)
252 -- Numbers and comments
254 case lookAhead# buf 1# of
255 '-'# -> lex_comment (stepOnBy# buf 2#)
258 then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
261 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
262 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
265 case prefixMatch (stepOn buf) "..)" of
266 Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
268 case lookAhead# buf 1# of
269 ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
270 ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
271 _ -> IToparen : lexIface (stepOn buf)
273 '{'# -> ITocurly : lexIface (stepOn buf)
274 '}'# -> ITccurly : lexIface (stepOn buf)
275 ')'# -> ITcparen : lexIface (stepOn buf)
277 case lookAhead# buf 1# of
278 ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
279 _ -> ITobrack : lexIface (stepOn buf)
280 ']'# -> ITcbrack : lexIface (stepOn buf)
281 ','# -> ITcomma : lexIface (stepOn buf)
282 ':'# -> case lookAhead# buf 1# of
283 ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
284 _ -> lex_id (incLexeme buf)
285 ';'# -> ITsemi : lexIface (stepOn buf)
286 '\"'# -> case untilEndOfString# (stepOn buf) of
288 -- the string literal does *not* include the dquotes
289 case lexemeToFastString buf' of
290 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
293 -- untilEndOfChar# extends the current lexeme until
294 -- it hits a non-escaped single quote. The lexeme of the
295 -- StringBuffer returned does *not* include the closing quote,
296 -- hence we augment the lexeme and make sure to add the
297 -- starting quote, before `read'ing the string.
299 case untilEndOfChar# (stepOn buf) of
300 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
301 [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
303 -- ``thingy'' form for casm
305 case lookAhead# buf 1# of
306 '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
307 _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
308 -- scanning an id of some sort.
311 case lookAhead# buf 1# of
312 'S'# -> case lookAhead# buf 2# of
314 lex_demand (stepOnUntil (not . isSpace)
315 (stepOnBy# buf 3#)) -- past _S_
316 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
317 Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
318 Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
320 _ -> lex_keyword (stepOn buf)
323 if bufferExhausted (stepOn buf) then
328 if isDigit (C# c) then
329 lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
334 -- _trace ("comment: "++[C# (currentChar# buf)]) $
335 case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
339 -- _trace ("demand: "++[C# (currentChar# buf)]) $
340 case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
342 -- code snatched from Demand.lhs
344 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
345 case currentChar# buf of
346 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
347 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
348 'S'# -> read_em (WwStrict : acc) (stepOn buf)
349 'P'# -> read_em (WwPrim : acc) (stepOn buf)
350 'E'# -> read_em (WwEnum : acc) (stepOn buf)
351 ')'# -> (reverse acc, stepOn buf)
352 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
353 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
354 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
355 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
356 _ -> (reverse acc, buf)
358 do_unpack new_or_data wrapper_unpacks acc buf
359 = case read_em [] buf of
360 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
364 -- _trace ("scc: "++[C# (currentChar# buf)]) $
365 case currentChar# buf of
368 case prefixMatch (stepOn buf) "NO_CC\"" of
369 Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
371 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
372 Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
374 case prefixMatch (stepOn buf) "OVERHEAD\"" of
375 Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
377 case prefixMatch (stepOn buf) "DONT_CARE\"" of
378 Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
380 case prefixMatch (stepOn buf) "SUBSUMED\"" of
381 Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
383 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
384 Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
386 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
388 case untilChar# (stepOverLexeme buf') '\"'# of
389 buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
390 lexIface (stepOn (stepOverLexeme buf''))
392 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
393 Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
395 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
397 case untilChar# (stepOverLexeme buf') '\"'# of
398 buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
399 lexIface (stepOn (stepOverLexeme buf''))
403 case untilChar# buf '/'# of
405 let mod_name = lexemeToFastString buf' in
406 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
408 -- let grp_name = lexemeToFastString buf'' in
409 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
411 let cc_name = lexemeToFastString buf'' in
412 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
413 stepOn (stepOverLexeme buf''))
415 case prefixMatch (stepOn buf) "CAF:" of
417 case match_user_cc (stepOverLexeme buf') of
418 (cc, buf'') -> ITscc (cafifyCC cc) : lexIface buf''
420 case match_user_cc (stepOn buf) of
421 (cc, buf'') -> ITscc cc : lexIface buf''
422 c -> ITunknown [C# c] : lexIface (stepOn buf)
426 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
427 lex_num minus acc# buf =
428 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
429 case scanNumLit (I# acc#) buf of
431 case currentChar# buf' of
433 -- this case is not optimised at all, as the
434 -- presence of floating point numbers in interface
435 -- files is not that common. (ToDo)
436 case expandWhile (isDigit) (incLexeme buf') of
437 buf'' -> -- points to first non digit char
438 case reads (lexemeToString buf'') of
439 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
440 _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
442 -- case reads (lexemeToString buf') of
443 -- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
447 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
448 case currentChar# buf of
449 ':'# -> case lookAhead# buf 1# of
450 '_'# -> -- a binding, type (and other id-info) follows,
451 -- to make the parser ever so slightly, we push
453 lex_decl (stepOnBy# buf 2#)
454 v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
456 case expandWhile (is_kwd_char) buf of
458 let kw = lexemeToFastString buf' in
459 -- _trace ("kw: "++lexemeToString buf') $
460 case lookupUFM ifaceKeywordsFM kw of
461 Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
462 lexIface (stepOverLexeme buf')
463 Just xx -> xx : lexIface (stepOverLexeme buf')
466 case doDiscard False buf of -- spin until ;; is found
468 {- _trace (show (lexemeToString buf')) $ -}
469 case currentChar# buf' of
470 '\n'# -> -- newline, no id info.
471 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
472 lexIface (stepOverLexeme buf')
473 '\r'# -> -- just to be sure for those Win* boxes..
474 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
475 lexIface (stepOverLexeme buf')
477 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
478 lexIface (stepOverLexeme buf')
479 c -> -- run all over the id info
480 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
482 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
483 --_trace (show (lexemeToString (decLexeme buf''))) $
484 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
485 let ls = lexIface (stepOverLexeme buf'') in
486 if opt_IgnoreIfacePragmas then
489 let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
494 is_kwd_char c@(C# c#) =
495 isAlphanum c || -- OLD: c `elem` "_@/\\"
507 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
508 case expandUntilMatch buf "\'\'" of
509 buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
510 lexIface (stepOverLexeme buf')
513 lex_tuple module_dot buf =
514 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
518 case currentChar# buf of
519 ','# -> go (n+1) (stepOn buf)
520 ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
521 _ -> ITunknown ("tuple " ++ show n) : lexIface buf
523 -- Similarly ' itself is ok inside an identifier, but not at the start
525 id_arr :: _ByteArray Int
527 unsafePerformPrimIO (
528 newCharArray (0,255) `thenPrimIO` \ barr ->
530 loop 256# = returnPrimIO ()
532 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
533 writeCharArray barr (I# i#) '\1' `seqPrimIO`
536 writeCharArray barr (I# i#) '\0' `seqPrimIO`
540 unsafeFreezeByteArray barr)
544 _ByteArray _ arr# = id_arr
546 case ord# (indexCharArray# arr# (ord# c#)) of
550 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
554 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
555 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
556 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
557 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
558 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
559 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
561 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
564 mod_arr :: _ByteArray Int
566 unsafePerformPrimIO (
567 newCharArray (0,255) `thenPrimIO` \ barr ->
569 loop 256# = returnPrimIO ()
571 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
572 writeCharArray barr (I# i#) '\1' `seqPrimIO`
575 writeCharArray barr (I# i#) '\0' `seqPrimIO`
579 unsafeFreezeByteArray barr)
582 is_mod_char (C# c#) =
584 _ByteArray _ arr# = mod_arr
586 case ord# (indexCharArray# arr# (ord# c#)) of
590 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
594 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
598 [] -> lex_id2 Nothing cs
599 _ -> lex_id3 Nothing len xs cs
603 [] -> lex_id2 Nothing cs
606 pk_str = _PK_ (xs::String)
607 len = lengthPS pk_str
610 error "Well, I never!"
612 lex_id2 (Just pk_str) cs''
614 [] -> lex_id2 Nothing cs
615 _ -> lex_id3 Nothing len xs cs'
620 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
621 case expandWhile (is_mod_char) buf of
623 case currentChar# buf' of
625 if not (emptyLexeme buf') then
626 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
627 case lexemeToFastString buf' of
628 l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
629 (stepOn (stepOverLexeme buf'))
632 _ -> lex_id2 Nothing buf'
634 -- Dealt with the Module.part
635 lex_id2 module_dot buf =
636 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
637 case currentChar# buf of
639 case lookAhead# buf 1# of
640 ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
641 _ -> lex_id3 module_dot buf
643 case lookAhead# buf 1# of
644 ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
645 ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
646 _ -> lex_id3 module_dot buf
647 ':'# -> lex_id3 module_dot (incLexeme buf)
648 _ -> lex_id3 module_dot buf
652 -- Dealt with [], (), : special cases
654 lex_id3 module_dot buf =
655 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
656 case expandWhile (is_id_char) buf of
660 end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
662 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
663 Just kwd_token -> kwd_token : lexIface new_buf
664 Nothing -> mk_var_token lexeme : lexIface new_buf
666 lexeme = lexemeToFastString buf'
667 new_buf = stepOverLexeme buf'
671 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
672 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
673 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
674 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
675 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
679 -- Dealt with [], (), : special cases
682 lex_id3 module_dot len_xs xs cs =
683 case my_span' (is_id_char) cs of
684 (xs1,len_xs1,rest) ->
686 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
688 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
689 Just kwd_token -> kwd_token : lexIface rest
690 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
692 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
694 mk_var_token pk_str =
699 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
700 -- remove the second half of disjunction when using a 1.3 prelude.
702 if isUpper f then ITconid pk_str
703 else if isLower f then ITvarid pk_str
704 else if f == ':' then ITconsym pk_str
705 else if isLowerISO f then ITvarid pk_str
706 else if isUpperISO f then ITconid pk_str
710 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
711 | f == ':' = ITconsym n
712 | isAlpha f = ITvarid n
713 | otherwise = ITvarsym n
718 end_lex_id Nothing token buf = token : lexIface buf
719 end_lex_id (Just m) token buf =
721 ITconid n -> ITqconid (m,n) : lexIface buf
722 ITvarid n -> ITqvarid (m,n) : lexIface buf
723 ITconsym n -> ITqconsym (m,n) : lexIface buf
724 ITvarsym n -> ITqvarsym (m,n) : lexIface buf
725 ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
726 _ -> ITunknown (show token) : lexIface buf
729 ifaceKeywordsFM :: UniqFM IfaceToken
730 ifaceKeywordsFM = listToUFM $
731 map (\ (x,y) -> (_PK_ x,y))
734 ,("letrec_", ITletrec)
735 ,("interface_", ITinterface)
736 ,("usages_", ITusages)
737 ,("versions_", ITversions)
738 ,("exports_", ITexports)
739 ,("instance_modules_", ITinstance_modules)
740 ,("instances_", ITinstances)
741 ,("fixities_", ITfixities)
742 ,("declarations_", ITdeclarations)
743 ,("pragmas_", ITpragmas)
744 ,("forall_", ITforall)
745 ,("U_", ITunfold False)
746 ,("U!_", ITunfold True)
748 ,("coerce_in_", ITcoerce_in)
749 ,("coerce_out_", ITcoerce_out)
751 ,("integer_", ITinteger_lit)
752 ,("rational_", ITrational_lit)
753 ,("addr_", ITaddr_lit)
754 ,("float_", ITfloat_lit)
755 ,("string_", ITstring_lit)
756 ,("litlit_", ITlit_lit)
757 ,("ccall_", ITccall (False, False))
758 ,("ccall_GC_", ITccall (False, True))
759 ,("casm_", ITccall (True, False))
760 ,("casm_GC_", ITccall (True, True))
763 haskellKeywordsFM = listToUFM $
764 map (\ (x,y) -> (_PK_ x,y))
767 ,("newtype", ITnewtype)
770 ,("instance", ITinstance)
771 ,("infixl", ITinfixl)
772 ,("infixr", ITinfixr)
775 ,("case#", ITprim_case)
779 ,("deriving", ITderiving)
790 -- doDiscard rips along really fast looking for a double semicolon,
791 -- indicating the end of the pragma we're skipping
792 doDiscard inStr buf =
793 -- _trace (show (C# (currentChar# buf))) $
794 case currentChar# buf of
797 case lookAhead# buf 1# of
798 ';'# -> incLexeme (incLexeme buf)
799 _ -> doDiscard inStr (incLexeme buf)
801 doDiscard inStr (incLexeme buf)
804 odd_slashes buf flg i# =
805 case lookAhead# buf i# of
806 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
809 case lookAhead# buf (negateInt# 1#) of --backwards, actually
810 '\\'# -> -- escaping something..
811 if odd_slashes buf True (negateInt# 2#) then
812 -- odd number of slashes, " is escaped.
813 doDiscard inStr (incLexeme buf)
815 -- even number of slashes, \ is escaped.
816 doDiscard (not inStr) (incLexeme buf)
817 _ -> case inStr of -- forced to avoid build-up
818 True -> doDiscard False (incLexeme buf)
819 False -> doDiscard True (incLexeme buf)
820 _ -> doDiscard inStr (incLexeme buf)
825 my_span :: (a -> Bool) -> [a] -> ([a],[a])
826 my_span p xs = go [] xs
828 go so_far (x:xs') | p x = go (x:so_far) xs'
829 go so_far xs = (reverse so_far, xs)
831 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
832 my_span' p xs = go [] 0 xs
834 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
835 go so_far n xs = (reverse so_far,n, xs)
839 %************************************************************************
841 \subsection{Other utility functions
843 %************************************************************************
846 type IfM a = MaybeErr a Error
848 returnIf :: a -> IfM a
849 thenIf :: IfM a -> (a -> IfM b) -> IfM b
850 happyError :: Int -> [IfaceToken] -> IfM a
852 returnIf a = Succeeded a
854 thenIf (Succeeded a) k = k a
855 thenIf (Failed err) _ = Failed err
857 happyError ln toks = Failed (ifaceParseErr ln toks)
859 -----------------------------------------------------------------
861 ifaceParseErr ln toks sty
862 = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]