2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
26 import CmdLineOpts ( opt_IgnoreIfacePragmas )
27 import Demand ( Demand(..) {- instance Read -} )
28 import UniqFM ( UniqFM, listToUFM, lookupUFM)
29 --import FiniteMap ( FiniteMap, listToFM, lookupFM )
30 #if __GLASGOW_HASKELL__ >= 202
31 import Maybes ( MaybeErr(..) )
33 import Maybes ( Maybe(..), MaybeErr(..) )
39 import ErrUtils ( Error(..) )
40 import Outputable ( Outputable(..) )
41 import PprStyle ( PprStyle(..) )
42 import Util ( nOfThem, panic )
47 #if __GLASGOW_HASKELL__ <= 201
54 %************************************************************************
56 \subsection{Lexical categories}
58 %************************************************************************
60 These functions test strings to see if they fit the lexical categories
61 defined in the Haskell report. Normally applied as in e.g. @isCon
65 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
66 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
68 isLexCon cs = isLexConId cs || isLexConSym cs
69 isLexVar cs = isLexVarId cs || isLexVarSym cs
71 isLexId cs = isLexConId cs || isLexVarId cs
72 isLexSym cs = isLexConSym cs || isLexVarSym cs
78 | cs == SLIT("[]") = True
79 | c == '(' = True -- (), (,), (,,), ...
80 | otherwise = isUpper c || isUpperISO c
86 | otherwise = isLower c || isLowerISO c
92 | otherwise = c == ':'
99 | otherwise = isSymbolASCII c
105 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
106 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
107 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
108 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
109 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
110 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
114 %************************************************************************
116 \subsection{Tuple strings -- ugh!}
118 %************************************************************************
121 mkTupNameStr 0 = SLIT("()")
122 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
123 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
124 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
125 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
126 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
131 %************************************************************************
133 \subsection{Data types}
135 %************************************************************************
137 The token data type, fairly un-interesting except from two constructors,
138 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
139 strictness, unfolding etc) and types for id decls.
141 The Idea/Observation here is that the renamer needs to scan through
142 all of an interface file before it can continue. But only a fraction
143 of the information contained in the file turns out to be useful, so
144 delaying as much as possible of the scanning and parsing of an
145 interface file Makes Sense (Heap profiles of the compiler
146 show at a reduction in heap usage by at least a factor of two,
149 Hence, the interface file lexer spots when value declarations are
150 being scanned and return the @ITidinfo@ and @ITtype@ constructors
151 for the type and any other id info for that binding (unfolding, strictness
152 etc). These constructors are applied to the result of lexing these sub-chunks.
154 The lexing of the type and id info is all done lazily, of course, so
155 the scanning (and subsequent parsing) will be done *only* on the ids the
156 renamer finds out that it is interested in. The rest will just be junked.
157 Laziness, you know it makes sense :-)
161 = ITinterface -- keywords
181 | ITbang -- magic symbols
196 | ITvarid FAST_STRING
197 | ITconid FAST_STRING
198 | ITvarsym FAST_STRING
199 | ITconsym FAST_STRING
200 | ITqvarid (FAST_STRING,FAST_STRING)
201 | ITqconid (FAST_STRING,FAST_STRING)
202 | ITqvarsym (FAST_STRING,FAST_STRING)
203 | ITqconsym (FAST_STRING,FAST_STRING)
205 | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
206 -- the info attached to an id.
207 | ITtysig [IfaceToken] -- lazily return the stream of tokens for
208 -- the info attached to an id.
209 -- Stuff for reading unfoldings
210 | ITarity | ITstrict | ITunfold
211 | ITdemand [Demand] | ITbottom
212 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
213 | ITcoerce_in | ITcoerce_out | ITatsign
214 | ITccall (Bool,Bool) -- (is_casm, may_gc)
216 | ITchar Char | ITstring FAST_STRING
217 | ITinteger Integer | ITdouble Double
218 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
219 | ITunknown String -- Used when the lexer can't make sense of it
220 deriving Text -- debugging
222 instance Text CostCentre -- cheat!
226 %************************************************************************
228 \subsection{The lexical analyser}
230 %************************************************************************
233 lexIface :: StringBuffer -> [IfaceToken]
236 -- if bufferExhausted buf then
239 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
240 case currentChar# buf of
241 -- whitespace and comments, ignore.
242 ' '# -> lexIface (stepOn buf)
243 '\t'# -> lexIface (stepOn buf)
244 '\n'# -> lexIface (stepOn buf)
246 -- Numbers and comments
248 case lookAhead# buf 1# of
249 '-'# -> lex_comment (stepOnBy# buf 2#)
252 then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
255 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
256 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
259 case prefixMatch (stepOn buf) "..)" of
260 Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
262 case lookAhead# buf 1# of
263 ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
264 ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
265 _ -> IToparen : lexIface (stepOn buf)
267 '{'# -> ITocurly : lexIface (stepOn buf)
268 '}'# -> ITccurly : lexIface (stepOn buf)
269 ')'# -> ITcparen : lexIface (stepOn buf)
271 case lookAhead# buf 1# of
272 ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
273 _ -> ITobrack : lexIface (stepOn buf)
274 ']'# -> ITcbrack : lexIface (stepOn buf)
275 ','# -> ITcomma : lexIface (stepOn buf)
276 ':'# -> case lookAhead# buf 1# of
277 ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
278 _ -> lex_id (incLexeme buf)
279 ';'# -> ITsemi : lexIface (stepOn buf)
280 '\"'# -> case untilEndOfString# (stepOn buf) of
282 -- the string literal does *not* include the dquotes
283 case lexemeToFastString buf' of
284 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
287 -- untilEndOfChar# extends the current lexeme until
288 -- it hits a non-escaped single quote. The lexeme of the
289 -- StringBuffer returned does *not* include the closing quote,
290 -- hence we augment the lexeme and make sure to add the
291 -- starting quote, before `read'ing the string.
293 case untilEndOfChar# (stepOn buf) of
294 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
295 [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
297 -- ``thingy'' form for casm
299 case lookAhead# buf 1# of
300 '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
301 _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
302 -- scanning an id of some sort.
305 case lookAhead# buf 1# of
306 'S'# -> case lookAhead# buf 2# of
308 lex_demand (stepOnUntil (not . isSpace)
309 (stepOnBy# buf 3#)) -- past _S_
310 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
311 Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
312 Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
314 _ -> lex_keyword (stepOn buf)
317 if bufferExhausted (stepOn buf) then
322 if isDigit (C# c) then
323 lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
328 -- _trace ("comment: "++[C# (currentChar# buf)]) $
329 case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
333 -- _trace ("demand: "++[C# (currentChar# buf)]) $
334 case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
336 -- code snatched from Demand.lhs
338 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
339 case currentChar# buf of
340 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
341 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
342 'S'# -> read_em (WwStrict : acc) (stepOn buf)
343 'P'# -> read_em (WwPrim : acc) (stepOn buf)
344 'E'# -> read_em (WwEnum : acc) (stepOn buf)
345 ')'# -> (reverse acc, stepOn buf)
346 'U'# -> do_unpack True acc (stepOnBy# buf 2#)
347 'u'# -> do_unpack False acc (stepOnBy# buf 2#)
348 _ -> (reverse acc, buf)
350 do_unpack wrapper_unpacks acc buf
351 = case read_em [] buf of
352 (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
356 -- _trace ("scc: "++[C# (currentChar# buf)]) $
357 case currentChar# buf of
360 case prefixMatch (stepOn buf) "NO_CC\"" of
361 Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
363 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
364 Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
366 case prefixMatch (stepOn buf) "OVERHEAD\"" of
367 Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
369 case prefixMatch (stepOn buf) "DONT_CARE\"" of
370 Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
372 case prefixMatch (stepOn buf) "SUBSUMED\"" of
373 Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
375 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
376 Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
378 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
380 case untilChar# (stepOverLexeme buf') '\"'# of
381 buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
382 lexIface (stepOn (stepOverLexeme buf''))
384 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
385 Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
387 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
389 case untilChar# (stepOverLexeme buf') '\"'# of
390 buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
391 lexIface (stepOn (stepOverLexeme buf''))
393 case prefixMatch (stepOn buf) "CAF:" of
395 case untilChar# (stepOverLexeme buf') '\"'# of
396 buf'' -> ITscc (cafifyCC (mkUserCC (lexemeToFastString buf'') _NIL_ _NIL_)):
397 lexIface (stepOn (stepOverLexeme buf''))
399 case untilChar# (stepOn buf) '\"'# of
400 buf' -> ITscc (mkUserCC (lexemeToFastString buf') _NIL_ _NIL_):
401 lexIface (stepOn (stepOverLexeme buf'))
402 c -> ITunknown [C# c] : lexIface (stepOn buf)
406 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
407 lex_num minus acc# buf =
408 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
409 case scanNumLit (I# acc#) buf of
411 case currentChar# buf' of
413 -- this case is not optimised at all, as the
414 -- presence of floating point numbers in interface
415 -- files is not that common. (ToDo)
416 case expandWhile (isDigit) (incLexeme buf') of
417 buf'' -> -- points to first non digit char
418 case reads (lexemeToString buf'') of
419 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
420 _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
422 -- case reads (lexemeToString buf') of
423 -- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
427 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
428 case currentChar# buf of
429 ':'# -> case lookAhead# buf 1# of
430 '_'# -> -- a binding, type (and other id-info) follows,
431 -- to make the parser ever so slightly, we push
433 lex_decl (stepOnBy# buf 2#)
434 v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
436 case expandWhile (is_kwd_char) buf of
438 let kw = lexemeToFastString buf' in
439 -- _trace ("kw: "++lexemeToString buf') $
440 case lookupUFM ifaceKeywordsFM kw of
441 Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
442 lexIface (stepOverLexeme buf')
443 Just xx -> xx : lexIface (stepOverLexeme buf')
446 case doDiscard False buf of -- spin until ;; is found
448 {- _trace (show (lexemeToString buf')) $ -}
449 case currentChar# buf' of
450 '\n'# -> -- newline, no id info.
451 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
452 lexIface (stepOverLexeme buf')
453 '\r'# -> -- just to be sure for those Win* boxes..
454 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
455 lexIface (stepOverLexeme buf')
457 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
458 lexIface (stepOverLexeme buf')
459 c -> -- run all over the id info
460 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
462 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
463 --_trace (show (lexemeToString (decLexeme buf''))) $
464 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
465 let ls = lexIface (stepOverLexeme buf'') in
466 if opt_IgnoreIfacePragmas then
469 let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
474 is_kwd_char c@(C# c#) =
475 isAlphanum c || -- OLD: c `elem` "_@/\\"
487 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
488 case expandUntilMatch buf "\'\'" of
489 buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
490 lexIface (stepOverLexeme buf')
493 lex_tuple module_dot buf =
494 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
498 case currentChar# buf of
499 ','# -> go (n+1) (stepOn buf)
500 ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
501 _ -> ITunknown ("tuple " ++ show n) : lexIface buf
503 -- Similarly ' itself is ok inside an identifier, but not at the start
505 id_arr :: _ByteArray Int
507 unsafePerformPrimIO (
508 newCharArray (0,255) `thenPrimIO` \ barr ->
510 loop 256# = returnPrimIO ()
512 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
513 writeCharArray barr (I# i#) '\1' `seqPrimIO`
516 writeCharArray barr (I# i#) '\0' `seqPrimIO`
520 unsafeFreezeByteArray barr)
524 _ByteArray _ arr# = id_arr
526 case ord# (indexCharArray# arr# (ord# c#)) of
530 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
534 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
535 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
536 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
537 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
538 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
539 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
541 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
544 mod_arr :: _ByteArray Int
546 unsafePerformPrimIO (
547 newCharArray (0,255) `thenPrimIO` \ barr ->
549 loop 256# = returnPrimIO ()
551 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
552 writeCharArray barr (I# i#) '\1' `seqPrimIO`
555 writeCharArray barr (I# i#) '\0' `seqPrimIO`
559 unsafeFreezeByteArray barr)
562 is_mod_char (C# c#) =
564 _ByteArray _ arr# = mod_arr
566 case ord# (indexCharArray# arr# (ord# c#)) of
570 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
574 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
578 [] -> lex_id2 Nothing cs
579 _ -> lex_id3 Nothing len xs cs
583 [] -> lex_id2 Nothing cs
586 pk_str = _PK_ (xs::String)
587 len = lengthPS pk_str
590 error "Well, I never!"
592 lex_id2 (Just pk_str) cs''
594 [] -> lex_id2 Nothing cs
595 _ -> lex_id3 Nothing len xs cs'
600 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
601 case expandWhile (is_mod_char) buf of
603 case currentChar# buf' of
605 if not (emptyLexeme buf') then
606 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
607 case lexemeToFastString buf' of
608 l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
609 (stepOn (stepOverLexeme buf'))
612 _ -> lex_id2 Nothing buf'
614 -- Dealt with the Module.part
615 lex_id2 module_dot buf =
616 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
617 case currentChar# buf of
619 case lookAhead# buf 1# of
620 ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
621 _ -> lex_id3 module_dot buf
623 case lookAhead# buf 1# of
624 ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
625 ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
626 _ -> lex_id3 module_dot buf
627 ':'# -> lex_id3 module_dot (incLexeme buf)
628 _ -> lex_id3 module_dot buf
632 -- Dealt with [], (), : special cases
634 lex_id3 module_dot buf =
635 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
636 case expandWhile (is_id_char) buf of
640 end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
642 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
643 Just kwd_token -> kwd_token : lexIface new_buf
644 Nothing -> mk_var_token lexeme : lexIface new_buf
646 lexeme = lexemeToFastString buf'
647 new_buf = stepOverLexeme buf'
651 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
652 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
653 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
654 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
655 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
659 -- Dealt with [], (), : special cases
662 lex_id3 module_dot len_xs xs cs =
663 case my_span' (is_id_char) cs of
664 (xs1,len_xs1,rest) ->
666 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
668 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
669 Just kwd_token -> kwd_token : lexIface rest
670 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
672 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
674 mk_var_token pk_str =
679 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
680 -- remove the second half of disjunction when using a 1.3 prelude.
682 if isUpper f then ITconid pk_str
683 else if isLower f then ITvarid pk_str
684 else if f == ':' then ITconsym pk_str
685 else if isLowerISO f then ITvarid pk_str
686 else if isUpperISO f then ITconid pk_str
690 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
691 | f == ':' = ITconsym n
692 | isAlpha f = ITvarid n
693 | otherwise = ITvarsym n
698 end_lex_id Nothing token buf = token : lexIface buf
699 end_lex_id (Just m) token buf =
701 ITconid n -> ITqconid (m,n) : lexIface buf
702 ITvarid n -> ITqvarid (m,n) : lexIface buf
703 ITconsym n -> ITqconsym (m,n) : lexIface buf
704 ITvarsym n -> ITqvarsym (m,n) : lexIface buf
705 ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
706 _ -> ITunknown (show token) : lexIface buf
709 ifaceKeywordsFM :: UniqFM IfaceToken
710 ifaceKeywordsFM = listToUFM $
711 map (\ (x,y) -> (_PK_ x,y))
714 ,("letrec_", ITletrec)
715 ,("interface_", ITinterface)
716 ,("usages_", ITusages)
717 ,("versions_", ITversions)
718 ,("exports_", ITexports)
719 ,("instance_modules_", ITinstance_modules)
720 ,("instances_", ITinstances)
721 ,("fixities_", ITfixities)
722 ,("declarations_", ITdeclarations)
723 ,("pragmas_", ITpragmas)
724 ,("forall_", ITforall)
727 ,("coerce_in_", ITcoerce_in)
728 ,("coerce_out_", ITcoerce_out)
730 ,("integer_", ITinteger_lit)
731 ,("rational_", ITrational_lit)
732 ,("addr_", ITaddr_lit)
733 ,("float_", ITfloat_lit)
734 ,("string_", ITstring_lit)
735 ,("litlit_", ITlit_lit)
736 ,("ccall_", ITccall (False, False))
737 ,("ccall_GC_", ITccall (False, True))
738 ,("casm_", ITccall (True, False))
739 ,("casm_GC_", ITccall (True, True))
742 haskellKeywordsFM = listToUFM $
743 map (\ (x,y) -> (_PK_ x,y))
746 ,("newtype", ITnewtype)
749 ,("instance", ITinstance)
750 ,("infixl", ITinfixl)
751 ,("infixr", ITinfixr)
754 ,("case#", ITprim_case)
758 ,("deriving", ITderiving)
769 -- doDiscard rips along really fast looking for a double semicolon,
770 -- indicating the end of the pragma we're skipping
771 doDiscard inStr buf =
772 -- _trace (show (C# (currentChar# buf))) $
773 case currentChar# buf of
776 case lookAhead# buf 1# of
777 ';'# -> incLexeme (incLexeme buf)
778 _ -> doDiscard inStr (incLexeme buf)
780 doDiscard inStr (incLexeme buf)
783 odd_slashes buf flg i# =
784 case lookAhead# buf i# of
785 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
788 case lookAhead# buf (negateInt# 1#) of --backwards, actually
789 '\\'# -> -- escaping something..
790 if odd_slashes buf True (negateInt# 2#) then
791 -- odd number of slashes, " is escaped.
792 doDiscard inStr (incLexeme buf)
794 -- even number of slashes, \ is escaped.
795 doDiscard (not inStr) (incLexeme buf)
796 _ -> case inStr of -- forced to avoid build-up
797 True -> doDiscard False (incLexeme buf)
798 False -> doDiscard True (incLexeme buf)
799 _ -> doDiscard inStr (incLexeme buf)
804 my_span :: (a -> Bool) -> [a] -> ([a],[a])
805 my_span p xs = go [] xs
807 go so_far (x:xs') | p x = go (x:so_far) xs'
808 go so_far xs = (reverse so_far, xs)
810 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
811 my_span' p xs = go [] 0 xs
813 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
814 go so_far n xs = (reverse so_far,n, xs)
818 %************************************************************************
820 \subsection{Other utility functions
822 %************************************************************************
825 type IfM a = MaybeErr a Error
827 returnIf :: a -> IfM a
828 thenIf :: IfM a -> (a -> IfM b) -> IfM b
829 happyError :: Int -> [IfaceToken] -> IfM a
831 returnIf a = Succeeded a
833 thenIf (Succeeded a) k = k a
834 thenIf (Failed err) _ = Failed err
836 happyError ln toks = Failed (ifaceParseErr ln toks)
838 -----------------------------------------------------------------
840 ifaceParseErr ln toks sty
841 = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]