2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
26 import CmdLineOpts ( opt_IgnoreIfacePragmas )
27 import Demand ( Demand(..) {- instance Read -} )
28 import UniqFM ( UniqFM, listToUFM, lookupUFM)
29 import BasicTypes ( NewOrData(..) )
31 #if __GLASGOW_HASKELL__ >= 202
32 import Maybes ( MaybeErr(..) )
34 import Maybes ( Maybe(..), MaybeErr(..) )
40 import ErrUtils ( Error(..) )
41 import Outputable ( Outputable(..), PprStyle(..) )
42 import Util ( nOfThem, panic )
47 #if __GLASGOW_HASKELL__ <= 201
54 %************************************************************************
56 \subsection{Lexical categories}
58 %************************************************************************
60 These functions test strings to see if they fit the lexical categories
61 defined in the Haskell report. Normally applied as in e.g. @isCon
65 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
66 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
68 isLexCon cs = isLexConId cs || isLexConSym cs
69 isLexVar cs = isLexVarId cs || isLexVarSym cs
71 isLexId cs = isLexConId cs || isLexVarId cs
72 isLexSym cs = isLexConSym cs || isLexVarSym cs
78 | cs == SLIT("[]") = True
79 | c == '(' = True -- (), (,), (,,), ...
80 | otherwise = isUpper c || isUpperISO c
86 | otherwise = isLower c || isLowerISO c
92 | otherwise = c == ':'
99 | otherwise = isSymbolASCII c
105 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
106 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
107 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
108 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
109 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
110 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
114 %************************************************************************
116 \subsection{Tuple strings -- ugh!}
118 %************************************************************************
121 mkTupNameStr 0 = SLIT("()")
122 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
123 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
124 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
125 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
126 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
131 %************************************************************************
133 \subsection{Data types}
135 %************************************************************************
137 The token data type, fairly un-interesting except from two constructors,
138 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
139 strictness, unfolding etc) and types for id decls.
141 The Idea/Observation here is that the renamer needs to scan through
142 all of an interface file before it can continue. But only a fraction
143 of the information contained in the file turns out to be useful, so
144 delaying as much as possible of the scanning and parsing of an
145 interface file Makes Sense (Heap profiles of the compiler
146 show at a reduction in heap usage by at least a factor of two,
149 Hence, the interface file lexer spots when value declarations are
150 being scanned and return the @ITidinfo@ and @ITtype@ constructors
151 for the type and any other id info for that binding (unfolding, strictness
152 etc). These constructors are applied to the result of lexing these sub-chunks.
154 The lexing of the type and id info is all done lazily, of course, so
155 the scanning (and subsequent parsing) will be done *only* on the ids the
156 renamer finds out that it is interested in. The rest will just be junked.
157 Laziness, you know it makes sense :-)
161 = ITinterface -- keywords
181 | ITbang -- magic symbols
196 | ITvarid FAST_STRING
197 | ITconid FAST_STRING
198 | ITvarsym FAST_STRING
199 | ITconsym FAST_STRING
200 | ITqvarid (FAST_STRING,FAST_STRING)
201 | ITqconid (FAST_STRING,FAST_STRING)
202 | ITqvarsym (FAST_STRING,FAST_STRING)
203 | ITqconsym (FAST_STRING,FAST_STRING)
205 | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
206 -- the info attached to an id.
207 | ITtysig [IfaceToken] -- lazily return the stream of tokens for
208 -- the info attached to an id.
209 -- Stuff for reading unfoldings
211 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
212 | ITdemand [Demand] | ITbottom
213 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
214 | ITcoerce_in | ITcoerce_out | ITatsign
215 | ITccall (Bool,Bool) -- (is_casm, may_gc)
217 | ITchar Char | ITstring FAST_STRING
218 | ITinteger Integer | ITdouble Double
219 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
220 | ITunknown String -- Used when the lexer can't make sense of it
221 deriving Text -- debugging
223 instance Text CostCentre -- cheat!
227 %************************************************************************
229 \subsection{The lexical analyser}
231 %************************************************************************
234 lexIface :: StringBuffer -> [IfaceToken]
237 -- if bufferExhausted buf then
240 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
241 case currentChar# buf of
242 -- whitespace and comments, ignore.
243 ' '# -> lexIface (stepOn buf)
244 '\t'# -> lexIface (stepOn buf)
245 '\n'# -> lexIface (stepOn buf)
247 -- Numbers and comments
249 case lookAhead# buf 1# of
250 '-'# -> lex_comment (stepOnBy# buf 2#)
253 then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
256 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
257 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
260 case prefixMatch (stepOn buf) "..)" of
261 Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
263 case lookAhead# buf 1# of
264 ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
265 ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
266 _ -> IToparen : lexIface (stepOn buf)
268 '{'# -> ITocurly : lexIface (stepOn buf)
269 '}'# -> ITccurly : lexIface (stepOn buf)
270 ')'# -> ITcparen : lexIface (stepOn buf)
272 case lookAhead# buf 1# of
273 ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
274 _ -> ITobrack : lexIface (stepOn buf)
275 ']'# -> ITcbrack : lexIface (stepOn buf)
276 ','# -> ITcomma : lexIface (stepOn buf)
277 ':'# -> case lookAhead# buf 1# of
278 ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
279 _ -> lex_id (incLexeme buf)
280 ';'# -> ITsemi : lexIface (stepOn buf)
281 '\"'# -> case untilEndOfString# (stepOn buf) of
283 -- the string literal does *not* include the dquotes
284 case lexemeToFastString buf' of
285 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
288 -- untilEndOfChar# extends the current lexeme until
289 -- it hits a non-escaped single quote. The lexeme of the
290 -- StringBuffer returned does *not* include the closing quote,
291 -- hence we augment the lexeme and make sure to add the
292 -- starting quote, before `read'ing the string.
294 case untilEndOfChar# (stepOn buf) of
295 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
296 [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
298 -- ``thingy'' form for casm
300 case lookAhead# buf 1# of
301 '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
302 _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
303 -- scanning an id of some sort.
306 case lookAhead# buf 1# of
307 'S'# -> case lookAhead# buf 2# of
309 lex_demand (stepOnUntil (not . isSpace)
310 (stepOnBy# buf 3#)) -- past _S_
311 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
312 Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
313 Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
315 _ -> lex_keyword (stepOn buf)
318 if bufferExhausted (stepOn buf) then
323 if isDigit (C# c) then
324 lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
329 -- _trace ("comment: "++[C# (currentChar# buf)]) $
330 case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
334 -- _trace ("demand: "++[C# (currentChar# buf)]) $
335 case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
337 -- code snatched from Demand.lhs
339 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
340 case currentChar# buf of
341 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
342 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
343 'S'# -> read_em (WwStrict : acc) (stepOn buf)
344 'P'# -> read_em (WwPrim : acc) (stepOn buf)
345 'E'# -> read_em (WwEnum : acc) (stepOn buf)
346 ')'# -> (reverse acc, stepOn buf)
347 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
348 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
349 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
350 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
351 _ -> (reverse acc, buf)
353 do_unpack new_or_data wrapper_unpacks acc buf
354 = case read_em [] buf of
355 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
359 -- _trace ("scc: "++[C# (currentChar# buf)]) $
360 case currentChar# buf of
363 case prefixMatch (stepOn buf) "NO_CC\"" of
364 Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
366 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
367 Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
369 case prefixMatch (stepOn buf) "OVERHEAD\"" of
370 Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
372 case prefixMatch (stepOn buf) "DONT_CARE\"" of
373 Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
375 case prefixMatch (stepOn buf) "SUBSUMED\"" of
376 Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
378 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
379 Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
381 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
383 case untilChar# (stepOverLexeme buf') '\"'# of
384 buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
385 lexIface (stepOn (stepOverLexeme buf''))
387 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
388 Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
390 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
392 case untilChar# (stepOverLexeme buf') '\"'# of
393 buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
394 lexIface (stepOn (stepOverLexeme buf''))
398 case untilChar# buf '/'# of
400 let mod_name = lexemeToFastString buf' in
401 case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
403 let grp_name = lexemeToFastString buf'' in
404 case untilChar# (stepOn (stepOverLexeme buf'')) '\"'# of
406 let cc_name = lexemeToFastString buf''' in
407 (mkUserCC cc_name mod_name grp_name,
408 stepOn (stepOverLexeme buf'''))
410 case prefixMatch (stepOn buf) "CAF:" of
412 case match_user_cc (stepOverLexeme buf') of
413 (cc, buf'') -> ITscc (cafifyCC cc) : lexIface buf''
415 case match_user_cc (stepOn buf) of
416 (cc, buf'') -> ITscc cc : lexIface buf''
417 c -> ITunknown [C# c] : lexIface (stepOn buf)
421 lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
422 lex_num minus acc# buf =
423 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
424 case scanNumLit (I# acc#) buf of
426 case currentChar# buf' of
428 -- this case is not optimised at all, as the
429 -- presence of floating point numbers in interface
430 -- files is not that common. (ToDo)
431 case expandWhile (isDigit) (incLexeme buf') of
432 buf'' -> -- points to first non digit char
433 case reads (lexemeToString buf'') of
434 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
435 _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
437 -- case reads (lexemeToString buf') of
438 -- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
442 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
443 case currentChar# buf of
444 ':'# -> case lookAhead# buf 1# of
445 '_'# -> -- a binding, type (and other id-info) follows,
446 -- to make the parser ever so slightly, we push
448 lex_decl (stepOnBy# buf 2#)
449 v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
451 case expandWhile (is_kwd_char) buf of
453 let kw = lexemeToFastString buf' in
454 -- _trace ("kw: "++lexemeToString buf') $
455 case lookupUFM ifaceKeywordsFM kw of
456 Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
457 lexIface (stepOverLexeme buf')
458 Just xx -> xx : lexIface (stepOverLexeme buf')
461 case doDiscard False buf of -- spin until ;; is found
463 {- _trace (show (lexemeToString buf')) $ -}
464 case currentChar# buf' of
465 '\n'# -> -- newline, no id info.
466 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
467 lexIface (stepOverLexeme buf')
468 '\r'# -> -- just to be sure for those Win* boxes..
469 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
470 lexIface (stepOverLexeme buf')
472 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
473 lexIface (stepOverLexeme buf')
474 c -> -- run all over the id info
475 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
477 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
478 --_trace (show (lexemeToString (decLexeme buf''))) $
479 ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
480 let ls = lexIface (stepOverLexeme buf'') in
481 if opt_IgnoreIfacePragmas then
484 let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
489 is_kwd_char c@(C# c#) =
490 isAlphanum c || -- OLD: c `elem` "_@/\\"
502 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
503 case expandUntilMatch buf "\'\'" of
504 buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
505 lexIface (stepOverLexeme buf')
508 lex_tuple module_dot buf =
509 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
513 case currentChar# buf of
514 ','# -> go (n+1) (stepOn buf)
515 ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
516 _ -> ITunknown ("tuple " ++ show n) : lexIface buf
518 -- Similarly ' itself is ok inside an identifier, but not at the start
520 id_arr :: _ByteArray Int
522 unsafePerformPrimIO (
523 newCharArray (0,255) `thenPrimIO` \ barr ->
525 loop 256# = returnPrimIO ()
527 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
528 writeCharArray barr (I# i#) '\1' `seqPrimIO`
531 writeCharArray barr (I# i#) '\0' `seqPrimIO`
535 unsafeFreezeByteArray barr)
539 _ByteArray _ arr# = id_arr
541 case ord# (indexCharArray# arr# (ord# c#)) of
545 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
549 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
550 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
551 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
552 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
553 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
554 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
556 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
559 mod_arr :: _ByteArray Int
561 unsafePerformPrimIO (
562 newCharArray (0,255) `thenPrimIO` \ barr ->
564 loop 256# = returnPrimIO ()
566 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
567 writeCharArray barr (I# i#) '\1' `seqPrimIO`
570 writeCharArray barr (I# i#) '\0' `seqPrimIO`
574 unsafeFreezeByteArray barr)
577 is_mod_char (C# c#) =
579 _ByteArray _ arr# = mod_arr
581 case ord# (indexCharArray# arr# (ord# c#)) of
585 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
589 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
593 [] -> lex_id2 Nothing cs
594 _ -> lex_id3 Nothing len xs cs
598 [] -> lex_id2 Nothing cs
601 pk_str = _PK_ (xs::String)
602 len = lengthPS pk_str
605 error "Well, I never!"
607 lex_id2 (Just pk_str) cs''
609 [] -> lex_id2 Nothing cs
610 _ -> lex_id3 Nothing len xs cs'
615 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
616 case expandWhile (is_mod_char) buf of
618 case currentChar# buf' of
620 if not (emptyLexeme buf') then
621 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
622 case lexemeToFastString buf' of
623 l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
624 (stepOn (stepOverLexeme buf'))
627 _ -> lex_id2 Nothing buf'
629 -- Dealt with the Module.part
630 lex_id2 module_dot buf =
631 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
632 case currentChar# buf of
634 case lookAhead# buf 1# of
635 ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
636 _ -> lex_id3 module_dot buf
638 case lookAhead# buf 1# of
639 ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
640 ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
641 _ -> lex_id3 module_dot buf
642 ':'# -> lex_id3 module_dot (incLexeme buf)
643 _ -> lex_id3 module_dot buf
647 -- Dealt with [], (), : special cases
649 lex_id3 module_dot buf =
650 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
651 case expandWhile (is_id_char) buf of
655 end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
657 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
658 Just kwd_token -> kwd_token : lexIface new_buf
659 Nothing -> mk_var_token lexeme : lexIface new_buf
661 lexeme = lexemeToFastString buf'
662 new_buf = stepOverLexeme buf'
666 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
667 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
668 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
669 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
670 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
674 -- Dealt with [], (), : special cases
677 lex_id3 module_dot len_xs xs cs =
678 case my_span' (is_id_char) cs of
679 (xs1,len_xs1,rest) ->
681 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
683 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
684 Just kwd_token -> kwd_token : lexIface rest
685 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
687 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
689 mk_var_token pk_str =
694 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
695 -- remove the second half of disjunction when using a 1.3 prelude.
697 if isUpper f then ITconid pk_str
698 else if isLower f then ITvarid pk_str
699 else if f == ':' then ITconsym pk_str
700 else if isLowerISO f then ITvarid pk_str
701 else if isUpperISO f then ITconid pk_str
705 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
706 | f == ':' = ITconsym n
707 | isAlpha f = ITvarid n
708 | otherwise = ITvarsym n
713 end_lex_id Nothing token buf = token : lexIface buf
714 end_lex_id (Just m) token buf =
716 ITconid n -> ITqconid (m,n) : lexIface buf
717 ITvarid n -> ITqvarid (m,n) : lexIface buf
718 ITconsym n -> ITqconsym (m,n) : lexIface buf
719 ITvarsym n -> ITqvarsym (m,n) : lexIface buf
720 ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
721 _ -> ITunknown (show token) : lexIface buf
724 ifaceKeywordsFM :: UniqFM IfaceToken
725 ifaceKeywordsFM = listToUFM $
726 map (\ (x,y) -> (_PK_ x,y))
729 ,("letrec_", ITletrec)
730 ,("interface_", ITinterface)
731 ,("usages_", ITusages)
732 ,("versions_", ITversions)
733 ,("exports_", ITexports)
734 ,("instance_modules_", ITinstance_modules)
735 ,("instances_", ITinstances)
736 ,("fixities_", ITfixities)
737 ,("declarations_", ITdeclarations)
738 ,("pragmas_", ITpragmas)
739 ,("forall_", ITforall)
740 ,("U_", ITunfold False)
741 ,("U!_", ITunfold True)
743 ,("coerce_in_", ITcoerce_in)
744 ,("coerce_out_", ITcoerce_out)
746 ,("integer_", ITinteger_lit)
747 ,("rational_", ITrational_lit)
748 ,("addr_", ITaddr_lit)
749 ,("float_", ITfloat_lit)
750 ,("string_", ITstring_lit)
751 ,("litlit_", ITlit_lit)
752 ,("ccall_", ITccall (False, False))
753 ,("ccall_GC_", ITccall (False, True))
754 ,("casm_", ITccall (True, False))
755 ,("casm_GC_", ITccall (True, True))
758 haskellKeywordsFM = listToUFM $
759 map (\ (x,y) -> (_PK_ x,y))
762 ,("newtype", ITnewtype)
765 ,("instance", ITinstance)
766 ,("infixl", ITinfixl)
767 ,("infixr", ITinfixr)
770 ,("case#", ITprim_case)
774 ,("deriving", ITderiving)
785 -- doDiscard rips along really fast looking for a double semicolon,
786 -- indicating the end of the pragma we're skipping
787 doDiscard inStr buf =
788 -- _trace (show (C# (currentChar# buf))) $
789 case currentChar# buf of
792 case lookAhead# buf 1# of
793 ';'# -> incLexeme (incLexeme buf)
794 _ -> doDiscard inStr (incLexeme buf)
796 doDiscard inStr (incLexeme buf)
799 odd_slashes buf flg i# =
800 case lookAhead# buf i# of
801 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
804 case lookAhead# buf (negateInt# 1#) of --backwards, actually
805 '\\'# -> -- escaping something..
806 if odd_slashes buf True (negateInt# 2#) then
807 -- odd number of slashes, " is escaped.
808 doDiscard inStr (incLexeme buf)
810 -- even number of slashes, \ is escaped.
811 doDiscard (not inStr) (incLexeme buf)
812 _ -> case inStr of -- forced to avoid build-up
813 True -> doDiscard False (incLexeme buf)
814 False -> doDiscard True (incLexeme buf)
815 _ -> doDiscard inStr (incLexeme buf)
820 my_span :: (a -> Bool) -> [a] -> ([a],[a])
821 my_span p xs = go [] xs
823 go so_far (x:xs') | p x = go (x:so_far) xs'
824 go so_far xs = (reverse so_far, xs)
826 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
827 my_span' p xs = go [] 0 xs
829 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
830 go so_far n xs = (reverse so_far,n, xs)
834 %************************************************************************
836 \subsection{Other utility functions
838 %************************************************************************
841 type IfM a = MaybeErr a Error
843 returnIf :: a -> IfM a
844 thenIf :: IfM a -> (a -> IfM b) -> IfM b
845 happyError :: Int -> [IfaceToken] -> IfM a
847 returnIf a = Succeeded a
849 thenIf (Succeeded a) k = k a
850 thenIf (Failed err) _ = Failed err
852 happyError ln toks = Failed (ifaceParseErr ln toks)
854 -----------------------------------------------------------------
856 ifaceParseErr ln toks sty
857 = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]