2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
26 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
28 import {-# SOURCE #-} CostCentre
29 # if __GLASGOW_HASKELL__ == 202
30 import PrelBase ( Char(..) )
32 # if __GLASGOW_HASKELL__ >= 209
33 import Addr ( Addr(..) )
38 import CmdLineOpts ( opt_IgnoreIfacePragmas )
39 import Demand ( Demand(..) {- instance Read -} )
40 import UniqFM ( UniqFM, listToUFM, lookupUFM)
41 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
43 #if __GLASGOW_HASKELL__ >= 202
44 import Maybes ( MaybeErr(..) )
46 import Maybes ( Maybe(..), MaybeErr(..) )
52 import ErrUtils ( Error(..) )
53 import Outputable ( Outputable(..), PprStyle(..) )
54 import Util ( nOfThem, panic )
59 #if __GLASGOW_HASKELL__ <= 201
66 %************************************************************************
68 \subsection{Lexical categories}
70 %************************************************************************
72 These functions test strings to see if they fit the lexical categories
73 defined in the Haskell report. Normally applied as in e.g. @isCon
77 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
78 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
80 isLexCon cs = isLexConId cs || isLexConSym cs
81 isLexVar cs = isLexVarId cs || isLexVarSym cs
83 isLexId cs = isLexConId cs || isLexVarId cs
84 isLexSym cs = isLexConSym cs || isLexVarSym cs
90 | cs == SLIT("[]") = True
91 | c == '(' = True -- (), (,), (,,), ...
92 | otherwise = isUpper c || isUpperISO c
98 | otherwise = isLower c || isLowerISO c
104 | otherwise = c == ':'
111 | otherwise = isSymbolASCII c
117 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
118 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
119 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
120 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
121 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
122 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
126 %************************************************************************
128 \subsection{Tuple strings -- ugh!}
130 %************************************************************************
133 mkTupNameStr 0 = SLIT("()")
134 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
135 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
136 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
137 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
138 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
143 %************************************************************************
145 \subsection{Data types}
147 %************************************************************************
149 The token data type, fairly un-interesting except from two constructors,
150 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
151 strictness, unfolding etc) and types for id decls.
153 The Idea/Observation here is that the renamer needs to scan through
154 all of an interface file before it can continue. But only a fraction
155 of the information contained in the file turns out to be useful, so
156 delaying as much as possible of the scanning and parsing of an
157 interface file Makes Sense (Heap profiles of the compiler
158 show at a reduction in heap usage by at least a factor of two,
161 Hence, the interface file lexer spots when value declarations are
162 being scanned and return the @ITidinfo@ and @ITtype@ constructors
163 for the type and any other id info for that binding (unfolding, strictness
164 etc). These constructors are applied to the result of lexing these sub-chunks.
166 The lexing of the type and id info is all done lazily, of course, so
167 the scanning (and subsequent parsing) will be done *only* on the ids the
168 renamer finds out that it is interested in. The rest will just be junked.
169 Laziness, you know it makes sense :-)
173 = ITinterface -- keywords
193 | ITbang -- magic symbols
208 | ITvarid FAST_STRING
209 | ITconid FAST_STRING
210 | ITvarsym FAST_STRING
211 | ITconsym FAST_STRING
212 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
213 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
214 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
215 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
217 | ITtysig StringBuffer (Maybe StringBuffer)
218 -- lazily return the stream of tokens for
219 -- the info attached to an id.
220 -- Stuff for reading unfoldings
222 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
223 | ITstrict [Demand] | ITbottom
224 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
225 | ITcoerce_in | ITcoerce_out | ITatsign
226 | ITccall (Bool,Bool) -- (is_casm, may_gc)
228 | ITchar Char | ITstring FAST_STRING
229 | ITinteger Integer | ITdouble Double
230 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
231 | ITunknown String -- Used when the lexer can't make sense of it
232 | ITeof -- end of file token
233 deriving Text -- debugging
235 instance Text CostCentre -- cheat!
239 %************************************************************************
241 \subsection{The lexical analyser}
243 %************************************************************************
246 lexIface :: (IfaceToken -> IfM a) -> IfM a
249 -- if bufferExhausted buf then
252 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
253 case currentChar# buf of
254 -- whitespace and comments, ignore.
255 ' '# -> lexIface cont (stepOn buf)
256 '\t'# -> lexIface cont (stepOn buf)
257 '\n'# -> \line -> lexIface cont (stepOn buf) (line+1)
259 -- Numbers and comments
261 case lookAhead# buf 1# of
262 '-'# -> lex_comment cont (stepOnBy# buf 2#)
265 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
268 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
269 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
272 case prefixMatch (stepOn buf) "..)" of
273 Just buf' -> cont ITdotdot (stepOverLexeme buf')
275 case lookAhead# buf 1# of
276 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
277 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
278 _ -> cont IToparen (stepOn buf)
280 '{'# -> cont ITocurly (stepOn buf)
281 '}'# -> cont ITccurly (stepOn buf)
282 ')'# -> cont ITcparen (stepOn buf)
284 case lookAhead# buf 1# of
285 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
286 _ -> cont ITobrack (stepOn buf)
287 ']'# -> cont ITcbrack (stepOn buf)
288 ','# -> cont ITcomma (stepOn buf)
289 ':'# -> case lookAhead# buf 1# of
290 ':'# -> cont ITdcolon (stepOnBy# buf 2#)
291 _ -> lex_id cont (incLexeme buf)
292 ';'# -> cont ITsemi (stepOn buf)
293 '\"'# -> case untilEndOfString# (stepOn buf) of
295 -- the string literal does *not* include the dquotes
296 case lexemeToFastString buf' of
297 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
300 -- untilEndOfChar# extends the current lexeme until
301 -- it hits a non-escaped single quote. The lexeme of the
302 -- StringBuffer returned does *not* include the closing quote,
303 -- hence we augment the lexeme and make sure to add the
304 -- starting quote, before `read'ing the string.
306 case untilEndOfChar# (stepOn buf) of
307 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
308 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
310 -- ``thingy'' form for casm
312 case lookAhead# buf 1# of
313 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
314 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
315 -- scanning an id of some sort.
318 case lookAhead# buf 1# of
319 'S'# -> case lookAhead# buf 2# of
321 lex_demand cont (stepOnUntil (not . isSpace)
322 (stepOnBy# buf 3#)) -- past _S_
323 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
324 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
325 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
327 _ -> lex_keyword cont (stepOn buf)
330 if bufferExhausted (stepOn buf) then
335 if isDigit (C# c) then
336 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
340 lex_comment cont buf =
341 -- _trace ("comment: "++[C# (currentChar# buf)]) $
342 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
345 lex_demand cont buf =
346 -- _trace ("demand: "++[C# (currentChar# buf)]) $
347 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
349 -- code snatched from Demand.lhs
351 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
352 case currentChar# buf of
353 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
354 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
355 'S'# -> read_em (WwStrict : acc) (stepOn buf)
356 'P'# -> read_em (WwPrim : acc) (stepOn buf)
357 'E'# -> read_em (WwEnum : acc) (stepOn buf)
358 ')'# -> (reverse acc, stepOn buf)
359 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
360 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
361 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
362 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
363 _ -> (reverse acc, buf)
365 do_unpack new_or_data wrapper_unpacks acc buf
366 = case read_em [] buf of
367 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
371 -- _trace ("scc: "++[C# (currentChar# buf)]) $
372 case currentChar# buf of
375 case prefixMatch (stepOn buf) "NO_CC\"" of
376 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
378 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
379 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
381 case prefixMatch (stepOn buf) "OVERHEAD\"" of
382 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
384 case prefixMatch (stepOn buf) "DONT_CARE\"" of
385 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
387 case prefixMatch (stepOn buf) "SUBSUMED\"" of
388 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
390 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
391 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
393 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
395 case untilChar# (stepOverLexeme buf') '\"'# of
396 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
398 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
399 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
401 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
403 case untilChar# (stepOverLexeme buf') '\"'# of
404 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
405 (stepOn (stepOverLexeme buf''))
409 case untilChar# buf '/'# of
411 let mod_name = lexemeToFastString buf' in
412 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
414 -- let grp_name = lexemeToFastString buf'' in
415 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
417 -- The label may contain arbitrary characters, so it
418 -- may have been escaped etc., hence we `read' it in to get
419 -- rid of these meta-chars in the string and then pack it (again.)
420 -- ToDo: do the same for module name (single quotes allowed in m-names).
421 -- BTW, the code in this module is totally gruesome..
422 let upk_label = _UNPK_ (lexemeToFastString buf'') in
423 case reads ('"':upk_label++"\"") of
425 let cc_name = _PK_ cc_label in
426 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
427 stepOn (stepOverLexeme buf''))
429 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
430 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
431 stepOn (stepOverLexeme buf''))
433 case prefixMatch (stepOn buf) "CAF:" of
435 case match_user_cc (stepOverLexeme buf') of
436 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
438 case match_user_cc (stepOn buf) of
439 (cc, buf'') -> cont (ITscc cc) buf''
440 c -> cont (ITunknown [C# c]) (stepOn buf)
444 lex_num :: (IfaceToken -> IfM a) ->
445 (Int -> Int) -> Int# -> IfM a
446 lex_num cont minus acc# buf =
447 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
448 case scanNumLit (I# acc#) buf of
450 case currentChar# buf' of
452 -- this case is not optimised at all, as the
453 -- presence of floating point numbers in interface
454 -- files is not that common. (ToDo)
455 case expandWhile (isDigit) (incLexeme buf') of
456 buf'' -> -- points to first non digit char
457 case reads (lexemeToString buf'') of
458 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
459 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
461 -- case reads (lexemeToString buf') of
462 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
465 lex_keyword cont buf =
466 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
467 case currentChar# buf of
468 ':'# -> case lookAhead# buf 1# of
469 '_'# -> -- a binding, type (and other id-info) follows,
470 -- to make the parser ever so slightly, we push
472 lex_decl cont (stepOnBy# buf 2#)
473 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
475 case expandWhile (is_kwd_char) buf of
477 let kw = lexemeToFastString buf' in
478 -- _trace ("kw: "++lexemeToString buf') $
479 case lookupUFM ifaceKeywordsFM kw of
480 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
481 (stepOverLexeme buf')
482 Just xx -> cont xx (stepOverLexeme buf')
485 case doDiscard False buf of -- spin until ;; is found
487 {- _trace (show (lexemeToString buf')) $ -}
488 case currentChar# buf' of
489 '\n'# -> -- newline, no id info.
490 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
491 (stepOverLexeme buf')
492 '\r'# -> -- just to be sure for those Win* boxes..
493 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
494 (stepOverLexeme buf')
496 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
497 (stepOverLexeme buf')
498 c -> -- run all over the id info
499 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
501 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
502 --_trace (show (lexemeToString (decLexeme buf''))) $
504 if opt_IgnoreIfacePragmas then
507 Just (lexemeToBuffer (decLexeme buf''))
510 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
511 (stepOverLexeme buf'')
514 is_kwd_char c@(C# c#) =
515 isAlphanum c || -- OLD: c `elem` "_@/\\"
526 lex_cstring cont buf =
527 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
528 case expandUntilMatch buf "\'\'" of
529 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
530 (stepOverLexeme buf')
533 lex_tuple cont module_dot buf =
534 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
538 case currentChar# buf of
539 ','# -> go (n+1) (stepOn buf)
540 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
541 _ -> cont (ITunknown ("tuple " ++ show n)) buf
543 -- Similarly ' itself is ok inside an identifier, but not at the start
545 id_arr :: _ByteArray Int
548 newCharArray (0,255) `thenStrictlyST` \ barr ->
550 loop 256# = returnStrictlyST ()
552 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
553 writeCharArray barr (I# i#) '\1' `seqStrictlyST`
556 writeCharArray barr (I# i#) '\0' `seqStrictlyST`
559 loop 0# `seqStrictlyST`
560 unsafeFreezeByteArray barr)
564 _ByteArray _ arr# = id_arr
566 case ord# (indexCharArray# arr# (ord# c#)) of
570 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
574 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
575 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
576 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
577 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
578 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
579 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
581 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
584 mod_arr :: _ByteArray Int
587 newCharArray (0,255) `thenStrictlyST` \ barr ->
589 loop 256# = returnStrictlyST ()
591 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
592 writeCharArray barr (I# i#) '\1' `seqStrictlyST`
595 writeCharArray barr (I# i#) '\0' `seqStrictlyST`
598 loop 0# `seqStrictlyST`
599 unsafeFreezeByteArray barr)
602 is_mod_char (C# c#) =
604 _ByteArray _ arr# = mod_arr
606 case ord# (indexCharArray# arr# (ord# c#)) of
610 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
614 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
618 [] -> lex_id2 Nothing cs
619 _ -> lex_id3 Nothing len xs cs
623 [] -> lex_id2 Nothing cs
626 pk_str = _PK_ (xs::String)
627 len = lengthPS pk_str
630 error "Well, I never!"
632 lex_id2 (Just pk_str) cs''
634 [] -> lex_id2 Nothing cs
635 _ -> lex_id3 Nothing len xs cs'
640 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
641 case expandWhile (is_mod_char) buf of
643 case currentChar# buf' of
644 '.'# -> munch buf' HiFile
645 '!'# -> munch buf' HiBootFile
646 _ -> lex_id2 cont Nothing buf'
649 if not (emptyLexeme buf') then
650 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
651 case lexemeToFastString buf' of
652 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
653 (stepOn (stepOverLexeme buf'))
655 lex_id2 cont Nothing buf'
658 -- Dealt with the Module.part
659 lex_id2 cont module_dot buf =
660 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
661 case currentChar# buf of
663 '['# -> -- Special case for []
664 case lookAhead# buf 1# of
665 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
666 _ -> lex_id3 cont module_dot buf
668 '('# -> -- Special case for (,,,)
669 case lookAhead# buf 1# of
670 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
671 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
672 _ -> lex_id3 cont module_dot buf
673 ':'# -> lex_id3 cont module_dot (incLexeme buf)
676 Nothing -> lex_id3 cont module_dot buf
677 Just ghc -> -- this should be "GHC" (current home of (->))
678 case lookAhead# buf 1# of
679 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
681 _ -> lex_id3 cont module_dot buf
682 _ -> lex_id3 cont module_dot buf
686 -- Dealt with [], (), : special cases
688 lex_id3 cont module_dot buf =
689 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
690 case expandWhile (is_id_char) buf of
694 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
696 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
697 Just kwd_token -> cont kwd_token new_buf
698 Nothing -> cont (mk_var_token lexeme) new_buf
700 lexeme = lexemeToFastString buf'
701 new_buf = stepOverLexeme buf'
705 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
706 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
707 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
708 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
709 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
712 -- Dealt with [], (), : special cases
715 lex_id3 module_dot len_xs xs cs =
716 case my_span' (is_id_char) cs of
717 (xs1,len_xs1,rest) ->
719 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
721 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
722 Just kwd_token -> kwd_token : lexIface rest
723 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
725 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
727 mk_var_token pk_str =
732 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
733 -- remove the second half of disjunction when using a 1.3 prelude.
735 if isUpper f then ITconid pk_str
736 else if isLower f then ITvarid pk_str
737 else if f == ':' then ITconsym pk_str
738 else if isLowerISO f then ITvarid pk_str
739 else if isUpperISO f then ITconid pk_str
743 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
744 | f == ':' = ITconsym n
745 | isAlpha f = ITvarid n
746 | otherwise = ITvarsym n
751 end_lex_id cont Nothing token buf = cont token buf
752 end_lex_id cont (Just (m,hif)) token buf =
754 ITconid n -> cont (ITqconid (m,n,hif)) buf
755 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
756 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
758 -- Special case for ->
759 -- "->" by itself is a special token (ITrarrow),
760 -- but M.-> is a ITqconid
761 ITvarsym n | n == SLIT("->")
762 -> cont (ITqconsym (m,n,hif)) buf
764 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
766 -- ITbang can't happen here I think
767 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
769 _ -> cont (ITunknown (show token)) buf
772 ifaceKeywordsFM :: UniqFM IfaceToken
773 ifaceKeywordsFM = listToUFM $
774 map (\ (x,y) -> (_PK_ x,y))
777 ,("letrec_", ITletrec)
778 ,("interface_", ITinterface)
779 ,("usages_", ITusages)
780 ,("versions_", ITversions)
781 ,("exports_", ITexports)
782 ,("instance_modules_", ITinstance_modules)
783 ,("instances_", ITinstances)
784 ,("fixities_", ITfixities)
785 ,("declarations_", ITdeclarations)
786 ,("pragmas_", ITpragmas)
787 ,("forall_", ITforall)
788 ,("U_", ITunfold False)
789 ,("U!_", ITunfold True)
791 ,("coerce_in_", ITcoerce_in)
792 ,("coerce_out_", ITcoerce_out)
794 ,("integer_", ITinteger_lit)
795 ,("rational_", ITrational_lit)
796 ,("addr_", ITaddr_lit)
797 ,("float_", ITfloat_lit)
798 ,("string_", ITstring_lit)
799 ,("litlit_", ITlit_lit)
800 ,("ccall_", ITccall (False, False))
801 ,("ccall_GC_", ITccall (False, True))
802 ,("casm_", ITccall (True, False))
803 ,("casm_GC_", ITccall (True, True))
806 haskellKeywordsFM = listToUFM $
807 map (\ (x,y) -> (_PK_ x,y))
810 ,("newtype", ITnewtype)
813 ,("instance", ITinstance)
814 ,("infixl", ITinfixl)
815 ,("infixr", ITinfixr)
818 ,("case#", ITprim_case)
822 ,("deriving", ITderiving)
833 -- doDiscard rips along really fast, looking for a double semicolon,
834 -- indicating the end of the pragma we're skipping
835 doDiscard inStr buf =
836 -- _trace (show (C# (currentChar# buf))) $
837 case currentChar# buf of
840 case lookAhead# buf 1# of
841 ';'# -> incLexeme (incLexeme buf)
842 _ -> doDiscard inStr (incLexeme buf)
844 doDiscard inStr (incLexeme buf)
847 odd_slashes buf flg i# =
848 case lookAhead# buf i# of
849 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
852 case lookAhead# buf (negateInt# 1#) of --backwards, actually
853 '\\'# -> -- escaping something..
854 if odd_slashes buf True (negateInt# 2#) then
855 -- odd number of slashes, " is escaped.
856 doDiscard inStr (incLexeme buf)
858 -- even number of slashes, \ is escaped.
859 doDiscard (not inStr) (incLexeme buf)
860 _ -> case inStr of -- forced to avoid build-up
861 True -> doDiscard False (incLexeme buf)
862 False -> doDiscard True (incLexeme buf)
863 _ -> doDiscard inStr (incLexeme buf)
868 my_span :: (a -> Bool) -> [a] -> ([a],[a])
869 my_span p xs = go [] xs
871 go so_far (x:xs') | p x = go (x:so_far) xs'
872 go so_far xs = (reverse so_far, xs)
874 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
875 my_span' p xs = go [] 0 xs
877 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
878 go so_far n xs = (reverse so_far,n, xs)
882 %************************************************************************
884 \subsection{Other utility functions
886 %************************************************************************
889 type IfM a = StringBuffer -> Int -> MaybeErr a Error
891 returnIf :: a -> IfM a
892 returnIf a s l = Succeeded a
894 thenIf :: IfM a -> (a -> IfM b) -> IfM b
895 m `thenIf` k = \s l ->
897 Succeeded a -> k a s l
898 Failed err -> Failed err
901 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
903 -----------------------------------------------------------------
905 ifaceParseErr l toks sty
906 = hsep [ptext SLIT("Interface-file parse error: line"), int l, ptext SLIT("toks="), text (show (take 10 toks))]