2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
26 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
28 import {-# SOURCE #-} CostCentre
29 # if __GLASGOW_HASKELL__ == 202
30 import PrelBase ( Char(..) )
34 import CmdLineOpts ( opt_IgnoreIfacePragmas )
35 import Demand ( Demand(..) {- instance Read -} )
36 import UniqFM ( UniqFM, listToUFM, lookupUFM)
37 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
39 #if __GLASGOW_HASKELL__ >= 202
40 import Maybes ( MaybeErr(..) )
42 import Maybes ( Maybe(..), MaybeErr(..) )
48 import ErrUtils ( Error(..) )
49 import Outputable ( Outputable(..), PprStyle(..) )
50 import Util ( nOfThem, panic )
55 #if __GLASGOW_HASKELL__ <= 201
62 %************************************************************************
64 \subsection{Lexical categories}
66 %************************************************************************
68 These functions test strings to see if they fit the lexical categories
69 defined in the Haskell report. Normally applied as in e.g. @isCon
73 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
74 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
76 isLexCon cs = isLexConId cs || isLexConSym cs
77 isLexVar cs = isLexVarId cs || isLexVarSym cs
79 isLexId cs = isLexConId cs || isLexVarId cs
80 isLexSym cs = isLexConSym cs || isLexVarSym cs
86 | cs == SLIT("[]") = True
87 | c == '(' = True -- (), (,), (,,), ...
88 | otherwise = isUpper c || isUpperISO c
94 | otherwise = isLower c || isLowerISO c
100 | otherwise = c == ':'
107 | otherwise = isSymbolASCII c
113 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
114 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
115 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
116 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
117 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
118 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
122 %************************************************************************
124 \subsection{Tuple strings -- ugh!}
126 %************************************************************************
129 mkTupNameStr 0 = SLIT("()")
130 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
131 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
132 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
133 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
134 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
139 %************************************************************************
141 \subsection{Data types}
143 %************************************************************************
145 The token data type, fairly un-interesting except from two constructors,
146 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
147 strictness, unfolding etc) and types for id decls.
149 The Idea/Observation here is that the renamer needs to scan through
150 all of an interface file before it can continue. But only a fraction
151 of the information contained in the file turns out to be useful, so
152 delaying as much as possible of the scanning and parsing of an
153 interface file Makes Sense (Heap profiles of the compiler
154 show at a reduction in heap usage by at least a factor of two,
157 Hence, the interface file lexer spots when value declarations are
158 being scanned and return the @ITidinfo@ and @ITtype@ constructors
159 for the type and any other id info for that binding (unfolding, strictness
160 etc). These constructors are applied to the result of lexing these sub-chunks.
162 The lexing of the type and id info is all done lazily, of course, so
163 the scanning (and subsequent parsing) will be done *only* on the ids the
164 renamer finds out that it is interested in. The rest will just be junked.
165 Laziness, you know it makes sense :-)
169 = ITinterface -- keywords
189 | ITbang -- magic symbols
204 | ITvarid FAST_STRING
205 | ITconid FAST_STRING
206 | ITvarsym FAST_STRING
207 | ITconsym FAST_STRING
208 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
209 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
210 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
211 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
213 | ITtysig StringBuffer (Maybe StringBuffer)
214 -- lazily return the stream of tokens for
215 -- the info attached to an id.
216 -- Stuff for reading unfoldings
218 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
219 | ITstrict [Demand] | ITbottom
220 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
221 | ITcoerce_in | ITcoerce_out | ITatsign
222 | ITccall (Bool,Bool) -- (is_casm, may_gc)
224 | ITchar Char | ITstring FAST_STRING
225 | ITinteger Integer | ITdouble Double
226 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
227 | ITunknown String -- Used when the lexer can't make sense of it
228 | ITeof -- end of file token
229 deriving Text -- debugging
231 instance Text CostCentre -- cheat!
235 %************************************************************************
237 \subsection{The lexical analyser}
239 %************************************************************************
242 lexIface :: (IfaceToken -> IfM a) -> IfM a
245 -- if bufferExhausted buf then
248 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
249 case currentChar# buf of
250 -- whitespace and comments, ignore.
251 ' '# -> lexIface cont (stepOn buf)
252 '\t'# -> lexIface cont (stepOn buf)
253 '\n'# -> \line -> lexIface cont (stepOn buf) (line+1)
255 -- Numbers and comments
257 case lookAhead# buf 1# of
258 '-'# -> lex_comment cont (stepOnBy# buf 2#)
261 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
264 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
265 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
268 case prefixMatch (stepOn buf) "..)" of
269 Just buf' -> cont ITdotdot (stepOverLexeme buf')
271 case lookAhead# buf 1# of
272 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
273 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
274 _ -> cont IToparen (stepOn buf)
276 '{'# -> cont ITocurly (stepOn buf)
277 '}'# -> cont ITccurly (stepOn buf)
278 ')'# -> cont ITcparen (stepOn buf)
280 case lookAhead# buf 1# of
281 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
282 _ -> cont ITobrack (stepOn buf)
283 ']'# -> cont ITcbrack (stepOn buf)
284 ','# -> cont ITcomma (stepOn buf)
285 ':'# -> case lookAhead# buf 1# of
286 ':'# -> cont ITdcolon (stepOnBy# buf 2#)
287 _ -> lex_id cont (incLexeme buf)
288 ';'# -> cont ITsemi (stepOn buf)
289 '\"'# -> case untilEndOfString# (stepOn buf) of
291 -- the string literal does *not* include the dquotes
292 case lexemeToFastString buf' of
293 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
296 -- untilEndOfChar# extends the current lexeme until
297 -- it hits a non-escaped single quote. The lexeme of the
298 -- StringBuffer returned does *not* include the closing quote,
299 -- hence we augment the lexeme and make sure to add the
300 -- starting quote, before `read'ing the string.
302 case untilEndOfChar# (stepOn buf) of
303 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
304 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
306 -- ``thingy'' form for casm
308 case lookAhead# buf 1# of
309 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
310 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
311 -- scanning an id of some sort.
314 case lookAhead# buf 1# of
315 'S'# -> case lookAhead# buf 2# of
317 lex_demand cont (stepOnUntil (not . isSpace)
318 (stepOnBy# buf 3#)) -- past _S_
319 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
320 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
321 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
323 _ -> lex_keyword cont (stepOn buf)
326 if bufferExhausted (stepOn buf) then
331 if isDigit (C# c) then
332 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
336 lex_comment cont buf =
337 -- _trace ("comment: "++[C# (currentChar# buf)]) $
338 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
341 lex_demand cont buf =
342 -- _trace ("demand: "++[C# (currentChar# buf)]) $
343 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
345 -- code snatched from Demand.lhs
347 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
348 case currentChar# buf of
349 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
350 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
351 'S'# -> read_em (WwStrict : acc) (stepOn buf)
352 'P'# -> read_em (WwPrim : acc) (stepOn buf)
353 'E'# -> read_em (WwEnum : acc) (stepOn buf)
354 ')'# -> (reverse acc, stepOn buf)
355 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
356 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
357 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
358 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
359 _ -> (reverse acc, buf)
361 do_unpack new_or_data wrapper_unpacks acc buf
362 = case read_em [] buf of
363 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
367 -- _trace ("scc: "++[C# (currentChar# buf)]) $
368 case currentChar# buf of
371 case prefixMatch (stepOn buf) "NO_CC\"" of
372 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
374 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
375 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
377 case prefixMatch (stepOn buf) "OVERHEAD\"" of
378 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
380 case prefixMatch (stepOn buf) "DONT_CARE\"" of
381 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
383 case prefixMatch (stepOn buf) "SUBSUMED\"" of
384 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
386 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
387 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
389 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
391 case untilChar# (stepOverLexeme buf') '\"'# of
392 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
394 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
395 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
397 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
399 case untilChar# (stepOverLexeme buf') '\"'# of
400 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
401 (stepOn (stepOverLexeme buf''))
405 case untilChar# buf '/'# of
407 let mod_name = lexemeToFastString buf' in
408 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
410 -- let grp_name = lexemeToFastString buf'' in
411 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
413 -- The label may contain arbitrary characters, so it
414 -- may have been escaped etc., hence we `read' it in to get
415 -- rid of these meta-chars in the string and then pack it (again.)
416 -- ToDo: do the same for module name (single quotes allowed in m-names).
417 -- BTW, the code in this module is totally gruesome..
418 let upk_label = _UNPK_ (lexemeToFastString buf'') in
419 case reads ('"':upk_label++"\"") of
421 let cc_name = _PK_ cc_label in
422 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
423 stepOn (stepOverLexeme buf''))
425 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
426 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
427 stepOn (stepOverLexeme buf''))
429 case prefixMatch (stepOn buf) "CAF:" of
431 case match_user_cc (stepOverLexeme buf') of
432 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
434 case match_user_cc (stepOn buf) of
435 (cc, buf'') -> cont (ITscc cc) buf''
436 c -> cont (ITunknown [C# c]) (stepOn buf)
440 lex_num :: (IfaceToken -> IfM a) ->
441 (Int -> Int) -> Int# -> IfM a
442 lex_num cont minus acc# buf =
443 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
444 case scanNumLit (I# acc#) buf of
446 case currentChar# buf' of
448 -- this case is not optimised at all, as the
449 -- presence of floating point numbers in interface
450 -- files is not that common. (ToDo)
451 case expandWhile (isDigit) (incLexeme buf') of
452 buf'' -> -- points to first non digit char
453 case reads (lexemeToString buf'') of
454 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
455 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
457 -- case reads (lexemeToString buf') of
458 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
461 lex_keyword cont buf =
462 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
463 case currentChar# buf of
464 ':'# -> case lookAhead# buf 1# of
465 '_'# -> -- a binding, type (and other id-info) follows,
466 -- to make the parser ever so slightly, we push
468 lex_decl cont (stepOnBy# buf 2#)
469 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
471 case expandWhile (is_kwd_char) buf of
473 let kw = lexemeToFastString buf' in
474 -- _trace ("kw: "++lexemeToString buf') $
475 case lookupUFM ifaceKeywordsFM kw of
476 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
477 (stepOverLexeme buf')
478 Just xx -> cont xx (stepOverLexeme buf')
481 case doDiscard False buf of -- spin until ;; is found
483 {- _trace (show (lexemeToString buf')) $ -}
484 case currentChar# buf' of
485 '\n'# -> -- newline, no id info.
486 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
487 (stepOverLexeme buf')
488 '\r'# -> -- just to be sure for those Win* boxes..
489 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
490 (stepOverLexeme buf')
492 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
493 (stepOverLexeme buf')
494 c -> -- run all over the id info
495 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
497 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
498 --_trace (show (lexemeToString (decLexeme buf''))) $
500 if opt_IgnoreIfacePragmas then
503 Just (lexemeToBuffer (decLexeme buf''))
506 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
507 (stepOverLexeme buf'')
510 is_kwd_char c@(C# c#) =
511 isAlphanum c || -- OLD: c `elem` "_@/\\"
522 lex_cstring cont buf =
523 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
524 case expandUntilMatch buf "\'\'" of
525 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
526 (stepOverLexeme buf')
529 lex_tuple cont module_dot buf =
530 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
534 case currentChar# buf of
535 ','# -> go (n+1) (stepOn buf)
536 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
537 _ -> cont (ITunknown ("tuple " ++ show n)) buf
539 -- Similarly ' itself is ok inside an identifier, but not at the start
541 id_arr :: _ByteArray Int
543 unsafePerformPrimIO (
544 newCharArray (0,255) `thenPrimIO` \ barr ->
546 loop 256# = returnPrimIO ()
548 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
549 writeCharArray barr (I# i#) '\1' `seqPrimIO`
552 writeCharArray barr (I# i#) '\0' `seqPrimIO`
556 unsafeFreezeByteArray barr)
560 _ByteArray _ arr# = id_arr
562 case ord# (indexCharArray# arr# (ord# c#)) of
566 --is_id_char c@(C# c#) = isAlphanum c || is_sym c#
570 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
571 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
572 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
573 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
574 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
575 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
577 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
580 mod_arr :: _ByteArray Int
582 unsafePerformPrimIO (
583 newCharArray (0,255) `thenPrimIO` \ barr ->
585 loop 256# = returnPrimIO ()
587 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
588 writeCharArray barr (I# i#) '\1' `seqPrimIO`
591 writeCharArray barr (I# i#) '\0' `seqPrimIO`
595 unsafeFreezeByteArray barr)
598 is_mod_char (C# c#) =
600 _ByteArray _ arr# = mod_arr
602 case ord# (indexCharArray# arr# (ord# c#)) of
606 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
610 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
614 [] -> lex_id2 Nothing cs
615 _ -> lex_id3 Nothing len xs cs
619 [] -> lex_id2 Nothing cs
622 pk_str = _PK_ (xs::String)
623 len = lengthPS pk_str
626 error "Well, I never!"
628 lex_id2 (Just pk_str) cs''
630 [] -> lex_id2 Nothing cs
631 _ -> lex_id3 Nothing len xs cs'
636 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
637 case expandWhile (is_mod_char) buf of
639 case currentChar# buf' of
640 '.'# -> munch buf' HiFile
641 '!'# -> munch buf' HiBootFile
642 _ -> lex_id2 cont Nothing buf'
645 if not (emptyLexeme buf') then
646 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
647 case lexemeToFastString buf' of
648 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
649 (stepOn (stepOverLexeme buf'))
651 lex_id2 cont Nothing buf'
654 -- Dealt with the Module.part
655 lex_id2 cont module_dot buf =
656 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
657 case currentChar# buf of
659 case lookAhead# buf 1# of
660 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
661 _ -> lex_id3 cont module_dot buf
663 case lookAhead# buf 1# of
664 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
665 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
666 _ -> lex_id3 cont module_dot buf
667 ':'# -> lex_id3 cont module_dot (incLexeme buf)
668 _ -> lex_id3 cont module_dot buf
672 -- Dealt with [], (), : special cases
674 lex_id3 cont module_dot buf =
675 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
676 case expandWhile (is_id_char) buf of
680 end_lex_id cont module_dot (mk_var_token lexeme) (stepOverLexeme buf')
682 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
683 Just kwd_token -> cont kwd_token new_buf
684 Nothing -> cont (mk_var_token lexeme) new_buf
686 lexeme = lexemeToFastString buf'
687 new_buf = stepOverLexeme buf'
691 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
692 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
693 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
694 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
695 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
698 -- Dealt with [], (), : special cases
701 lex_id3 module_dot len_xs xs cs =
702 case my_span' (is_id_char) cs of
703 (xs1,len_xs1,rest) ->
705 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
707 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
708 Just kwd_token -> kwd_token : lexIface rest
709 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
711 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
713 mk_var_token pk_str =
718 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
719 -- remove the second half of disjunction when using a 1.3 prelude.
721 if isUpper f then ITconid pk_str
722 else if isLower f then ITvarid pk_str
723 else if f == ':' then ITconsym pk_str
724 else if isLowerISO f then ITvarid pk_str
725 else if isUpperISO f then ITconid pk_str
729 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
730 | f == ':' = ITconsym n
731 | isAlpha f = ITvarid n
732 | otherwise = ITvarsym n
737 end_lex_id cont Nothing token buf = cont token buf
738 end_lex_id cont (Just (m,hif)) token buf =
740 ITconid n -> cont (ITqconid (m,n,hif)) buf
741 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
742 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
743 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
744 ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
745 _ -> cont (ITunknown (show token)) buf
748 ifaceKeywordsFM :: UniqFM IfaceToken
749 ifaceKeywordsFM = listToUFM $
750 map (\ (x,y) -> (_PK_ x,y))
753 ,("letrec_", ITletrec)
754 ,("interface_", ITinterface)
755 ,("usages_", ITusages)
756 ,("versions_", ITversions)
757 ,("exports_", ITexports)
758 ,("instance_modules_", ITinstance_modules)
759 ,("instances_", ITinstances)
760 ,("fixities_", ITfixities)
761 ,("declarations_", ITdeclarations)
762 ,("pragmas_", ITpragmas)
763 ,("forall_", ITforall)
764 ,("U_", ITunfold False)
765 ,("U!_", ITunfold True)
767 ,("coerce_in_", ITcoerce_in)
768 ,("coerce_out_", ITcoerce_out)
770 ,("integer_", ITinteger_lit)
771 ,("rational_", ITrational_lit)
772 ,("addr_", ITaddr_lit)
773 ,("float_", ITfloat_lit)
774 ,("string_", ITstring_lit)
775 ,("litlit_", ITlit_lit)
776 ,("ccall_", ITccall (False, False))
777 ,("ccall_GC_", ITccall (False, True))
778 ,("casm_", ITccall (True, False))
779 ,("casm_GC_", ITccall (True, True))
782 haskellKeywordsFM = listToUFM $
783 map (\ (x,y) -> (_PK_ x,y))
786 ,("newtype", ITnewtype)
789 ,("instance", ITinstance)
790 ,("infixl", ITinfixl)
791 ,("infixr", ITinfixr)
794 ,("case#", ITprim_case)
798 ,("deriving", ITderiving)
809 -- doDiscard rips along really fast looking for a double semicolon,
810 -- indicating the end of the pragma we're skipping
811 doDiscard inStr buf =
812 -- _trace (show (C# (currentChar# buf))) $
813 case currentChar# buf of
816 case lookAhead# buf 1# of
817 ';'# -> incLexeme (incLexeme buf)
818 _ -> doDiscard inStr (incLexeme buf)
820 doDiscard inStr (incLexeme buf)
823 odd_slashes buf flg i# =
824 case lookAhead# buf i# of
825 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
828 case lookAhead# buf (negateInt# 1#) of --backwards, actually
829 '\\'# -> -- escaping something..
830 if odd_slashes buf True (negateInt# 2#) then
831 -- odd number of slashes, " is escaped.
832 doDiscard inStr (incLexeme buf)
834 -- even number of slashes, \ is escaped.
835 doDiscard (not inStr) (incLexeme buf)
836 _ -> case inStr of -- forced to avoid build-up
837 True -> doDiscard False (incLexeme buf)
838 False -> doDiscard True (incLexeme buf)
839 _ -> doDiscard inStr (incLexeme buf)
844 my_span :: (a -> Bool) -> [a] -> ([a],[a])
845 my_span p xs = go [] xs
847 go so_far (x:xs') | p x = go (x:so_far) xs'
848 go so_far xs = (reverse so_far, xs)
850 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
851 my_span' p xs = go [] 0 xs
853 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
854 go so_far n xs = (reverse so_far,n, xs)
858 %************************************************************************
860 \subsection{Other utility functions
862 %************************************************************************
865 type IfM a = StringBuffer -> Int -> MaybeErr a Error
867 returnIf :: a -> IfM a
868 returnIf a s l = Succeeded a
870 thenIf :: IfM a -> (a -> IfM b) -> IfM b
871 m `thenIf` k = \s l ->
873 Succeeded a -> k a s l
874 Failed err -> Failed err
877 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
879 -----------------------------------------------------------------
881 ifaceParseErr l toks sty
882 = hsep [ptext SLIT("Interface-file parse error: line"), int l, ptext SLIT("toks="), text (show (take 10 toks))]