2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
4 \section[Lexical analysis]{Lexical analysis}
7 #include "HsVersions.h"
11 isLexCon, isLexVar, isLexId, isLexSym,
12 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
13 mkTupNameStr, ifaceParseErr,
16 IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
22 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
24 #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
26 IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
28 import {-# SOURCE #-} CostCentre
29 # if __GLASGOW_HASKELL__ == 202
30 import PrelBase ( Char(..) )
32 # if __GLASGOW_HASKELL__ >= 209
33 import Addr ( Addr(..) )
38 import CmdLineOpts ( opt_IgnoreIfacePragmas )
39 import Demand ( Demand(..) {- instance Read -} )
40 import UniqFM ( UniqFM, listToUFM, lookupUFM)
41 import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
43 #if __GLASGOW_HASKELL__ >= 202
44 import Maybes ( MaybeErr(..) )
46 import Maybes ( Maybe(..), MaybeErr(..) )
52 import ErrUtils ( Error(..) )
53 import Outputable ( Outputable(..), PprStyle(..) )
54 import Util ( nOfThem, panic )
59 #if __GLASGOW_HASKELL__ <= 201
63 #if __GLASGOW_HASKELL__ <= 209
64 import ST ( thenST, seqST )
69 %************************************************************************
71 \subsection{Lexical categories}
73 %************************************************************************
75 These functions test strings to see if they fit the lexical categories
76 defined in the Haskell report. Normally applied as in e.g. @isCon
80 isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
81 isLexVarId, isLexVarSym :: FAST_STRING -> Bool
83 isLexCon cs = isLexConId cs || isLexConSym cs
84 isLexVar cs = isLexVarId cs || isLexVarSym cs
86 isLexId cs = isLexConId cs || isLexVarId cs
87 isLexSym cs = isLexConSym cs || isLexVarSym cs
93 | cs == SLIT("[]") = True
94 | c == '(' = True -- (), (,), (,,), ...
95 | otherwise = isUpper c || isUpperISO c
101 | otherwise = isLower c || isLowerISO c
107 | otherwise = c == ':'
114 | otherwise = isSymbolASCII c
120 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
121 isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
122 isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
123 --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
124 isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
125 --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
129 %************************************************************************
131 \subsection{Tuple strings -- ugh!}
133 %************************************************************************
136 mkTupNameStr 0 = SLIT("()")
137 mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
138 mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary
139 mkTupNameStr 3 = _PK_ "(,,)" -- ditto
140 mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
141 mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
146 %************************************************************************
148 \subsection{Data types}
150 %************************************************************************
152 The token data type, fairly un-interesting except from two constructors,
153 @ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
154 strictness, unfolding etc) and types for id decls.
156 The Idea/Observation here is that the renamer needs to scan through
157 all of an interface file before it can continue. But only a fraction
158 of the information contained in the file turns out to be useful, so
159 delaying as much as possible of the scanning and parsing of an
160 interface file Makes Sense (Heap profiles of the compiler
161 show at a reduction in heap usage by at least a factor of two,
164 Hence, the interface file lexer spots when value declarations are
165 being scanned and return the @ITidinfo@ and @ITtype@ constructors
166 for the type and any other id info for that binding (unfolding, strictness
167 etc). These constructors are applied to the result of lexing these sub-chunks.
169 The lexing of the type and id info is all done lazily, of course, so
170 the scanning (and subsequent parsing) will be done *only* on the ids the
171 renamer finds out that it is interested in. The rest will just be junked.
172 Laziness, you know it makes sense :-)
176 = ITinterface -- keywords
196 | ITbang -- magic symbols
211 | ITvarid FAST_STRING
212 | ITconid FAST_STRING
213 | ITvarsym FAST_STRING
214 | ITconsym FAST_STRING
215 | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
216 | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
217 | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
218 | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
220 | ITtysig StringBuffer (Maybe StringBuffer)
221 -- lazily return the stream of tokens for
222 -- the info attached to an id.
223 -- Stuff for reading unfoldings
225 | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
226 | ITstrict [Demand] | ITbottom
227 | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
228 | ITcoerce_in | ITcoerce_out | ITatsign
229 | ITccall (Bool,Bool) -- (is_casm, may_gc)
231 | ITchar Char | ITstring FAST_STRING
232 | ITinteger Integer | ITdouble Double
233 | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
234 | ITunknown String -- Used when the lexer can't make sense of it
235 | ITeof -- end of file token
236 deriving Text -- debugging
238 instance Text CostCentre -- cheat!
242 %************************************************************************
244 \subsection{The lexical analyser}
246 %************************************************************************
249 lexIface :: (IfaceToken -> IfM a) -> IfM a
252 -- if bufferExhausted buf then
255 -- _trace ("Lexer: "++[C# (currentChar# buf)]) $
256 case currentChar# buf of
257 -- whitespace and comments, ignore.
258 ' '# -> lexIface cont (stepOn buf)
259 '\t'# -> lexIface cont (stepOn buf)
260 '\n'# -> \line -> lexIface cont (stepOn buf) (line+1)
262 -- Numbers and comments
264 case lookAhead# buf 1# of
265 '-'# -> lex_comment cont (stepOnBy# buf 2#)
268 then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
271 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
272 -- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
275 case prefixMatch (stepOn buf) "..)" of
276 Just buf' -> cont ITdotdot (stepOverLexeme buf')
278 case lookAhead# buf 1# of
279 ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
280 ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
281 _ -> cont IToparen (stepOn buf)
283 '{'# -> cont ITocurly (stepOn buf)
284 '}'# -> cont ITccurly (stepOn buf)
285 ')'# -> cont ITcparen (stepOn buf)
287 case lookAhead# buf 1# of
288 ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
289 _ -> cont ITobrack (stepOn buf)
290 ']'# -> cont ITcbrack (stepOn buf)
291 ','# -> cont ITcomma (stepOn buf)
292 ':'# -> case lookAhead# buf 1# of
293 ':'# -> cont ITdcolon (stepOnBy# buf 2#)
294 _ -> lex_id cont (incLexeme buf)
295 ';'# -> cont ITsemi (stepOn buf)
296 '\"'# -> case untilEndOfString# (stepOn buf) of
298 -- the string literal does *not* include the dquotes
299 case lexemeToFastString buf' of
300 v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
303 -- untilEndOfChar# extends the current lexeme until
304 -- it hits a non-escaped single quote. The lexeme of the
305 -- StringBuffer returned does *not* include the closing quote,
306 -- hence we augment the lexeme and make sure to add the
307 -- starting quote, before `read'ing the string.
309 case untilEndOfChar# (stepOn buf) of
310 buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
311 [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
313 -- ``thingy'' form for casm
315 case lookAhead# buf 1# of
316 '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
317 _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
318 -- scanning an id of some sort.
321 case lookAhead# buf 1# of
322 'S'# -> case lookAhead# buf 2# of
324 lex_demand cont (stepOnUntil (not . isSpace)
325 (stepOnBy# buf 3#)) -- past _S_
326 's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
327 Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
328 Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
330 _ -> lex_keyword cont (stepOn buf)
333 if bufferExhausted (stepOn buf) then
338 if isDigit (C# c) then
339 lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
343 lex_comment cont buf =
344 -- _trace ("comment: "++[C# (currentChar# buf)]) $
345 case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
348 lex_demand cont buf =
349 -- _trace ("demand: "++[C# (currentChar# buf)]) $
350 case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
352 -- code snatched from Demand.lhs
354 -- _trace ("read_em: "++[C# (currentChar# buf)]) $
355 case currentChar# buf of
356 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
357 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
358 'S'# -> read_em (WwStrict : acc) (stepOn buf)
359 'P'# -> read_em (WwPrim : acc) (stepOn buf)
360 'E'# -> read_em (WwEnum : acc) (stepOn buf)
361 ')'# -> (reverse acc, stepOn buf)
362 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
363 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
364 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
365 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
366 _ -> (reverse acc, buf)
368 do_unpack new_or_data wrapper_unpacks acc buf
369 = case read_em [] buf of
370 (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
374 -- _trace ("scc: "++[C# (currentChar# buf)]) $
375 case currentChar# buf of
378 case prefixMatch (stepOn buf) "NO_CC\"" of
379 Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
381 case prefixMatch (stepOn buf) "CURRENT_CC\"" of
382 Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
384 case prefixMatch (stepOn buf) "OVERHEAD\"" of
385 Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
387 case prefixMatch (stepOn buf) "DONT_CARE\"" of
388 Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
390 case prefixMatch (stepOn buf) "SUBSUMED\"" of
391 Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
393 case prefixMatch (stepOn buf) "CAFs_in_...\"" of
394 Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
396 case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
398 case untilChar# (stepOverLexeme buf') '\"'# of
399 buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
401 case prefixMatch (stepOn buf) "DICTs_in_...\"" of
402 Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
404 case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
406 case untilChar# (stepOverLexeme buf') '\"'# of
407 buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
408 (stepOn (stepOverLexeme buf''))
412 case untilChar# buf '/'# of
414 let mod_name = lexemeToFastString buf' in
415 -- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
417 -- let grp_name = lexemeToFastString buf'' in
418 case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
420 -- The label may contain arbitrary characters, so it
421 -- may have been escaped etc., hence we `read' it in to get
422 -- rid of these meta-chars in the string and then pack it (again.)
423 -- ToDo: do the same for module name (single quotes allowed in m-names).
424 -- BTW, the code in this module is totally gruesome..
425 let upk_label = _UNPK_ (lexemeToFastString buf'') in
426 case reads ('"':upk_label++"\"") of
428 let cc_name = _PK_ cc_label in
429 (mkUserCC cc_name mod_name _NIL_{-grp_name-},
430 stepOn (stepOverLexeme buf''))
432 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
433 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
434 stepOn (stepOverLexeme buf''))
436 case prefixMatch (stepOn buf) "CAF:" of
438 case match_user_cc (stepOverLexeme buf') of
439 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
441 case match_user_cc (stepOn buf) of
442 (cc, buf'') -> cont (ITscc cc) buf''
443 c -> cont (ITunknown [C# c]) (stepOn buf)
447 lex_num :: (IfaceToken -> IfM a) ->
448 (Int -> Int) -> Int# -> IfM a
449 lex_num cont minus acc# buf =
450 -- _trace ("lex_num: "++[C# (currentChar# buf)]) $
451 case scanNumLit (I# acc#) buf of
453 case currentChar# buf' of
455 -- this case is not optimised at all, as the
456 -- presence of floating point numbers in interface
457 -- files is not that common. (ToDo)
458 case expandWhile (isDigit) (incLexeme buf') of
459 buf'' -> -- points to first non digit char
460 case reads (lexemeToString buf'') of
461 [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
462 _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
464 -- case reads (lexemeToString buf') of
465 -- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
468 lex_keyword cont buf =
469 -- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
470 case currentChar# buf of
471 ':'# -> case lookAhead# buf 1# of
472 '_'# -> -- a binding, type (and other id-info) follows,
473 -- to make the parser ever so slightly, we push
475 lex_decl cont (stepOnBy# buf 2#)
476 v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
478 case expandWhile (is_kwd_char) buf of
480 let kw = lexemeToFastString buf' in
481 -- _trace ("kw: "++lexemeToString buf') $
482 case lookupUFM ifaceKeywordsFM kw of
483 Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
484 (stepOverLexeme buf')
485 Just xx -> cont xx (stepOverLexeme buf')
488 case doDiscard False buf of -- spin until ;; is found
490 {- _trace (show (lexemeToString buf')) $ -}
491 case currentChar# buf' of
492 '\n'# -> -- newline, no id info.
493 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
494 (stepOverLexeme buf')
495 '\r'# -> -- just to be sure for those Win* boxes..
496 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
497 (stepOverLexeme buf')
499 cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
500 (stepOverLexeme buf')
501 c -> -- run all over the id info
502 case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
504 --_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
505 --_trace (show (lexemeToString (decLexeme buf''))) $
507 if opt_IgnoreIfacePragmas then
510 Just (lexemeToBuffer (decLexeme buf''))
513 cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
514 (stepOverLexeme buf'')
517 is_kwd_char c@(C# c#) =
518 isAlphanum c || -- OLD: c `elem` "_@/\\"
529 lex_cstring cont buf =
530 -- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
531 case expandUntilMatch buf "\'\'" of
532 buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
533 (stepOverLexeme buf')
536 lex_tuple cont module_dot buf =
537 -- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
541 case currentChar# buf of
542 ','# -> go (n+1) (stepOn buf)
543 ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
544 _ -> cont (ITunknown ("tuple " ++ show n)) buf
546 -- Similarly ' itself is ok inside an identifier, but not at the start
548 id_arr :: _ByteArray Int
551 newCharArray (0,255) `thenStrictlyST` \ barr ->
553 loop 256# = returnStrictlyST ()
555 if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
556 writeCharArray barr (I# i#) '\1' `seqStrictlyST`
559 writeCharArray barr (I# i#) '\0' `seqStrictlyST`
562 loop 0# `seqStrictlyST`
563 unsafeFreezeByteArray barr)
567 _ByteArray _ arr# = id_arr
569 case ord# (indexCharArray# arr# (ord# c#)) of
573 --OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
577 ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
578 '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
579 '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
580 '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
581 '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
582 '-'# -> True; '~'# -> True; '@'# -> True; _ -> False }
584 --isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
587 mod_arr :: _ByteArray Int
590 newCharArray (0,255) `thenStrictlyST` \ barr ->
592 loop 256# = returnStrictlyST ()
594 if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
595 writeCharArray barr (I# i#) '\1' `seqStrictlyST`
598 writeCharArray barr (I# i#) '\0' `seqStrictlyST`
601 loop 0# `seqStrictlyST`
602 unsafeFreezeByteArray barr)
605 is_mod_char (C# c#) =
607 _ByteArray _ arr# = mod_arr
609 case ord# (indexCharArray# arr# (ord# c#)) of
613 --isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
617 case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
621 [] -> lex_id2 Nothing cs
622 _ -> lex_id3 Nothing len xs cs
626 [] -> lex_id2 Nothing cs
629 pk_str = _PK_ (xs::String)
630 len = lengthPS pk_str
633 error "Well, I never!"
635 lex_id2 (Just pk_str) cs''
637 [] -> lex_id2 Nothing cs
638 _ -> lex_id3 Nothing len xs cs'
643 -- _trace ("lex_id: "++[C# (currentChar# buf)]) $
644 case expandWhile (is_mod_char) buf of
646 case currentChar# buf' of
647 '.'# -> munch buf' HiFile
648 '!'# -> munch buf' HiBootFile
649 _ -> lex_id2 cont Nothing buf'
652 if not (emptyLexeme buf') then
653 -- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
654 case lexemeToFastString buf' of
655 l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
656 (stepOn (stepOverLexeme buf'))
658 lex_id2 cont Nothing buf'
661 -- Dealt with the Module.part
662 lex_id2 cont module_dot buf =
663 -- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
664 case currentChar# buf of
666 '['# -> -- Special case for []
667 case lookAhead# buf 1# of
668 ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
669 _ -> lex_id3 cont module_dot buf
671 '('# -> -- Special case for (,,,)
672 case lookAhead# buf 1# of
673 ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
674 ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
675 _ -> lex_id3 cont module_dot buf
676 ':'# -> lex_id3 cont module_dot (incLexeme buf)
679 Nothing -> lex_id3 cont module_dot buf
680 Just ghc -> -- this should be "GHC" (current home of (->))
681 case lookAhead# buf 1# of
682 '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
684 _ -> lex_id3 cont module_dot buf
685 _ -> lex_id3 cont module_dot buf
689 -- Dealt with [], (), : special cases
691 lex_id3 cont module_dot buf =
692 -- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
693 case expandWhile (is_id_char) buf of
697 end_lex_id cont module_dot (mk_var_token lexeme) new_buf
699 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
700 Just kwd_token -> cont kwd_token new_buf
701 Nothing -> cont (mk_var_token lexeme) new_buf
703 lexeme = lexemeToFastString buf'
704 new_buf = stepOverLexeme buf'
708 lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
709 lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
710 lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
711 lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs
712 lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
715 -- Dealt with [], (), : special cases
718 lex_id3 module_dot len_xs xs cs =
719 case my_span' (is_id_char) cs of
720 (xs1,len_xs1,rest) ->
722 Just m -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
724 case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
725 Just kwd_token -> kwd_token : lexIface rest
726 other -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
728 rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
730 mk_var_token pk_str =
735 -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
736 -- remove the second half of disjunction when using a 1.3 prelude.
738 if isUpper f then ITconid pk_str
739 else if isLower f then ITvarid pk_str
740 else if f == ':' then ITconsym pk_str
741 else if isLowerISO f then ITvarid pk_str
742 else if isUpperISO f then ITconid pk_str
746 mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
747 | f == ':' = ITconsym n
748 | isAlpha f = ITvarid n
749 | otherwise = ITvarsym n
754 end_lex_id cont Nothing token buf = cont token buf
755 end_lex_id cont (Just (m,hif)) token buf =
757 ITconid n -> cont (ITqconid (m,n,hif)) buf
758 ITvarid n -> cont (ITqvarid (m,n,hif)) buf
759 ITconsym n -> cont (ITqconsym (m,n,hif)) buf
761 -- Special case for ->
762 -- "->" by itself is a special token (ITrarrow),
763 -- but M.-> is a ITqconid
764 ITvarsym n | n == SLIT("->")
765 -> cont (ITqconsym (m,n,hif)) buf
767 ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
769 -- ITbang can't happen here I think
770 -- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
772 _ -> cont (ITunknown (show token)) buf
775 ifaceKeywordsFM :: UniqFM IfaceToken
776 ifaceKeywordsFM = listToUFM $
777 map (\ (x,y) -> (_PK_ x,y))
780 ,("letrec_", ITletrec)
781 ,("interface_", ITinterface)
782 ,("usages_", ITusages)
783 ,("versions_", ITversions)
784 ,("exports_", ITexports)
785 ,("instance_modules_", ITinstance_modules)
786 ,("instances_", ITinstances)
787 ,("fixities_", ITfixities)
788 ,("declarations_", ITdeclarations)
789 ,("pragmas_", ITpragmas)
790 ,("forall_", ITforall)
791 ,("U_", ITunfold False)
792 ,("U!_", ITunfold True)
794 ,("coerce_in_", ITcoerce_in)
795 ,("coerce_out_", ITcoerce_out)
797 ,("integer_", ITinteger_lit)
798 ,("rational_", ITrational_lit)
799 ,("addr_", ITaddr_lit)
800 ,("float_", ITfloat_lit)
801 ,("string_", ITstring_lit)
802 ,("litlit_", ITlit_lit)
803 ,("ccall_", ITccall (False, False))
804 ,("ccall_GC_", ITccall (False, True))
805 ,("casm_", ITccall (True, False))
806 ,("casm_GC_", ITccall (True, True))
809 haskellKeywordsFM = listToUFM $
810 map (\ (x,y) -> (_PK_ x,y))
813 ,("newtype", ITnewtype)
816 ,("instance", ITinstance)
817 ,("infixl", ITinfixl)
818 ,("infixr", ITinfixr)
821 ,("case#", ITprim_case)
825 ,("deriving", ITderiving)
836 -- doDiscard rips along really fast, looking for a double semicolon,
837 -- indicating the end of the pragma we're skipping
838 doDiscard inStr buf =
839 -- _trace (show (C# (currentChar# buf))) $
840 case currentChar# buf of
843 case lookAhead# buf 1# of
844 ';'# -> incLexeme (incLexeme buf)
845 _ -> doDiscard inStr (incLexeme buf)
847 doDiscard inStr (incLexeme buf)
850 odd_slashes buf flg i# =
851 case lookAhead# buf i# of
852 '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
855 case lookAhead# buf (negateInt# 1#) of --backwards, actually
856 '\\'# -> -- escaping something..
857 if odd_slashes buf True (negateInt# 2#) then
858 -- odd number of slashes, " is escaped.
859 doDiscard inStr (incLexeme buf)
861 -- even number of slashes, \ is escaped.
862 doDiscard (not inStr) (incLexeme buf)
863 _ -> case inStr of -- forced to avoid build-up
864 True -> doDiscard False (incLexeme buf)
865 False -> doDiscard True (incLexeme buf)
866 _ -> doDiscard inStr (incLexeme buf)
871 my_span :: (a -> Bool) -> [a] -> ([a],[a])
872 my_span p xs = go [] xs
874 go so_far (x:xs') | p x = go (x:so_far) xs'
875 go so_far xs = (reverse so_far, xs)
877 my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
878 my_span' p xs = go [] 0 xs
880 go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
881 go so_far n xs = (reverse so_far,n, xs)
885 %************************************************************************
887 \subsection{Other utility functions
889 %************************************************************************
892 type IfM a = StringBuffer -> Int -> MaybeErr a Error
894 returnIf :: a -> IfM a
895 returnIf a s l = Succeeded a
897 thenIf :: IfM a -> (a -> IfM b) -> IfM b
898 m `thenIf` k = \s l ->
900 Succeeded a -> k a s l
901 Failed err -> Failed err
904 happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
906 -----------------------------------------------------------------
908 ifaceParseErr l toks sty
909 = hsep [ptext SLIT("Interface-file parse error: line"), int l, ptext SLIT("toks="), text (show (take 10 toks))]