+--------------------------------------------------------
+[Jan 98]
+There's a known bug in here:
+
+ If an interface file ends prematurely, Lex tries to
+ do headFS of an empty FastString.
+
+An example that provokes the error is
+
+ f _:_ _forall_ [a] <<<END OF FILE>>>
+--------------------------------------------------------
+
+
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
%
\section[Lexical analysis]{Lexical analysis}
\begin{code}
-#include "HsVersions.h"
-
module Lex (
isLexCon, isLexVar, isLexId, isLexSym,
mkTupNameStr, ifaceParseErr,
-- Monad for parser
- IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
+ IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
+ happyError,
StringBuffer
) where
+#include "HsVersions.h"
-IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
-IMPORT_DELOOPER(Ubiq)
-IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
+import Char (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
+
+import {-# SOURCE #-} CostCentre
import CmdLineOpts ( opt_IgnoreIfacePragmas )
import Demand ( Demand(..) {- instance Read -} )
import UniqFM ( UniqFM, listToUFM, lookupUFM)
---import FiniteMap ( FiniteMap, listToFM, lookupFM )
-#if __GLASGOW_HASKELL__ >= 202
-import Maybes ( MaybeErr(..) )
-#else
-import Maybes ( Maybe(..), MaybeErr(..) )
-#endif
-import Pretty
-
-
+import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
+import SrcLoc ( SrcLoc, incSrcLine )
-import ErrUtils ( Error(..) )
-import Outputable ( Outputable(..) )
-import PprStyle ( PprStyle(..) )
+import Maybes ( MaybeErr(..) )
+import ErrUtils ( ErrMsg(..) )
+import Outputable
import Util ( nOfThem, panic )
import FastString
import StringBuffer
-
-#if __GLASGOW_HASKELL__ <= 201
-import PreludeGlaST
-#else
import GlaExts
-#endif
+import ST ( runST )
\end{code}
%************************************************************************
| ITconid FAST_STRING
| ITvarsym FAST_STRING
| ITconsym FAST_STRING
- | ITqvarid (FAST_STRING,FAST_STRING)
- | ITqconid (FAST_STRING,FAST_STRING)
- | ITqvarsym (FAST_STRING,FAST_STRING)
- | ITqconsym (FAST_STRING,FAST_STRING)
+ | ITqvarid (FAST_STRING,FAST_STRING,IfaceFlavour)
+ | ITqconid (FAST_STRING,FAST_STRING,IfaceFlavour)
+ | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
+ | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
- | ITidinfo [IfaceToken] -- lazily return the stream of tokens for
- -- the info attached to an id.
- | ITtysig [IfaceToken] -- lazily return the stream of tokens for
+ | ITtysig StringBuffer (Maybe StringBuffer)
+ -- lazily return the stream of tokens for
-- the info attached to an id.
-- Stuff for reading unfoldings
- | ITarity | ITstrict | ITunfold
- | ITdemand [Demand] | ITbottom
+ | ITarity
+ | ITunfold Bool -- True <=> there's an INLINE pragma on this Id
+ | ITstrict [Demand] | ITbottom
| ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
| ITcoerce_in | ITcoerce_out | ITatsign
| ITccall (Bool,Bool) -- (is_casm, may_gc)
| ITinteger Integer | ITdouble Double
| ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
| ITunknown String -- Used when the lexer can't make sense of it
+ | ITeof -- end of file token
deriving Text -- debugging
instance Text CostCentre -- cheat!
%************************************************************************
\begin{code}
-lexIface :: StringBuffer -> [IfaceToken]
-lexIface buf =
+lexIface :: (IfaceToken -> IfM a) -> IfM a
+lexIface cont buf =
_scc_ "Lexer"
-- if bufferExhausted buf then
-- []
-- _trace ("Lexer: "++[C# (currentChar# buf)]) $
case currentChar# buf of
-- whitespace and comments, ignore.
- ' '# -> lexIface (stepOn buf)
- '\t'# -> lexIface (stepOn buf)
- '\n'# -> lexIface (stepOn buf)
+ ' '# -> lexIface cont (stepOn buf)
+ '\t'# -> lexIface cont (stepOn buf)
+ '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
-- Numbers and comments
'-'# ->
case lookAhead# buf 1# of
- '-'# -> lex_comment (stepOnBy# buf 2#)
+ '-'# -> lex_comment cont (stepOnBy# buf 2#)
c ->
if isDigit (C# c)
- then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
- else lex_id buf
+ then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
+ else lex_id cont buf
-- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
-- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
'('# ->
case prefixMatch (stepOn buf) "..)" of
- Just buf' -> ITdotdot : lexIface (stepOverLexeme buf')
+ Just buf' -> cont ITdotdot (stepOverLexeme buf')
Nothing ->
case lookAhead# buf 1# of
- ','# -> lex_tuple Nothing (stepOnBy# buf 2#)
- ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
- _ -> IToparen : lexIface (stepOn buf)
+ ','# -> lex_tuple cont Nothing (stepOnBy# buf 2#)
+ ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
+ _ -> cont IToparen (stepOn buf)
- '{'# -> ITocurly : lexIface (stepOn buf)
- '}'# -> ITccurly : lexIface (stepOn buf)
- ')'# -> ITcparen : lexIface (stepOn buf)
+ '{'# -> cont ITocurly (stepOn buf)
+ '}'# -> cont ITccurly (stepOn buf)
+ ')'# -> cont ITcparen (stepOn buf)
'['# ->
case lookAhead# buf 1# of
- ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
- _ -> ITobrack : lexIface (stepOn buf)
- ']'# -> ITcbrack : lexIface (stepOn buf)
- ','# -> ITcomma : lexIface (stepOn buf)
- ':'# -> case lookAhead# buf 1# of
- ':'# -> ITdcolon : lexIface (stepOnBy# buf 2#)
- _ -> lex_id (incLexeme buf)
- ';'# -> ITsemi : lexIface (stepOn buf)
+ ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
+ _ -> cont ITobrack (stepOn buf)
+ ']'# -> cont ITcbrack (stepOn buf)
+ ','# -> cont ITcomma (stepOn buf)
+ ';'# -> cont ITsemi (stepOn buf)
'\"'# -> case untilEndOfString# (stepOn buf) of
buf' ->
-- the string literal does *not* include the dquotes
case lexemeToFastString buf' of
- v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
+ v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
'\''# -> --
-- untilEndOfChar# extends the current lexeme until
--
case untilEndOfChar# (stepOn buf) of
buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
- [ (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
+ [ (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
-- ``thingy'' form for casm
'`'# ->
case lookAhead# buf 1# of
- '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
- _ -> lex_id (incLexeme buf) -- add ` to lexeme and assume
+ '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
+ _ -> lex_id cont (incLexeme buf) -- add ` to lexeme and assume
-- scanning an id of some sort.
-- Keywords
'_'# ->
case lookAhead# buf 1# of
'S'# -> case lookAhead# buf 2# of
- '_'# -> ITstrict :
- lex_demand (stepOnUntil (not . isSpace)
- (stepOnBy# buf 3#)) -- past _S_
+ '_'# ->
+ lex_demand cont (stepOnUntil (not . isSpace)
+ (stepOnBy# buf 3#)) -- past _S_
's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
- Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
- Nothing -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
+ Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
+ Nothing -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
-- it is a keyword.
- _ -> lex_keyword (stepOn buf)
+ _ -> lex_keyword cont (stepOn buf)
'\NUL'# ->
if bufferExhausted (stepOn buf) then
- []
+ cont ITeof buf
else
- lex_id buf
+ lex_id cont buf
c ->
if isDigit (C# c) then
- lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
+ lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
else
- lex_id buf
+ lex_id cont buf
-- where
-lex_comment buf =
+lex_comment cont buf =
-- _trace ("comment: "++[C# (currentChar# buf)]) $
- case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
+ case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
------------------
-lex_demand buf =
+lex_demand cont buf =
-- _trace ("demand: "++[C# (currentChar# buf)]) $
- case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
+ case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
where
-- code snatched from Demand.lhs
read_em acc buf =
'P'# -> read_em (WwPrim : acc) (stepOn buf)
'E'# -> read_em (WwEnum : acc) (stepOn buf)
')'# -> (reverse acc, stepOn buf)
- 'U'# -> do_unpack True acc (stepOnBy# buf 2#)
- 'u'# -> do_unpack False acc (stepOnBy# buf 2#)
+ 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
+ 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
+ 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
+ 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
_ -> (reverse acc, buf)
- do_unpack wrapper_unpacks acc buf
+ do_unpack new_or_data wrapper_unpacks acc buf
= case read_em [] buf of
- (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
+ (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
------------------
-lex_scc buf =
+lex_scc cont buf =
-- _trace ("scc: "++[C# (currentChar# buf)]) $
case currentChar# buf of
'"'# ->
-- YUCK^2
case prefixMatch (stepOn buf) "NO_CC\"" of
- Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "CURRENT_CC\"" of
- Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "OVERHEAD\"" of
- Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "DONT_CARE\"" of
- Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "SUBSUMED\"" of
- Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "CAFs_in_...\"" of
- Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
Just buf' ->
case untilChar# (stepOverLexeme buf') '\"'# of
- buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_):
- lexIface (stepOn (stepOverLexeme buf''))
+ buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
Nothing ->
case prefixMatch (stepOn buf) "DICTs_in_...\"" of
- Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
+ Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
Nothing ->
case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
Just buf' ->
case untilChar# (stepOverLexeme buf') '\"'# of
- buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True):
- lexIface (stepOn (stepOverLexeme buf''))
+ buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True))
+ (stepOn (stepOverLexeme buf''))
Nothing ->
let
match_user_cc buf =
case untilChar# buf '/'# of
buf' ->
let mod_name = lexemeToFastString buf' in
- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
- buf'' ->
- let grp_name = lexemeToFastString buf'' in
- case untilChar# (stepOn (stepOverLexeme buf'')) '\"'# of
- buf''' ->
- let cc_name = lexemeToFastString buf''' in
- (mkUserCC cc_name mod_name grp_name,
- stepOn (stepOverLexeme buf'''))
+-- case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
+-- buf'' ->
+-- let grp_name = lexemeToFastString buf'' in
+ case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
+ buf'' ->
+ -- The label may contain arbitrary characters, so it
+ -- may have been escaped etc., hence we `read' it in to get
+ -- rid of these meta-chars in the string and then pack it (again.)
+ -- ToDo: do the same for module name (single quotes allowed in m-names).
+ -- BTW, the code in this module is totally gruesome..
+ let upk_label = _UNPK_ (lexemeToFastString buf'') in
+ case reads ('"':upk_label++"\"") of
+ ((cc_label,_):_) ->
+ let cc_name = _PK_ cc_label in
+ (mkUserCC cc_name mod_name _NIL_{-grp_name-},
+ stepOn (stepOverLexeme buf''))
+ _ ->
+ trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring")
+ (mkUserCC _NIL_ mod_name _NIL_{-grp_name-},
+ stepOn (stepOverLexeme buf''))
in
case prefixMatch (stepOn buf) "CAF:" of
Just buf' ->
case match_user_cc (stepOverLexeme buf') of
- (cc, buf'') -> ITscc (cafifyCC cc) : lexIface buf''
+ (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
Nothing ->
case match_user_cc (stepOn buf) of
- (cc, buf'') -> ITscc cc : lexIface buf''
- c -> ITunknown [C# c] : lexIface (stepOn buf)
+ (cc, buf'') -> cont (ITscc cc) buf''
+ c -> cont (ITunknown [C# c]) (stepOn buf)
-----------
-lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
-lex_num minus acc# buf =
+lex_num :: (IfaceToken -> IfM a) ->
+ (Int -> Int) -> Int# -> IfM a
+lex_num cont minus acc# buf =
-- _trace ("lex_num: "++[C# (currentChar# buf)]) $
case scanNumLit (I# acc#) buf of
(acc',buf') ->
case expandWhile (isDigit) (incLexeme buf') of
buf'' -> -- points to first non digit char
case reads (lexemeToString buf'') of
- [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
- _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
+ [(v,_)] -> cont (ITdouble v) (stepOverLexeme buf'')
+ _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
-- case reads (lexemeToString buf') of
--- [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
+-- [(i,_)] -> cont (ITinteger i) (stepOverLexeme buf')
------------
-lex_keyword buf =
+lex_keyword cont buf =
-- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
case currentChar# buf of
':'# -> case lookAhead# buf 1# of
'_'# -> -- a binding, type (and other id-info) follows,
-- to make the parser ever so slightly, we push
--
- lex_decl (stepOnBy# buf 2#)
- v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
+ lex_decl cont (stepOnBy# buf 2#)
+ v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
_ ->
case expandWhile (is_kwd_char) buf of
buf' ->
let kw = lexemeToFastString buf' in
-- _trace ("kw: "++lexemeToString buf') $
case lookupUFM ifaceKeywordsFM kw of
- Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh
- lexIface (stepOverLexeme buf')
- Just xx -> xx : lexIface (stepOverLexeme buf')
+ Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh
+ (stepOverLexeme buf')
+ Just xx -> cont xx (stepOverLexeme buf')
-lex_decl buf =
+lex_decl cont buf =
case doDiscard False buf of -- spin until ;; is found
buf' ->
{- _trace (show (lexemeToString buf')) $ -}
case currentChar# buf' of
'\n'# -> -- newline, no id info.
- ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
- lexIface (stepOverLexeme buf')
+ cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
+ (stepOverLexeme buf')
'\r'# -> -- just to be sure for those Win* boxes..
- ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
- lexIface (stepOverLexeme buf')
+ cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
+ (stepOverLexeme buf')
'\NUL'# ->
- ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) :
- lexIface (stepOverLexeme buf')
+ cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
+ (stepOverLexeme buf')
c -> -- run all over the id info
case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
buf'' ->
--_trace ((C# c):show (lexemeToString (decLexeme buf'))) $
--_trace (show (lexemeToString (decLexeme buf''))) $
- ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
- let ls = lexIface (stepOverLexeme buf'') in
- if opt_IgnoreIfacePragmas then
- ls
- else
- let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
+ let idinfo =
+ if opt_IgnoreIfacePragmas then
+ Nothing
+ else
+ Just (lexemeToBuffer (decLexeme buf''))
--_trace (show is) $
- ITidinfo is : ls
+ in
+ cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
+ (stepOverLexeme buf'')
-- ToDo: hammer!
is_kwd_char c@(C# c#) =
-----------
-lex_cstring buf =
+lex_cstring cont buf =
-- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
case expandUntilMatch buf "\'\'" of
- buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
- lexIface (stepOverLexeme buf')
+ buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
+ (stepOverLexeme buf')
-----------
-lex_tuple module_dot buf =
+lex_tuple cont module_dot buf =
-- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
go 2 buf
where
go n buf =
case currentChar# buf of
','# -> go (n+1) (stepOn buf)
- ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
- _ -> ITunknown ("tuple " ++ show n) : lexIface buf
+ ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
+ _ -> cont (ITunknown ("tuple " ++ show n)) buf
-- Similarly ' itself is ok inside an identifier, but not at the start
-id_arr :: _ByteArray Int
+-- id_arr is an array of bytes, indexed by characters,
+-- containing 0 if the character isn't a valid character from an identifier
+-- and 1 if it is. It's just a memo table for is_id_char.
+id_arr :: ByteArray Int
id_arr =
- unsafePerformPrimIO (
- newCharArray (0,255) `thenPrimIO` \ barr ->
+ runST (
+ newCharArray (0,255) >>= \ barr ->
let
- loop 256# = returnPrimIO ()
+ loop 256# = return ()
loop i# =
if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
- writeCharArray barr (I# i#) '\1' `seqPrimIO`
+ writeCharArray barr (I# i#) '\1' >>
loop (i# +# 1#)
else
- writeCharArray barr (I# i#) '\0' `seqPrimIO`
+ writeCharArray barr (I# i#) '\0' >>
loop (i# +# 1#)
in
- loop 0# `seqPrimIO`
+ loop 0# >>
unsafeFreezeByteArray barr)
is_id_char (C# c#) =
let
- _ByteArray _ arr# = id_arr
+ ByteArray _ arr# = id_arr
in
case ord# (indexCharArray# arr# (ord# c#)) of
0# -> False
1# -> True
---is_id_char c@(C# c#) = isAlphanum c || is_sym c#
+--OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
-is_sym c#=
+is_sym c# =
case c# of {
':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
- '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
+ '#'# -> True; '$'# -> True; '%'# -> True;
'&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
'/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
'?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
--isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
-mod_arr :: _ByteArray Int
+-- mod_arr is an array of bytes, indexed by characters,
+-- containing 0 if the character isn't a valid character from a module name,
+-- and 1 if it is.
+mod_arr :: ByteArray Int
mod_arr =
- unsafePerformPrimIO (
- newCharArray (0,255) `thenPrimIO` \ barr ->
+ runST (
+ newCharArray (0,255) >>= \ barr ->
let
- loop 256# = returnPrimIO ()
+ loop 256# = return ()
loop i# =
if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
- writeCharArray barr (I# i#) '\1' `seqPrimIO`
+ writeCharArray barr (I# i#) '\1' >>
loop (i# +# 1#)
else
- writeCharArray barr (I# i#) '\0' `seqPrimIO`
+ writeCharArray barr (I# i#) '\0' >>
loop (i# +# 1#)
in
- loop 0# `seqPrimIO`
+ loop 0# >>
unsafeFreezeByteArray barr)
is_mod_char (C# c#) =
let
- _ByteArray _ arr# = mod_arr
+ ByteArray _ arr# = mod_arr
in
case ord# (indexCharArray# arr# (ord# c#)) of
0# -> False
--isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
-{-
-lex_id cs =
- case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
- (xs, len, cs') ->
- case cs' of
- [] -> case xs of
- [] -> lex_id2 Nothing cs
- _ -> lex_id3 Nothing len xs cs
-
- '.':cs'' ->
- case xs of
- [] -> lex_id2 Nothing cs
- _ ->
- let
- pk_str = _PK_ (xs::String)
- len = lengthPS pk_str
- in
- if len==len+1 then
- error "Well, I never!"
- else
- lex_id2 (Just pk_str) cs''
- _ -> case xs of
- [] -> lex_id2 Nothing cs
- _ -> lex_id3 Nothing len xs cs'
-
--}
-
-lex_id buf =
+lex_id cont buf =
-- _trace ("lex_id: "++[C# (currentChar# buf)]) $
case expandWhile (is_mod_char) buf of
buf' ->
case currentChar# buf' of
- '.'# ->
+ '.'# -> munch buf' HiFile
+ '!'# -> munch buf' HiBootFile
+ _ -> lex_id2 cont Nothing buf'
+ where
+ munch buf' hif =
if not (emptyLexeme buf') then
-- _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $
case lexemeToFastString buf' of
- l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#))
+ l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif))
(stepOn (stepOverLexeme buf'))
else
- lex_id2 Nothing buf'
- _ -> lex_id2 Nothing buf'
+ lex_id2 cont Nothing buf'
+
-- Dealt with the Module.part
-lex_id2 module_dot buf =
+lex_id2 cont module_dot buf =
-- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
case currentChar# buf of
- '['# ->
+
+ '['# -> -- Special case for []
case lookAhead# buf 1# of
- ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
- _ -> lex_id3 module_dot buf
- '('# ->
+ ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
+ _ -> lex_id3 cont module_dot buf
+
+ '('# -> -- Special case for (,,,)
case lookAhead# buf 1# of
- ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
- ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
- _ -> lex_id3 module_dot buf
- ':'# -> lex_id3 module_dot (incLexeme buf)
- _ -> lex_id3 module_dot buf
+ ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
+ ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
+ _ -> lex_id3 cont module_dot buf
+ ':'# -> lex_id3 cont module_dot (incLexeme buf)
+ '-'# ->
+ case module_dot of
+ Nothing -> lex_id3 cont module_dot buf
+ Just ghc -> -- this should be "GHC" (current home of (->))
+ case lookAhead# buf 1# of
+ '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
+ (stepOnBy# buf 2#)
+ _ -> lex_id3 cont module_dot buf
+ _ -> lex_id3 cont module_dot buf
-- Dealt with [], (), : special cases
-lex_id3 module_dot buf =
+lex_id3 cont module_dot buf =
-- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
case expandWhile (is_id_char) buf of
buf' ->
case module_dot of
Just _ ->
- end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
+ end_lex_id cont module_dot (mk_var_token lexeme) new_buf
Nothing ->
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
- Just kwd_token -> kwd_token : lexIface new_buf
- Nothing -> mk_var_token lexeme : lexIface new_buf
+ Just kwd_token -> cont kwd_token new_buf
+ Nothing -> cont (mk_var_token lexeme) new_buf
where
lexeme = lexemeToFastString buf'
new_buf = stepOverLexeme buf'
lex_id2 module_dot xs cs = lex_id3 module_dot xs cs
-}
-
-- Dealt with [], (), : special cases
{-
n = _PK_ xs
-}
-end_lex_id Nothing token buf = token : lexIface buf
-end_lex_id (Just m) token buf =
+end_lex_id cont Nothing token buf = cont token buf
+end_lex_id cont (Just (m,hif)) token buf =
case token of
- ITconid n -> ITqconid (m,n) : lexIface buf
- ITvarid n -> ITqvarid (m,n) : lexIface buf
- ITconsym n -> ITqconsym (m,n) : lexIface buf
- ITvarsym n -> ITqvarsym (m,n) : lexIface buf
- ITbang -> ITqvarsym (m,SLIT("!")) : lexIface buf
- _ -> ITunknown (show token) : lexIface buf
+ ITconid n -> cont (ITqconid (m,n,hif)) buf
+ ITvarid n -> cont (ITqvarid (m,n,hif)) buf
+ ITconsym n -> cont (ITqconsym (m,n,hif)) buf
+
+ -- Special case for ->
+ -- "->" by itself is a special token (ITrarrow),
+ -- but M.-> is a ITqconid
+ ITvarsym n | n == SLIT("->")
+ -> cont (ITqconsym (m,n,hif)) buf
+
+ ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
+
+-- ITbang can't happen here I think
+-- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
+
+ _ -> cont (ITunknown (show token)) buf
------------
ifaceKeywordsFM :: UniqFM IfaceToken
,("declarations_", ITdeclarations)
,("pragmas_", ITpragmas)
,("forall_", ITforall)
- ,("U_", ITunfold)
+ ,("U_", ITunfold False)
+ ,("U!_", ITunfold True)
,("A_", ITarity)
,("coerce_in_", ITcoerce_in)
,("coerce_out_", ITcoerce_out)
,("!", ITbang)
,("=>", ITdarrow)
,("=", ITequal)
+ ,("::", ITdcolon)
]
--- doDiscard rips along really fast looking for a double semicolon,
+-- doDiscard rips along really fast, looking for a double semicolon,
-- indicating the end of the pragma we're skipping
doDiscard inStr buf =
-- _trace (show (C# (currentChar# buf))) $
%************************************************************************
\begin{code}
-type IfM a = MaybeErr a Error
+type IfM a = StringBuffer -- Input string
+ -> SrcLoc
+ -> MaybeErr a ErrMsg
returnIf :: a -> IfM a
-thenIf :: IfM a -> (a -> IfM b) -> IfM b
-happyError :: Int -> [IfaceToken] -> IfM a
+returnIf a s l = Succeeded a
-returnIf a = Succeeded a
+thenIf :: IfM a -> (a -> IfM b) -> IfM b
+m `thenIf` k = \s l ->
+ case m s l of
+ Succeeded a -> k a s l
+ Failed err -> Failed err
-thenIf (Succeeded a) k = k a
-thenIf (Failed err) _ = Failed err
+getSrcLocIf :: IfM SrcLoc
+getSrcLocIf s l = Succeeded l
-happyError ln toks = Failed (ifaceParseErr ln toks)
+happyError :: IfM a
+happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
-----------------------------------------------------------------
-ifaceParseErr ln toks sty
- = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]
+ifaceParseErr l toks
+ = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
+ ptext SLIT("toks="), text (show (take 10 toks))]
\end{code}