+--------------------------------------------------------
+[Jan 98]
+There's a known bug in here:
+
+ If an interface file ends prematurely, Lex tries to
+ do headFS of an empty FastString.
+
+An example that provokes the error is
+
+ f _:_ _forall_ [a] <<<END OF FILE>>>
+--------------------------------------------------------
+
+
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
%
\section[Lexical analysis]{Lexical analysis}
\begin{code}
-#include "HsVersions.h"
-
module Lex (
isLexCon, isLexVar, isLexId, isLexSym,
mkTupNameStr, ifaceParseErr,
-- Monad for parser
- IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
+ IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
+ happyError,
StringBuffer
) where
+#include "HsVersions.h"
-IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
+import Char (isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord )
-#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ <= 201
-IMPORT_DELOOPER(Ubiq)
-IMPORT_DELOOPER(IdLoop) -- get the CostCentre type&constructors from here
-#else
import {-# SOURCE #-} CostCentre
-# if __GLASGOW_HASKELL__ == 202
-import PrelBase ( Char(..) )
-# endif
-#endif
import CmdLineOpts ( opt_IgnoreIfacePragmas )
import Demand ( Demand(..) {- instance Read -} )
import UniqFM ( UniqFM, listToUFM, lookupUFM)
import BasicTypes ( NewOrData(..), IfaceFlavour(..) )
+import SrcLoc ( SrcLoc, incSrcLine )
-#if __GLASGOW_HASKELL__ >= 202
import Maybes ( MaybeErr(..) )
-#else
-import Maybes ( Maybe(..), MaybeErr(..) )
-#endif
-import Pretty
-
-
-
-import ErrUtils ( Error(..) )
-import Outputable ( Outputable(..), PprStyle(..) )
+import ErrUtils ( ErrMsg(..) )
+import Outputable
import Util ( nOfThem, panic )
import FastString
import StringBuffer
-
-#if __GLASGOW_HASKELL__ <= 201
-import PreludeGlaST
-#else
import GlaExts
-#endif
+import ST ( runST )
\end{code}
%************************************************************************
-- whitespace and comments, ignore.
' '# -> lexIface cont (stepOn buf)
'\t'# -> lexIface cont (stepOn buf)
- '\n'# -> \line -> lexIface cont (stepOn buf) (line+1)
+ '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
-- Numbers and comments
'-'# ->
_ -> cont ITobrack (stepOn buf)
']'# -> cont ITcbrack (stepOn buf)
','# -> cont ITcomma (stepOn buf)
- ':'# -> case lookAhead# buf 1# of
- ':'# -> cont ITdcolon (stepOnBy# buf 2#)
- _ -> lex_id cont (incLexeme buf)
';'# -> cont ITsemi (stepOn buf)
'\"'# -> case untilEndOfString# (stepOn buf) of
buf' ->
-- Similarly ' itself is ok inside an identifier, but not at the start
-id_arr :: _ByteArray Int
+-- id_arr is an array of bytes, indexed by characters,
+-- containing 0 if the character isn't a valid character from an identifier
+-- and 1 if it is. It's just a memo table for is_id_char.
+id_arr :: ByteArray Int
id_arr =
- unsafePerformPrimIO (
- newCharArray (0,255) `thenPrimIO` \ barr ->
+ runST (
+ newCharArray (0,255) >>= \ barr ->
let
- loop 256# = returnPrimIO ()
+ loop 256# = return ()
loop i# =
if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
- writeCharArray barr (I# i#) '\1' `seqPrimIO`
+ writeCharArray barr (I# i#) '\1' >>
loop (i# +# 1#)
else
- writeCharArray barr (I# i#) '\0' `seqPrimIO`
+ writeCharArray barr (I# i#) '\0' >>
loop (i# +# 1#)
in
- loop 0# `seqPrimIO`
+ loop 0# >>
unsafeFreezeByteArray barr)
is_id_char (C# c#) =
let
- _ByteArray _ arr# = id_arr
+ ByteArray _ arr# = id_arr
in
case ord# (indexCharArray# arr# (ord# c#)) of
0# -> False
1# -> True
---is_id_char c@(C# c#) = isAlphanum c || is_sym c#
+--OLD: is_id_char c@(C# c#) = isAlphanum c || is_sym c#
-is_sym c#=
+is_sym c# =
case c# of {
':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True;
- '#'# -> True; '$'# -> True; ':'# -> True; '%'# -> True;
+ '#'# -> True; '$'# -> True; '%'# -> True;
'&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True;
'/'# -> True; '<'# -> True; '='# -> True; '>'# -> True;
'?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True;
--isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
-mod_arr :: _ByteArray Int
+-- mod_arr is an array of bytes, indexed by characters,
+-- containing 0 if the character isn't a valid character from a module name,
+-- and 1 if it is.
+mod_arr :: ByteArray Int
mod_arr =
- unsafePerformPrimIO (
- newCharArray (0,255) `thenPrimIO` \ barr ->
+ runST (
+ newCharArray (0,255) >>= \ barr ->
let
- loop 256# = returnPrimIO ()
+ loop 256# = return ()
loop i# =
if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
- writeCharArray barr (I# i#) '\1' `seqPrimIO`
+ writeCharArray barr (I# i#) '\1' >>
loop (i# +# 1#)
else
- writeCharArray barr (I# i#) '\0' `seqPrimIO`
+ writeCharArray barr (I# i#) '\0' >>
loop (i# +# 1#)
in
- loop 0# `seqPrimIO`
+ loop 0# >>
unsafeFreezeByteArray barr)
is_mod_char (C# c#) =
let
- _ByteArray _ arr# = mod_arr
+ ByteArray _ arr# = mod_arr
in
case ord# (indexCharArray# arr# (ord# c#)) of
0# -> False
--isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
-{-
-lex_id cs =
- case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
- (xs, len, cs') ->
- case cs' of
- [] -> case xs of
- [] -> lex_id2 Nothing cs
- _ -> lex_id3 Nothing len xs cs
-
- '.':cs'' ->
- case xs of
- [] -> lex_id2 Nothing cs
- _ ->
- let
- pk_str = _PK_ (xs::String)
- len = lengthPS pk_str
- in
- if len==len+1 then
- error "Well, I never!"
- else
- lex_id2 (Just pk_str) cs''
- _ -> case xs of
- [] -> lex_id2 Nothing cs
- _ -> lex_id3 Nothing len xs cs'
-
--}
-
lex_id cont buf =
-- _trace ("lex_id: "++[C# (currentChar# buf)]) $
case expandWhile (is_mod_char) buf of
lex_id2 cont module_dot buf =
-- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
case currentChar# buf of
- '['# ->
+
+ '['# -> -- Special case for []
case lookAhead# buf 1# of
']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
_ -> lex_id3 cont module_dot buf
- '('# ->
+
+ '('# -> -- Special case for (,,,)
case lookAhead# buf 1# of
')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
_ -> lex_id3 cont module_dot buf
':'# -> lex_id3 cont module_dot (incLexeme buf)
+ '-'# ->
+ case module_dot of
+ Nothing -> lex_id3 cont module_dot buf
+ Just ghc -> -- this should be "GHC" (current home of (->))
+ case lookAhead# buf 1# of
+ '>'# -> end_lex_id cont module_dot (ITconid SLIT("->"))
+ (stepOnBy# buf 2#)
+ _ -> lex_id3 cont module_dot buf
_ -> lex_id3 cont module_dot buf
buf' ->
case module_dot of
Just _ ->
- end_lex_id cont module_dot (mk_var_token lexeme) (stepOverLexeme buf')
+ end_lex_id cont module_dot (mk_var_token lexeme) new_buf
Nothing ->
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
Just kwd_token -> cont kwd_token new_buf
ITconid n -> cont (ITqconid (m,n,hif)) buf
ITvarid n -> cont (ITqvarid (m,n,hif)) buf
ITconsym n -> cont (ITqconsym (m,n,hif)) buf
+
+ -- Special case for ->
+ -- "->" by itself is a special token (ITrarrow),
+ -- but M.-> is a ITqconid
+ ITvarsym n | n == SLIT("->")
+ -> cont (ITqconsym (m,n,hif)) buf
+
ITvarsym n -> cont (ITqvarsym (m,n,hif)) buf
- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
+
+-- ITbang can't happen here I think
+-- ITbang -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
+
_ -> cont (ITunknown (show token)) buf
------------
,("!", ITbang)
,("=>", ITdarrow)
,("=", ITequal)
+ ,("::", ITdcolon)
]
--- doDiscard rips along really fast looking for a double semicolon,
+-- doDiscard rips along really fast, looking for a double semicolon,
-- indicating the end of the pragma we're skipping
doDiscard inStr buf =
-- _trace (show (C# (currentChar# buf))) $
%************************************************************************
\begin{code}
-type IfM a = StringBuffer -> Int -> MaybeErr a Error
+type IfM a = StringBuffer -- Input string
+ -> SrcLoc
+ -> MaybeErr a ErrMsg
returnIf :: a -> IfM a
returnIf a s l = Succeeded a
Succeeded a -> k a s l
Failed err -> Failed err
+getSrcLocIf :: IfM SrcLoc
+getSrcLocIf s l = Succeeded l
+
happyError :: IfM a
happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
-----------------------------------------------------------------
-ifaceParseErr l toks sty
- = hsep [ptext SLIT("Interface-file parse error: line"), int l, ptext SLIT("toks="), text (show (take 10 toks))]
+ifaceParseErr l toks
+ = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
+ ptext SLIT("toks="), text (show (take 10 toks))]
\end{code}