--------------------------------------------------------
\begin{code}
-{-# OPTIONS -#include "ctypes.h" #-}
module Lex (
- ifaceParseErr,
+ ifaceParseErr, srcParseErr,
-- Monad for parser
Token(..), lexer, ParseResult(..), PState(..),
import StringBuffer
import GlaExts
import ST ( runST )
-
-#if __GLASGOW_HASKELL__ >= 303
-import Bits
-import Word
-#endif
-
+import Ctypes
import Char ( chr )
import Addr
import PrelRead ( readRational__ ) -- Glasgow non-std
| ITlabel
| ITdynamic
| ITunsafe
+ | ITwith
+ | ITstdcallconv
+ | ITccallconv
| ITinterface -- interface keywords
| IT__export
| ITbottom
| ITinteger_lit
| ITfloat_lit
+ | ITword_lit
+ | ITword64_lit
+ | ITint64_lit
| ITrational_lit
| ITaddr_lit
| ITlit_lit
| ITstring_lit
| ITtypeapp
- | ITonce
- | ITmany
+ | ITusage
+ | ITfuall
| ITarity
| ITspecialise
| ITnocaf
| ITunfold InlinePragInfo
| ITstrict ([Demand], Bool)
| ITrules
- | ITcprinfo (CprInfo)
+ | ITcprinfo
+ | ITdeprecated
| IT__scc
| ITsccAllCafs
| ITinline_prag
| ITnoinline_prag
| ITrules_prag
+ | ITdeprecated_prag
| ITline_prag
| ITclose_prag
| ITqvarsym (FAST_STRING,FAST_STRING)
| ITqconsym (FAST_STRING,FAST_STRING)
+ | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x
+
| ITpragma StringBuffer
| ITchar Char
| ITunknown String -- Used when the lexer can't make sense of it
| ITeof -- end of file token
- deriving Text -- debugging
+ deriving Show -- debugging
\end{code}
-----------------------------------------------------------------------------
( "NOTINLINE", ITnoinline_prag ),
( "LINE", ITline_prag ),
( "RULES", ITrules_prag ),
- ( "RULEZ", ITrules_prag ) -- american spelling :-)
+ ( "RULEZ", ITrules_prag ), -- american spelling :-)
+ ( "DEPRECATED", ITdeprecated_prag )
]
haskellKeywordsFM = listToUFM $
( "_scc_", ITscc )
]
-
+-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
ghcExtensionKeywordsFM = listToUFM $
map (\ (x,y) -> (_PK_ x,y))
[ ( "forall", ITforall ),
( "label", ITlabel ),
( "dynamic", ITdynamic ),
( "unsafe", ITunsafe ),
+ ( "with", ITwith ),
+ ( "stdcall", ITstdcallconv),
+ ( "ccall", ITccallconv),
("_ccall_", ITccall (False, False, False)),
("_ccall_GC_", ITccall (False, False, True)),
("_casm_", ITccall (False, True, False)),
("__bot", ITbottom),
("__integer", ITinteger_lit),
("__float", ITfloat_lit),
+ ("__int64", ITint64_lit),
+ ("__word", ITword_lit),
+ ("__word64", ITword64_lit),
("__rational", ITrational_lit),
("__addr", ITaddr_lit),
("__litlit", ITlit_lit),
("__string", ITstring_lit),
("__a", ITtypeapp),
- ("__o", ITonce),
- ("__m", ITmany),
+ ("__u", ITusage),
+ ("__fuall", ITfuall),
("__A", ITarity),
("__P", ITspecialise),
("__C", ITnocaf),
("__R", ITrules),
- ("__u", ITunfold NoInlinePragInfo),
+ ("__D", ITdeprecated),
+ ("__U", ITunfold NoInlinePragInfo),
("__ccall", ITccall (False, False, False)),
("__ccall_GC", ITccall (False, False, True)),
})
-- first, start a new lexeme and lose all the whitespace
- = tab line bol atbol (stepOverLexeme buf)
+ = _scc_ "Lexer"
+ tab line bol atbol (stepOverLexeme buf)
where
line = srcLocLine loc
lexToken :: (Token -> P a) -> Int# -> P a
lexToken cont glaexts buf =
--trace "lexToken" $
- _scc_ "Lexer"
case currentChar# buf of
-- special symbols ----------------------------------------------------
'}'# -> \ s@PState{context = ctx} ->
case ctx of
(_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
- _ -> lexError "too many '}'s" buf s
+ _ -> lexError "too many '}'s" buf s
'#'# -> case lookAhead# buf 1# of
')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
lex_demand cont (stepOnUntil (not . isSpace)
(stepOnBy# buf 3#)) -- past __S
'M'# ->
- lex_cpr cont (stepOnUntil (not . isSpace)
- (stepOnBy# buf 3#)) -- past __M
+ cont ITcprinfo (stepOnBy# buf 3#) -- past __M
+
's'# ->
case prefixMatch (stepOnBy# buf 3#) "cc" of
Just buf' -> lex_scc cont (stepOverLexeme buf')
trace "lexIface: misplaced NUL?" $
cont (ITunknown "\NUL") (stepOn buf)
+ '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
+ lex_ip cont (incLexeme buf)
c | is_digit c -> lex_num cont glaexts 0 buf
| is_symbol c -> lex_sym cont buf
| is_upper c -> lex_con cont glaexts buf
-- ignore \& in a string, deal with string gaps
'\\'# | next_ch `eqChar#` '&'#
- -> lex_string cont glaexts s (setCurrentPos# buf 2#)
+ -> lex_string cont glaexts s buf'
| is_space next_ch
- -> lex_stringgap cont glaexts s buf'
+ -> lex_stringgap cont glaexts s (incLexeme buf)
where next_ch = lookAhead# buf 1#
buf' = setCurrentPos# buf 2#
is_hexdigit c
= is_digit c
- || (c `geChar#` 'a'# && c `leChar#` 'h'#)
- || (c `geChar#` 'A'# && c `leChar#` 'H'#)
+ || (c `geChar#` 'a'# && c `leChar#` 'f'#)
+ || (c `geChar#` 'A'# && c `leChar#` 'F'#)
hex c | is_digit c = ord# c -# ord# '0'#
| otherwise = ord# (to_lower c) -# ord# 'a'# +# 10#
= case read_em [] buf of
(stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
-lex_cpr cont buf =
- case read_em [] buf of { (cpr_inf,buf') ->
- ASSERT ( null (tail cpr_inf) )
- cont (ITcprinfo $ head cpr_inf) buf'
- }
- where
- -- code snatched from lex_demand above
- read_em acc buf =
- case currentChar# buf of
- '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
- '('# -> do_unpack acc (stepOn buf)
- ')'# -> (reverse acc, stepOn buf)
- _ -> (reverse acc, buf)
-
- do_unpack acc buf
- = case read_em [] buf of
- (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest
------------------
lex_scc cont buf =
lex_cstring cont buf =
case expandUntilMatch (stepOverLexeme buf) "\'\'" of
- buf' -> cont (ITlitlit (lexemeToFastString
+ Just buf' -> cont (ITlitlit (lexemeToFastString
(setCurrentPos# buf' (negateInt# 2#))))
- (mergeLexemes buf buf')
-
-------------------------------------------------------------------------------
--- Character Classes
-
-is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
-
-{-# INLINE is_ctype #-}
-#if __GLASGOW_HASKELL__ >= 303
-is_ctype :: Word8 -> Char# -> Bool
-is_ctype mask = \c ->
- (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
-#else
-is_ctype :: Int -> Char# -> Bool
-is_ctype (I# mask) = \c ->
- let (A# ctype) = ``char_types'' :: Addr
- flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
- in
- (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
-#endif
-
-is_ident = is_ctype 1
-is_symbol = is_ctype 2
-is_any = is_ctype 4
-is_space = is_ctype 8
-is_upper = is_ctype 16
-is_digit = is_ctype 32
+ (mergeLexemes buf buf')
+ Nothing -> lexError "unterminated ``" buf
-----------------------------------------------------------------------------
-- identifiers, symbols etc.
+lex_ip cont buf =
+ case expandWhile# is_ident buf of
+ buf' -> cont (ITipvarid lexeme) buf'
+ where lexeme = lexemeToFastString buf'
+
lex_id cont glaexts buf =
case expandWhile# is_ident buf of { buf1 ->
-- real lexeme is M.<sym>
new_buf = mergeLexemes buf buf'
in
- case lookupUFM haskellKeySymsFM lexeme of {
- Just kwd_token -> just_a_conid; -- avoid M.:: etc.
- Nothing -> cont (mk_qvar_token mod lexeme) new_buf
- }}
+ cont (mk_qvar_token mod lexeme) new_buf
+ -- wrong, but arguably morally right: M... is now a qvarsym
+ }
| otherwise =
let
in
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
Just kwd_token -> just_a_conid; -- avoid M.where etc.
- Nothing ->
- if flag glaexts
- then case lookupUFM ghcExtensionKeywordsFM lexeme of {
- Just kwd_token -> just_a_conid;
- Nothing -> is_a_qvarid }
- else is_a_qvarid
+ Nothing -> is_a_qvarid
+ -- TODO: special ids (as, qualified, hiding) shouldn't be
+ -- recognised as keywords here. ie. M.as is a qualified varid.
}}}
mk_var_token pk_str
| is_upper f = ITconid pk_str
- -- _[A-Z] is treated as a constructor in interface files.
- | f `eqChar#` '_'# && not (_NULL_ tl)
- && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
| is_ident f = ITvarid pk_str
| f `eqChar#` ':'# = ITconsym pk_str
| otherwise = ITvarsym pk_str
- we still need to insert another '}' followed by a ';',
hence the atbol trick.
+There's also a special hack in here to deal with
+
+ do
+ ....
+ e $ do
+ blah
+
+i.e. the inner context is at the same indentation level as the outer
+context. This is strictly illegal according to Haskell 98, but
+there's a lot of existing code using this style and it doesn't make
+any sense to disallow it, since empty 'do' lists don't make sense.
-}
-layoutOn :: P ()
-layoutOn buf s@(PState{ bol = bol, context = ctx }) =
+layoutOn :: Bool -> P ()
+layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
let offset = lexemeIndex buf -# bol in
case ctx of
- Layout prev_off : _ | prev_off >=# offset ->
+ Layout prev_off : _
+ | if strict then prev_off >=# offset else prev_off ># offset ->
--trace ("layout on, column: " ++ show (I# offset)) $
POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
other ->
POk s{ context = NoLayout:ctx } ()
popContext :: P ()
-popContext = \ buf s@(PState{ context = ctx }) ->
+popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
case ctx of
(_:tl) -> POk s{ context = tl } ()
- [] -> panic "Lex.popContext: empty context"
+ [] -> PFailed (srcParseErr buf loc)
{-
Note that if the name of the file we're processing ends
Nothing -> ptext SLIT("pre ghc-3.02 version")
Just v -> ptext SLIT("version") <+> integer v
+-----------------------------------------------------------------------------
+
+srcParseErr :: StringBuffer -> SrcLoc -> Message
+srcParseErr s l
+ = hcat [ppr l,
+ if null token
+ then ptext SLIT(": parse error (possibly incorrect indentation)")
+ else hcat [ptext SLIT(": parse error on input "),
+ char '`', text token, char '\'']
+ ]
+ where
+ token = lexemeToString s
+
\end{code}