--------------------------------------------------------
\begin{code}
-{-# OPTIONS -#include "ctypes.h" #-}
module Lex (
-- Monad for parser
Token(..), lexer, ParseResult(..), PState(..),
- checkVersion,
+ checkVersion, ExtFlags(..), mkPState,
StringBuffer,
P, thenP, thenP_, returnP, mapP, failP, failMsgP,
- getSrcLocP, getSrcFile,
+ getSrcLocP, setSrcLocP, getSrcFile,
layoutOn, layoutOff, pushContext, popContext
) where
#include "HsVersions.h"
-import Char ( ord, isSpace, toUpper )
+import Char ( isSpace, toUpper )
import List ( isSuffixOf )
-import IdInfo ( InlinePragInfo(..), CprInfo(..) )
-import Name ( isLowerISO, isUpperISO )
-import PrelMods ( mkTupNameStr, mkUbxTupNameStr )
-import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
-import Demand ( Demand(..) {- instance Read -} )
-import UniqFM ( UniqFM, listToUFM, lookupUFM)
-import BasicTypes ( NewOrData(..) )
+import PrelNames ( mkTupNameStr )
+import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck )
+import ForeignCall ( Safety(..) )
+import NewDemand ( StrictSig(..), Demand(..), Demands(..),
+ DmdResult(..), mkTopDmdType, evalDmd, lazyDmd )
+import UniqFM ( listToUFM, lookupUFM )
+import BasicTypes ( Boxity(..) )
import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
replaceSrcLine, mkSrcLoc )
-import Maybes ( MaybeErr(..) )
import ErrUtils ( Message )
import Outputable
import FastString
import StringBuffer
import GlaExts
-import ST ( runST )
-
-#if __GLASGOW_HASKELL__ >= 303
-import Bits
-import Word
-#endif
-
-import Char ( chr )
-import Addr
+import Ctype
+import Char ( chr, ord )
import PrelRead ( readRational__ ) -- Glasgow non-std
+import PrelBits ( Bits(..) ) -- non-std
\end{code}
%************************************************************************
| ITthen
| ITtype
| ITwhere
- | ITscc
+ | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now)
| ITforall -- GHC extension keywords
| ITforeign
| ITexport
| ITlabel
| ITdynamic
+ | ITsafe
| ITunsafe
| ITwith
| ITstdcallconv
| ITccallconv
+ | ITdotnet
| ITinterface -- interface keywords
| IT__export
| ITcoerce
| ITinlineMe
| ITinlineCall
- | ITccall (Bool,Bool,Bool) -- (is_dyn, is_casm, may_gc)
+ | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc)
| ITdefaultbranch
| ITbottom
| ITinteger_lit
| ITint64_lit
| ITrational_lit
| ITaddr_lit
+ | ITlabel_lit
| ITlit_lit
| ITstring_lit
| ITtypeapp
| ITarity
| ITspecialise
| ITnocaf
- | ITunfold InlinePragInfo
- | ITstrict ([Demand], Bool)
+ | ITunfold
+ | ITstrict StrictSig
| ITrules
| ITcprinfo
| ITdeprecated
| ITrules_prag
| ITdeprecated_prag
| ITline_prag
+ | ITscc_prag
| ITclose_prag
| ITdotdot -- reserved symbols
| ITdarrow
| ITminus
| ITbang
+ | ITstar
| ITdot
| ITbiglam -- GHC-extension symbols
| ITocurly -- special symbols
| ITccurly
+ | ITocurlybar -- {|, for type applications
+ | ITccurlybar -- |}, for type applications
| ITvccurly
| ITobrack
+ | ITopabrack -- [:, for parallel arrays with -fparr
+ | ITcpabrack -- :], for parallel arrays with -fparr
| ITcbrack
| IToparen
| ITcparen
| ITqvarsym (FAST_STRING,FAST_STRING)
| ITqconsym (FAST_STRING,FAST_STRING)
- | ITipvarid FAST_STRING -- GHC extension: implicit param: ?x
+ | ITdupipvarid FAST_STRING -- GHC extension: implicit param: ?x
+ | ITsplitipvarid FAST_STRING -- GHC extension: implicit param: %x
| ITpragma StringBuffer
- | ITchar Char
+ | ITchar Int
| ITstring FAST_STRING
- | ITinteger Integer
+ | ITinteger Integer
| ITrational Rational
- | ITprimchar Char
+ | ITprimchar Int
| ITprimstring FAST_STRING
| ITprimint Integer
| ITprimfloat Rational
( "LINE", ITline_prag ),
( "RULES", ITrules_prag ),
( "RULEZ", ITrules_prag ), -- american spelling :-)
+ ( "SCC", ITscc_prag ),
( "DEPRECATED", ITdeprecated_prag )
]
( "then", ITthen ),
( "type", ITtype ),
( "where", ITwhere ),
- ( "_scc_", ITscc )
+ ( "_scc_", ITscc ) -- ToDo: remove
]
+isSpecial :: Token -> Bool
+-- If we see M.x, where x is a keyword, but
+-- is special, we treat is as just plain M.x,
+-- not as a keyword.
+isSpecial ITas = True
+isSpecial IThiding = True
+isSpecial ITqualified = True
+isSpecial ITforall = True
+isSpecial ITexport = True
+isSpecial ITlabel = True
+isSpecial ITdynamic = True
+isSpecial ITsafe = True
+isSpecial ITunsafe = True
+isSpecial ITwith = True
+isSpecial ITccallconv = True
+isSpecial ITstdcallconv = True
+isSpecial _ = False
+
-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
ghcExtensionKeywordsFM = listToUFM $
map (\ (x,y) -> (_PK_ x,y))
( "export", ITexport ),
( "label", ITlabel ),
( "dynamic", ITdynamic ),
+ ( "safe", ITunsafe ),
( "unsafe", ITunsafe ),
( "with", ITwith ),
( "stdcall", ITstdcallconv),
( "ccall", ITccallconv),
- ("_ccall_", ITccall (False, False, False)),
- ("_ccall_GC_", ITccall (False, False, True)),
- ("_casm_", ITccall (False, True, False)),
- ("_casm_GC_", ITccall (False, True, True)),
+ ( "dotnet", ITdotnet),
+ ("_ccall_", ITccall (False, False, PlayRisky)),
+ ("_ccall_GC_", ITccall (False, False, PlaySafe)),
+ ("_casm_", ITccall (False, True, PlayRisky)),
+ ("_casm_GC_", ITccall (False, True, PlaySafe)),
-- interface keywords
("__interface", ITinterface),
("__word64", ITword64_lit),
("__rational", ITrational_lit),
("__addr", ITaddr_lit),
+ ("__label", ITlabel_lit),
("__litlit", ITlit_lit),
("__string", ITstring_lit),
("__a", ITtypeapp),
("__C", ITnocaf),
("__R", ITrules),
("__D", ITdeprecated),
- ("__U", ITunfold NoInlinePragInfo),
+ ("__U", ITunfold),
- ("__ccall", ITccall (False, False, False)),
- ("__ccall_GC", ITccall (False, False, True)),
- ("__dyn_ccall", ITccall (True, False, False)),
- ("__dyn_ccall_GC", ITccall (True, False, True)),
- ("__casm", ITccall (False, True, False)),
- ("__dyn_casm", ITccall (True, True, False)),
- ("__casm_GC", ITccall (False, True, True)),
- ("__dyn_casm_GC", ITccall (True, True, True)),
+ ("__ccall", ITccall (False, False, PlayRisky)),
+ ("__ccall_GC", ITccall (False, False, PlaySafe)),
+ ("__dyn_ccall", ITccall (True, False, PlayRisky)),
+ ("__dyn_ccall_GC", ITccall (True, False, PlaySafe)),
+ ("__casm", ITccall (False, True, PlayRisky)),
+ ("__dyn_casm", ITccall (True, True, PlayRisky)),
+ ("__casm_GC", ITccall (False, True, PlaySafe)),
+ ("__dyn_casm_GC", ITccall (True, True, PlaySafe)),
("/\\", ITbiglam)
]
,("=>", ITdarrow)
,("-", ITminus)
,("!", ITbang)
+ ,("*", ITstar)
,(".", ITdot) -- sadly, for 'forall a . t'
]
\end{code}
Lexer state:
- - (glaexts) lexing an interface file or -fglasgow-exts
+ - (exts) lexing a source with extensions, eg, an interface file or
+ with -fglasgow-exts
- (bol) pointer to beginning of line (for column calculations)
- (buf) pointer to beginning of token
- (buf) pointer to current char
lexer :: (Token -> P a) -> P a
lexer cont buf s@(PState{
loc = loc,
- glasgow_exts = glaexts,
+ extsBitmap = exts,
bol = bol,
atbol = atbol,
context = ctx
if next `eqChar#` '-'# then trundle (n +# 1#)
else if is_symbol next || n <# 2#
then is_a_token
- else case untilChar# (stepOnBy# buf n) '\n'# of
- { buf' -> tab y bol atbol (stepOverLexeme buf')
- }
+ else tab y bol atbol
+ (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
in trundle 1#
-- comments and pragmas. We deal with LINE pragmas here,
-- and throw out any unrecognised pragmas as comments. Any
-- pragmas we know about are dealt with later (after any layout
-- processing if necessary).
-
- '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
+ '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
if lookAhead# buf 2# `eqChar#` '#'# then
if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
let lexeme = mkFastString -- ToDo: too slow
(map toUpper (lexemeToString buf2)) in
case lookupUFM pragmaKeywordsFM lexeme of
- Just ITline_prag -> line_prag (lexer cont) buf2 s'
+ -- ignore RULES pragmas when -fglasgow-exts is off
+ Just ITrules_prag | not (glaExtsEnabled exts) ->
+ skip_to_end (stepOnBy# buf 2#) s'
+ Just ITline_prag ->
+ line_prag skip_to_end buf2 s'
Just other -> is_a_token
- Nothing -> skip_to_end (stepOnBy# buf 2#)
+ Nothing -> skip_to_end (stepOnBy# buf 2#) s'
}}
-
- else skip_to_end (stepOnBy# buf 2#)
+
+ else skip_to_end (stepOnBy# buf 2#) s'
+ where
+ skip_to_end = skipNestedComment (lexer cont)
+
+ -- special GHC extension: we grok cpp-style #line pragmas
+ '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0
+ let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
+ lookAhead# buf 2# `eqChar#` 'i'# &&
+ lookAhead# buf 3# `eqChar#` 'n'# &&
+ lookAhead# buf 4# `eqChar#` 'e'# = stepOnBy# buf 5#
+ | otherwise = stepOn buf
+ in
+ case expandWhile# is_space buf1 of { buf2 ->
+ if is_digit (currentChar# buf2)
+ then line_prag next_line buf2 s'
+ else is_a_token
+ }
where
- skip_to_end buf = nested_comment (lexer cont) buf s'
+ next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
-- tabs have been expanded beforehand
c | is_space c -> tab y bol atbol (stepOn buf)
atbol = atbol}
is_a_token | atbol /=# 0# = lexBOL cont buf s'
- | otherwise = lexToken cont glaexts buf s'
+ | otherwise = lexToken cont exts buf s'
-- {-# LINE .. #-} pragmas. yeuch.
-line_prag cont buf =
+line_prag cont buf s@PState{loc=loc} =
case expandWhile# is_space buf of { buf1 ->
case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) ->
-- subtract one: the line number refers to the *following* line.
let real_line = line - 1 in
case fromInteger real_line of { i@(I# l) ->
+ -- ToDo, if no filename then we skip the newline.... d'oh
case expandWhile# is_space buf2 of { buf3 ->
case currentChar# buf3 of
'\"'#{-"-} ->
case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
- let file = lexemeToFastString buf4 in
- \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
+ let
+ file = lexemeToFastString buf4
+ new_buf = stepOn (stepOverLexeme buf4)
+ in
+ if nullFastString file
+ then cont new_buf s{loc = replaceSrcLine loc l}
+ else cont new_buf s{loc = mkSrcLoc file i}
}
- other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
+ _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
}}}}
- where
- skipToEnd buf = nested_comment cont buf
-nested_comment :: P a -> P a
-nested_comment cont buf = loop buf
+skipNestedComment :: P a -> P a
+skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
+
+skipNestedComment' :: SrcLoc -> P a -> P a
+skipNestedComment' orig_loc cont buf = loop buf
where
loop buf =
case currentChar# buf of
- '\NUL'# | bufferExhausted (stepOn buf) ->
- lexError "unterminated `{-'" buf
-
- '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
- cont (stepOnBy# buf 2#)
+ '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
'{'# | lookAhead# buf 1# `eqChar#` '-'# ->
- nested_comment (nested_comment cont) (stepOnBy# buf 2#)
+ skipNestedComment
+ (skipNestedComment' orig_loc cont)
+ (stepOnBy# buf 2#)
'\n'# -> \ s@PState{loc=loc} ->
let buf' = stepOn buf in
- nested_comment cont buf'
- s{loc = incSrcLine loc, bol = currentIndex# buf',
- atbol = 1#}
+ loop buf' s{loc = incSrcLine loc,
+ bol = currentIndex# buf',
+ atbol = 1#}
+
+ -- pass the original SrcLoc to lexError so that the error is
+ -- reported at the line it was originally on, not the line at
+ -- the end of the file.
+ '\NUL'# | bufferExhausted (stepOn buf) ->
+ \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
- _ -> nested_comment cont (stepOn buf)
+ _ -> loop (stepOn buf)
-- When we are lexing the first token of a line, check whether we need to
-- insert virtual semicolons or close braces due to layout.
lexBOL :: (Token -> P a) -> P a
lexBOL cont buf s@(PState{
loc = loc,
- glasgow_exts = glaexts,
+ extsBitmap = exts,
bol = bol,
atbol = atbol,
context = ctx
--trace ("col = " ++ show (I# col) ++ ", layout: inserting ';'") $
cont ITsemi buf s{atbol = 0#}
else
- lexToken cont glaexts buf s{atbol = 0#}
+ lexToken cont exts buf s{atbol = 0#}
where
col = currentIndex# buf -# bol
lexToken :: (Token -> P a) -> Int# -> P a
-lexToken cont glaexts buf =
- --trace "lexToken" $
+lexToken cont exts buf =
+-- trace "lexToken" $
case currentChar# buf of
-- special symbols ----------------------------------------------------
- '('# | flag glaexts && lookAhead# buf 1# `eqChar#` '#'#
+ '('# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '#'#
-> cont IToubxparen (setCurrentPos# buf 2#)
| otherwise
-> cont IToparen (incLexeme buf)
')'# -> cont ITcparen (incLexeme buf)
- '['# -> cont ITobrack (incLexeme buf)
+ '['# | parrEnabled exts && lookAhead# buf 1# `eqChar#` ':'# ->
+ cont ITopabrack (setCurrentPos# buf 2#)
+ | otherwise ->
+ cont ITobrack (incLexeme buf)
']'# -> cont ITcbrack (incLexeme buf)
','# -> cont ITcomma (incLexeme buf)
';'# -> cont ITsemi (incLexeme buf)
-
'}'# -> \ s@PState{context = ctx} ->
case ctx of
(_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
_ -> lexError "too many '}'s" buf s
-
+ '|'# -> case lookAhead# buf 1# of
+ '}'# | glaExtsEnabled exts -> cont ITccurlybar
+ (setCurrentPos# buf 2#)
+ _ -> lex_sym cont (incLexeme buf)
+ ':'# -> case lookAhead# buf 1# of
+ ']'# | parrEnabled exts -> cont ITcpabrack
+ (setCurrentPos# buf 2#)
+ _ -> lex_sym cont (incLexeme buf)
+
+
'#'# -> case lookAhead# buf 1# of
- ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
+ ')'# | glaExtsEnabled exts
+ -> cont ITcubxparen (setCurrentPos# buf 2#)
'-'# -> case lookAhead# buf 2# of
'}'# -> cont ITclose_prag (setCurrentPos# buf 3#)
_ -> lex_sym cont (incLexeme buf)
_ -> lex_sym cont (incLexeme buf)
- '`'# | flag glaexts && lookAhead# buf 1# `eqChar#` '`'#
+ '`'# | glaExtsEnabled exts && lookAhead# buf 1# `eqChar#` '`'#
-> lex_cstring cont (setCurrentPos# buf 2#)
| otherwise
-> cont ITbackquote (incLexeme buf)
- '{'# -> -- look for "{-##" special iface pragma
- case lookAhead# buf 1# of
+ '{'# -> -- look for "{-##" special iface pragma -- for Emacs: -}
+ case lookAhead# buf 1# of
+ '|'# | glaExtsEnabled exts
+ -> cont ITocurlybar (setCurrentPos# buf 2#)
'-'# -> case lookAhead# buf 2# of
'#'# -> case lookAhead# buf 3# of
- '#'# ->
- let (lexeme, buf')
- = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
- cont (ITpragma lexeme) buf'
+ '#'# ->
+ lexPragma
+ cont
+ (\ cont lexeme buf' -> cont (ITpragma lexeme) buf')
+ 0#
+ (stepOnBy# (stepOverLexeme buf) 4#)
_ -> lex_prag cont (setCurrentPos# buf 3#)
- _ -> cont ITocurly (incLexeme buf)
- _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf)
+ _ -> cont ITocurly (incLexeme buf)
+ _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf)
-- strings/characters -------------------------------------------------
- '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
- '\''# -> lex_char (char_end cont) glaexts (incLexeme buf)
+ '\"'#{-"-} -> lex_string cont exts [] (incLexeme buf)
+ '\''# -> lex_char (char_end cont) exts (incLexeme buf)
-- strictness and cpr pragmas and __scc treated specially.
- '_'# | flag glaexts ->
+ '_'# | glaExtsEnabled exts ->
case lookAhead# buf 1# of
'_'# -> case lookAhead# buf 2# of
'S'# ->
's'# ->
case prefixMatch (stepOnBy# buf 3#) "cc" of
Just buf' -> lex_scc cont (stepOverLexeme buf')
- Nothing -> lex_id cont glaexts buf
- _ -> lex_id cont glaexts buf
- _ -> lex_id cont glaexts buf
+ Nothing -> lex_id cont exts buf
+ _ -> lex_id cont exts buf
+ _ -> lex_id cont exts buf
-- Hexadecimal and octal constants
'0'# | (ch `eqChar#` 'x'# || ch `eqChar#` 'X'#) && is_hexdigit ch2
- -> readNum (after_lexnum cont glaexts) buf' is_hexdigit 16 hex
+ -> readNum (after_lexnum cont exts) buf' is_hexdigit 16 hex
| (ch `eqChar#` 'o'# || ch `eqChar#` 'O'#) && is_octdigit ch2
- -> readNum (after_lexnum cont glaexts) buf' is_octdigit 8 oct_or_dec
+ -> readNum (after_lexnum cont exts) buf' is_octdigit 8 oct_or_dec
where ch = lookAhead# buf 1#
ch2 = lookAhead# buf 2#
buf' = setCurrentPos# buf 2#
trace "lexIface: misplaced NUL?" $
cont (ITunknown "\NUL") (stepOn buf)
- '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
- lex_ip cont (incLexeme buf)
- c | is_digit c -> lex_num cont glaexts 0 buf
+ '?'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
+ lex_ip ITdupipvarid cont (incLexeme buf)
+ '%'# | glaExtsEnabled exts && is_lower (lookAhead# buf 1#) ->
+ lex_ip ITsplitipvarid cont (incLexeme buf)
+ c | is_digit c -> lex_num cont exts 0 buf
| is_symbol c -> lex_sym cont buf
- | is_upper c -> lex_con cont glaexts buf
- | is_ident c -> lex_id cont glaexts buf
+ | is_upper c -> lex_con cont exts buf
+ | is_ident c -> lex_id cont exts buf
| otherwise -> lexError "illegal character" buf
-- Int# is unlifted, and therefore faster than Bool for flags.
-------------------------------------------------------------------------------
-- Strings & Chars
-lex_string cont glaexts s buf
+lex_string cont exts s buf
= case currentChar# buf of
'"'#{-"-} ->
- let buf' = incLexeme buf; s' = mkFastString (reverse s) in
- case currentChar# buf' of
- '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
+ let buf' = incLexeme buf
+ s' = mkFastStringNarrow (map chr (reverse s))
+ in case currentChar# buf' of
+ '#'# | glaExtsEnabled exts -> if all (<= 0xFF) s
+ then cont (ITprimstring s') (incLexeme buf')
+ else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
_ -> cont (ITstring s') buf'
-- ignore \& in a string, deal with string gaps
'\\'# | next_ch `eqChar#` '&'#
- -> lex_string cont glaexts s buf'
+ -> lex_string cont exts s buf'
| is_space next_ch
- -> lex_stringgap cont glaexts s (incLexeme buf)
+ -> lex_stringgap cont exts s (incLexeme buf)
where next_ch = lookAhead# buf 1#
buf' = setCurrentPos# buf 2#
- _ -> lex_char (lex_next_string cont s) glaexts buf
+ _ -> lex_char (lex_next_string cont s) exts buf
-lex_stringgap cont glaexts s buf
+lex_stringgap cont exts s buf
= let buf' = incLexeme buf in
case currentChar# buf of
- '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont glaexts s buf'
+ '\n'# -> \st@PState{loc = loc} -> lex_stringgap cont exts s buf'
st{loc = incSrcLine loc}
- '\\'# -> lex_string cont glaexts s buf'
- c | is_space c -> lex_stringgap cont glaexts s buf'
+ '\\'# -> lex_string cont exts s buf'
+ c | is_space c -> lex_stringgap cont exts s buf'
other -> charError buf'
-lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
+lex_next_string cont s exts c buf = lex_string cont exts (c:s) buf
-lex_char :: (Int# -> Char -> P a) -> Int# -> P a
-lex_char cont glaexts buf
+lex_char :: (Int# -> Int -> P a) -> Int# -> P a
+lex_char cont exts buf
= case currentChar# buf of
- '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
- c | is_any c -> cont glaexts (C# c) (incLexeme buf)
+ '\\'# -> lex_escape (cont exts) (incLexeme buf)
+ c | is_any c -> cont exts (I# (ord# c)) (incLexeme buf)
other -> charError buf
-char_end cont glaexts c buf
+char_end cont exts c buf
= case currentChar# buf of
'\''# -> let buf' = incLexeme buf in
case currentChar# buf' of
- '#'# | flag glaexts
+ '#'# | glaExtsEnabled exts
-> cont (ITprimchar c) (incLexeme buf')
_ -> cont (ITchar c) buf'
_ -> charError buf
lex_escape cont buf
= let buf' = incLexeme buf in
case currentChar# buf of
- 'a'# -> cont '\a' buf'
- 'b'# -> cont '\b' buf'
- 'f'# -> cont '\f' buf'
- 'n'# -> cont '\n' buf'
- 'r'# -> cont '\r' buf'
- 't'# -> cont '\t' buf'
- 'v'# -> cont '\v' buf'
- '\\'# -> cont '\\' buf'
- '"'# -> cont '\"' buf'
- '\''# -> cont '\'' buf'
+ 'a'# -> cont (ord '\a') buf'
+ 'b'# -> cont (ord '\b') buf'
+ 'f'# -> cont (ord '\f') buf'
+ 'n'# -> cont (ord '\n') buf'
+ 'r'# -> cont (ord '\r') buf'
+ 't'# -> cont (ord '\t') buf'
+ 'v'# -> cont (ord '\v') buf'
+ '\\'# -> cont (ord '\\') buf'
+ '"'# -> cont (ord '\"') buf'
+ '\''# -> cont (ord '\'') buf'
'^'# -> let c = currentChar# buf' in
if c `geChar#` '@'# && c `leChar#` '_'#
- then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
+ then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
else charError buf'
'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
_ -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
Just buf2 <- [prefixMatch buf p] ] of
- (c,buf2):_ -> cont c buf2
+ (c,buf2):_ -> cont (ord c) buf2
[] -> charError buf'
-after_charnum cont i buf
- = let int = fromInteger i in
- if i >= 0 && i <= 255
- then cont (chr int) buf
+after_charnum cont i buf
+ = if i >= 0 && i <= 0x10FFFF
+ then cont (fromInteger i) buf
else charError buf
readNum cont buf is_digit base conv = read buf 0
lex_demand cont buf =
case read_em [] buf of { (ls,buf') ->
case currentChar# buf' of
- 'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
- _ -> cont (ITstrict (ls, False)) buf'
+ 'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf')
+ 'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf')
+ _ -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf'
}
where
- -- code snatched from Demand.lhs
read_em acc buf =
case currentChar# buf of
- 'L'# -> read_em (WwLazy False : acc) (stepOn buf)
- 'A'# -> read_em (WwLazy True : acc) (stepOn buf)
- 'S'# -> read_em (WwStrict : acc) (stepOn buf)
- 'P'# -> read_em (WwPrim : acc) (stepOn buf)
- 'E'# -> read_em (WwEnum : acc) (stepOn buf)
- ')'# -> (reverse acc, stepOn buf)
- 'U'# -> do_unpack DataType True acc (stepOnBy# buf 2#)
- 'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
- 'N'# -> do_unpack NewType True acc (stepOnBy# buf 2#)
- 'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
- _ -> (reverse acc, buf)
+ 'T'# -> read_em (Top : acc) (stepOn buf)
+ 'L'# -> read_em (lazyDmd : acc) (stepOn buf)
+ 'A'# -> read_em (Abs : acc) (stepOn buf)
+ 'V'# -> read_em (evalDmd : acc) (stepOn buf) -- Temporary, until
+ -- we've recompiled prelude etc
+ 'C'# -> do_unary Call acc (stepOnBy# buf 2#) -- Skip 'C('
+
+ 'U'# -> do_seq1 Eval acc (stepOnBy# buf 1#)
+ 'D'# -> do_seq1 Defer acc (stepOnBy# buf 1#)
+ 'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#)
- do_unpack new_or_data wrapper_unpacks acc buf
- = case read_em [] buf of
- (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
+ _ -> (reverse acc, buf)
+ do_seq1 fn acc buf
+ = case currentChar# buf of
+ '('# -> do_seq2 fn acc (stepOnBy# buf 1#)
+ _ -> read_em (fn (Poly Abs) : acc) buf
+
+ do_seq2 fn acc buf
+ = case read_em [] buf of { (dmds, buf) ->
+ case currentChar# buf of
+ ')'# -> read_em (fn (Prod dmds) : acc)
+ (stepOn buf)
+ '*'# -> ASSERT( length dmds == 1 )
+ read_em (fn (Poly (head dmds)) : acc)
+ (stepOnBy# buf 2#) -- Skip '*)'
+ }
+
+ do_unary fn acc buf
+ = case read_em [] buf of
+ ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest) -- Skip ')'
------------------
lex_scc cont buf =
-- Numbers
lex_num :: (Token -> P a) -> Int# -> Integer -> P a
-lex_num cont glaexts acc buf =
+lex_num cont exts acc buf =
case scanNumLit acc buf of
(acc',buf') ->
case currentChar# buf' of
v = readRational__ (lexemeToString l)
in case currentChar# l of -- glasgow exts only
- '#'# | flag glaexts -> let l' = incLexeme l in
+ '#'# | glaExtsEnabled exts -> let l' = incLexeme l in
case currentChar# l' of
'#'# -> cont (ITprimdouble v) (incLexeme l')
_ -> cont (ITprimfloat v) l'
_ -> cont (ITrational v) l
- _ -> after_lexnum cont glaexts acc' buf'
+ _ -> after_lexnum cont exts acc' buf'
-after_lexnum cont glaexts i buf
+after_lexnum cont exts i buf
= case currentChar# buf of
- '#'# | flag glaexts -> cont (ITprimint i) (incLexeme buf)
- _ -> cont (ITinteger i) buf
+ '#'# | glaExtsEnabled exts -> cont (ITprimint i) (incLexeme buf)
+ _ -> cont (ITinteger i) buf
-----------------------------------------------------------------------------
-- C "literal literal"s (i.e. things like ``NULL'', ``stdout'' etc.)
(mergeLexemes buf buf')
Nothing -> lexError "unterminated ``" buf
-------------------------------------------------------------------------------
--- Character Classes
-
-is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
-
-{-# INLINE is_ctype #-}
-#if __GLASGOW_HASKELL__ >= 303
-is_ctype :: Word8 -> Char# -> Bool
-is_ctype mask = \c ->
- (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
-#else
-is_ctype :: Int -> Char# -> Bool
-is_ctype (I# mask) = \c ->
- let (A# ctype) = ``char_types'' :: Addr
- flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
- in
- (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
-#endif
-
-is_ident = is_ctype 1
-is_symbol = is_ctype 2
-is_any = is_ctype 4
-is_space = is_ctype 8
-is_lower = is_ctype 16
-is_upper = is_ctype 32
-is_digit = is_ctype 64
-
-----------------------------------------------------------------------------
-- identifiers, symbols etc.
-lex_ip cont buf =
+lex_ip ip_constr cont buf =
case expandWhile# is_ident buf of
- buf' -> cont (ITipvarid lexeme) buf'
- where lexeme = lexemeToFastString buf'
+ buf' -> cont (ip_constr (tailFS lexeme)) buf'
+ where lexeme = lexemeToFastString buf'
-lex_id cont glaexts buf =
- case expandWhile# is_ident buf of { buf1 ->
+lex_id cont exts buf =
+ let buf1 = expandWhile# is_ident buf in
+ seq buf1 $
- case (if flag glaexts
+ case (if glaExtsEnabled exts
then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
else buf1) of { buf' ->
cont kwd_token buf';
Nothing ->
- let var_token = cont (mk_var_token lexeme) buf' in
+ let var_token = cont (ITvarid lexeme) buf' in
- if not (flag glaexts)
+ if not (glaExtsEnabled exts)
then var_token
else
Just kwd_token -> cont kwd_token buf';
Nothing -> var_token
- }}}}
+ }}}
lex_sym cont buf =
+ -- trace "lex_sym" $
case expandWhile# is_symbol buf of
buf' -> case lookupUFM haskellKeySymsFM lexeme of {
Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
where lexeme = lexemeToFastString buf'
-lex_con cont glaexts buf =
- case expandWhile# is_ident buf of { buf1 ->
- case slurp_trailing_hashes buf1 glaexts of { buf' ->
+-- lex_con recursively collects components of a qualified identifer.
+-- The argument buf is the StringBuffer representing the lexeme
+-- identified so far, where the next character is upper-case.
- case currentChar# buf' of
- '.'# -> munch
+lex_con cont exts buf =
+ -- trace ("con: "{-++unpackFS lexeme-}) $
+ let empty_buf = stepOverLexeme buf in
+ case expandWhile# is_ident empty_buf of { buf1 ->
+ case slurp_trailing_hashes buf1 exts of { con_buf ->
+
+ let all_buf = mergeLexemes buf con_buf
+
+ con_lexeme = lexemeToFastString con_buf
+ mod_lexeme = lexemeToFastString (decLexeme buf)
+ all_lexeme = lexemeToFastString all_buf
+
+ just_a_conid
+ | emptyLexeme buf = cont (ITconid con_lexeme) all_buf
+ | otherwise = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
+ in
+
+ case currentChar# all_buf of
+ '.'# -> maybe_qualified cont exts all_lexeme
+ (incLexeme all_buf) just_a_conid
_ -> just_a_conid
-
- where
- just_a_conid = --trace ("con: "++unpackFS lexeme) $
- cont (ITconid lexeme) buf'
- lexeme = lexemeToFastString buf'
- munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
- }}
-
-lex_qid cont glaexts mod buf just_a_conid =
+ }}
+
+
+maybe_qualified cont exts mod buf just_a_conid =
+ -- trace ("qid: "{-++unpackFS lexeme-}) $
case currentChar# buf of
'['# -> -- Special case for []
case lookAhead# buf 1# of
'('# -> -- Special case for (,,,)
-- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
case lookAhead# buf 1# of
- '#'# | flag glaexts -> case lookAhead# buf 2# of
+ '#'# | glaExtsEnabled exts -> case lookAhead# buf 2# of
','# -> lex_ubx_tuple cont mod (setCurrentPos# buf 3#)
just_a_conid
_ -> just_a_conid
_ -> just_a_conid
'-'# -> case lookAhead# buf 1# of
- '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
- _ -> lex_id3 cont glaexts mod buf just_a_conid
- _ -> lex_id3 cont glaexts mod buf just_a_conid
+ '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
+ _ -> lex_id3 cont exts mod buf just_a_conid
+
+ _ -> lex_id3 cont exts mod buf just_a_conid
+
+
+lex_id3 cont exts mod buf just_a_conid
+ | is_upper (currentChar# buf) =
+ lex_con cont exts buf
-lex_id3 cont glaexts mod buf just_a_conid
| is_symbol (currentChar# buf) =
let
start_new_lexeme = stepOverLexeme buf
in
+ -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
case expandWhile# is_symbol start_new_lexeme of { buf' ->
let
lexeme = lexemeToFastString buf'
let
start_new_lexeme = stepOverLexeme buf
in
+ -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
case expandWhile# is_ident start_new_lexeme of { buf1 ->
if emptyLexeme buf1
then just_a_conid
else
- case slurp_trailing_hashes buf1 glaexts of { buf' ->
+ case slurp_trailing_hashes buf1 exts of { buf' ->
let
- lexeme = lexemeToFastString buf'
- new_buf = mergeLexemes buf buf'
+ lexeme = lexemeToFastString buf'
+ new_buf = mergeLexemes buf buf'
is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
in
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
- Just kwd_token -> just_a_conid; -- avoid M.where etc.
- Nothing -> is_a_qvarid
- -- TODO: special ids (as, qualified, hiding) shouldn't be
- -- recognised as keywords here. ie. M.as is a qualified varid.
- }}}
+ Nothing -> is_a_qvarid ;
+ Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be
+ -> is_a_qvarid -- recognised as keywords here.
+ | otherwise
+ -> just_a_conid -- avoid M.where etc.
+ }}}
-slurp_trailing_hashes buf glaexts
- | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
- | otherwise = buf
+slurp_trailing_hashes buf exts
+ | glaExtsEnabled exts = expandWhile# (`eqChar#` '#'#) buf
+ | otherwise = buf
mk_var_token pk_str
| otherwise = ITvarsym pk_str
where
(C# f) = _HEAD_ pk_str
- tl = _TAIL_ pk_str
+ -- tl = _TAIL_ pk_str
mk_qvar_token m token =
+-- trace ("mk_qvar ") $
case mk_var_token token of
ITconid n -> ITqconid (m,n)
ITvarid n -> ITqvarid (m,n)
go n buf =
case currentChar# buf of
','# -> go (n+1) (stepOn buf)
- ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
+ ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
_ -> back_off
lex_ubx_tuple cont mod buf back_off =
case currentChar# buf of
','# -> go (n+1) (stepOn buf)
'#'# -> case lookAhead# buf 1# of
- ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
+ ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
(stepOnBy# buf 2#)
_ -> back_off
_ -> back_off
\end{code}
-----------------------------------------------------------------------------
-doDiscard rips along really fast, looking for a '#-}',
+'lexPragma' rips along really fast, looking for a '##-}',
indicating the end of the pragma we're skipping
\begin{code}
-doDiscard inStr buf =
+lexPragma cont contf inStr buf =
case currentChar# buf of
- '#'# | not inStr ->
+ '#'# | inStr ==# 0# ->
case lookAhead# buf 1# of { '#'# ->
case lookAhead# buf 2# of { '-'# ->
case lookAhead# buf 3# of { '}'# ->
- (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
- _ -> doDiscard inStr (incLexeme buf) };
- _ -> doDiscard inStr (incLexeme buf) };
- _ -> doDiscard inStr (incLexeme buf) }
+ contf cont (lexemeToBuffer buf)
+ (stepOverLexeme (setCurrentPos# buf 4#));
+ _ -> lexPragma cont contf inStr (incLexeme buf) };
+ _ -> lexPragma cont contf inStr (incLexeme buf) };
+ _ -> lexPragma cont contf inStr (incLexeme buf) }
+
'"'# ->
let
odd_slashes buf flg i# =
case lookAhead# buf i# of
'\\'# -> odd_slashes buf (not flg) (i# -# 1#)
_ -> flg
+
+ not_inStr = if inStr ==# 0# then 1# else 0#
in
case lookAhead# buf (negateInt# 1#) of --backwards, actually
'\\'# -> -- escaping something..
- if odd_slashes buf True (negateInt# 2#) then
- -- odd number of slashes, " is escaped.
- doDiscard inStr (incLexeme buf)
- else
- -- even number of slashes, \ is escaped.
- doDiscard (not inStr) (incLexeme buf)
- _ -> case inStr of -- forced to avoid build-up
- True -> doDiscard False (incLexeme buf)
- False -> doDiscard True (incLexeme buf)
- _ -> doDiscard inStr (incLexeme buf)
+ if odd_slashes buf True (negateInt# 2#)
+ then -- odd number of slashes, " is escaped.
+ lexPragma cont contf inStr (incLexeme buf)
+ else -- even number of slashes, \ is escaped.
+ lexPragma cont contf not_inStr (incLexeme buf)
+ _ -> lexPragma cont contf not_inStr (incLexeme buf)
+
+ '\''# | inStr ==# 0# ->
+ case lookAhead# buf 1# of { '"'# ->
+ case lookAhead# buf 2# of { '\''# ->
+ lexPragma cont contf inStr (setCurrentPos# buf 3#);
+ _ -> lexPragma cont contf inStr (incLexeme buf) };
+ _ -> lexPragma cont contf inStr (incLexeme buf) }
+
+ -- a sign that the input is ill-formed, since pragmas are
+ -- assumed to always be properly closed (in .hi files).
+ '\NUL'# -> trace "lexPragma: unexpected end-of-file" $
+ cont (ITunknown "\NUL") buf
+
+ _ -> lexPragma cont contf inStr (incLexeme buf)
\end{code}
| PFailed Message
data PState = PState {
- loc :: SrcLoc,
- glasgow_exts :: Int#,
- bol :: Int#,
- atbol :: Int#,
- context :: [LayoutContext]
+ loc :: SrcLoc,
+ extsBitmap :: Int#, -- bitmap that determines permitted extensions
+ bol :: Int#,
+ atbol :: Int#,
+ context :: [LayoutContext]
}
type P a = StringBuffer -- Input string
getSrcLocP :: P SrcLoc
getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
+-- use a temporary SrcLoc for the duration of the argument
+setSrcLocP :: SrcLoc -> P a -> P a
+setSrcLocP new_loc p buf s =
+ case p buf s{ loc=new_loc } of
+ POk _ a -> POk s a
+ PFailed e -> PFailed e
+
getSrcFile :: P FAST_STRING
getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
-getContext :: P [LayoutContext]
-getContext buf s@(PState{ context = ctx }) = POk s ctx
-
pushContext :: LayoutContext -> P ()
pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
| "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile loc)) = POk s ()
| otherwise = PFailed (ifaceVersionErr mb loc ([]::[Token]){-Todo-})
+
+-- for reasons of efficiency, flags indicating language extensions (eg,
+-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- integer
+
+glaExtsBit, ffiBit, parrBit :: Int
+glaExtsBit = 0
+ffiBit = 1 -- FIXME: not used yet; still part of `glaExtsBit'
+parrBit = 2
+
+glaExtsEnabled, ffiEnabled, parrEnabled :: Int# -> Bool
+glaExtsEnabled flags = testBit (I# flags) glaExtsBit
+ffiEnabled flags = testBit (I# flags) ffiBit
+parrEnabled flags = testBit (I# flags) parrBit
+
+-- convenient record-based bitmap for the interface to the rest of the world
+--
+data ExtFlags = ExtFlags {
+ glasgowExtsEF :: Bool,
+-- ffiEF :: Bool, -- commented out to avoid warnings
+ parrEF :: Bool -- while not used yet
+ }
+
+-- create a parse state
+--
+mkPState :: SrcLoc -> ExtFlags -> PState
+mkPState loc exts = PState {
+ loc = loc,
+ extsBitmap = case bitmap of {I# bits -> bits},
+ bol = 0#,
+ atbol = 1#,
+ context = []
+ }
+ where
+ bitmap = glaExtsBit `setBitIf` glasgowExtsEF exts
+-- .|. ffiBit `setBitIf` ffiEF exts
+ .|. parrBit `setBitIf` parrEF exts
+ --
+ b `setBitIf` cond | cond = bit b
+ | otherwise = 0
+
+
-----------------------------------------------------------------
ifaceParseErr :: StringBuffer -> SrcLoc -> Message