#include "HsVersions.h"
-import Char ( ord, isSpace, toUpper )
+import Char ( isSpace, toUpper )
import List ( isSuffixOf )
-import IdInfo ( InlinePragInfo(..), CprInfo(..) )
-import Name ( isLowerISO, isUpperISO )
-import PrelMods ( mkTupNameStr, mkUbxTupNameStr )
-import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
+import IdInfo ( InlinePragInfo(..) )
+import PrelNames ( mkTupNameStr )
+import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck )
import Demand ( Demand(..) {- instance Read -} )
-import UniqFM ( UniqFM, listToUFM, lookupUFM)
-import BasicTypes ( NewOrData(..) )
+import UniqFM ( listToUFM, lookupUFM )
+import BasicTypes ( NewOrData(..), Boxity(..) )
import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
replaceSrcLine, mkSrcLoc )
-import Maybes ( MaybeErr(..) )
import ErrUtils ( Message )
import Outputable
import FastString
import StringBuffer
import GlaExts
-import ST ( runST )
import Ctype
-import Char ( chr )
-import Addr
+import Char ( chr, ord )
import PrelRead ( readRational__ ) -- Glasgow non-std
\end{code}
| ITthen
| ITtype
| ITwhere
- | ITscc
+ | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now)
| ITforall -- GHC extension keywords
| ITforeign
| ITint64_lit
| ITrational_lit
| ITaddr_lit
+ | ITlabel_lit
| ITlit_lit
| ITstring_lit
| ITtypeapp
| ITrules_prag
| ITdeprecated_prag
| ITline_prag
+ | ITscc_prag
| ITclose_prag
| ITdotdot -- reserved symbols
| ITocurly -- special symbols
| ITccurly
+ | ITocurlybar -- {|, for type applications
+ | ITccurlybar -- |}, for type applications
| ITvccurly
| ITobrack
| ITcbrack
| ITpragma StringBuffer
- | ITchar Char
+ | ITchar Int
| ITstring FAST_STRING
- | ITinteger Integer
+ | ITinteger Integer
| ITrational Rational
- | ITprimchar Char
+ | ITprimchar Int
| ITprimstring FAST_STRING
| ITprimint Integer
| ITprimfloat Rational
( "LINE", ITline_prag ),
( "RULES", ITrules_prag ),
( "RULEZ", ITrules_prag ), -- american spelling :-)
+ ( "SCC", ITscc_prag ),
( "DEPRECATED", ITdeprecated_prag )
]
( "then", ITthen ),
( "type", ITtype ),
( "where", ITwhere ),
- ( "_scc_", ITscc )
+ ( "_scc_", ITscc ) -- ToDo: remove
]
+isSpecial :: Token -> Bool
+-- If we see M.x, where x is a keyword, but
+-- is special, we treat is as just plain M.x,
+-- not as a keyword.
+isSpecial ITas = True
+isSpecial IThiding = True
+isSpecial ITqualified = True
+isSpecial ITforall = True
+isSpecial ITexport = True
+isSpecial ITlabel = True
+isSpecial ITdynamic = True
+isSpecial ITunsafe = True
+isSpecial ITwith = True
+isSpecial ITccallconv = True
+isSpecial ITstdcallconv = True
+isSpecial _ = False
+
-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
ghcExtensionKeywordsFM = listToUFM $
map (\ (x,y) -> (_PK_ x,y))
("__word64", ITword64_lit),
("__rational", ITrational_lit),
("__addr", ITaddr_lit),
+ ("__label", ITlabel_lit),
("__litlit", ITlit_lit),
("__string", ITstring_lit),
("__a", ITtypeapp),
where
line = srcLocLine loc
- tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
+ tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $
case currentChar# buf of
'\NUL'# ->
if next `eqChar#` '-'# then trundle (n +# 1#)
else if is_symbol next || n <# 2#
then is_a_token
- else case untilChar# (stepOnBy# buf n) '\n'# of
- { buf' -> tab y bol atbol (stepOverLexeme buf')
- }
+ else tab y bol atbol
+ (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
in trundle 1#
-- comments and pragmas. We deal with LINE pragmas here,
-- and throw out any unrecognised pragmas as comments. Any
-- pragmas we know about are dealt with later (after any layout
-- processing if necessary).
-
- '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
+ '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
if lookAhead# buf 2# `eqChar#` '#'# then
if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
let lexeme = mkFastString -- ToDo: too slow
(map toUpper (lexemeToString buf2)) in
case lookupUFM pragmaKeywordsFM lexeme of
- Just ITline_prag -> line_prag (lexer cont) buf2 s'
+ Just ITline_prag ->
+ line_prag skip_to_end buf2 s'
Just other -> is_a_token
- Nothing -> skip_to_end (stepOnBy# buf 2#)
+ Nothing -> skip_to_end (stepOnBy# buf 2#) s'
}}
-
- else skip_to_end (stepOnBy# buf 2#)
+
+ else skip_to_end (stepOnBy# buf 2#) s'
where
- skip_to_end buf = nested_comment (lexer cont) buf s'
+ skip_to_end = nested_comment (lexer cont)
+
+ -- special GHC extension: we grok cpp-style #line pragmas
+ '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0
+ case expandWhile# is_space (stepOn buf) of { buf1 ->
+ if is_digit (currentChar# buf1)
+ then line_prag next_line buf1 s'
+ else is_a_token
+ }
+ where
+ next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
-- tabs have been expanded beforehand
c | is_space c -> tab y bol atbol (stepOn buf)
| otherwise = lexToken cont glaexts buf s'
-- {-# LINE .. #-} pragmas. yeuch.
-line_prag cont buf =
+line_prag cont buf s@PState{loc=loc} =
case expandWhile# is_space buf of { buf1 ->
case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) ->
-- subtract one: the line number refers to the *following* line.
let real_line = line - 1 in
case fromInteger real_line of { i@(I# l) ->
+ -- ToDo, if no filename then we skip the newline.... d'oh
case expandWhile# is_space buf2 of { buf3 ->
case currentChar# buf3 of
'\"'#{-"-} ->
case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
- let file = lexemeToFastString buf4 in
- \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
+ let
+ file = lexemeToFastString buf4
+ new_buf = stepOn (stepOverLexeme buf4)
+ in
+ if nullFastString file
+ then cont new_buf s{loc = replaceSrcLine loc l}
+ else cont new_buf s{loc = mkSrcLoc file i}
}
- other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
+ _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
}}}}
- where
- skipToEnd buf = nested_comment cont buf
nested_comment :: P a -> P a
nested_comment cont buf = loop buf
loop buf =
case currentChar# buf of
'\NUL'# | bufferExhausted (stepOn buf) ->
- lexError "unterminated `{-'" buf
-
+ lexError "unterminated `{-'" buf -- -}
'-'# | lookAhead# buf 1# `eqChar#` '}'# ->
cont (stepOnBy# buf 2#)
lexToken :: (Token -> P a) -> Int# -> P a
lexToken cont glaexts buf =
- --trace "lexToken" $
+ -- trace "lexToken" $
case currentChar# buf of
-- special symbols ----------------------------------------------------
']'# -> cont ITcbrack (incLexeme buf)
','# -> cont ITcomma (incLexeme buf)
';'# -> cont ITsemi (incLexeme buf)
-
'}'# -> \ s@PState{context = ctx} ->
case ctx of
(_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
_ -> lexError "too many '}'s" buf s
+ '|'# -> case lookAhead# buf 1# of
+ '}'# | flag glaexts -> cont ITccurlybar
+ (setCurrentPos# buf 2#)
+ _ -> lex_sym cont (incLexeme buf)
+
'#'# -> case lookAhead# buf 1# of
')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
'-'# -> case lookAhead# buf 2# of
-> cont ITbackquote (incLexeme buf)
'{'# -> -- look for "{-##" special iface pragma
- case lookAhead# buf 1# of
+ case lookAhead# buf 1# of
+ '|'# | flag glaexts
+ -> cont ITocurlybar (setCurrentPos# buf 2#)
'-'# -> case lookAhead# buf 2# of
'#'# -> case lookAhead# buf 3# of
- '#'# ->
+ '#'# ->
let (lexeme, buf')
- = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
- cont (ITpragma lexeme) buf'
+ = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in
+ cont (ITpragma lexeme) buf'
_ -> lex_prag cont (setCurrentPos# buf 3#)
- _ -> cont ITocurly (incLexeme buf)
- _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf)
+ _ -> cont ITocurly (incLexeme buf)
+ _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf)
-- strings/characters -------------------------------------------------
- '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
+ '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
'\''# -> lex_char (char_end cont) glaexts (incLexeme buf)
-- strictness and cpr pragmas and __scc treated specially.
lex_string cont glaexts s buf
= case currentChar# buf of
'"'#{-"-} ->
- let buf' = incLexeme buf; s' = mkFastString (reverse s) in
- case currentChar# buf' of
- '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
+ let buf' = incLexeme buf
+ s' = mkFastStringNarrow (map chr (reverse s))
+ in case currentChar# buf' of
+ '#'# | flag glaexts -> if all (<= 0xFF) s
+ then cont (ITprimstring s') (incLexeme buf')
+ else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
_ -> cont (ITstring s') buf'
-- ignore \& in a string, deal with string gaps
lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
-lex_char :: (Int# -> Char -> P a) -> Int# -> P a
+lex_char :: (Int# -> Int -> P a) -> Int# -> P a
lex_char cont glaexts buf
= case currentChar# buf of
'\\'# -> lex_escape (cont glaexts) (incLexeme buf)
- c | is_any c -> cont glaexts (C# c) (incLexeme buf)
+ c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
other -> charError buf
char_end cont glaexts c buf
lex_escape cont buf
= let buf' = incLexeme buf in
case currentChar# buf of
- 'a'# -> cont '\a' buf'
- 'b'# -> cont '\b' buf'
- 'f'# -> cont '\f' buf'
- 'n'# -> cont '\n' buf'
- 'r'# -> cont '\r' buf'
- 't'# -> cont '\t' buf'
- 'v'# -> cont '\v' buf'
- '\\'# -> cont '\\' buf'
- '"'# -> cont '\"' buf'
- '\''# -> cont '\'' buf'
+ 'a'# -> cont (ord '\a') buf'
+ 'b'# -> cont (ord '\b') buf'
+ 'f'# -> cont (ord '\f') buf'
+ 'n'# -> cont (ord '\n') buf'
+ 'r'# -> cont (ord '\r') buf'
+ 't'# -> cont (ord '\t') buf'
+ 'v'# -> cont (ord '\v') buf'
+ '\\'# -> cont (ord '\\') buf'
+ '"'# -> cont (ord '\"') buf'
+ '\''# -> cont (ord '\'') buf'
'^'# -> let c = currentChar# buf' in
if c `geChar#` '@'# && c `leChar#` '_'#
- then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
+ then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
else charError buf'
'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
_ -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
Just buf2 <- [prefixMatch buf p] ] of
- (c,buf2):_ -> cont c buf2
+ (c,buf2):_ -> cont (ord c) buf2
[] -> charError buf'
-after_charnum cont i buf
- = let int = fromInteger i in
- if i >= 0 && i <= 255
- then cont (chr int) buf
+after_charnum cont i buf
+ = if i >= 0 && i <= 0x10FFFF
+ then cont (fromInteger i) buf
else charError buf
readNum cont buf is_digit base conv = read buf 0
where lexeme = lexemeToFastString buf'
lex_id cont glaexts buf =
- case expandWhile# is_ident buf of { buf1 ->
+ let buf1 = expandWhile# is_ident buf in
+ seq buf1 $
case (if flag glaexts
then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
cont kwd_token buf';
Nothing ->
- let var_token = cont (mk_var_token lexeme) buf' in
+ let var_token = cont (ITvarid lexeme) buf' in
if not (flag glaexts)
then var_token
Just kwd_token -> cont kwd_token buf';
Nothing -> var_token
- }}}}
+ }}}
lex_sym cont buf =
+ -- trace "lex_sym" $
case expandWhile# is_symbol buf of
buf' -> case lookupUFM haskellKeySymsFM lexeme of {
Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
lex_con cont glaexts buf =
+ -- trace ("con: "{-++unpackFS lexeme-}) $
case expandWhile# is_ident buf of { buf1 ->
case slurp_trailing_hashes buf1 glaexts of { buf' ->
_ -> just_a_conid
where
- just_a_conid = --trace ("con: "++unpackFS lexeme) $
- cont (ITconid lexeme) buf'
+ just_a_conid = cont (ITconid lexeme) buf'
lexeme = lexemeToFastString buf'
munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
}}
lex_qid cont glaexts mod buf just_a_conid =
+ -- trace ("quid: "{-++unpackFS lexeme-}) $
case currentChar# buf of
'['# -> -- Special case for []
case lookAhead# buf 1# of
_ -> just_a_conid
'-'# -> case lookAhead# buf 1# of
- '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
+ '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
_ -> lex_id3 cont glaexts mod buf just_a_conid
_ -> lex_id3 cont glaexts mod buf just_a_conid
let
start_new_lexeme = stepOverLexeme buf
in
+ -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
case expandWhile# is_symbol start_new_lexeme of { buf' ->
let
lexeme = lexemeToFastString buf'
let
start_new_lexeme = stepOverLexeme buf
in
+ -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
case expandWhile# is_ident start_new_lexeme of { buf1 ->
if emptyLexeme buf1
then just_a_conid
case slurp_trailing_hashes buf1 glaexts of { buf' ->
let
- lexeme = lexemeToFastString buf'
- new_buf = mergeLexemes buf buf'
+ lexeme = lexemeToFastString buf'
+ new_buf = mergeLexemes buf buf'
is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
in
case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
- Just kwd_token -> just_a_conid; -- avoid M.where etc.
- Nothing -> is_a_qvarid
- -- TODO: special ids (as, qualified, hiding) shouldn't be
- -- recognised as keywords here. ie. M.as is a qualified varid.
- }}}
+ Nothing -> is_a_qvarid ;
+ Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be
+ -> is_a_qvarid -- recognised as keywords here.
+ | otherwise
+ -> just_a_conid -- avoid M.where etc.
+ }}}
slurp_trailing_hashes buf glaexts
| flag glaexts = expandWhile# (`eqChar#` '#'#) buf
| otherwise = ITvarsym pk_str
where
(C# f) = _HEAD_ pk_str
- tl = _TAIL_ pk_str
+ -- tl = _TAIL_ pk_str
mk_qvar_token m token =
+-- trace ("mk_qvar ") $
case mk_var_token token of
ITconid n -> ITqconid (m,n)
ITvarid n -> ITqvarid (m,n)
go n buf =
case currentChar# buf of
','# -> go (n+1) (stepOn buf)
- ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
+ ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
_ -> back_off
lex_ubx_tuple cont mod buf back_off =
case currentChar# buf of
','# -> go (n+1) (stepOn buf)
'#'# -> case lookAhead# buf 1# of
- ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
+ ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
(stepOnBy# buf 2#)
_ -> back_off
_ -> back_off
\end{code}
-----------------------------------------------------------------------------
-doDiscard rips along really fast, looking for a '#-}',
+doDiscard rips along really fast, looking for a '##-}',
indicating the end of the pragma we're skipping
\begin{code}
doDiscard inStr buf =
case currentChar# buf of
- '#'# | not inStr ->
+ '#'# | inStr ==# 0# ->
case lookAhead# buf 1# of { '#'# ->
case lookAhead# buf 2# of { '-'# ->
case lookAhead# buf 3# of { '}'# ->
_ -> doDiscard inStr (incLexeme buf) };
_ -> doDiscard inStr (incLexeme buf) };
_ -> doDiscard inStr (incLexeme buf) }
+
'"'# ->
let
odd_slashes buf flg i# =
case lookAhead# buf i# of
'\\'# -> odd_slashes buf (not flg) (i# -# 1#)
_ -> flg
+
+ not_inStr = if inStr ==# 0# then 1# else 0#
in
case lookAhead# buf (negateInt# 1#) of --backwards, actually
'\\'# -> -- escaping something..
- if odd_slashes buf True (negateInt# 2#) then
- -- odd number of slashes, " is escaped.
- doDiscard inStr (incLexeme buf)
- else
- -- even number of slashes, \ is escaped.
- doDiscard (not inStr) (incLexeme buf)
- _ -> case inStr of -- forced to avoid build-up
- True -> doDiscard False (incLexeme buf)
- False -> doDiscard True (incLexeme buf)
+ if odd_slashes buf True (negateInt# 2#)
+ then -- odd number of slashes, " is escaped.
+ doDiscard inStr (incLexeme buf)
+ else -- even number of slashes, \ is escaped.
+ doDiscard not_inStr (incLexeme buf)
+ _ -> doDiscard not_inStr (incLexeme buf)
+
+ '\''# | inStr ==# 0# ->
+ case lookAhead# buf 1# of { '"'# ->
+ case lookAhead# buf 2# of { '\''# ->
+ doDiscard inStr (setCurrentPos# buf 3#);
+ _ -> doDiscard inStr (incLexeme buf) };
+ _ -> doDiscard inStr (incLexeme buf) }
+
_ -> doDiscard inStr (incLexeme buf)
\end{code}