X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=faec03fcf3384456ab591d33a6a5edb5f7ea8461;hb=11b6ed82b7e80125bd66c8d99b82270b81daa553;hp=0a247e0d03211c7c9d2dccda16123dd0233c4653;hpb=891e78b3ac7f865055203fd6d14f26809f749ec9;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index 0a247e0..faec03f 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -33,30 +33,26 @@ module Lex ( #include "HsVersions.h" -import Char ( ord, isSpace, toUpper ) +import Char ( isSpace, toUpper ) import List ( isSuffixOf ) -import IdInfo ( InlinePragInfo(..), CprInfo(..) ) -import Name ( isLowerISO, isUpperISO ) -import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) -import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) +import IdInfo ( InlinePragInfo(..) ) +import PrelNames ( mkTupNameStr ) +import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck ) import Demand ( Demand(..) {- instance Read -} ) -import UniqFM ( UniqFM, listToUFM, lookupUFM) -import BasicTypes ( NewOrData(..) ) +import UniqFM ( listToUFM, lookupUFM ) +import BasicTypes ( NewOrData(..), Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) -import Maybes ( MaybeErr(..) ) import ErrUtils ( Message ) import Outputable import FastString import StringBuffer import GlaExts -import ST ( runST ) import Ctype -import Char ( chr ) -import Addr +import Char ( chr, ord ) import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} @@ -114,7 +110,7 @@ data Token | ITthen | ITtype | ITwhere - | ITscc + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) | ITforall -- GHC extension keywords | ITforeign @@ -144,6 +140,7 @@ data Token | ITint64_lit | ITrational_lit | ITaddr_lit + | ITlabel_lit | ITlit_lit | ITstring_lit | ITtypeapp @@ -167,6 +164,7 @@ data Token | ITrules_prag | ITdeprecated_prag | ITline_prag + | ITscc_prag | ITclose_prag | ITdotdot -- reserved symbols @@ -187,6 +185,8 @@ data Token | ITocurly -- special symbols | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack | ITcbrack @@ -212,12 +212,12 @@ data Token | ITpragma StringBuffer - | ITchar Char + | ITchar Int | ITstring FAST_STRING - | ITinteger Integer + | ITinteger Integer | ITrational Rational - | ITprimchar Char + | ITprimchar Int | ITprimstring FAST_STRING | ITprimint Integer | ITprimfloat Rational @@ -244,6 +244,7 @@ pragmaKeywordsFM = listToUFM $ ( "LINE", ITline_prag ), ( "RULES", ITrules_prag ), ( "RULEZ", ITrules_prag ), -- american spelling :-) + ( "SCC", ITscc_prag ), ( "DEPRECATED", ITdeprecated_prag ) ] @@ -274,9 +275,26 @@ haskellKeywordsFM = listToUFM $ ( "then", ITthen ), ( "type", ITtype ), ( "where", ITwhere ), - ( "_scc_", ITscc ) + ( "_scc_", ITscc ) -- ToDo: remove ] +isSpecial :: Token -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITunsafe = True +isSpecial ITwith = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial _ = False + -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP) ghcExtensionKeywordsFM = listToUFM $ map (\ (x,y) -> (_PK_ x,y)) @@ -313,6 +331,7 @@ ghcExtensionKeywordsFM = listToUFM $ ("__word64", ITword64_lit), ("__rational", ITrational_lit), ("__addr", ITaddr_lit), + ("__label", ITlabel_lit), ("__litlit", ITlit_lit), ("__string", ITstring_lit), ("__a", ITtypeapp), @@ -383,7 +402,7 @@ lexer cont buf s@(PState{ where line = srcLocLine loc - tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ + tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ case currentChar# buf of '\NUL'# -> @@ -401,17 +420,15 @@ lexer cont buf s@(PState{ if next `eqChar#` '-'# then trundle (n +# 1#) else if is_symbol next || n <# 2# then is_a_token - else case untilChar# (stepOnBy# buf n) '\n'# of - { buf' -> tab y bol atbol (stepOverLexeme buf') - } + else tab y bol atbol + (stepOnUntilChar# (stepOnBy# buf n) '\n'#) in trundle 1# -- comments and pragmas. We deal with LINE pragmas here, -- and throw out any unrecognised pragmas as comments. Any -- pragmas we know about are dealt with later (after any layout -- processing if necessary). - - '{'# | lookAhead# buf 1# `eqChar#` '-'# -> + '{'# | lookAhead# buf 1# `eqChar#` '-'# -> if lookAhead# buf 2# `eqChar#` '#'# then if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1-> @@ -419,14 +436,25 @@ lexer cont buf s@(PState{ let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of - Just ITline_prag -> line_prag (lexer cont) buf2 s' + Just ITline_prag -> + line_prag skip_to_end buf2 s' Just other -> is_a_token - Nothing -> skip_to_end (stepOnBy# buf 2#) + Nothing -> skip_to_end (stepOnBy# buf 2#) s' }} - - else skip_to_end (stepOnBy# buf 2#) + + else skip_to_end (stepOnBy# buf 2#) s' where - skip_to_end buf = nested_comment (lexer cont) buf s' + skip_to_end = nested_comment (lexer cont) + + -- special GHC extension: we grok cpp-style #line pragmas + '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 + case expandWhile# is_space (stepOn buf) of { buf1 -> + if is_digit (currentChar# buf1) + then line_prag next_line buf1 s' + else is_a_token + } + where + next_line buf = lexer cont (stepOnUntilChar# buf '\n'#) -- tabs have been expanded beforehand c | is_space c -> tab y bol atbol (stepOn buf) @@ -440,23 +468,27 @@ lexer cont buf s@(PState{ | otherwise = lexToken cont glaexts buf s' -- {-# LINE .. #-} pragmas. yeuch. -line_prag cont buf = +line_prag cont buf s@PState{loc=loc} = case expandWhile# is_space buf of { buf1 -> case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) -> -- subtract one: the line number refers to the *following* line. let real_line = line - 1 in case fromInteger real_line of { i@(I# l) -> + -- ToDo, if no filename then we skip the newline.... d'oh case expandWhile# is_space buf2 of { buf3 -> case currentChar# buf3 of '\"'#{-"-} -> case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 -> - let file = lexemeToFastString buf4 in - \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i} + let + file = lexemeToFastString buf4 + new_buf = stepOn (stepOverLexeme buf4) + in + if nullFastString file + then cont new_buf s{loc = replaceSrcLine loc l} + else cont new_buf s{loc = mkSrcLoc file i} } - other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l} + _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} - where - skipToEnd buf = nested_comment cont buf nested_comment :: P a -> P a nested_comment cont buf = loop buf @@ -464,8 +496,7 @@ nested_comment cont buf = loop buf loop buf = case currentChar# buf of '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf - + lexError "unterminated `{-'" buf -- -} '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) @@ -518,7 +549,7 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a lexToken cont glaexts buf = - --trace "lexToken" $ + -- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- @@ -532,12 +563,16 @@ lexToken cont glaexts buf = ']'# -> cont ITcbrack (incLexeme buf) ','# -> cont ITcomma (incLexeme buf) ';'# -> cont ITsemi (incLexeme buf) - '}'# -> \ s@PState{context = ctx} -> case ctx of (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} _ -> lexError "too many '}'s" buf s + '|'# -> case lookAhead# buf 1# of + '}'# | flag glaexts -> cont ITccurlybar + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) + '#'# -> case lookAhead# buf 1# of ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of @@ -551,19 +586,21 @@ lexToken cont glaexts buf = -> cont ITbackquote (incLexeme buf) '{'# -> -- look for "{-##" special iface pragma - case lookAhead# buf 1# of + case lookAhead# buf 1# of + '|'# | flag glaexts + -> cont ITocurlybar (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '#'# -> case lookAhead# buf 3# of - '#'# -> + '#'# -> let (lexeme, buf') - = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' + = doDiscard 0# (stepOnBy# (stepOverLexeme buf) 4#) in + cont (ITpragma lexeme) buf' _ -> lex_prag cont (setCurrentPos# buf 3#) - _ -> cont ITocurly (incLexeme buf) - _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) + _ -> cont ITocurly (incLexeme buf) + _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) + '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. @@ -631,9 +668,12 @@ lex_prag cont buf lex_string cont glaexts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in - case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') + let buf' = incLexeme buf + s' = mkFastStringNarrow (map chr (reverse s)) + in case currentChar# buf' of + '#'# | flag glaexts -> if all (<= 0xFF) s + then cont (ITprimstring s') (incLexeme buf') + else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps @@ -658,11 +698,11 @@ lex_stringgap cont glaexts s buf lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a +lex_char :: (Int# -> Int -> P a) -> Int# -> P a lex_char cont glaexts buf = case currentChar# buf of '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) other -> charError buf char_end cont glaexts c buf @@ -677,19 +717,19 @@ char_end cont glaexts c buf lex_escape cont buf = let buf' = incLexeme buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -699,13 +739,12 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x10FFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 @@ -872,7 +911,8 @@ lex_ip cont buf = where lexeme = lexemeToFastString buf' lex_id cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> + let buf1 = expandWhile# is_ident buf in + seq buf1 $ case (if flag glaexts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes @@ -885,7 +925,7 @@ lex_id cont glaexts buf = cont kwd_token buf'; Nothing -> - let var_token = cont (mk_var_token lexeme) buf' in + let var_token = cont (ITvarid lexeme) buf' in if not (flag glaexts) then var_token @@ -895,9 +935,10 @@ lex_id cont glaexts buf = Just kwd_token -> cont kwd_token buf'; Nothing -> var_token - }}}} + }}} lex_sym cont buf = + -- trace "lex_sym" $ case expandWhile# is_symbol buf of buf' -> case lookupUFM haskellKeySymsFM lexeme of { Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ @@ -909,6 +950,7 @@ lex_sym cont buf = lex_con cont glaexts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ case expandWhile# is_ident buf of { buf1 -> case slurp_trailing_hashes buf1 glaexts of { buf' -> @@ -917,13 +959,13 @@ lex_con cont glaexts buf = _ -> just_a_conid where - just_a_conid = --trace ("con: "++unpackFS lexeme) $ - cont (ITconid lexeme) buf' + just_a_conid = cont (ITconid lexeme) buf' lexeme = lexemeToFastString buf' munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid }} lex_qid cont glaexts mod buf just_a_conid = + -- trace ("quid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of @@ -942,7 +984,7 @@ lex_qid cont glaexts mod buf just_a_conid = _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) + '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) _ -> lex_id3 cont glaexts mod buf just_a_conid _ -> lex_id3 cont glaexts mod buf just_a_conid @@ -951,6 +993,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id31 "{-++unpackFS lexeme-}) $ case expandWhile# is_symbol start_new_lexeme of { buf' -> let lexeme = lexemeToFastString buf' @@ -965,6 +1008,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id32 "{-++unpackFS lexeme-}) $ case expandWhile# is_ident start_new_lexeme of { buf1 -> if emptyLexeme buf1 then just_a_conid @@ -973,17 +1017,18 @@ lex_id3 cont glaexts mod buf just_a_conid case slurp_trailing_hashes buf1 glaexts of { buf' -> let - lexeme = lexemeToFastString buf' - new_buf = mergeLexemes buf buf' + lexeme = lexemeToFastString buf' + new_buf = mergeLexemes buf buf' is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf in case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of { - Just kwd_token -> just_a_conid; -- avoid M.where etc. - Nothing -> is_a_qvarid - -- TODO: special ids (as, qualified, hiding) shouldn't be - -- recognised as keywords here. ie. M.as is a qualified varid. - }}} + Nothing -> is_a_qvarid ; + Just kwd_token | isSpecial kwd_token -- special ids (as, qualified, hiding) shouldn't be + -> is_a_qvarid -- recognised as keywords here. + | otherwise + -> just_a_conid -- avoid M.where etc. + }}} slurp_trailing_hashes buf glaexts | flag glaexts = expandWhile# (`eqChar#` '#'#) buf @@ -997,9 +1042,10 @@ mk_var_token pk_str | otherwise = ITvarsym pk_str where (C# f) = _HEAD_ pk_str - tl = _TAIL_ pk_str + -- tl = _TAIL_ pk_str mk_qvar_token m token = +-- trace ("mk_qvar ") $ case mk_var_token token of ITconid n -> ITqconid (m,n) ITvarid n -> ITqvarid (m,n) @@ -1018,7 +1064,7 @@ lex_tuple cont mod buf back_off = go n buf = case currentChar# buf of ','# -> go (n+1) (stepOn buf) - ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf) _ -> back_off lex_ubx_tuple cont mod buf back_off = @@ -1028,20 +1074,20 @@ lex_ubx_tuple cont mod buf back_off = case currentChar# buf of ','# -> go (n+1) (stepOn buf) '#'# -> case lookAhead# buf 1# of - ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n))) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n))) (stepOnBy# buf 2#) _ -> back_off _ -> back_off \end{code} ----------------------------------------------------------------------------- -doDiscard rips along really fast, looking for a '#-}', +doDiscard rips along really fast, looking for a '##-}', indicating the end of the pragma we're skipping \begin{code} doDiscard inStr buf = case currentChar# buf of - '#'# | not inStr -> + '#'# | inStr ==# 0# -> case lookAhead# buf 1# of { '#'# -> case lookAhead# buf 2# of { '-'# -> case lookAhead# buf 3# of { '}'# -> @@ -1049,24 +1095,32 @@ doDiscard inStr buf = _ -> doDiscard inStr (incLexeme buf) }; _ -> doDiscard inStr (incLexeme buf) }; _ -> doDiscard inStr (incLexeme buf) } + '"'# -> let odd_slashes buf flg i# = case lookAhead# buf i# of '\\'# -> odd_slashes buf (not flg) (i# -# 1#) _ -> flg + + not_inStr = if inStr ==# 0# then 1# else 0# in case lookAhead# buf (negateInt# 1#) of --backwards, actually '\\'# -> -- escaping something.. - if odd_slashes buf True (negateInt# 2#) then - -- odd number of slashes, " is escaped. - doDiscard inStr (incLexeme buf) - else - -- even number of slashes, \ is escaped. - doDiscard (not inStr) (incLexeme buf) - _ -> case inStr of -- forced to avoid build-up - True -> doDiscard False (incLexeme buf) - False -> doDiscard True (incLexeme buf) + if odd_slashes buf True (negateInt# 2#) + then -- odd number of slashes, " is escaped. + doDiscard inStr (incLexeme buf) + else -- even number of slashes, \ is escaped. + doDiscard not_inStr (incLexeme buf) + _ -> doDiscard not_inStr (incLexeme buf) + + '\''# | inStr ==# 0# -> + case lookAhead# buf 1# of { '"'# -> + case lookAhead# buf 2# of { '\''# -> + doDiscard inStr (setCurrentPos# buf 3#); + _ -> doDiscard inStr (incLexeme buf) }; + _ -> doDiscard inStr (incLexeme buf) } + _ -> doDiscard inStr (incLexeme buf) \end{code}