X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=6c69738d9f1de6d008ed3227838481c69ac5a0be;hb=9622688719ae4fbcba307d91162ac537d100659f;hp=6d56a6de735601cc623528d4c65493557c077a7b;hpb=656b963de456a5888b840bc788e036ad575a3930;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index 6d56a6d..6c69738 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -16,7 +16,6 @@ An example that provokes the error is -------------------------------------------------------- \begin{code} -{-# OPTIONS -#include "ctypes.h" #-} module Lex ( @@ -34,35 +33,26 @@ module Lex ( #include "HsVersions.h" -import Char ( ord, isSpace, toUpper ) +import Char ( isSpace, toUpper ) import List ( isSuffixOf ) -import IdInfo ( InlinePragInfo(..), CprInfo(..) ) -import Name ( isLowerISO, isUpperISO ) -import PrelMods ( mkTupNameStr, mkUbxTupNameStr ) -import CmdLineOpts ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck ) +import IdInfo ( InlinePragInfo(..) ) +import PrelNames ( mkTupNameStr ) +import CmdLineOpts ( opt_HiVersion, opt_NoHiCheck ) import Demand ( Demand(..) {- instance Read -} ) -import UniqFM ( UniqFM, listToUFM, lookupUFM) -import BasicTypes ( NewOrData(..) ) +import UniqFM ( listToUFM, lookupUFM ) +import BasicTypes ( NewOrData(..), Boxity(..) ) import SrcLoc ( SrcLoc, incSrcLine, srcLocFile, srcLocLine, replaceSrcLine, mkSrcLoc ) -import Maybes ( MaybeErr(..) ) import ErrUtils ( Message ) import Outputable import FastString import StringBuffer import GlaExts -import ST ( runST ) - -#if __GLASGOW_HASKELL__ >= 303 -import Bits -import Word -#endif - -import Char ( chr ) -import Addr +import Ctype +import Char ( ord ) import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} @@ -145,8 +135,12 @@ data Token | ITbottom | ITinteger_lit | ITfloat_lit + | ITword_lit + | ITword64_lit + | ITint64_lit | ITrational_lit | ITaddr_lit + | ITlabel_lit | ITlit_lit | ITstring_lit | ITtypeapp @@ -158,8 +152,8 @@ data Token | ITunfold InlinePragInfo | ITstrict ([Demand], Bool) | ITrules + | ITcprinfo | ITdeprecated - | ITcprinfo (CprInfo) | IT__scc | ITsccAllCafs @@ -190,6 +184,8 @@ data Token | ITocurly -- special symbols | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications | ITvccurly | ITobrack | ITcbrack @@ -215,12 +211,12 @@ data Token | ITpragma StringBuffer - | ITchar Char + | ITchar Int | ITstring FAST_STRING - | ITinteger Integer + | ITinteger Integer | ITrational Rational - | ITprimchar Char + | ITprimchar Int | ITprimstring FAST_STRING | ITprimint Integer | ITprimfloat Rational @@ -229,7 +225,7 @@ data Token | ITunknown String -- Used when the lexer can't make sense of it | ITeof -- end of file token - deriving Text -- debugging + deriving Show -- debugging \end{code} ----------------------------------------------------------------------------- @@ -311,8 +307,12 @@ ghcExtensionKeywordsFM = listToUFM $ ("__bot", ITbottom), ("__integer", ITinteger_lit), ("__float", ITfloat_lit), + ("__int64", ITint64_lit), + ("__word", ITword_lit), + ("__word64", ITword64_lit), ("__rational", ITrational_lit), ("__addr", ITaddr_lit), + ("__label", ITlabel_lit), ("__litlit", ITlit_lit), ("__string", ITstring_lit), ("__a", ITtypeapp), @@ -383,7 +383,7 @@ lexer cont buf s@(PState{ where line = srcLocLine loc - tab y bol atbol buf = --trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ + tab y bol atbol buf = -- trace ("tab: " ++ show (I# y) ++ " : " ++ show (currentChar buf)) $ case currentChar# buf of '\NUL'# -> @@ -401,17 +401,15 @@ lexer cont buf s@(PState{ if next `eqChar#` '-'# then trundle (n +# 1#) else if is_symbol next || n <# 2# then is_a_token - else case untilChar# (stepOnBy# buf n) '\n'# of - { buf' -> tab y bol atbol (stepOverLexeme buf') - } + else tab y bol atbol + (stepOnUntilChar# (stepOnBy# buf n) '\n'#) in trundle 1# -- comments and pragmas. We deal with LINE pragmas here, -- and throw out any unrecognised pragmas as comments. Any -- pragmas we know about are dealt with later (after any layout -- processing if necessary). - - '{'# | lookAhead# buf 1# `eqChar#` '-'# -> + '{'# | lookAhead# buf 1# `eqChar#` '-'# -> if lookAhead# buf 2# `eqChar#` '#'# then if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1-> @@ -419,14 +417,21 @@ lexer cont buf s@(PState{ let lexeme = mkFastString -- ToDo: too slow (map toUpper (lexemeToString buf2)) in case lookupUFM pragmaKeywordsFM lexeme of - Just ITline_prag -> line_prag (lexer cont) buf2 s' + Just ITline_prag -> + line_prag skip_to_end buf2 s' Just other -> is_a_token - Nothing -> skip_to_end (stepOnBy# buf 2#) + Nothing -> skip_to_end (stepOnBy# buf 2#) s' }} - - else skip_to_end (stepOnBy# buf 2#) + + else skip_to_end (stepOnBy# buf 2#) s' where - skip_to_end buf = nested_comment (lexer cont) buf s' + skip_to_end = nested_comment (lexer cont) + + -- special GHC extension: we grok cpp-style #line pragmas + '#'# | lexemeIndex buf ==# bol -> -- the '#' must be in column 0 + line_prag next_line (stepOn buf) s' + where + next_line buf = lexer cont (stepOnUntilChar# buf '\n'#) -- tabs have been expanded beforehand c | is_space c -> tab y bol atbol (stepOn buf) @@ -440,23 +445,27 @@ lexer cont buf s@(PState{ | otherwise = lexToken cont glaexts buf s' -- {-# LINE .. #-} pragmas. yeuch. -line_prag cont buf = +line_prag cont buf s@PState{loc=loc} = case expandWhile# is_space buf of { buf1 -> case scanNumLit 0 (stepOverLexeme buf1) of { (line,buf2) -> -- subtract one: the line number refers to the *following* line. let real_line = line - 1 in case fromInteger real_line of { i@(I# l) -> + -- ToDo, if no filename then we skip the newline.... d'oh case expandWhile# is_space buf2 of { buf3 -> case currentChar# buf3 of '\"'#{-"-} -> case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 -> - let file = lexemeToFastString buf4 in - \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i} + let + file = lexemeToFastString buf4 + new_buf = stepOn (stepOverLexeme buf4) + in + if nullFastString file + then cont new_buf s{loc = replaceSrcLine loc l} + else cont new_buf s{loc = mkSrcLoc file i} } - other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l} + _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l} }}}} - where - skipToEnd buf = nested_comment cont buf nested_comment :: P a -> P a nested_comment cont buf = loop buf @@ -464,8 +473,7 @@ nested_comment cont buf = loop buf loop buf = case currentChar# buf of '\NUL'# | bufferExhausted (stepOn buf) -> - lexError "unterminated `{-'" buf - + lexError "unterminated `{-'" buf -- -} '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#) @@ -518,7 +526,7 @@ lexBOL cont buf s@(PState{ lexToken :: (Token -> P a) -> Int# -> P a lexToken cont glaexts buf = - --trace "lexToken" $ + -- trace "lexToken" $ case currentChar# buf of -- special symbols ---------------------------------------------------- @@ -532,12 +540,16 @@ lexToken cont glaexts buf = ']'# -> cont ITcbrack (incLexeme buf) ','# -> cont ITcomma (incLexeme buf) ';'# -> cont ITsemi (incLexeme buf) - '}'# -> \ s@PState{context = ctx} -> case ctx of (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'} _ -> lexError "too many '}'s" buf s + '|'# -> case lookAhead# buf 1# of + '}'# | flag glaexts -> cont ITccurlybar + (setCurrentPos# buf 2#) + _ -> lex_sym cont (incLexeme buf) + '#'# -> case lookAhead# buf 1# of ')'# | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of @@ -551,19 +563,21 @@ lexToken cont glaexts buf = -> cont ITbackquote (incLexeme buf) '{'# -> -- look for "{-##" special iface pragma - case lookAhead# buf 1# of + case lookAhead# buf 1# of + '|'# | flag glaexts + -> cont ITocurlybar (setCurrentPos# buf 2#) '-'# -> case lookAhead# buf 2# of '#'# -> case lookAhead# buf 3# of - '#'# -> + '#'# -> let (lexeme, buf') = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in - cont (ITpragma lexeme) buf' + cont (ITpragma lexeme) buf' _ -> lex_prag cont (setCurrentPos# buf 3#) - _ -> cont ITocurly (incLexeme buf) - _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) + _ -> cont ITocurly (incLexeme buf) + _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) + '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. @@ -574,8 +588,8 @@ lexToken cont glaexts buf = lex_demand cont (stepOnUntil (not . isSpace) (stepOnBy# buf 3#)) -- past __S 'M'# -> - lex_cpr cont (stepOnUntil (not . isSpace) - (stepOnBy# buf 3#)) -- past __M + cont ITcprinfo (stepOnBy# buf 3#) -- past __M + 's'# -> case prefixMatch (stepOnBy# buf 3#) "cc" of Just buf' -> lex_scc cont (stepOverLexeme buf') @@ -631,9 +645,11 @@ lex_prag cont buf lex_string cont glaexts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in + let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') + '#'# | flag glaexts -> if all (<= 0xFF) s + then cont (ITprimstring s') (incLexeme buf') + else lexError "primitive string literal must contain only characters <= '\xFF'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps @@ -658,11 +674,11 @@ lex_stringgap cont glaexts s buf lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a +lex_char :: (Int# -> Int -> P a) -> Int# -> P a lex_char cont glaexts buf = case currentChar# buf of '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) other -> charError buf char_end cont glaexts c buf @@ -677,19 +693,19 @@ char_end cont glaexts c buf lex_escape cont buf = let buf' = incLexeme buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -699,13 +715,12 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x7FFFFFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 @@ -799,23 +814,6 @@ lex_demand cont buf = = case read_em [] buf of (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest -lex_cpr cont buf = - case read_em [] buf of { (cpr_inf,buf') -> - ASSERT ( null (tail cpr_inf) ) - cont (ITcprinfo $ head cpr_inf) buf' - } - where - -- code snatched from lex_demand above - read_em acc buf = - case currentChar# buf of - '-'# -> read_em (NoCPRInfo : acc) (stepOn buf) - '('# -> do_unpack acc (stepOn buf) - ')'# -> (reverse acc, stepOn buf) - _ -> (reverse acc, buf) - - do_unpack acc buf - = case read_em [] buf of - (stuff, rest) -> read_em ((CPRInfo stuff) : acc) rest ------------------ lex_scc cont buf = @@ -880,33 +878,6 @@ lex_cstring cont buf = (mergeLexemes buf buf') Nothing -> lexError "unterminated ``" buf ------------------------------------------------------------------------------- --- Character Classes - -is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool - -{-# INLINE is_ctype #-} -#if __GLASGOW_HASKELL__ >= 303 -is_ctype :: Word8 -> Char# -> Bool -is_ctype mask = \c -> - (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0 -#else -is_ctype :: Int -> Char# -> Bool -is_ctype (I# mask) = \c -> - let (A# ctype) = ``char_types'' :: Addr - flag_word = int2Word# (ord# (indexCharOffAddr# ctype (ord# c))) - in - (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#) -#endif - -is_ident = is_ctype 1 -is_symbol = is_ctype 2 -is_any = is_ctype 4 -is_space = is_ctype 8 -is_lower = is_ctype 16 -is_upper = is_ctype 32 -is_digit = is_ctype 64 - ----------------------------------------------------------------------------- -- identifiers, symbols etc. @@ -916,7 +887,8 @@ lex_ip cont buf = where lexeme = lexemeToFastString buf' lex_id cont glaexts buf = - case expandWhile# is_ident buf of { buf1 -> + let buf1 = expandWhile# is_ident buf in + seq buf1 $ case (if flag glaexts then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes @@ -929,7 +901,7 @@ lex_id cont glaexts buf = cont kwd_token buf'; Nothing -> - let var_token = cont (mk_var_token lexeme) buf' in + let var_token = cont (ITvarid lexeme) buf' in if not (flag glaexts) then var_token @@ -939,9 +911,10 @@ lex_id cont glaexts buf = Just kwd_token -> cont kwd_token buf'; Nothing -> var_token - }}}} + }}} lex_sym cont buf = + -- trace "lex_sym" $ case expandWhile# is_symbol buf of buf' -> case lookupUFM haskellKeySymsFM lexeme of { Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $ @@ -953,6 +926,7 @@ lex_sym cont buf = lex_con cont glaexts buf = + -- trace ("con: "{-++unpackFS lexeme-}) $ case expandWhile# is_ident buf of { buf1 -> case slurp_trailing_hashes buf1 glaexts of { buf' -> @@ -961,13 +935,13 @@ lex_con cont glaexts buf = _ -> just_a_conid where - just_a_conid = --trace ("con: "++unpackFS lexeme) $ - cont (ITconid lexeme) buf' + just_a_conid = cont (ITconid lexeme) buf' lexeme = lexemeToFastString buf' munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid }} lex_qid cont glaexts mod buf just_a_conid = + -- trace ("quid: "{-++unpackFS lexeme-}) $ case currentChar# buf of '['# -> -- Special case for [] case lookAhead# buf 1# of @@ -986,7 +960,7 @@ lex_qid cont glaexts mod buf just_a_conid = _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) + '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) _ -> lex_id3 cont glaexts mod buf just_a_conid _ -> lex_id3 cont glaexts mod buf just_a_conid @@ -995,6 +969,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id31 "{-++unpackFS lexeme-}) $ case expandWhile# is_symbol start_new_lexeme of { buf' -> let lexeme = lexemeToFastString buf' @@ -1009,6 +984,7 @@ lex_id3 cont glaexts mod buf just_a_conid let start_new_lexeme = stepOverLexeme buf in + -- trace ("lex_id32 "{-++unpackFS lexeme-}) $ case expandWhile# is_ident start_new_lexeme of { buf1 -> if emptyLexeme buf1 then just_a_conid @@ -1036,17 +1012,15 @@ slurp_trailing_hashes buf glaexts mk_var_token pk_str | is_upper f = ITconid pk_str - -- _[A-Z] is treated as a constructor in interface files. - | f `eqChar#` '_'# && not (_NULL_ tl) - && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str | is_ident f = ITvarid pk_str | f `eqChar#` ':'# = ITconsym pk_str | otherwise = ITvarsym pk_str where (C# f) = _HEAD_ pk_str - tl = _TAIL_ pk_str + -- tl = _TAIL_ pk_str mk_qvar_token m token = +-- trace ("mk_qvar ") $ case mk_var_token token of ITconid n -> ITqconid (m,n) ITvarid n -> ITqvarid (m,n) @@ -1065,7 +1039,7 @@ lex_tuple cont mod buf back_off = go n buf = case currentChar# buf of ','# -> go (n+1) (stepOn buf) - ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf) _ -> back_off lex_ubx_tuple cont mod buf back_off = @@ -1075,7 +1049,7 @@ lex_ubx_tuple cont mod buf back_off = case currentChar# buf of ','# -> go (n+1) (stepOn buf) '#'# -> case lookAhead# buf 1# of - ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n))) + ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n))) (stepOnBy# buf 2#) _ -> back_off _ -> back_off