X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLexer.x;h=4c1b48efc0845f3009dc19f4d8e5e26eadb36103;hb=274a7b1ab7cf30fd1ceaddd02f991eaf694cc37c;hp=3846b5af2c491df6adecaf21d549016bff7da268;hpb=704ca16d2447b081eebd5f5e66635b19cba4a655;p=ghc-hetmet.git diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x index 3846b5a..4c1b48e 100644 --- a/ghc/compiler/parser/Lexer.x +++ b/ghc/compiler/parser/Lexer.x @@ -22,11 +22,12 @@ { module Lexer ( - Token(..), lexer, mkPState, PState(..), + Token(..), lexer, pragState, mkPState, PState(..), P(..), ParseResult(..), getSrcLoc, failLocMsgP, failSpanMsgP, srcParseFail, popContext, pushCurrentContext, setLastToken, setSrcLoc, - getLexState, popLexState, pushLexState + getLexState, popLexState, pushLexState, + extension, bangPatEnabled ) where #include "HsVersions.h" @@ -43,9 +44,15 @@ import Ctype import Util ( maybePrefixMatch, readRational ) import DATA_BITS -import Data.Char +import Data.Char ( chr ) import Ratio --import TRACE + +#if __GLASGOW_HASKELL__ >= 605 +import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper ) +#else +import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) +#endif } $unispace = \x05 @@ -151,7 +158,7 @@ $white_no_nl+ ; -- generate a matching '}' token. () { do_layout_left } -<0,glaexts> \n { begin bol } +<0,option_prags,glaexts> \n { begin bol } "{-#" $whitechar* (line|LINE) { begin line_prag2 } @@ -177,7 +184,7 @@ $white_no_nl+ ; "{-#" $whitechar* (RULES|rules) { token ITrules_prag } -<0,glaexts> { +<0,option_prags,glaexts> { "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) } "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) { token (ITinline_prag False) } @@ -194,13 +201,20 @@ $white_no_nl+ ; "{-#" $whitechar* (SCC|scc) { token ITscc_prag } "{-#" $whitechar* (CORE|core) { token ITcore_prag } "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag } - + "{-#" { nested_comment } -- ToDo: should only be valid inside a pragma: "#-}" { token ITclose_prag} } + { + "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag } + "{-#" $whitechar* (OPTIONS_GHC|options_ghc) + { lex_string_prag IToptions_prag } + "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag } + "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } +} -- '0' state: ordinary lexemes -- 'glaexts' state: glasgow extensions (postfix '#', etc.) @@ -241,7 +255,7 @@ $white_no_nl+ ; "|}" { token ITccurlybar } } -<0,glaexts> { +<0,option_prags,glaexts> { \( { special IToparen } \) { special ITcparen } \[ { special ITobrack } @@ -254,7 +268,7 @@ $white_no_nl+ ; \} { close_brace } } -<0,glaexts> { +<0,option_prags,glaexts> { @qual @varid { check_qvarid } @qual @conid { idtoken qconid } @varid { varid } @@ -370,6 +384,9 @@ data Token | ITcore_prag -- hdaume: core annotations | ITunpack_prag | ITclose_prag + | IToptions_prag String + | ITinclude_prag String + | ITlanguage_prag | ITdotdot -- reserved symbols | ITcolon @@ -560,6 +577,8 @@ reservedSymsFM = listToUFM $ #if __GLASGOW_HASKELL__ >= 605 ,("λ", ITlam, bit glaExtsBit) + ,("∷", ITdcolon, bit glaExtsBit) + ,("⇒", ITdarrow, bit glaExtsBit) ,("∀", ITforall, bit glaExtsBit) ,("→", ITrarrow, bit glaExtsBit) ,("←", ITlarrow, bit glaExtsBit) @@ -842,6 +861,32 @@ setFile code span buf len = do pushLexState code lexToken + +-- ----------------------------------------------------------------------------- +-- Options, includes and language pragmas. + +lex_string_prag :: (String -> Token) -> Action +lex_string_prag mkTok span buf len + = do input <- getInput + start <- getSrcLoc + tok <- go [] input + end <- getSrcLoc + return (L (mkSrcSpan start end) tok) + where go acc input + = if isString input "#-}" + then do setInput input + return (mkTok (reverse acc)) + else case alexGetChar input of + Just (c,i) -> go (c:acc) i + Nothing -> err input + isString i [] = True + isString i (x:xs) + = case alexGetChar i of + Just (c,i') | c == x -> isString i' xs + _other -> False + err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma" + + -- ----------------------------------------------------------------------------- -- Strings & Chars @@ -1179,9 +1224,6 @@ alexGetChar (AI loc ofs s) other_graphic = '\x6' adj_c -#if __GLASGOW_HASKELL__ < 605 - = c -- no Unicode support -#else | c <= '\x06' = non_graphic | c <= '\xff' = c | otherwise = @@ -1210,7 +1252,6 @@ alexGetChar (AI loc ofs s) OtherSymbol -> symbol Space -> space _other -> non_graphic -#endif -- This version does not squash unicode characters, it is used when -- lexing strings. @@ -1255,6 +1296,8 @@ arrowsBit = 4 thBit = 5 ipBit = 6 tvBit = 7 -- Scoped type variables enables 'forall' keyword +bangPatBit = 8 -- Tells the parser to understand bang-patterns + -- (doesn't affect the lexer) glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool glaExtsEnabled flags = testBit flags glaExtsBit @@ -1264,6 +1307,23 @@ arrowsEnabled flags = testBit flags arrowsBit thEnabled flags = testBit flags thBit ipEnabled flags = testBit flags ipBit tvEnabled flags = testBit flags tvBit +bangPatEnabled flags = testBit flags bangPatBit + +-- PState for parsing options pragmas +-- +pragState :: StringBuffer -> SrcLoc -> PState +pragState buf loc = + PState { + buffer = buf, + last_loc = mkSrcSpan loc loc, + last_offs = 0, + last_len = 0, + loc = loc, + extsBitmap = 0, + context = [], + lex_state = [bol, option_prags, 0] + } + -- create a parse state -- @@ -1288,6 +1348,7 @@ mkPState buf loc flags = .|. thBit `setBitIf` dopt Opt_TH flags .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags .|. tvBit `setBitIf` dopt Opt_ScopedTypeVariables flags + .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags -- setBitIf :: Int -> Bool -> Int b `setBitIf` cond | cond = bit b