X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=6362bf3e20ea3973122c8102feba090174bb6630;hb=0f5e104c36b1dc3d8deeec5fef3d65e7b3a1b5ad;hp=2f3ef4b41b5a238425d3e3d934d8f27fbf75e24f;hpb=67cb409159fa9136dff942b8baaec25909416022;p=ghc-hetmet.git

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 2f3ef4b..6362bf3 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -27,6 +27,9 @@
 -- any warnings in the module. See
 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
 -- for details
+--
+-- Note that Alex itself generates code with with some unused bindings and
+-- without type signatures, so removing the flag might not be possible.
 
 module Lexer (
    Token(..), lexer, pragState, mkPState, PState(..),
@@ -39,8 +42,6 @@ module Lexer (
    addWarning
   ) where
 
-#include "HsVersions.h"
-
 import Bag
 import ErrUtils
 import Outputable
@@ -55,7 +56,7 @@ import Util		( maybePrefixMatch, readRational )
 
 import Control.Monad
 import Data.Bits
-import Data.Char 	( chr, isSpace )
+import Data.Char 	( chr, ord, isSpace )
 import Data.Ratio
 import Debug.Trace
 
@@ -154,7 +155,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 
 -- Next, match Haddock comments if no -haddock flag
 
-"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } ;
 
 -- Now, when we've matched comments that begin with 2 dashes and continue
 -- with a different character, we need to match comments that begin with three
@@ -308,6 +309,9 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   "|]"	    / { ifExtension thEnabled }	{ token ITcloseQuote }
   \$ @varid / { ifExtension thEnabled }	{ skip_one_varid ITidEscape }
   "$("	    / { ifExtension thEnabled }	{ token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
 }
 
 <0> {
@@ -372,25 +376,29 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- when trying to be close to Haskell98
 <0> {
   -- Normal integral literals (:: Num a => a, from Integer)
-  @decimal			{ tok_num positive 0 0 decimal }
-  0[oO] @octal			{ tok_num positive 2 2 octal }
-  0[xX] @hexadecimal		{ tok_num positive 2 2 hexadecimal }
+  @decimal           { tok_num positive 0 0 decimal }
+  0[oO] @octal       { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point		{ strtoken tok_float }
+  @floating_point    { strtoken tok_float }
 }
 
 <0> {
-  -- Unboxed ints (:: Int#)
+  -- Unboxed ints (:: Int#) and words (:: Word#)
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
-  @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
-  0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
-  @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
-  @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 
+  @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+
   -- Unboxed floats and doubles (:: Float#, :: Double#)
   -- prim_{float,double} work with signed literals
   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
@@ -496,8 +504,8 @@ data Token
   | ITvocurly
   | ITvccurly
   | ITobrack
-  | ITopabrack			-- [:, for parallel arrays with -fparr
-  | ITcpabrack			-- :], for parallel arrays with -fparr
+  | ITopabrack			-- [:, for parallel arrays with -XParr
+  | ITcpabrack			-- :], for parallel arrays with -XParr
   | ITcbrack
   | IToparen
   | ITcparen
@@ -529,6 +537,7 @@ data Token
   | ITprimchar   Char
   | ITprimstring FastString
   | ITprimint    Integer
+  | ITprimword   Integer
   | ITprimfloat  Rational
   | ITprimdouble Rational
 
@@ -542,6 +551,7 @@ data Token
   | ITparenEscape		--  $( 
   | ITvarQuote			--  '
   | ITtyQuote			--  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 
   -- Arrow notation extension
   | ITproc
@@ -568,6 +578,7 @@ data Token
   deriving Show -- debugging
 #endif
 
+{-
 isSpecial :: Token -> Bool
 -- If we see M.x, where x is a keyword, but
 -- is special, we treat is as just plain M.x, 
@@ -590,6 +601,7 @@ isSpecial ITgroup   = True
 isSpecial ITby      = True
 isSpecial ITusing   = True
 isSpecial _             = False
+-}
 
 -- the bitmap provided as the third component indicates whether the
 -- corresponding extension keyword is valid under the extension options
@@ -697,11 +709,11 @@ reservedSymsFM = listToUFM $
 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 
 special :: Token -> Action
-special tok span _buf len = return (L span tok)
+special tok span _buf _len = return (L span tok)
 
 token, layout_token :: Token -> Action
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
 
 idtoken :: (StringBuffer -> Int -> Token) -> Action
 idtoken f span buf len = return (L span $! (f buf len))
@@ -751,8 +763,10 @@ isNormalComment bits _ _ (AI _ _ buf)
 
 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 
+{-
 haddockDisabledAnd p bits _ _ (AI _ _ buf)
   = if haddockEnabled bits then False else (p buf)
+-}
 
 atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
 
@@ -801,12 +815,12 @@ nested_comment cont span _str _len = do
       Just ('-',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
         Just ('\125',input) -> go (n-1) input
-        Just (c,_)          -> go n input
+        Just (_,_)          -> go n input
       Just ('\123',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
         Just ('-',input) -> go (n+1) input
-        Just (c,_)       -> go n input
-      Just (c,input) -> go n input
+        Just (_,_)       -> go n input
+      Just (_,input) -> go n input
 
 nested_doc_comment :: Action
 nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -815,16 +829,16 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing -> errBrace input span
-        Just ('\125',input@(AI end _ buf2)) ->
+        Just ('\125',input) ->
           docCommentEnd input commentAcc docType buf span
-        Just (c,_) -> go ('-':commentAcc) input docType False
+        Just (_,_) -> go ('-':commentAcc) input docType False
       Just ('\123', input) -> case alexGetChar input of
         Nothing  -> errBrace input span
         Just ('-',input) -> do
           setInput input
           let cont = do input <- getInput; go commentAcc input docType False
           nested_comment cont span buf _len
-        Just (c,_) -> go ('\123':commentAcc) input docType False
+        Just (_,_) -> go ('\123':commentAcc) input docType False
       Just (c,input) -> go (c:commentAcc) input docType False
 
 withLexedDocType lexDocComment = do
@@ -838,7 +852,7 @@ withLexedDocType lexDocComment = do
  where 
     lexDocSection n input = case alexGetChar input of 
       Just ('*', input) -> lexDocSection (n+1) input
-      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Just (_,   _)     -> lexDocComment input (ITdocSection n) True
       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 
 -- docCommentEnd
@@ -916,6 +930,7 @@ splitQualName orig_buf len = split orig_buf orig_buf
 	qual_size = orig_buf `byteDiff` dot_buf
 
 varid span buf len = 
+  fs `seq`
   case lookupUFM reservedWordsFM fs of
 	Just (keyword,0)    -> do
 		maybe_layout keyword
@@ -961,6 +976,7 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
 -- some conveniences for use with tok_integral
 tok_num = tok_integral ITinteger
 tok_primint = tok_integral ITprimint
+tok_primword = tok_integral ITprimword positive
 positive = id
 negative = negate
 decimal = (10,octDecDigit)
@@ -1024,7 +1040,7 @@ new_layout_context strict span _buf _len = do
 		-- we must generate a {} sequence now.
 		pushLexState layout_left
 		return (L span ITvocurly)
-	other -> do
+	_ -> do
 		setContext (Layout offset : ctx)
 		return (L span ITvocurly)
 
@@ -1058,7 +1074,7 @@ setFile code span buf len = do
 -- Options, includes and language pragmas.
 
 lex_string_prag :: (String -> Token) -> Action
-lex_string_prag mkTok span buf len
+lex_string_prag mkTok span _buf _len
     = do input <- getInput
          start <- getSrcLoc
          tok <- go [] input
@@ -1071,7 +1087,7 @@ lex_string_prag mkTok span buf len
                    else case alexGetChar input of
                           Just (c,i) -> go (c:acc) i
                           Nothing -> err input
-          isString i [] = True
+          isString _ [] = True
           isString i (x:xs)
               = case alexGetChar i of
                   Just (c,i') | c == x    -> isString i' xs
@@ -1085,7 +1101,7 @@ lex_string_prag mkTok span buf len
 -- This stuff is horrible.  I hates it.
 
 lex_string_tok :: Action
-lex_string_tok span buf len = do
+lex_string_tok span _buf _len = do
   tok <- lex_string ""
   end <- getSrcLoc 
   return (L (mkSrcSpan (srcSpanStart span) end) tok)
@@ -1142,7 +1158,7 @@ lex_char_tok :: Action
 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 -- So we have to do two characters of lookahead: when we see 'x we need to
 -- see if there's a trailing quote
-lex_char_tok span buf len = do	-- We've seen '
+lex_char_tok span _buf _len = do	-- We've seen '
    i1 <- getInput	-- Look ahead to first character
    let loc = srcSpanStart span
    case alexGetChar' i1 of
@@ -1155,14 +1171,14 @@ lex_char_tok span buf len = do	-- We've seen '
 			return (L (mkSrcSpan loc end2)  ITtyQuote)
 		   else lit_error
 
-	Just ('\\', i2@(AI end2 _ _)) -> do 	-- We've seen 'backslash 
+	Just ('\\', i2@(AI _end2 _ _)) -> do 	-- We've seen 'backslash
 		  setInput i2
 		  lit_ch <- lex_escape
 		  mc <- getCharOrFail	-- Trailing quote
 		  if mc == '\'' then finish_char_tok loc lit_ch
 			        else do setInput i2; lit_error 
 
-        Just (c, i2@(AI end2 _ _)) 
+        Just (c, i2@(AI _end2 _ _))
 		| not (isAny c) -> lit_error
 		| otherwise ->
 
@@ -1226,7 +1242,7 @@ lex_escape = do
 
 	'x'   -> readNum is_hexdigit 16 hexDigit
 	'o'   -> readNum is_octdigit  8 octDecDigit
-	x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+	x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
 
 	c1 ->  do
 	   i <- getInput
@@ -1318,6 +1334,42 @@ getCharOrFail =  do
 	Just (c,i)  -> do setInput i; return c
 
 -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+	| Just ('|',i) <- next -> do 
+		setInput i; lex_quasiquote ('|' : s)
+	| Just (']',i) <- next -> do 
+		setInput i; lex_quasiquote (']' : s)
+	where next = alexGetChar' i
+
+    Just ('|',i)
+	| Just (']',i) <- next -> do 
+		setInput i; return s
+	where next = alexGetChar' i
+
+    Just (c, i) -> do
+	 setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
 -- Warnings
 
 warn :: DynFlag -> SDoc -> Action
@@ -1370,7 +1422,7 @@ instance Monad P where
   fail = failP
 
 returnP :: a -> P a
-returnP a = P $ \s -> POk s a
+returnP a = a `seq` (P $ \s -> POk s a)
 
 thenP :: P a -> (a -> P b) -> P b
 (P m) `thenP` k = P $ \ s ->
@@ -1385,10 +1437,10 @@ failMsgP :: String -> P a
 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
 
 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
 
-failSpanMsgP :: SrcSpan -> String -> P a
-failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+failSpanMsgP :: SrcSpan -> SDoc -> P a
+failSpanMsgP span msg = P $ \_ -> PFailed span msg
 
 extension :: (Int -> Bool) -> P Bool
 extension p = P $ \s -> POk s (p $! extsBitmap s)
@@ -1478,7 +1530,7 @@ alexGetChar' (AI loc ofs s)
         ofs'   = advanceOffs c ofs
 
 advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' offs = 0
+advanceOffs '\n' _    = 0
 advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
 advanceOffs _    offs = offs + 1
 
@@ -1495,10 +1547,10 @@ popLexState :: P Int
 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
 
 getLexState :: P Int
-getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
 
 -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
 -- integer
 
 genericsBit, ffiBit, parrBit :: Int
@@ -1520,6 +1572,7 @@ unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
 unboxedTuplesBit = 15 -- (# and #)
 standaloneDerivingBit = 16 -- standalone instance deriving declarations
 transformComprehensionsBit = 17
+qqBit	   = 18 -- enable quasiquoting
 
 genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
 always           _     = True
@@ -1540,17 +1593,16 @@ unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
 standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
 transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled        flags = testBit flags qqBit
 
 -- PState for parsing options pragmas
 --
-pragState :: StringBuffer -> SrcLoc -> PState
-pragState buf loc  = 
+pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+pragState dynflags buf loc =
   PState {
-      buffer	      = buf,
+      buffer        = buf,
       messages      = emptyMessages,
-      -- XXX defaultDynFlags is not right, but we don't have a real
-      -- dflags handy
-      dflags        = defaultDynFlags,
+      dflags        = dynflags,
       last_loc      = mkSrcSpan loc loc,
       last_offs     = 0,
       last_len      = 0,
@@ -1586,6 +1638,7 @@ mkPState buf loc flags  =
 	       .|. parrBit      `setBitIf` dopt Opt_PArr         flags
 	       .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
 	       .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
+	       .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
 	       .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
@@ -1625,7 +1678,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} ()
 
 popContext :: P ()
 popContext = P $ \ s@(PState{ buffer = buf, context = ctx, 
-			   loc = loc, last_len = len, last_loc = last_loc }) ->
+                              last_len = len, last_loc = last_loc }) ->
   case ctx of
 	(_:tl) -> POk s{ context = tl } ()
 	[]     -> PFailed last_loc (srcParseErr buf len)
@@ -1654,8 +1707,8 @@ srcParseErr
   -> Message
 srcParseErr buf len
   = hcat [ if null token 
-	     then ptext SLIT("parse error (possibly incorrect indentation)")
-	     else hcat [ptext SLIT("parse error on input "),
+	     then ptext (sLit "parse error (possibly incorrect indentation)")
+	     else hcat [ptext (sLit "parse error on input "),
           	  	char '`', text token, char '\'']
     ]
   where token = lexemeToString (offsetBytes (-len) buf) len
@@ -1673,7 +1726,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
 lexError :: String -> P a
 lexError str = do
   loc <- getSrcLoc
-  i@(AI end _ buf) <- getInput
+  (AI end _ buf) <- getInput
   reportLexError loc end buf str
 
 -- -----------------------------------------------------------------------------
@@ -1682,7 +1735,7 @@ lexError str = do
 
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
-  tok@(L span tok__) <- lexToken
+  tok@(L _span _tok__) <- lexToken
 --  trace ("token: " ++ show tok__) $ do
   cont tok
 
@@ -1692,20 +1745,21 @@ lexToken = do
   sc <- getLexState
   exts <- getExts
   case alexScanUser exts inp sc of
-    AlexEOF -> do let span = mkSrcSpan loc1 loc1
-		  setLastToken span 0 0
-		  return (L span ITeof)
-    AlexError (AI loc2 _ buf) -> do 
-	reportLexError loc1 loc2 buf "lexical error"
+    AlexEOF -> do
+        let span = mkSrcSpan loc1 loc1
+        setLastToken span 0 0
+        return (L span ITeof)
+    AlexError (AI loc2 _ buf) ->
+        reportLexError loc1 loc2 buf "lexical error"
     AlexSkip inp2 _ -> do
-	setInput inp2
-	lexToken
-    AlexToken inp2@(AI end _ buf2) len t -> do
-    setInput inp2
-    let span = mkSrcSpan loc1 end
-    let bytes = byteDiff buf buf2
-    span `seq` setLastToken span bytes bytes
-    t span buf bytes
+        setInput inp2
+        lexToken
+    AlexToken inp2@(AI end _ buf2) _ t -> do
+        setInput inp2
+        let span = mkSrcSpan loc1 end
+        let bytes = byteDiff buf buf2
+        span `seq` setLastToken span bytes bytes
+        t span buf bytes
 
 reportLexError loc1 loc2 buf str
   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")