X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=0dd36ff050724dee075359a579a3132404122357;hp=0a8c4104a76460aac29c54883cd53462a29445fc;hb=9bcd95bad83ee937c178970e8b729732e680fe1e;hpb=6517f499c5b7a7b9dd217e5394a159ba9df5dd8c

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 0a8c410..0dd36ff 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -20,7 +20,30 @@
 --    - M... should be 3 tokens, not 1.
 --    - pragma-end should be only valid in a pragma
 
+--   qualified operator NOTES.
+--   
+--   - If M.(+) is a single lexeme, then..
+--     - Probably (+) should be a single lexeme too, for consistency.
+--       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
+--     - But we have to rule out reserved operators, otherwise (..) becomes
+--       a different lexeme.
+--     - Should we therefore also rule out reserved operators in the qualified
+--       form?  This is quite difficult to achieve.  We don't do it for
+--       qualified varids.
+
 {
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+--
+-- Note that Alex itself generates code with with some unused bindings and
+-- without type signatures, so removing the flag might not be possible.
+
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+
 module Lexer (
    Token(..), lexer, pragState, mkPState, PState(..),
    P(..), ParseResult(..), getSrcLoc, 
@@ -28,11 +51,11 @@ module Lexer (
    getMessages,
    popContext, pushCurrentContext, setLastToken, setSrcLoc,
    getLexState, popLexState, pushLexState,
-   extension, glaExtsEnabled, bangPatEnabled
+   extension, standaloneDerivingEnabled, bangPatEnabled,
+   addWarning,
+   lexTokenStream
   ) where
 
-#include "HsVersions.h"
-
 import Bag
 import ErrUtils
 import Outputable
@@ -47,19 +70,15 @@ import Util		( maybePrefixMatch, readRational )
 
 import Control.Monad
 import Data.Bits
-import Data.Char 	( chr, isSpace )
+import Data.Char 	( chr, ord, isSpace )
 import Data.Ratio
 import Debug.Trace
 
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char 	( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode	( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
+import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
 }
 
 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
-$whitechar   = [\ \n\r\f\v\xa0 $unispace]
+$whitechar   = [\ \n\r\f\v $unispace]
 $white_no_nl = $whitechar # \n
 $tab         = \t
 
@@ -69,16 +88,16 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 
 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
+$asclarge  = [A-Z]
 $large     = [$asclarge $unilarge]
 
 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
+$ascsmall  = [a-z]
 $small     = [$ascsmall $unismall \_]
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -141,12 +160,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 -- have a Haddock comment). The rules then munch the rest of the line.
 
-"-- " ~$docsym .* ;
-"--" [^$symbol : \ ] .* ;
+"-- " ~[$docsym \#] .* { lineCommentToken }
+"--" [^$symbol : \ ] .* { lineCommentToken }
 
 -- Next, match Haddock comments if no -haddock flag
 
-"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 
 -- Now, when we've matched comments that begin with 2 dashes and continue
 -- with a different character, we need to match comments that begin with three
@@ -154,17 +173,17 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- make sure that the first non-dash character isn't a symbol, and munch the
 -- rest of the line.
 
-"---"\-* [^$symbol :] .* ;
+"---"\-* [^$symbol :] .* { lineCommentToken }
 
 -- Since the previous rules all match dashes followed by at least one
 -- character, we also need to match a whole line filled with just dashes.
 
-"--"\-* / { atEOL } ;
+"--"\-* / { atEOL } { lineCommentToken }
 
 -- We need this rule since none of the other single line comment rules
 -- actually match this case.
 
-"-- " / { atEOL } ;
+"-- " / { atEOL } { lineCommentToken }
 
 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 -- blank lines) until we find a non-whitespace character, then do layout
@@ -202,7 +221,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- generate a matching '}' token.
 <layout_left>  ()			{ do_layout_left }
 
-<0,option_prags,glaexts> \n				{ begin bol }
+<0,option_prags> \n				{ begin bol }
 
 "{-#" $whitechar* (line|LINE) 		{ begin line_prag2 }
 
@@ -220,16 +239,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
    -- with older versions of GHC which generated these.
 
--- We only want RULES pragmas to be picked up when explicit forall
--- syntax is enabled is on, because the contents of the pragma always
--- uses it. If it's not on then we're sure to get a parse error.
--- (ToDo: we should really emit a warning when ignoring pragmas)
--- XXX Now that we can enable this without the -fglasgow-exts hammer,
--- is it better just to let the parse error happen?
-<0,glaexts>
-  "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
-
-<0,option_prags,glaexts> {
+<0,option_prags> {
+  "{-#" $whitechar* (RULES|rules)       { rulePrag }
   "{-#" $whitechar* (INLINE|inline)	{ token (ITinline_prag True) }
   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
   					{ token (ITinline_prag False) }
@@ -241,6 +252,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 	$whitechar* (NO(T?)INLINE|no(t?)inline)
 					{ token (ITspec_inline_prag False) }
   "{-#" $whitechar* (SOURCE|source)	{ token ITsource_prag }
+  "{-#" $whitechar* (WARNING|warning)
+  					{ token ITwarning_prag }
   "{-#" $whitechar* (DEPRECATED|deprecated)
   					{ token ITdeprecated_prag }
   "{-#" $whitechar* (SCC|scc)		{ token ITscc_prag }
@@ -248,47 +261,60 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   					{ token ITgenerated_prag }
   "{-#" $whitechar* (CORE|core)		{ token ITcore_prag }
   "{-#" $whitechar* (UNPACK|unpack)	{ token ITunpack_prag }
+  "{-#" $whitechar* (ANN|ann)	        { token ITann_prag }
 
-  "{-#" $whitechar* (DOCOPTIONS|docoptions)
-  / { ifExtension haddockEnabled }     { lex_string_prag ITdocOptions }
-
- "{-#"                                 { nested_comment lexToken }
+  -- We ignore all these pragmas, but don't generate a warning for them
+  -- CFILES is a hugs-only thing.
+  "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+                    { nested_comment lexToken }
 
   -- ToDo: should only be valid inside a pragma:
-  "#-}" 				{ token ITclose_prag}
+  "#-}" 				{ endPrag }
 }
 
 <option_prags> {
-  "{-#" $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
+  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
                                         { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
+                                         { lex_string_prag ITdocOptions }
+  "-- #"                                 { multiline_doc_comment }
+  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
+  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+}
+
+<0> {
+  -- In the "0" mode we ignore these pragmas
+  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+                     { nested_comment lexToken }
+}
+
+<0> {
+  "-- #" .* { lineCommentToken }
 }
 
-<0,option_prags,glaexts> {
-	-- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
-  "{-#" $whitechar* $idchar+		{ nested_comment lexToken }
+<0,option_prags> {
+  "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
+                    (nested_comment lexToken) }
 }
 
 -- '0' state: ordinary lexemes
--- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 
 -- Haddock comments
 
-<0,glaexts> {
-  "-- " $docsym    / { ifExtension haddockEnabled } { multiline_doc_comment }
-  "{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
+<0> {
+  "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 }
 
 -- "special" symbols
 
-<0,glaexts> {
+<0> {
   "[:" / { ifExtension parrEnabled }	{ token ITopabrack }
   ":]" / { ifExtension parrEnabled }	{ token ITcpabrack }
 }
   
-<0,glaexts> {
+<0> {
   "[|"	    / { ifExtension thEnabled }	{ token ITopenExpQuote }
   "[e|"	    / { ifExtension thEnabled }	{ token ITopenExpQuote }
   "[p|"	    / { ifExtension thEnabled }	{ token ITopenPatQuote }
@@ -297,31 +323,34 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   "|]"	    / { ifExtension thEnabled }	{ token ITcloseQuote }
   \$ @varid / { ifExtension thEnabled }	{ skip_one_varid ITidEscape }
   "$("	    / { ifExtension thEnabled }	{ token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
 }
 
-<0,glaexts> {
+<0> {
   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 					{ special IToparenbar }
   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 }
 
-<0,glaexts> {
+<0> {
   \? @varid / { ifExtension ipEnabled }	{ skip_one_varid ITdupipvarid }
 }
 
-<0,glaexts> {
+<0> {
   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
          { token IToubxparen }
   "#)" / { ifExtension unboxedTuplesEnabled }
          { token ITcubxparen }
 }
 
-<glaexts> {
-  "{|"					{ token ITocurlybar }
-  "|}"					{ token ITccurlybar }
+<0> {
+  "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
+  "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 }
 
-<0,option_prags,glaexts> {
+<0,option_prags> {
   \(					{ special IToparen }
   \)					{ special ITcparen }
   \[					{ special ITobrack }
@@ -334,70 +363,69 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   \}					{ close_brace }
 }
 
-<0,option_prags,glaexts> {
-  @qual @varid			{ check_qvarid }
+<0,option_prags> {
+  @qual @varid			{ idtoken qvarid }
   @qual @conid			{ idtoken qconid }
   @varid			{ varid }
   @conid			{ idtoken conid }
 }
 
--- after an illegal qvarid, such as 'M.let', 
--- we back up and try again in the bad_qvarid state:
-<bad_qvarid> {
-  @conid			{ pop_and (idtoken conid) }
-  @qual @conid			{ pop_and (idtoken qconid) }
-}
-
-<0,glaexts> {
+<0> {
   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 }
 
--- ToDo: M.(,,,)
-
-<0,glaexts> {
-  @qual @varsym			{ idtoken qvarsym }
-  @qual @consym			{ idtoken qconsym }
-  @varsym			{ varsym }
-  @consym			{ consym }
+-- ToDo: - move `var` and (sym) into lexical syntax?
+--       - remove backquote from $special?
+<0> {
+  @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
+  @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
+  @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
+  @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
+  @varsym                                          { varsym }
+  @consym                                          { consym }
 }
 
 -- For the normal boxed literals we need to be careful
 -- when trying to be close to Haskell98
-<0,glaexts> {
+<0> {
   -- Normal integral literals (:: Num a => a, from Integer)
-  @decimal			{ tok_num positive 0 0 decimal }
-  0[oO] @octal			{ tok_num positive 2 2 octal }
-  0[xX] @hexadecimal		{ tok_num positive 2 2 hexadecimal }
+  @decimal           { tok_num positive 0 0 decimal }
+  0[oO] @octal       { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point		{ strtoken tok_float }
+  @floating_point    { strtoken tok_float }
 }
 
-<glaexts> {
-  -- Unboxed ints (:: Int#)
+<0> {
+  -- Unboxed ints (:: Int#) and words (:: Word#)
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
-  @decimal \#			{ tok_primint positive 0 1 decimal }
-  0[oO] @octal \#		{ tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal \#		{ tok_primint positive 2 3 hexadecimal }
-  @negative @decimal \#			{ tok_primint negative 1 2 decimal }
-  @negative 0[oO] @octal \#		{ tok_primint negative 3 4 octal }
-  @negative 0[xX] @hexadecimal \#	{ tok_primint negative 3 4 hexadecimal }
+  @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+
+  @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 
   -- Unboxed floats and doubles (:: Float#, :: Double#)
   -- prim_{float,double} work with signed literals
-  @signed @floating_point \#		{ init_strtoken 1 tok_primfloat }
-  @signed @floating_point \# \#		{ init_strtoken 2 tok_primdouble }
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 }
 
 -- Strings and chars are lexed by hand-written code.  The reason is
 -- that even if we recognise the string or char here in the regex
 -- lexer, we would still have to parse the string afterward in order
 -- to convert it to a String.
-<0,glaexts> {
+<0> {
   \'				{ lex_char_tok }
   \" 				{ lex_string_tok }
 }
@@ -413,7 +441,6 @@ data Token
   | ITdata
   | ITdefault
   | ITderiving
-  | ITderive
   | ITdo
   | ITelse
   | IThiding
@@ -447,6 +474,9 @@ data Token
   | ITdotnet
   | ITmdo
   | ITfamily
+  | ITgroup
+  | ITby
+  | ITusing
 
 	-- Pragmas
   | ITinline_prag Bool		-- True <=> INLINE, False <=> NOINLINE
@@ -454,12 +484,14 @@ data Token
   | ITspec_inline_prag Bool	-- SPECIALISE INLINE (or NOINLINE)
   | ITsource_prag
   | ITrules_prag
+  | ITwarning_prag
   | ITdeprecated_prag
   | ITline_prag
   | ITscc_prag
   | ITgenerated_prag
   | ITcore_prag                 -- hdaume: core annotations
   | ITunpack_prag
+  | ITann_prag
   | ITclose_prag
   | IToptions_prag String
   | ITinclude_prag String
@@ -490,8 +522,8 @@ data Token
   | ITvocurly
   | ITvccurly
   | ITobrack
-  | ITopabrack			-- [:, for parallel arrays with -fparr
-  | ITcpabrack			-- :], for parallel arrays with -fparr
+  | ITopabrack			-- [:, for parallel arrays with -XParr
+  | ITcpabrack			-- :], for parallel arrays with -XParr
   | ITcbrack
   | IToparen
   | ITcparen
@@ -510,6 +542,8 @@ data Token
   | ITqconid  (FastString,FastString)
   | ITqvarsym (FastString,FastString)
   | ITqconsym (FastString,FastString)
+  | ITprefixqvarsym (FastString,FastString)
+  | ITprefixqconsym (FastString,FastString)
 
   | ITdupipvarid   FastString	-- GHC extension: implicit param: ?x
 
@@ -523,6 +557,7 @@ data Token
   | ITprimchar   Char
   | ITprimstring FastString
   | ITprimint    Integer
+  | ITprimword   Integer
   | ITprimfloat  Rational
   | ITprimdouble Rational
 
@@ -536,6 +571,7 @@ data Token
   | ITparenEscape		--  $( 
   | ITvarQuote			--  '
   | ITtyQuote			--  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 
   -- Arrow notation extension
   | ITproc
@@ -556,18 +592,21 @@ data Token
   | ITdocCommentNamed String     -- something beginning '-- $'
   | ITdocSection      Int String -- a section heading
   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
+  | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
+  | ITlineComment     String     -- comment starting by "--"
+  | ITblockComment    String     -- comment in {- -}
 
 #ifdef DEBUG
   deriving Show -- debugging
 #endif
 
+{-
 isSpecial :: Token -> Bool
 -- If we see M.x, where x is a keyword, but
 -- is special, we treat is as just plain M.x, 
 -- not as a keyword.
 isSpecial ITas        	= True
 isSpecial IThiding    	= True
-isSpecial ITderive    	= True
 isSpecial ITqualified 	= True
 isSpecial ITforall    	= True
 isSpecial ITexport    	= True
@@ -580,7 +619,11 @@ isSpecial ITccallconv   = True
 isSpecial ITstdcallconv = True
 isSpecial ITmdo		= True
 isSpecial ITfamily	= True
+isSpecial ITgroup   = True
+isSpecial ITby      = True
+isSpecial ITusing   = True
 isSpecial _             = False
+-}
 
 -- the bitmap provided as the third component indicates whether the
 -- corresponding extension keyword is valid under the extension options
@@ -598,7 +641,6 @@ reservedWordsFM = listToUFM $
 	( "data",	ITdata, 	0 ),     
 	( "default",	ITdefault, 	0 ),  
 	( "deriving",	ITderiving, 	0 ), 
-	( "derive",	ITderive, 	0 ), 
 	( "do",		ITdo, 		0 ),       
 	( "else",	ITelse, 	0 ),     
 	( "hiding",	IThiding, 	0 ),
@@ -619,9 +661,12 @@ reservedWordsFM = listToUFM $
 	( "where",	ITwhere, 	0 ),
 	( "_scc_",	ITscc, 		0 ),		-- ToDo: remove
 
-      	( "forall",	ITforall,	 bit explicitForallBit),
+    ( "forall",	ITforall,	 bit explicitForallBit .|. bit inRulePragBit),
 	( "mdo",	ITmdo,		 bit recursiveDoBit),
 	( "family",	ITfamily,	 bit tyFamBit),
+    ( "group",  ITgroup,     bit transformComprehensionsBit),
+    ( "by",     ITby,        bit transformComprehensionsBit),
+    ( "using",  ITusing,     bit transformComprehensionsBit),
 
 	( "foreign",	ITforeign,	 bit ffiBit),
 	( "export",	ITexport,	 bit ffiBit),
@@ -657,11 +702,9 @@ reservedSymsFM = listToUFM $
        ,("!",   ITbang,     always)
 
         -- For data T (a::*) = MkT
-       ,("*", ITstar, \i -> glaExtsEnabled i ||
-                            kindSigsEnabled i ||
-                            tyFamEnabled i)
+       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
         -- For 'forall a . t'
-       ,(".", ITdot, explicitForallEnabled)
+       ,(".", ITdot, \i -> explicitForallEnabled i || inRulePrag i)
 
        ,("-<",  ITlarrowtail, arrowsEnabled)
        ,(">-",  ITrarrowtail, arrowsEnabled)
@@ -688,11 +731,11 @@ reservedSymsFM = listToUFM $
 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 
 special :: Token -> Action
-special tok span _buf len = return (L span tok)
+special tok span _buf _len = return (L span tok)
 
 token, layout_token :: Token -> Action
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
 
 idtoken :: (StringBuffer -> Int -> Token) -> Action
 idtoken f span buf len = return (L span $! (f buf len))
@@ -742,8 +785,10 @@ isNormalComment bits _ _ (AI _ _ buf)
 
 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 
+{-
 haddockDisabledAnd p bits _ _ (AI _ _ buf)
   = if haddockEnabled bits then False else (p buf)
+-}
 
 atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
 
@@ -777,6 +822,11 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
             | otherwise -> input
           Nothing -> input
 
+lineCommentToken :: Action
+lineCommentToken span buf len = do
+  b <- extension rawTokenStreamEnabled
+  if b then strtoken ITlineComment span buf len else lexToken
+
 {-
   nested comments require traversing by hand, they can't be parsed
   using regular expressions.
@@ -784,20 +834,24 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
 nested_comment :: P (Located Token) -> Action
 nested_comment cont span _str _len = do
   input <- getInput
-  go 1 input
+  go "" (1::Int) input
   where
-    go 0 input = do setInput input; cont
-    go n input = case alexGetChar input of
+    go commentAcc 0 input = do setInput input
+                               b <- extension rawTokenStreamEnabled
+                               if b
+                                 then docCommentEnd input commentAcc ITblockComment _str span
+                                 else cont
+    go commentAcc n input = case alexGetChar input of
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('\125',input) -> go (n-1) input
-        Just (c,_)          -> go n input
+        Just ('\125',input) -> go commentAcc (n-1) input
+        Just (_,_)          -> go ('-':commentAcc) n input
       Just ('\123',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('-',input) -> go (n+1) input
-        Just (c,_)       -> go n input
-      Just (c,input) -> go n input
+        Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
+        Just (_,_)       -> go ('\123':commentAcc) n input
+      Just (c,input) -> go (c:commentAcc) n input
 
 nested_doc_comment :: Action
 nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -806,16 +860,16 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing -> errBrace input span
-        Just ('\125',input@(AI end _ buf2)) ->
+        Just ('\125',input) ->
           docCommentEnd input commentAcc docType buf span
-        Just (c,_) -> go ('-':commentAcc) input docType False
+        Just (_,_) -> go ('-':commentAcc) input docType False
       Just ('\123', input) -> case alexGetChar input of
         Nothing  -> errBrace input span
         Just ('-',input) -> do
           setInput input
           let cont = do input <- getInput; go commentAcc input docType False
           nested_comment cont span buf _len
-        Just (c,_) -> go ('\123':commentAcc) input docType False
+        Just (_,_) -> go ('\123':commentAcc) input docType False
       Just (c,input) -> go (c:commentAcc) input docType False
 
 withLexedDocType lexDocComment = do
@@ -824,13 +878,26 @@ withLexedDocType lexDocComment = do
     '|' -> lexDocComment input ITdocCommentNext False
     '^' -> lexDocComment input ITdocCommentPrev False
     '$' -> lexDocComment input ITdocCommentNamed False
-    '*' -> lexDocSection 1 input 
+    '*' -> lexDocSection 1 input
+    '#' -> lexDocComment input ITdocOptionsOld False
  where 
     lexDocSection n input = case alexGetChar input of 
       Just ('*', input) -> lexDocSection (n+1) input
-      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Just (_,   _)     -> lexDocComment input (ITdocSection n) True
       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 
+-- RULES pragmas turn on the forall and '.' keywords, and we turn them
+-- off again at the end of the pragma.
+rulePrag :: Action
+rulePrag span buf len = do
+  setExts (.|. bit inRulePragBit)
+  return (L span ITrules_prag)
+
+endPrag :: Action
+endPrag span buf len = do
+  setExts (.&. complement (bit inRulePragBit))
+  return (L span ITclose_prag)
+
 -- docCommentEnd
 -------------------------------------------------------------------------------
 -- This function is quite tricky. We can't just return a new token, we also
@@ -874,38 +941,14 @@ close_brace span _str _len = do
   popContext
   return (L span ITccurly)
 
--- We have to be careful not to count M.<varid> as a qualified name
--- when <varid> is a keyword.  We hack around this by catching 
--- the offending tokens afterward, and re-lexing in a different state.
-check_qvarid span buf len = do
-  case lookupUFM reservedWordsFM var of
-	Just (keyword,exts)
-	  | not (isSpecial keyword) ->
-	  if exts == 0 
-	     then try_again
-	     else do
-		b <- extension (\i -> exts .&. i /= 0)
-		if b then try_again
-		     else return token
-	_other -> return token
-  where
-	(mod,var) = splitQualName buf len
-	token     = L span (ITqvarid (mod,var))
-
-	try_again = do
-		(AI _ offs _) <- getInput	
-		setInput (AI (srcSpanStart span) (offs-len) buf)
-		pushLexState bad_qvarid
-		lexToken
-
-qvarid buf len = ITqvarid $! splitQualName buf len
-qconid buf len = ITqconid $! splitQualName buf len
+qvarid buf len = ITqvarid $! splitQualName buf len False
+qconid buf len = ITqconid $! splitQualName buf len False
 
-splitQualName :: StringBuffer -> Int -> (FastString,FastString)
+splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 -- takes a StringBuffer and a length, and returns the module name
 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 -- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf orig_buf
+splitQualName orig_buf len parens = split orig_buf orig_buf
   where
     split buf dot_buf
 	| orig_buf `byteDiff` buf >= len  = done dot_buf
@@ -925,11 +968,14 @@ splitQualName orig_buf len = split orig_buf orig_buf
 
     done dot_buf =
 	(lexemeToFastString orig_buf (qual_size - 1),
-	 lexemeToFastString dot_buf (len - qual_size))
+	 if parens -- Prelude.(+)
+            then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
+            else lexemeToFastString dot_buf (len - qual_size))
       where
 	qual_size = orig_buf `byteDiff` dot_buf
 
 varid span buf len = 
+  fs `seq`
   case lookupUFM reservedWordsFM fs of
 	Just (keyword,0)    -> do
 		maybe_layout keyword
@@ -946,8 +992,10 @@ varid span buf len =
 conid buf len = ITconid fs
   where fs = lexemeToFastString buf len
 
-qvarsym buf len = ITqvarsym $! splitQualName buf len
-qconsym buf len = ITqconsym $! splitQualName buf len
+qvarsym buf len = ITqvarsym $! splitQualName buf len False
+qconsym buf len = ITqconsym $! splitQualName buf len False
+prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
+prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
 
 varsym = sym ITvarsym
 consym = sym ITconsym
@@ -975,6 +1023,7 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
 -- some conveniences for use with tok_integral
 tok_num = tok_integral ITinteger
 tok_primint = tok_integral ITprimint
+tok_primword = tok_integral ITprimword positive
 positive = id
 negative = negate
 decimal = (10,octDecDigit)
@@ -1038,7 +1087,7 @@ new_layout_context strict span _buf _len = do
 		-- we must generate a {} sequence now.
 		pushLexState layout_left
 		return (L span ITvocurly)
-	other -> do
+	_ -> do
 		setContext (Layout offset : ctx)
 		return (L span ITvocurly)
 
@@ -1072,7 +1121,7 @@ setFile code span buf len = do
 -- Options, includes and language pragmas.
 
 lex_string_prag :: (String -> Token) -> Action
-lex_string_prag mkTok span buf len
+lex_string_prag mkTok span _buf _len
     = do input <- getInput
          start <- getSrcLoc
          tok <- go [] input
@@ -1085,7 +1134,7 @@ lex_string_prag mkTok span buf len
                    else case alexGetChar input of
                           Just (c,i) -> go (c:acc) i
                           Nothing -> err input
-          isString i [] = True
+          isString _ [] = True
           isString i (x:xs)
               = case alexGetChar i of
                   Just (c,i') | c == x    -> isString i' xs
@@ -1099,7 +1148,7 @@ lex_string_prag mkTok span buf len
 -- This stuff is horrible.  I hates it.
 
 lex_string_tok :: Action
-lex_string_tok span buf len = do
+lex_string_tok span _buf _len = do
   tok <- lex_string ""
   end <- getSrcLoc 
   return (L (mkSrcSpan (srcSpanStart span) end) tok)
@@ -1112,8 +1161,8 @@ lex_string s = do
 
     Just ('"',i)  -> do
 	setInput i
-	glaexts <- extension glaExtsEnabled
-	if glaexts
+	magicHash <- extension magicHashEnabled
+	if magicHash
 	  then do
 	    i <- getInput
 	    case alexGetChar' i of
@@ -1156,7 +1205,7 @@ lex_char_tok :: Action
 -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
 -- So we have to do two characters of lookahead: when we see 'x we need to
 -- see if there's a trailing quote
-lex_char_tok span buf len = do	-- We've seen '
+lex_char_tok span _buf _len = do	-- We've seen '
    i1 <- getInput	-- Look ahead to first character
    let loc = srcSpanStart span
    case alexGetChar' i1 of
@@ -1169,14 +1218,14 @@ lex_char_tok span buf len = do	-- We've seen '
 			return (L (mkSrcSpan loc end2)  ITtyQuote)
 		   else lit_error
 
-	Just ('\\', i2@(AI end2 _ _)) -> do 	-- We've seen 'backslash 
+	Just ('\\', i2@(AI _end2 _ _)) -> do 	-- We've seen 'backslash
 		  setInput i2
 		  lit_ch <- lex_escape
 		  mc <- getCharOrFail	-- Trailing quote
 		  if mc == '\'' then finish_char_tok loc lit_ch
 			        else do setInput i2; lit_error 
 
-        Just (c, i2@(AI end2 _ _)) 
+        Just (c, i2@(AI _end2 _ _))
 		| not (isAny c) -> lit_error
 		| otherwise ->
 
@@ -1197,9 +1246,9 @@ lex_char_tok span buf len = do	-- We've seen '
 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 finish_char_tok loc ch	-- We've already seen the closing quote
 			-- Just need to check for trailing #
-  = do	glaexts <- extension glaExtsEnabled
+  = do	magicHash <- extension magicHashEnabled
 	i@(AI end _ _) <- getInput
-	if glaexts then do
+	if magicHash then do
 		case alexGetChar' i of
 			Just ('#',i@(AI end _ _)) -> do
 				setInput i
@@ -1216,7 +1265,7 @@ lex_char c inp = do
       c | isAny c -> do setInput inp; return c
       _other -> lit_error
 
-isAny c | c > '\xff' = isPrint c
+isAny c | c > '\x7f' = isPrint c
 	| otherwise  = is_any c
 
 lex_escape :: P Char
@@ -1240,7 +1289,7 @@ lex_escape = do
 
 	'x'   -> readNum is_hexdigit 16 hexDigit
 	'o'   -> readNum is_octdigit  8 octDecDigit
-	x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+	x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
 
 	c1 ->  do
 	   i <- getInput
@@ -1332,13 +1381,54 @@ getCharOrFail =  do
 	Just (c,i)  -> do setInput i; return c
 
 -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+	| Just ('|',i) <- next -> do 
+		setInput i; lex_quasiquote ('|' : s)
+	| Just (']',i) <- next -> do 
+		setInput i; lex_quasiquote (']' : s)
+	where next = alexGetChar' i
+
+    Just ('|',i)
+	| Just (']',i) <- next -> do 
+		setInput i; return s
+	where next = alexGetChar' i
+
+    Just (c, i) -> do
+	 setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
 -- Warnings
 
 warn :: DynFlag -> SDoc -> Action
-warn option warning span _buf _len = do
-    addWarning option (mkWarnMsg span alwaysQualify warning)
+warn option warning srcspan _buf _len = do
+    addWarning option srcspan warning
     lexToken
 
+warnThen :: DynFlag -> SDoc -> Action -> Action
+warnThen option warning action srcspan buf len = do
+    addWarning option srcspan warning
+    action srcspan buf len
+
 -- -----------------------------------------------------------------------------
 -- The Parse Monad
 
@@ -1384,7 +1474,7 @@ instance Monad P where
   fail = failP
 
 returnP :: a -> P a
-returnP a = P $ \s -> POk s a
+returnP a = a `seq` (P $ \s -> POk s a)
 
 thenP :: P a -> (a -> P b) -> P b
 (P m) `thenP` k = P $ \ s ->
@@ -1399,10 +1489,10 @@ failMsgP :: String -> P a
 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
 
 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
 
-failSpanMsgP :: SrcSpan -> String -> P a
-failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+failSpanMsgP :: SrcSpan -> SDoc -> P a
+failSpanMsgP span msg = P $ \_ -> PFailed span msg
 
 extension :: (Int -> Bool) -> P Bool
 extension p = P $ \s -> POk s (p $! extsBitmap s)
@@ -1410,6 +1500,9 @@ extension p = P $ \s -> POk s (p $! extsBitmap s)
 getExts :: P Int
 getExts = P $ \s -> POk s (extsBitmap s)
 
+setExts :: (Int -> Int) -> P ()
+setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
+
 setSrcLoc :: SrcLoc -> P ()
 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
 
@@ -1448,7 +1541,7 @@ alexGetChar (AI loc ofs s)
 
 	adj_c 
 	  | c <= '\x06' = non_graphic
-	  | c <= '\xff' = c
+	  | c <= '\x7f' = c
           -- Alex doesn't handle Unicode, so when Unicode
           -- character is encoutered we output these values
           -- with the actual character value hidden in the state.
@@ -1458,20 +1551,20 @@ alexGetChar (AI loc ofs s)
 		  LowercaseLetter       -> lower
 		  TitlecaseLetter       -> upper
 		  ModifierLetter        -> other_graphic
-		  OtherLetter           -> other_graphic
+		  OtherLetter           -> lower -- see #1103
 		  NonSpacingMark        -> other_graphic
 		  SpacingCombiningMark  -> other_graphic
 		  EnclosingMark         -> other_graphic
 		  DecimalNumber         -> digit
 		  LetterNumber          -> other_graphic
 		  OtherNumber           -> other_graphic
-		  ConnectorPunctuation  -> other_graphic
-		  DashPunctuation       -> other_graphic
+		  ConnectorPunctuation  -> symbol
+		  DashPunctuation       -> symbol
 		  OpenPunctuation       -> other_graphic
 		  ClosePunctuation      -> other_graphic
 		  InitialQuote          -> other_graphic
 		  FinalQuote            -> other_graphic
-		  OtherPunctuation      -> other_graphic
+		  OtherPunctuation      -> symbol
 		  MathSymbol            -> symbol
 		  CurrencySymbol        -> symbol
 		  ModifierSymbol        -> symbol
@@ -1492,7 +1585,7 @@ alexGetChar' (AI loc ofs s)
         ofs'   = advanceOffs c ofs
 
 advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' offs = 0
+advanceOffs '\n' _    = 0
 advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
 advanceOffs _    offs = offs + 1
 
@@ -1509,14 +1602,14 @@ popLexState :: P Int
 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
 
 getLexState :: P Int
-getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
 
 -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
 -- integer
 
-glaExtsBit, ffiBit, parrBit :: Int
-glaExtsBit = 0
+genericsBit, ffiBit, parrBit :: Int
+genericsBit = 0 -- {| and |}
 ffiBit	   = 1
 parrBit	   = 2
 arrowsBit  = 4
@@ -1527,15 +1620,21 @@ bangPatBit = 8	-- Tells the parser to understand bang-patterns
 		-- (doesn't affect the lexer)
 tyFamBit   = 9	-- indexed type families: 'family' keyword and kind sigs
 haddockBit = 10 -- Lex and parse Haddock comments
-magicHashBit = 11 -- # in both functions and operators
+magicHashBit = 11 -- "#" in both functions and operators
 kindSigsBit = 12 -- Kind signatures on type variables
 recursiveDoBit = 13 -- mdo
 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
 unboxedTuplesBit = 15 -- (# and #)
-
-glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+standaloneDerivingBit = 16 -- standalone instance deriving declarations
+transformComprehensionsBit = 17
+qqBit	   = 18 -- enable quasiquoting
+inRulePragBit = 19
+rawTokenStreamBit = 20 -- producing a token stream with all comments included
+newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
+
+genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
 always           _     = True
-glaExtsEnabled   flags = testBit flags glaExtsBit
+genericsEnabled  flags = testBit flags genericsBit
 ffiEnabled       flags = testBit flags ffiBit
 parrEnabled      flags = testBit flags parrBit
 arrowsEnabled    flags = testBit flags arrowsBit
@@ -1550,17 +1649,22 @@ kindSigsEnabled  flags = testBit flags kindSigsBit
 recursiveDoEnabled flags = testBit flags recursiveDoBit
 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled        flags = testBit flags qqBit
+inRulePrag       flags = testBit flags inRulePragBit
+rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
+newQualOps       flags = testBit flags newQualOpsBit
+oldQualOps flags = not (newQualOps flags)
 
 -- PState for parsing options pragmas
 --
-pragState :: StringBuffer -> SrcLoc -> PState
-pragState buf loc  = 
+pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+pragState dynflags buf loc =
   PState {
-      buffer	      = buf,
+      buffer        = buf,
       messages      = emptyMessages,
-      -- XXX defaultDynFlags is not right, but we don't have a real
-      -- dflags handy
-      dflags        = defaultDynFlags,
+      dflags        = dynflags,
       last_loc      = mkSrcSpan loc loc,
       last_offs     = 0,
       last_len      = 0,
@@ -1587,17 +1691,19 @@ mkPState buf loc flags  =
       loc           = loc,
       extsBitmap    = fromIntegral bitmap,
       context       = [],
-      lex_state     = [bol, if glaExtsEnabled bitmap then glaexts else 0]
+      lex_state     = [bol, 0]
 	-- we begin in the layout state if toplev_layout is set
     }
     where
-      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts  flags
-	       .|. ffiBit       `setBitIf` dopt Opt_FFI          flags
+      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
+	       .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
 	       .|. parrBit      `setBitIf` dopt Opt_PArr         flags
 	       .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-	       .|. thBit        `setBitIf` dopt Opt_TH           flags
+	       .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
+	       .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
 	       .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
 	       .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
@@ -1610,15 +1716,20 @@ mkPState buf loc flags  =
 	       .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
 	       .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
 	       .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+	       .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
+               .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+               .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
+               .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
       --
       setBitIf :: Int -> Bool -> Int
       b `setBitIf` cond | cond      = bit b
 			| otherwise = 0
 
-addWarning :: DynFlag -> WarnMsg -> P ()
-addWarning option w
+addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
+addWarning option srcspan warning
  = P $ \s@PState{messages=(ws,es), dflags=d} ->
-       let ws' = if dopt option d then ws `snocBag` w else ws
+       let warning' = mkWarnMsg srcspan alwaysQualify warning
+           ws' = if dopt option d then ws `snocBag` warning' else ws
        in POk s{messages=(ws', es)} ()
 
 getMessages :: PState -> Messages
@@ -1632,7 +1743,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} ()
 
 popContext :: P ()
 popContext = P $ \ s@(PState{ buffer = buf, context = ctx, 
-			   loc = loc, last_len = len, last_loc = last_loc }) ->
+                              last_len = len, last_loc = last_loc }) ->
   case ctx of
 	(_:tl) -> POk s{ context = tl } ()
 	[]     -> PFailed last_loc (srcParseErr buf len)
@@ -1661,8 +1772,8 @@ srcParseErr
   -> Message
 srcParseErr buf len
   = hcat [ if null token 
-	     then ptext SLIT("parse error (possibly incorrect indentation)")
-	     else hcat [ptext SLIT("parse error on input "),
+	     then ptext (sLit "parse error (possibly incorrect indentation)")
+	     else hcat [ptext (sLit "parse error on input "),
           	  	char '`', text token, char '\'']
     ]
   where token = lexemeToString (offsetBytes (-len) buf) len
@@ -1680,7 +1791,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
 lexError :: String -> P a
 lexError str = do
   loc <- getSrcLoc
-  i@(AI end _ buf) <- getInput
+  (AI end _ buf) <- getInput
   reportLexError loc end buf str
 
 -- -----------------------------------------------------------------------------
@@ -1689,7 +1800,7 @@ lexError str = do
 
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
-  tok@(L span tok__) <- lexToken
+  tok@(L _span _tok__) <- lexToken
 --  trace ("token: " ++ show tok__) $ do
   cont tok
 
@@ -1699,20 +1810,21 @@ lexToken = do
   sc <- getLexState
   exts <- getExts
   case alexScanUser exts inp sc of
-    AlexEOF -> do let span = mkSrcSpan loc1 loc1
-		  setLastToken span 0 0
-		  return (L span ITeof)
-    AlexError (AI loc2 _ buf) -> do 
-	reportLexError loc1 loc2 buf "lexical error"
+    AlexEOF -> do
+        let span = mkSrcSpan loc1 loc1
+        setLastToken span 0 0
+        return (L span ITeof)
+    AlexError (AI loc2 _ buf) ->
+        reportLexError loc1 loc2 buf "lexical error"
     AlexSkip inp2 _ -> do
-	setInput inp2
-	lexToken
-    AlexToken inp2@(AI end _ buf2) len t -> do
-    setInput inp2
-    let span = mkSrcSpan loc1 end
-    let bytes = byteDiff buf buf2
-    span `seq` setLastToken span bytes bytes
-    t span buf bytes
+        setInput inp2
+        lexToken
+    AlexToken inp2@(AI end _ buf2) _ t -> do
+        setInput inp2
+        let span = mkSrcSpan loc1 end
+        let bytes = byteDiff buf buf2
+        span `seq` setLastToken span bytes bytes
+        t span buf bytes
 
 reportLexError loc1 loc2 buf str
   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
@@ -1723,4 +1835,13 @@ reportLexError loc1 loc2 buf str
   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
+
+lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
+lexTokenStream buf loc dflags = unP go initState
+    where initState = mkPState buf loc (dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream)
+          go = do
+            ltok <- lexer return
+            case ltok of
+              L _ ITeof -> return []
+              _ -> liftM (ltok:) go
 }