X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=84ee57eb1c96293ed65820f140524de3584af3d2;hp=15745d5620f27080eca1620a7ca8a651dc044171;hb=f3399c446c7507d46d6cc550aa2fe7027dbc1b5b;hpb=3ad8f84f6a75f240383e62a14472d14eb372dcd1

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 15745d5..84ee57e 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -21,18 +21,28 @@
 --    - pragma-end should be only valid in a pragma
 
 {
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
 module Lexer (
    Token(..), lexer, pragState, mkPState, PState(..),
    P(..), ParseResult(..), getSrcLoc, 
    failLocMsgP, failSpanMsgP, srcParseFail,
+   getMessages,
    popContext, pushCurrentContext, setLastToken, setSrcLoc,
    getLexState, popLexState, pushLexState,
-   extension, glaExtsEnabled, bangPatEnabled
+   extension, standaloneDerivingEnabled, bangPatEnabled,
+   addWarning
   ) where
 
 #include "HsVersions.h"
 
-import ErrUtils		( Message )
+import Bag
+import ErrUtils
 import Outputable
 import StringBuffer
 import FastString
@@ -43,10 +53,11 @@ import DynFlags
 import Ctype
 import Util		( maybePrefixMatch, readRational )
 
-import DATA_BITS
-import Data.Char 	( chr )
-import Ratio
---import TRACE
+import Control.Monad
+import Data.Bits
+import Data.Char 	( chr, ord, isSpace )
+import Data.Ratio
+import Debug.Trace
 
 #if __GLASGOW_HASKELL__ >= 605
 import Data.Char 	( GeneralCategory(..), generalCategory, isPrint, isUpper )
@@ -55,29 +66,30 @@ import Compat.Unicode	( GeneralCategory(..), generalCategory, isPrint, isUpper )
 #endif
 }
 
-$unispace    = \x05
-$whitechar   = [\ \t\n\r\f\v\xa0 $unispace]
+$unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
+$whitechar   = [\ \n\r\f\v\xa0 $unispace]
 $white_no_nl = $whitechar # \n
+$tab         = \t
 
 $ascdigit  = 0-9
-$unidigit  = \x03
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
-$unisymbol = \x04
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 
-$unilarge  = \x01
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
 $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
 $large     = [$asclarge $unilarge]
 
-$unismall  = \x02
+$unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
 $small     = [$ascsmall $unismall \_]
 
-$unigraphic = \x06
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 
 $octit	   = 0-7
@@ -86,6 +98,8 @@ $symchar   = [$symbol \:]
 $nl        = [\n\r]
 $idchar    = [$small $large $digit \']
 
+$docsym    = [\| \^ \* \$]
+
 @varid     = $small $idchar*
 @conid     = $large $idchar*
 
@@ -102,25 +116,63 @@ $idchar    = [$small $large $digit \']
 
 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 
+-- normal signed numerical literals can only be explicitly negative,
+-- not explicitly positive (contrast @exponent)
+@negative = \-
+@signed = @negative ?
+
 haskell :-
 
 -- everywhere: skip whitespace and comments
 $white_no_nl+ 				;
+$tab+         { warn Opt_WarnTabs (text "Tab character") }
 
 -- Everywhere: deal with nested comments.  We explicitly rule out
 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 -- (this can happen even though pragmas will normally take precedence due to
 -- longest-match, because pragmas aren't valid in every state, but comments
--- are).
-"{-" / { notFollowedBy '#' }		{ nested_comment }
+-- are). We also rule out nested Haddock comments, if the -haddock flag is
+-- set.
+
+"{-" / { isNormalComment } { nested_comment lexToken }
 
 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 -- more dashes followed by a symbol should be parsed as a varsym, so we
 -- have to exclude those.
--- The regex says: "munch all the characters after the dashes, as long as
--- the first one is not a symbol".
-"--"\-* [^$symbol :] .*			;
-"--"\-* / { atEOL }			;
+
+-- Since Haddock comments aren't valid in every state, we need to rule them
+-- out here.  
+
+-- The following two rules match comments that begin with two dashes, but
+-- continue with a different character. The rules test that this character
+-- is not a symbol (in which case we'd have a varsym), and that it's not a
+-- space followed by a Haddock comment symbol (docsym) (in which case we'd
+-- have a Haddock comment). The rules then munch the rest of the line.
+
+"-- " ~[$docsym \#] .* ;
+"--" [^$symbol : \ ] .* ;
+
+-- Next, match Haddock comments if no -haddock flag
+
+"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ;
+
+-- Now, when we've matched comments that begin with 2 dashes and continue
+-- with a different character, we need to match comments that begin with three
+-- or more dashes (which clearly can't be Haddock comments). We only need to
+-- make sure that the first non-dash character isn't a symbol, and munch the
+-- rest of the line.
+
+"---"\-* [^$symbol :] .* ;
+
+-- Since the previous rules all match dashes followed by at least one
+-- character, we also need to match a whole line filled with just dashes.
+
+"--"\-* / { atEOL } ;
+
+-- We need this rule since none of the other single line comment rules
+-- actually match this case.
+
+"-- " / { atEOL } ;
 
 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 -- blank lines) until we find a non-whitespace character, then do layout
@@ -158,7 +210,7 @@ $white_no_nl+ 				;
 -- generate a matching '}' token.
 <layout_left>  ()			{ do_layout_left }
 
-<0,option_prags,glaexts> \n				{ begin bol }
+<0,option_prags> \n				{ begin bol }
 
 "{-#" $whitechar* (line|LINE) 		{ begin line_prag2 }
 
@@ -176,15 +228,16 @@ $white_no_nl+ 				;
    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
    -- with older versions of GHC which generated these.
 
--- We only want RULES pragmas to be picked up when -fglasgow-exts
--- is on, because the contents of the pragma is always written using
--- glasgow-exts syntax (using forall etc.), so if glasgow exts are not
--- enabled, we're sure to get a parse error.
+-- We only want RULES pragmas to be picked up when explicit forall
+-- syntax is enabled is on, because the contents of the pragma always
+-- uses it. If it's not on then we're sure to get a parse error.
 -- (ToDo: we should really emit a warning when ignoring pragmas)
-<glaexts>
-  "{-#" $whitechar* (RULES|rules)	{ token ITrules_prag }
+-- XXX Now that we can enable this without the -fglasgow-exts hammer,
+-- is it better just to let the parse error happen?
+<0>
+  "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
 
-<0,option_prags,glaexts> {
+<0,option_prags> {
   "{-#" $whitechar* (INLINE|inline)	{ token (ITinline_prag True) }
   "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
   					{ token (ITinline_prag False) }
@@ -199,39 +252,54 @@ $white_no_nl+ 				;
   "{-#" $whitechar* (DEPRECATED|deprecated)
   					{ token ITdeprecated_prag }
   "{-#" $whitechar* (SCC|scc)		{ token ITscc_prag }
+  "{-#" $whitechar* (GENERATED|generated)
+  					{ token ITgenerated_prag }
   "{-#" $whitechar* (CORE|core)		{ token ITcore_prag }
   "{-#" $whitechar* (UNPACK|unpack)	{ token ITunpack_prag }
 
-  "{-#" 				{ nested_comment }
+ "{-#"                                 { nested_comment lexToken }
 
   -- ToDo: should only be valid inside a pragma:
   "#-}" 				{ token ITclose_prag}
 }
 
 <option_prags> {
-  "{-#" $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
+  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
                                         { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
+                                         { lex_string_prag ITdocOptions }
+  "-- #"                                 { multiline_doc_comment }
+  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
+  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
 }
 
-<0,option_prags,glaexts> {
+<0> {
+  "-- #" .* ;
+}
+
+<0,option_prags> {
 	-- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
-  "{-#" $whitechar* $idchar+            { nested_comment }
+  "{-#" $whitechar* $idchar+		{ nested_comment lexToken }
 }
 
 -- '0' state: ordinary lexemes
--- 'glaexts' state: glasgow extensions (postfix '#', etc.)
+
+-- Haddock comments
+
+<0> {
+  "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
+}
 
 -- "special" symbols
 
-<0,glaexts> {
+<0> {
   "[:" / { ifExtension parrEnabled }	{ token ITopabrack }
   ":]" / { ifExtension parrEnabled }	{ token ITcpabrack }
 }
   
-<0,glaexts> {
+<0> {
   "[|"	    / { ifExtension thEnabled }	{ token ITopenExpQuote }
   "[e|"	    / { ifExtension thEnabled }	{ token ITopenExpQuote }
   "[p|"	    / { ifExtension thEnabled }	{ token ITopenPatQuote }
@@ -240,26 +308,34 @@ $white_no_nl+ 				;
   "|]"	    / { ifExtension thEnabled }	{ token ITcloseQuote }
   \$ @varid / { ifExtension thEnabled }	{ skip_one_varid ITidEscape }
   "$("	    / { ifExtension thEnabled }	{ token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
 }
 
-<0,glaexts> {
+<0> {
   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 					{ special IToparenbar }
   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 }
 
-<0,glaexts> {
+<0> {
   \? @varid / { ifExtension ipEnabled }	{ skip_one_varid ITdupipvarid }
 }
 
-<glaexts> {
-  "(#" / { notFollowedBySymbol }	{ token IToubxparen }
-  "#)"					{ token ITcubxparen }
-  "{|"					{ token ITocurlybar }
-  "|}"					{ token ITccurlybar }
+<0> {
+  "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
+         { token IToubxparen }
+  "#)" / { ifExtension unboxedTuplesEnabled }
+         { token ITcubxparen }
 }
 
-<0,option_prags,glaexts> {
+<0> {
+  "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
+  "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
+}
+
+<0,option_prags> {
   \(					{ special IToparen }
   \)					{ special ITcparen }
   \[					{ special ITobrack }
@@ -272,67 +348,68 @@ $white_no_nl+ 				;
   \}					{ close_brace }
 }
 
-<0,option_prags,glaexts> {
-  @qual @varid			{ check_qvarid }
+<0,option_prags> {
+  @qual @varid			{ idtoken qvarid }
   @qual @conid			{ idtoken qconid }
   @varid			{ varid }
   @conid			{ idtoken conid }
 }
 
--- after an illegal qvarid, such as 'M.let', 
--- we back up and try again in the bad_qvarid state:
-<bad_qvarid> {
-  @conid			{ pop_and (idtoken conid) }
-  @qual @conid			{ pop_and (idtoken qconid) }
-}
-
-<glaexts> {
-  @qual @varid "#"+		{ idtoken qvarid }
-  @qual @conid "#"+		{ idtoken qconid }
-  @varid "#"+			{ varid }
-  @conid "#"+			{ idtoken conid }
+<0> {
+  @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
+  @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
+  @varid "#"+       / { ifExtension magicHashEnabled } { varid }
+  @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 }
 
 -- ToDo: M.(,,,)
 
-<0,glaexts> {
+<0> {
   @qual @varsym			{ idtoken qvarsym }
   @qual @consym			{ idtoken qconsym }
   @varsym			{ varsym }
   @consym			{ consym }
 }
 
-<0,glaexts> {
-  @decimal			{ tok_decimal }
-  0[oO] @octal			{ tok_octal }
-  0[xX] @hexadecimal		{ tok_hexadecimal }
-}
+-- For the normal boxed literals we need to be careful
+-- when trying to be close to Haskell98
+<0> {
+  -- Normal integral literals (:: Num a => a, from Integer)
+  @decimal			{ tok_num positive 0 0 decimal }
+  0[oO] @octal			{ tok_num positive 2 2 octal }
+  0[xX] @hexadecimal		{ tok_num positive 2 2 hexadecimal }
 
-<glaexts> {
-  @decimal \#			{ prim_decimal }
-  0[oO] @octal \#		{ prim_octal }
-  0[xX] @hexadecimal \#		{ prim_hexadecimal }
+  -- Normal rational literals (:: Fractional a => a, from Rational)
+  @floating_point		{ strtoken tok_float }
 }
 
-<0,glaexts> @floating_point		{ strtoken tok_float }
-<glaexts>   @floating_point \#		{ init_strtoken 1 prim_float }
-<glaexts>   @floating_point \# \#	{ init_strtoken 2 prim_double }
+<0> {
+  -- Unboxed ints (:: Int#)
+  -- It's simpler (and faster?) to give separate cases to the negatives,
+  -- especially considering octal/hexadecimal prefixes.
+  @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+
+  -- Unboxed floats and doubles (:: Float#, :: Double#)
+  -- prim_{float,double} work with signed literals
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
+}
 
 -- Strings and chars are lexed by hand-written code.  The reason is
 -- that even if we recognise the string or char here in the regex
 -- lexer, we would still have to parse the string afterward in order
 -- to convert it to a String.
-<0,glaexts> {
+<0> {
   \'				{ lex_char_tok }
   \" 				{ lex_string_tok }
 }
 
 {
--- work around bug in Alex 2.0
-#if __GLASGOW_HASKELL__ < 503
-unsafeAt arr i = arr ! i
-#endif
-
 -- -----------------------------------------------------------------------------
 -- The token type
 
@@ -345,7 +422,6 @@ data Token
   | ITderiving
   | ITdo
   | ITelse
-  | ITfor
   | IThiding
   | ITif
   | ITimport
@@ -376,8 +452,10 @@ data Token
   | ITccallconv
   | ITdotnet
   | ITmdo
-  | ITiso
   | ITfamily
+  | ITgroup
+  | ITby
+  | ITusing
 
 	-- Pragmas
   | ITinline_prag Bool		-- True <=> INLINE, False <=> NOINLINE
@@ -388,6 +466,7 @@ data Token
   | ITdeprecated_prag
   | ITline_prag
   | ITscc_prag
+  | ITgenerated_prag
   | ITcore_prag                 -- hdaume: core annotations
   | ITunpack_prag
   | ITclose_prag
@@ -466,6 +545,7 @@ data Token
   | ITparenEscape		--  $( 
   | ITvarQuote			--  '
   | ITtyQuote			--  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 
   -- Arrow notation extension
   | ITproc
@@ -479,6 +559,15 @@ data Token
 
   | ITunknown String		-- Used when the lexer can't make sense of it
   | ITeof			-- end of file token
+
+  -- Documentation annotations
+  | ITdocCommentNext  String     -- something beginning '-- |'
+  | ITdocCommentPrev  String     -- something beginning '-- ^'
+  | ITdocCommentNamed String     -- something beginning '-- $'
+  | ITdocSection      Int String -- a section heading
+  | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
+  | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
+
 #ifdef DEBUG
   deriving Show -- debugging
 #endif
@@ -489,7 +578,6 @@ isSpecial :: Token -> Bool
 -- not as a keyword.
 isSpecial ITas        	= True
 isSpecial IThiding    	= True
-isSpecial ITfor    	= True
 isSpecial ITqualified 	= True
 isSpecial ITforall    	= True
 isSpecial ITexport    	= True
@@ -501,8 +589,10 @@ isSpecial ITunsafe    	= True
 isSpecial ITccallconv   = True
 isSpecial ITstdcallconv = True
 isSpecial ITmdo		= True
-isSpecial ITiso		= True
 isSpecial ITfamily	= True
+isSpecial ITgroup   = True
+isSpecial ITby      = True
+isSpecial ITusing   = True
 isSpecial _             = False
 
 -- the bitmap provided as the third component indicates whether the
@@ -523,7 +613,6 @@ reservedWordsFM = listToUFM $
 	( "deriving",	ITderiving, 	0 ), 
 	( "do",		ITdo, 		0 ),       
 	( "else",	ITelse, 	0 ),     
-	( "for",	ITfor, 		0 ),
 	( "hiding",	IThiding, 	0 ),
 	( "if",		ITif, 		0 ),       
 	( "import",	ITimport, 	0 ),   
@@ -542,9 +631,12 @@ reservedWordsFM = listToUFM $
 	( "where",	ITwhere, 	0 ),
 	( "_scc_",	ITscc, 		0 ),		-- ToDo: remove
 
-      	( "forall",	ITforall,	 bit tvBit),
-	( "mdo",	ITmdo,		 bit glaExtsBit),
-	( "family",	ITfamily,	 bit idxTysBit),
+    ( "forall",	ITforall,	 bit explicitForallBit),
+	( "mdo",	ITmdo,		 bit recursiveDoBit),
+	( "family",	ITfamily,	 bit tyFamBit),
+    ( "group",  ITgroup,     bit transformComprehensionsBit),
+    ( "by",     ITby,        bit transformComprehensionsBit),
+    ( "using",  ITusing,     bit transformComprehensionsBit),
 
 	( "foreign",	ITforeign,	 bit ffiBit),
 	( "export",	ITexport,	 bit ffiBit),
@@ -561,40 +653,42 @@ reservedWordsFM = listToUFM $
 	( "proc",	ITproc,		 bit arrowsBit)
      ]
 
+reservedSymsFM :: UniqFM (Token, Int -> Bool)
 reservedSymsFM = listToUFM $
-	map (\ (x,y,z) -> (mkFastString x,(y,z)))
-      [ ("..",	ITdotdot,	0)
-       ,(":",	ITcolon,	0)	-- (:) is a reserved op, 
-						-- meaning only list cons
-       ,("::",	ITdcolon, 	0)
-       ,("=",	ITequal, 	0)
-       ,("\\",	ITlam, 		0)
-       ,("|",	ITvbar, 	0)
-       ,("<-",	ITlarrow, 	0)
-       ,("->",	ITrarrow, 	0)
-       ,("@",	ITat, 		0)
-       ,("~",	ITtilde, 	0)
-       ,("=>",	ITdarrow, 	0)
-       ,("-",	ITminus, 	0)
-       ,("!",	ITbang, 	0)
-
-       ,("*",	ITstar,		bit glaExtsBit .|. 
-				bit idxTysBit)	    -- For data T (a::*) = MkT
-       ,(".",	ITdot,		bit tvBit)	    -- For 'forall a . t'
-
-       ,("-<",	ITlarrowtail,	bit arrowsBit)
-       ,(">-",	ITrarrowtail,	bit arrowsBit)
-       ,("-<<",	ITLarrowtail,	bit arrowsBit)
-       ,(">>-",	ITRarrowtail,	bit arrowsBit)
+    map (\ (x,y,z) -> (mkFastString x,(y,z)))
+      [ ("..",  ITdotdot,   always)
+        -- (:) is a reserved op, meaning only list cons
+       ,(":",   ITcolon,    always)
+       ,("::",  ITdcolon,   always)
+       ,("=",   ITequal,    always)
+       ,("\\",  ITlam,      always)
+       ,("|",   ITvbar,     always)
+       ,("<-",  ITlarrow,   always)
+       ,("->",  ITrarrow,   always)
+       ,("@",   ITat,       always)
+       ,("~",   ITtilde,    always)
+       ,("=>",  ITdarrow,   always)
+       ,("-",   ITminus,    always)
+       ,("!",   ITbang,     always)
+
+        -- For data T (a::*) = MkT
+       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
+        -- For 'forall a . t'
+       ,(".", ITdot, explicitForallEnabled)
+
+       ,("-<",  ITlarrowtail, arrowsEnabled)
+       ,(">-",  ITrarrowtail, arrowsEnabled)
+       ,("-<<", ITLarrowtail, arrowsEnabled)
+       ,(">>-", ITRarrowtail, arrowsEnabled)
 
 #if __GLASGOW_HASKELL__ >= 605
-       ,("Î»",	ITlam,          bit glaExtsBit)
-       ,("â·",   ITdcolon,       bit glaExtsBit)
-       ,("â",   ITdarrow,	bit glaExtsBit)
-       ,("â",	ITforall,	bit glaExtsBit)
-       ,("â",   ITrarrow,	bit glaExtsBit)
-       ,("â",   ITlarrow,	bit glaExtsBit)
-       ,("â¯", 	ITdotdot,	bit glaExtsBit)
+       ,("â·",   ITdcolon, unicodeSyntaxEnabled)
+       ,("â",   ITdarrow, unicodeSyntaxEnabled)
+       ,("â",   ITforall, \i -> unicodeSyntaxEnabled i &&
+                                explicitForallEnabled i)
+       ,("â",   ITrarrow, unicodeSyntaxEnabled)
+       ,("â",   ITlarrow, unicodeSyntaxEnabled)
+       ,("â¯",   ITdotdot, unicodeSyntaxEnabled)
         -- ToDo: ideally, â and â· should be "specials", so that they cannot
         -- form part of a large operator.  This would let us have a better
         -- syntax for kinds: Éâ·*â* would be a legal kind signature. (maybe).
@@ -638,43 +732,153 @@ pop _span _buf _len = do popLexState; lexToken
 pop_and :: Action -> Action
 pop_and act span buf len = do popLexState; act span buf len
 
-notFollowedBy char _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf /= char
+{-# INLINE nextCharIs #-}
+nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
+
+notFollowedBy char _ _ _ (AI _ _ buf) 
+  = nextCharIs buf (/=char)
 
 notFollowedBySymbol _ _ _ (AI _ _ buf)
-  = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
+  = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
+
+-- We must reject doc comments as being ordinary comments everywhere.
+-- In some cases the doc comment will be selected as the lexeme due to
+-- maximal munch, but not always, because the nested comment rule is
+-- valid in all states, but the doc-comment rules are only valid in
+-- the non-layout states.
+isNormalComment bits _ _ (AI _ _ buf)
+  | haddockEnabled bits = notFollowedByDocOrPragma
+  | otherwise           = nextCharIs buf (/='#')
+  where
+    notFollowedByDocOrPragma
+       = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
+
+spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
+
+haddockDisabledAnd p bits _ _ (AI _ _ buf)
+  = if haddockEnabled bits then False else (p buf)
 
 atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
 
 ifExtension pred bits _ _ _ = pred bits
 
+multiline_doc_comment :: Action
+multiline_doc_comment span buf _len = withLexedDocType (worker "")
+  where
+    worker commentAcc input docType oneLine = case alexGetChar input of
+      Just ('\n', input') 
+        | oneLine -> docCommentEnd input commentAcc docType buf span
+        | otherwise -> case checkIfCommentLine input' of
+          Just input -> worker ('\n':commentAcc) input docType False
+          Nothing -> docCommentEnd input commentAcc docType buf span
+      Just (c, input) -> worker (c:commentAcc) input docType oneLine
+      Nothing -> docCommentEnd input commentAcc docType buf span
+      
+    checkIfCommentLine input = check (dropNonNewlineSpace input)
+      where
+        check input = case alexGetChar input of
+          Just ('-', input) -> case alexGetChar input of
+            Just ('-', input) -> case alexGetChar input of
+              Just (c, _) | c /= '-' -> Just input
+              _ -> Nothing
+            _ -> Nothing
+          _ -> Nothing
+
+        dropNonNewlineSpace input = case alexGetChar input of
+          Just (c, input') 
+            | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
+            | otherwise -> input
+          Nothing -> input
+
 {-
   nested comments require traversing by hand, they can't be parsed
   using regular expressions.
 -}
-nested_comment :: Action
-nested_comment span _str _len = do
+nested_comment :: P (Located Token) -> Action
+nested_comment cont span _str _len = do
   input <- getInput
-  go 1 input
-  where go 0 input = do setInput input; lexToken
-	go n input = do
-	  case alexGetChar input of
-	    Nothing  -> err input
-	    Just (c,input) -> do
-	      case c of
-	    	'-' -> do
-		  case alexGetChar input of
-		    Nothing  -> err input
-		    Just ('\125',input) -> go (n-1) input
-		    Just (c,_)          -> go n input
-	     	'\123' -> do
-		  case alexGetChar input of
-		    Nothing  -> err input
-		    Just ('-',input') -> go (n+1) input'
-		    Just (c,input)    -> go n input
-	    	c -> go n input
-
-        err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
-
+  go (1::Int) input
+  where
+    go 0 input = do setInput input; cont
+    go n input = case alexGetChar input of
+      Nothing -> errBrace input span
+      Just ('-',input) -> case alexGetChar input of
+        Nothing  -> errBrace input span
+        Just ('\125',input) -> go (n-1) input
+        Just (c,_)          -> go n input
+      Just ('\123',input) -> case alexGetChar input of
+        Nothing  -> errBrace input span
+        Just ('-',input) -> go (n+1) input
+        Just (c,_)       -> go n input
+      Just (c,input) -> go n input
+
+nested_doc_comment :: Action
+nested_doc_comment span buf _len = withLexedDocType (go "")
+  where
+    go commentAcc input docType _ = case alexGetChar input of
+      Nothing -> errBrace input span
+      Just ('-',input) -> case alexGetChar input of
+        Nothing -> errBrace input span
+        Just ('\125',input@(AI end _ buf2)) ->
+          docCommentEnd input commentAcc docType buf span
+        Just (c,_) -> go ('-':commentAcc) input docType False
+      Just ('\123', input) -> case alexGetChar input of
+        Nothing  -> errBrace input span
+        Just ('-',input) -> do
+          setInput input
+          let cont = do input <- getInput; go commentAcc input docType False
+          nested_comment cont span buf _len
+        Just (c,_) -> go ('\123':commentAcc) input docType False
+      Just (c,input) -> go (c:commentAcc) input docType False
+
+withLexedDocType lexDocComment = do
+  input@(AI _ _ buf) <- getInput
+  case prevChar buf ' ' of
+    '|' -> lexDocComment input ITdocCommentNext False
+    '^' -> lexDocComment input ITdocCommentPrev False
+    '$' -> lexDocComment input ITdocCommentNamed False
+    '*' -> lexDocSection 1 input
+    '#' -> lexDocComment input ITdocOptionsOld False
+ where 
+    lexDocSection n input = case alexGetChar input of 
+      Just ('*', input) -> lexDocSection (n+1) input
+      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+
+-- docCommentEnd
+-------------------------------------------------------------------------------
+-- This function is quite tricky. We can't just return a new token, we also
+-- need to update the state of the parser. Why? Because the token is longer
+-- than what was lexed by Alex, and the lexToken function doesn't know this, so 
+-- it writes the wrong token length to the parser state. This function is
+-- called afterwards, so it can just update the state. 
+
+-- This is complicated by the fact that Haddock tokens can span multiple lines, 
+-- which is something that the original lexer didn't account for. 
+-- I have added last_line_len in the parser state which represents the length 
+-- of the part of the token that is on the last line. It is now used for layout 
+-- calculation in pushCurrentContext instead of last_len. last_len is, like it 
+-- was before, the full length of the token, and it is now only used for error
+-- messages. /Waern 
+
+docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
+                 SrcSpan -> P (Located Token) 
+docCommentEnd input commentAcc docType buf span = do
+  setInput input
+  let (AI loc last_offs nextBuf) = input
+      comment = reverse commentAcc
+      span' = mkSrcSpan (srcSpanStart span) loc
+      last_len = byteDiff buf nextBuf
+      
+      last_line_len = if (last_offs - last_len < 0) 
+        then last_offs
+        else last_len  
+  
+  span `seq` setLastToken span' last_len last_line_len
+  return (L span' (docType comment))
+ 
+errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+ 
 open_brace, close_brace :: Action
 open_brace span _str _len = do 
   ctx <- getContext
@@ -684,30 +888,6 @@ close_brace span _str _len = do
   popContext
   return (L span ITccurly)
 
--- We have to be careful not to count M.<varid> as a qualified name
--- when <varid> is a keyword.  We hack around this by catching 
--- the offending tokens afterward, and re-lexing in a different state.
-check_qvarid span buf len = do
-  case lookupUFM reservedWordsFM var of
-	Just (keyword,exts)
-	  | not (isSpecial keyword) ->
-	  if exts == 0 
-	     then try_again
-	     else do
-		b <- extension (\i -> exts .&. i /= 0)
-		if b then try_again
-		     else return token
-	_other -> return token
-  where
-	(mod,var) = splitQualName buf len
-	token     = L span (ITqvarid (mod,var))
-
-	try_again = do
-		(AI _ offs _) <- getInput	
-		setInput (AI (srcSpanStart span) (offs-len) buf)
-		pushLexState bad_qvarid
-		lexToken
-
 qvarid buf len = ITqvarid $! splitQualName buf len
 qconid buf len = ITqconid $! splitQualName buf len
 
@@ -764,36 +944,37 @@ consym = sym ITconsym
 
 sym con span buf len = 
   case lookupUFM reservedSymsFM fs of
-	Just (keyword,0)    -> return (L span keyword)
 	Just (keyword,exts) -> do
-		b <- extension (\i -> exts .&. i /= 0)
+		b <- extension exts
 		if b then return (L span keyword)
 		     else return (L span $! con fs)
 	_other -> return (L span $! con fs)
   where
 	fs = lexemeToFastString buf len
 
-tok_decimal span buf len 
-  = return (L span (ITinteger  $! parseInteger buf len 10 octDecDigit))
-
-tok_octal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
-
-tok_hexadecimal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
-
-prim_decimal span buf len 
-  = return (L span (ITprimint  $! parseInteger buf (len-1) 10 octDecDigit))
-
-prim_octal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
-
-prim_hexadecimal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
-
+-- Variations on the integral numeric literal.
+tok_integral :: (Integer -> Token)
+     -> (Integer -> Integer)
+ --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
+     -> Int -> Int
+     -> (Integer, (Char->Int)) -> Action
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
+  return $ L span $ itint $! transint $ parseUnsignedInteger
+     (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
+
+-- some conveniences for use with tok_integral
+tok_num = tok_integral ITinteger
+tok_primint = tok_integral ITprimint
+positive = id
+negative = negate
+decimal = (10,octDecDigit)
+octal = (8,octDecDigit)
+hexadecimal = (16,hexDigit)
+
+-- readRational can understand negative rationals, exponents, everything.
 tok_float        str = ITrational   $! readRational str
-prim_float       str = ITprimfloat  $! readRational str
-prim_double      str = ITprimdouble $! readRational str
+tok_primfloat    str = ITprimfloat  $! readRational str
+tok_primdouble   str = ITprimdouble $! readRational str
 
 -- -----------------------------------------------------------------------------
 -- Layout processing
@@ -861,7 +1042,7 @@ do_layout_left span _buf _len = do
 
 setLine :: Int -> Action
 setLine code span buf len = do
-  let line = parseInteger buf len 10 octDecDigit
+  let line = parseUnsignedInteger buf len 10 octDecDigit
   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 	-- subtract one: the line number refers to the *following* line
   popLexState
@@ -921,8 +1102,8 @@ lex_string s = do
 
     Just ('"',i)  -> do
 	setInput i
-	glaexts <- extension glaExtsEnabled
-	if glaexts
+	magicHash <- extension magicHashEnabled
+	if magicHash
 	  then do
 	    i <- getInput
 	    case alexGetChar' i of
@@ -1006,9 +1187,9 @@ lex_char_tok span buf len = do	-- We've seen '
 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 finish_char_tok loc ch	-- We've already seen the closing quote
 			-- Just need to check for trailing #
-  = do	glaexts <- extension glaExtsEnabled
+  = do	magicHash <- extension magicHashEnabled
 	i@(AI end _ _) <- getInput
-	if glaexts then do
+	if magicHash then do
 		case alexGetChar' i of
 			Just ('#',i@(AI end _ _)) -> do
 				setInput i
@@ -1141,11 +1322,56 @@ getCharOrFail =  do
 	Just (c,i)  -> do setInput i; return c
 
 -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+	| Just ('|',i) <- next -> do 
+		setInput i; lex_quasiquote ('|' : s)
+	| Just (']',i) <- next -> do 
+		setInput i; lex_quasiquote (']' : s)
+	where next = alexGetChar' i
+
+    Just ('|',i)
+	| Just (']',i) <- next -> do 
+		setInput i; return s
+	where next = alexGetChar' i
+
+    Just (c, i) -> do
+	 setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
+-- Warnings
+
+warn :: DynFlag -> SDoc -> Action
+warn option warning srcspan _buf _len = do
+    addWarning option srcspan warning
+    lexToken
+
+-- -----------------------------------------------------------------------------
 -- The Parse Monad
 
 data LayoutContext
   = NoLayout
   | Layout !Int
+  deriving Show
 
 data ParseResult a
   = POk PState a
@@ -1157,11 +1383,14 @@ data ParseResult a
 
 data PState = PState { 
 	buffer	   :: StringBuffer,
+    dflags     :: DynFlags,
+    messages   :: Messages,
         last_loc   :: SrcSpan,	-- pos of previous token
         last_offs  :: !Int, 	-- offset of the previous token from the
 				-- beginning of  the current line.
 				-- \t is equal to 8 spaces.
 	last_len   :: !Int,	-- len of previous token
+  last_line_len :: !Int,
         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
 	extsBitmap :: !Int,	-- bitmap that determines permitted extensions
 	context	   :: [LayoutContext],
@@ -1213,8 +1442,12 @@ setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
 getSrcLoc :: P SrcLoc
 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
 
-setLastToken :: SrcSpan -> Int -> P ()
-setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
+setLastToken :: SrcSpan -> Int -> Int -> P ()
+setLastToken loc len line_len = P $ \s -> POk s { 
+  last_loc=loc, 
+  last_len=len,
+  last_line_len=line_len 
+} ()
 
 data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
 
@@ -1242,6 +1475,9 @@ alexGetChar (AI loc ofs s)
 	adj_c 
 	  | c <= '\x06' = non_graphic
 	  | c <= '\xff' = c
+          -- Alex doesn't handle Unicode, so when Unicode
+          -- character is encoutered we output these values
+          -- with the actual character value hidden in the state.
 	  | otherwise = 
 		case generalCategory c of
 		  UppercaseLetter       -> upper
@@ -1305,42 +1541,66 @@ getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
 -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
 -- integer
 
-glaExtsBit, ffiBit, parrBit :: Int
-glaExtsBit = 0
+genericsBit, ffiBit, parrBit :: Int
+genericsBit = 0 -- {| and |}
 ffiBit	   = 1
 parrBit	   = 2
 arrowsBit  = 4
 thBit	   = 5
 ipBit      = 6
-tvBit	   = 7	-- Scoped type variables enables 'forall' keyword
+explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
 bangPatBit = 8	-- Tells the parser to understand bang-patterns
 		-- (doesn't affect the lexer)
-idxTysBit  = 9	-- indexed type families: 'family' keyword and kind sigs
-
-glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
-glaExtsEnabled flags = testBit flags glaExtsBit
-ffiEnabled     flags = testBit flags ffiBit
-parrEnabled    flags = testBit flags parrBit
-arrowsEnabled  flags = testBit flags arrowsBit
-thEnabled      flags = testBit flags thBit
-ipEnabled      flags = testBit flags ipBit
-tvEnabled      flags = testBit flags tvBit
-bangPatEnabled flags = testBit flags bangPatBit
-idxTysEnabled  flags = testBit flags idxTysBit
+tyFamBit   = 9	-- indexed type families: 'family' keyword and kind sigs
+haddockBit = 10 -- Lex and parse Haddock comments
+magicHashBit = 11 -- # in both functions and operators
+kindSigsBit = 12 -- Kind signatures on type variables
+recursiveDoBit = 13 -- mdo
+unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit = 15 -- (# and #)
+standaloneDerivingBit = 16 -- standalone instance deriving declarations
+transformComprehensionsBit = 17
+qqBit	   = 18 -- enable quasiquoting
+
+genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+always           _     = True
+genericsEnabled  flags = testBit flags genericsBit
+ffiEnabled       flags = testBit flags ffiBit
+parrEnabled      flags = testBit flags parrBit
+arrowsEnabled    flags = testBit flags arrowsBit
+thEnabled        flags = testBit flags thBit
+ipEnabled        flags = testBit flags ipBit
+explicitForallEnabled flags = testBit flags explicitForallBit
+bangPatEnabled   flags = testBit flags bangPatBit
+tyFamEnabled     flags = testBit flags tyFamBit
+haddockEnabled   flags = testBit flags haddockBit
+magicHashEnabled flags = testBit flags magicHashBit
+kindSigsEnabled  flags = testBit flags kindSigsBit
+recursiveDoEnabled flags = testBit flags recursiveDoBit
+unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled        flags = testBit flags qqBit
 
 -- PState for parsing options pragmas
 --
 pragState :: StringBuffer -> SrcLoc -> PState
 pragState buf loc  = 
   PState {
-      buffer	 = buf,
-      last_loc   = mkSrcSpan loc loc,
-      last_offs  = 0,
-      last_len   = 0,
-      loc        = loc,
-      extsBitmap = 0,
-      context    = [],
-      lex_state  = [bol, option_prags, 0]
+      buffer	      = buf,
+      messages      = emptyMessages,
+      -- XXX defaultDynFlags is not right, but we don't have a real
+      -- dflags handy
+      dflags        = defaultDynFlags,
+      last_loc      = mkSrcSpan loc loc,
+      last_offs     = 0,
+      last_len      = 0,
+      last_line_len = 0,
+      loc           = loc,
+      extsBitmap    = 0,
+      context       = [],
+      lex_state     = [bol, option_prags, 0]
     }
 
 
@@ -1349,31 +1609,57 @@ pragState buf loc  =
 mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
 mkPState buf loc flags  = 
   PState {
-      buffer	 = buf,
-      last_loc   = mkSrcSpan loc loc,
-      last_offs  = 0,
-      last_len   = 0,
-      loc        = loc,
-      extsBitmap = fromIntegral bitmap,
-      context    = [],
-      lex_state  = [bol, if glaExtsEnabled bitmap then glaexts else 0]
+      buffer	      = buf,
+      dflags        = flags,
+      messages      = emptyMessages,
+      last_loc      = mkSrcSpan loc loc,
+      last_offs     = 0,
+      last_len      = 0,
+      last_line_len = 0,
+      loc           = loc,
+      extsBitmap    = fromIntegral bitmap,
+      context       = [],
+      lex_state     = [bol, 0]
 	-- we begin in the layout state if toplev_layout is set
     }
     where
-      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
-	       .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
-	       .|. parrBit    `setBitIf` dopt Opt_PArr        flags
-	       .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
-	       .|. thBit      `setBitIf` dopt Opt_TH          flags
-	       .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
-	       .|. tvBit      `setBitIf` dopt Opt_ScopedTypeVariables flags
-	       .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags
-	       .|. idxTysBit  `setBitIf` dopt Opt_IndexedTypes flags
+      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
+	       .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
+	       .|. parrBit      `setBitIf` dopt Opt_PArr         flags
+	       .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
+	       .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
+	       .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
+	       .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
+	       .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
+	       .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
+	       .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
+	       .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
+	       .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
+	       .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
+	       .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
+	       .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+	       .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
+           .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
       --
       setBitIf :: Int -> Bool -> Int
       b `setBitIf` cond | cond      = bit b
 			| otherwise = 0
 
+addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
+addWarning option srcspan warning
+ = P $ \s@PState{messages=(ws,es), dflags=d} ->
+       let warning' = mkWarnMsg srcspan alwaysQualify warning
+           ws' = if dopt option d then ws `snocBag` warning' else ws
+       in POk s{messages=(ws', es)} ()
+
+getMessages :: PState -> Messages
+getMessages PState{messages=ms} = ms
+
 getContext :: P [LayoutContext]
 getContext = P $ \s@PState{context=ctx} -> POk s ctx
 
@@ -1391,8 +1677,9 @@ popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
 -- This is only used at the outer level of a module when the 'module'
 -- keyword is missing.
 pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_len=len, context=ctx } ->
-  POk s{context = Layout (offs-len) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } -> 
+    POk s{context = Layout (offs-len) : ctx} ()
+--trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} ()
 
 getOffside :: P Ordering
 getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
@@ -1438,8 +1725,8 @@ lexError str = do
 
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
-  tok@(L _ tok__) <- lexToken
-  --trace ("token: " ++ show tok__) $ do
+  tok@(L span tok__) <- lexToken
+--  trace ("token: " ++ show tok__) $ do
   cont tok
 
 lexToken :: P (Located Token)
@@ -1449,7 +1736,7 @@ lexToken = do
   exts <- getExts
   case alexScanUser exts inp sc of
     AlexEOF -> do let span = mkSrcSpan loc1 loc1
-		  setLastToken span 0
+		  setLastToken span 0 0
 		  return (L span ITeof)
     AlexError (AI loc2 _ buf) -> do 
 	reportLexError loc1 loc2 buf "lexical error"
@@ -1457,11 +1744,11 @@ lexToken = do
 	setInput inp2
 	lexToken
     AlexToken inp2@(AI end _ buf2) len t -> do
-	setInput inp2
-	let span = mkSrcSpan loc1 end
-	let bytes = byteDiff buf buf2
-	span `seq` setLastToken span bytes
-	t span buf bytes
+    setInput inp2
+    let span = mkSrcSpan loc1 end
+    let bytes = byteDiff buf buf2
+    span `seq` setLastToken span bytes bytes
+    t span buf bytes
 
 reportLexError loc1 loc2 buf str
   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")