X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=872c7aab6ccf45297197b976221cdc64ba5c759c;hp=8d2a9a5cc87c1533e12a33d78b8206e8e4d87a34;hb=34c8d0312071f7d0f4d221a997d3408c653ef9e5;hpb=0c3e42b73205ec0811ab1899fa60bfdebd4ae77d

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 8d2a9a5..872c7aa 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -12,7 +12,6 @@
 -----------------------------------------------------------------------------
 
 --   ToDo / known bugs:
---    - Unicode
 --    - parsing integers is a bit slow
 --    - readRational is a bit slow
 --
@@ -20,54 +19,71 @@
 --    - M... should be 3 tokens, not 1.
 --    - pragma-end should be only valid in a pragma
 
+--   qualified operator NOTES.
+--   
+--   - If M.(+) is a single lexeme, then..
+--     - Probably (+) should be a single lexeme too, for consistency.
+--       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
+--     - But we have to rule out reserved operators, otherwise (..) becomes
+--       a different lexeme.
+--     - Should we therefore also rule out reserved operators in the qualified
+--       form?  This is quite difficult to achieve.  We don't do it for
+--       qualified varids.
+
 {
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
+-- XXX The above flags turn off warnings in the generated code:
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-unused-matches #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+-- But alex still generates some code that causes the "lazy unlifted bindings"
+-- warning, and old compilers don't know about it so we can't easily turn
+-- it off, so for now we use the sledge hammer:
+{-# OPTIONS_GHC -w #-}
+
+{-# OPTIONS_GHC -funbox-strict-fields #-}
 
 module Lexer (
    Token(..), lexer, pragState, mkPState, PState(..),
    P(..), ParseResult(..), getSrcLoc, 
+   getPState, getDynFlags, withThisPackage,
    failLocMsgP, failSpanMsgP, srcParseFail,
-   getMessages,
+   getMessages, 
    popContext, pushCurrentContext, setLastToken, setSrcLoc,
+   activeContext, nextIsEOF,
    getLexState, popLexState, pushLexState,
-   extension, standaloneDerivingEnabled, bangPatEnabled,
-   addWarning
+   extension, bangPatEnabled, datatypeContextsEnabled,
+   addWarning,
+   incrBracketDepth, decrBracketDepth, getParserBrakDepth,
+   lexTokenStream
   ) where
 
-#include "HsVersions.h"
-
 import Bag
 import ErrUtils
 import Outputable
 import StringBuffer
 import FastString
-import FastTypes
 import SrcLoc
 import UniqFM
 import DynFlags
+import Module
 import Ctype
-import Util		( maybePrefixMatch, readRational )
+import BasicTypes	( InlineSpec(..), RuleMatchInfo(..) )
+import Util		( readRational )
 
 import Control.Monad
 import Data.Bits
-import Data.Char 	( chr, ord, isSpace )
+import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
 import Data.Ratio
-import Debug.Trace
-
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char 	( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode	( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
 }
 
 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
-$whitechar   = [\ \n\r\f\v\xa0 $unispace]
+$whitechar   = [\ \n\r\f\v $unispace]
 $white_no_nl = $whitechar # \n
 $tab         = \t
 
@@ -77,16 +93,16 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 
 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
+$asclarge  = [A-Z]
 $large     = [$asclarge $unilarge]
 
 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
+$ascsmall  = [a-z]
 $small     = [$ascsmall $unismall \_]
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -98,6 +114,8 @@ $symchar   = [$symbol \:]
 $nl        = [\n\r]
 $idchar    = [$small $large $digit \']
 
+$pragmachar = [$small $large $digit]
+
 $docsym    = [\| \^ \* \$]
 
 @varid     = $small $idchar*
@@ -125,7 +143,7 @@ haskell :-
 
 -- everywhere: skip whitespace and comments
 $white_no_nl+ 				;
-$tab+         { warn Opt_WarnTabs (text "Tab character") }
+$tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 
 -- Everywhere: deal with nested comments.  We explicitly rule out
 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
@@ -149,12 +167,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 -- have a Haddock comment). The rules then munch the rest of the line.
 
-"-- " ~[$docsym \#] .* ;
-"--" [^$symbol : \ ] .* ;
+"-- " ~[$docsym \#] .* { lineCommentToken }
+"--" [^$symbol : \ ] .* { lineCommentToken }
 
 -- Next, match Haddock comments if no -haddock flag
 
-"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 
 -- Now, when we've matched comments that begin with 2 dashes and continue
 -- with a different character, we need to match comments that begin with three
@@ -162,17 +180,17 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- make sure that the first non-dash character isn't a symbol, and munch the
 -- rest of the line.
 
-"---"\-* [^$symbol :] .* ;
+"---"\-* [^$symbol :] .* { lineCommentToken }
 
 -- Since the previous rules all match dashes followed by at least one
 -- character, we also need to match a whole line filled with just dashes.
 
-"--"\-* / { atEOL } ;
+"--"\-* / { atEOL } { lineCommentToken }
 
 -- We need this rule since none of the other single line comment rules
 -- actually match this case.
 
-"-- " / { atEOL } ;
+"-- " / { atEOL } { lineCommentToken }
 
 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 -- blank lines) until we find a non-whitespace character, then do layout
@@ -195,7 +213,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- context if the curly brace is missing.
 -- Careful! This stuff is quite delicate.
 <layout, layout_do> {
-  \{ / { notFollowedBy '-' }		{ pop_and open_brace }
+  \{ / { notFollowedBy '-' }		{ hopefully_open_brace }
 	-- we might encounter {-# here, but {- has been handled already
   \n					;
   ^\# (line)?				{ begin line_prag1 }
@@ -212,7 +230,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 
 <0,option_prags> \n				{ begin bol }
 
-"{-#" $whitechar* (line|LINE) 		{ begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+                                { dispatch_pragmas linePrags }
 
 -- single-line line pragmas, of the form
 --    # <line> "<file>" <extra-stuff> \n
@@ -228,66 +247,49 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
    -- with older versions of GHC which generated these.
 
--- We only want RULES pragmas to be picked up when explicit forall
--- syntax is enabled is on, because the contents of the pragma always
--- uses it. If it's not on then we're sure to get a parse error.
--- (ToDo: we should really emit a warning when ignoring pragmas)
--- XXX Now that we can enable this without the -fglasgow-exts hammer,
--- is it better just to let the parse error happen?
-<0>
-  "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
-
 <0,option_prags> {
-  "{-#" $whitechar* (INLINE|inline)	{ token (ITinline_prag True) }
-  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
-  					{ token (ITinline_prag False) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-  					{ token ITspec_prag }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar* (INLINE|inline)	{ token (ITspec_inline_prag True) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar* (NO(T?)INLINE|no(t?)inline)
-					{ token (ITspec_inline_prag False) }
-  "{-#" $whitechar* (SOURCE|source)	{ token ITsource_prag }
-  "{-#" $whitechar* (DEPRECATED|deprecated)
-  					{ token ITdeprecated_prag }
-  "{-#" $whitechar* (SCC|scc)		{ token ITscc_prag }
-  "{-#" $whitechar* (GENERATED|generated)
-  					{ token ITgenerated_prag }
-  "{-#" $whitechar* (CORE|core)		{ token ITcore_prag }
-  "{-#" $whitechar* (UNPACK|unpack)	{ token ITunpack_prag }
-
- "{-#"                                 { nested_comment lexToken }
+  "{-#" $whitechar* $pragmachar+ 
+        $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+                                 { dispatch_pragmas twoWordPrags }
+
+  "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+                                 { dispatch_pragmas oneWordPrags }
+
+  -- We ignore all these pragmas, but don't generate a warning for them
+  "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+                                 { dispatch_pragmas ignoredPrags }
 
   -- ToDo: should only be valid inside a pragma:
-  "#-}" 				{ token ITclose_prag}
+  "#-}" 				{ endPrag }
 }
 
 <option_prags> {
-  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
-                                        { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
-                                         { lex_string_prag ITdocOptions }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                                   { dispatch_pragmas fileHeaderPrags }
+
   "-- #"                                 { multiline_doc_comment }
-  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
 }
 
 <0> {
-  "-- #" .* ;
+  -- In the "0" mode we ignore these pragmas
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                     { nested_comment lexToken }
+}
+
+<0> {
+  "-- #" .* { lineCommentToken }
 }
 
 <0,option_prags> {
-	-- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
-  "{-#" $whitechar* $idchar+		{ nested_comment lexToken }
+  "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
+                    (nested_comment lexToken) }
 }
 
 -- '0' state: ordinary lexemes
 
 -- Haddock comments
 
-<0> {
+<0,option_prags> {
   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 }
@@ -309,8 +311,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   \$ @varid / { ifExtension thEnabled }	{ skip_one_varid ITidEscape }
   "$("	    / { ifExtension thEnabled }	{ token ITparenEscape }
 
+-- For backward compatibility, accept the old dollar syntax
   "[$" @varid "|"  / { ifExtension qqEnabled }
                      { lex_quasiquote_tok }
+
+  "[" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
 }
 
 <0> {
@@ -320,6 +326,15 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 }
 
 <0> {
+  "<[" / { ifExtension hetMetEnabled `alexAndPred` notFollowedBySymbol }
+					{ special ITopenBrak }
+  "]>" / { ifExtension hetMetEnabled }  { special ITcloseBrak }
+  "~~" / { ifExtension hetMetEnabled }  { special ITescape }
+  "%%" / { ifExtension hetMetEnabled }  { special ITdoublePercent }
+  "~~$" / { ifExtension hetMetEnabled }  { special ITescapeDollar }
+}
+
+<0> {
   \? @varid / { ifExtension ipEnabled }	{ skip_one_varid ITdupipvarid }
 }
 
@@ -362,38 +377,42 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 }
 
--- ToDo: M.(,,,)
-
+-- ToDo: - move `var` and (sym) into lexical syntax?
+--       - remove backquote from $special?
 <0> {
-  @qual @varsym			{ idtoken qvarsym }
-  @qual @consym			{ idtoken qconsym }
-  @varsym			{ varsym }
-  @consym			{ consym }
+  @qual @varsym                                    { idtoken qvarsym }
+  @qual @consym                                    { idtoken qconsym }
+  @varsym                                          { varsym }
+  @consym                                          { consym }
 }
 
 -- For the normal boxed literals we need to be careful
 -- when trying to be close to Haskell98
 <0> {
   -- Normal integral literals (:: Num a => a, from Integer)
-  @decimal			{ tok_num positive 0 0 decimal }
-  0[oO] @octal			{ tok_num positive 2 2 octal }
-  0[xX] @hexadecimal		{ tok_num positive 2 2 hexadecimal }
+  @decimal           { tok_num positive 0 0 decimal }
+  0[oO] @octal       { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point		{ strtoken tok_float }
+  @floating_point    { strtoken tok_float }
 }
 
 <0> {
-  -- Unboxed ints (:: Int#)
+  -- Unboxed ints (:: Int#) and words (:: Word#)
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
-  @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
-  0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
-  @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
-  @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 
+  @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+
   -- Unboxed floats and doubles (:: Float#, :: Double#)
   -- prim_{float,double} work with signed literals
   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
@@ -447,10 +466,11 @@ data Token
   | ITdynamic
   | ITsafe
   | ITthreadsafe
+  | ITinterruptible
   | ITunsafe
   | ITstdcallconv
   | ITccallconv
-  | ITdotnet
+  | ITprimcallconv
   | ITmdo
   | ITfamily
   | ITgroup
@@ -458,17 +478,19 @@ data Token
   | ITusing
 
 	-- Pragmas
-  | ITinline_prag Bool		-- True <=> INLINE, False <=> NOINLINE
+  | ITinline_prag InlineSpec RuleMatchInfo
   | ITspec_prag			-- SPECIALISE	
   | ITspec_inline_prag Bool	-- SPECIALISE INLINE (or NOINLINE)
   | ITsource_prag
   | ITrules_prag
+  | ITwarning_prag
   | ITdeprecated_prag
   | ITline_prag
   | ITscc_prag
   | ITgenerated_prag
   | ITcore_prag                 -- hdaume: core annotations
   | ITunpack_prag
+  | ITann_prag
   | ITclose_prag
   | IToptions_prag String
   | ITinclude_prag String
@@ -499,8 +521,8 @@ data Token
   | ITvocurly
   | ITvccurly
   | ITobrack
-  | ITopabrack			-- [:, for parallel arrays with -fparr
-  | ITcpabrack			-- :], for parallel arrays with -fparr
+  | ITopabrack			-- [:, for parallel arrays with -XParallelArrays
+  | ITcpabrack			-- :], for parallel arrays with -XParallelArrays
   | ITcbrack
   | IToparen
   | ITcparen
@@ -519,11 +541,11 @@ data Token
   | ITqconid  (FastString,FastString)
   | ITqvarsym (FastString,FastString)
   | ITqconsym (FastString,FastString)
+  | ITprefixqvarsym (FastString,FastString)
+  | ITprefixqconsym (FastString,FastString)
 
   | ITdupipvarid   FastString	-- GHC extension: implicit param: ?x
 
-  | ITpragma StringBuffer
-
   | ITchar       Char
   | ITstring     FastString
   | ITinteger    Integer
@@ -532,10 +554,11 @@ data Token
   | ITprimchar   Char
   | ITprimstring FastString
   | ITprimint    Integer
+  | ITprimword   Integer
   | ITprimfloat  Rational
   | ITprimdouble Rational
 
-  -- MetaHaskell extension tokens
+  -- Template Haskell extension tokens
   | ITopenExpQuote  		--  [| or [e|
   | ITopenPatQuote		--  [p|
   | ITopenDecQuote		--  [d|
@@ -557,6 +580,13 @@ data Token
   | ITLarrowtail		--  -<<
   | ITRarrowtail		--  >>-
 
+  -- Heterogeneous Metaprogramming extension
+  | ITopenBrak			--  <[
+  | ITcloseBrak			--  ]>
+  | ITescape     		--  ~~
+  | ITescapeDollar    		--  ~~$
+  | ITdoublePercent             --  %%
+
   | ITunknown String		-- Used when the lexer can't make sense of it
   | ITeof			-- end of file token
 
@@ -567,11 +597,14 @@ data Token
   | ITdocSection      Int String -- a section heading
   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
+  | ITlineComment     String     -- comment starting by "--"
+  | ITblockComment    String     -- comment in {- -}
 
 #ifdef DEBUG
   deriving Show -- debugging
 #endif
 
+{-
 isSpecial :: Token -> Bool
 -- If we see M.x, where x is a keyword, but
 -- is special, we treat is as just plain M.x, 
@@ -585,15 +618,18 @@ isSpecial ITlabel     	= True
 isSpecial ITdynamic   	= True
 isSpecial ITsafe    	= True
 isSpecial ITthreadsafe 	= True
+isSpecial ITinterruptible = True
 isSpecial ITunsafe    	= True
 isSpecial ITccallconv   = True
 isSpecial ITstdcallconv = True
+isSpecial ITprimcallconv = True
 isSpecial ITmdo		= True
 isSpecial ITfamily	= True
 isSpecial ITgroup   = True
 isSpecial ITby      = True
 isSpecial ITusing   = True
 isSpecial _             = False
+-}
 
 -- the bitmap provided as the third component indicates whether the
 -- corresponding extension keyword is valid under the extension options
@@ -602,6 +638,7 @@ isSpecial _             = False
 -- facilitates using a keyword in two different extensions that can be
 -- activated independently)
 --
+reservedWordsFM :: UniqFM (Token, Int)
 reservedWordsFM = listToUFM $
 	map (\(x, y, z) -> (mkFastString x, (y, z)))
        [( "_",		ITunderscore, 	0 ),
@@ -631,7 +668,7 @@ reservedWordsFM = listToUFM $
 	( "where",	ITwhere, 	0 ),
 	( "_scc_",	ITscc, 		0 ),		-- ToDo: remove
 
-    ( "forall",	ITforall,	 bit explicitForallBit),
+    ( "forall",	ITforall,	 bit explicitForallBit .|. bit inRulePragBit),
 	( "mdo",	ITmdo,		 bit recursiveDoBit),
 	( "family",	ITfamily,	 bit tyFamBit),
     ( "group",  ITgroup,     bit transformComprehensionsBit),
@@ -643,13 +680,14 @@ reservedWordsFM = listToUFM $
 	( "label",	ITlabel,	 bit ffiBit),
 	( "dynamic",	ITdynamic,	 bit ffiBit),
 	( "safe",	ITsafe,		 bit ffiBit),
-	( "threadsafe",	ITthreadsafe,	 bit ffiBit),
+	( "threadsafe",	ITthreadsafe,	 bit ffiBit),  -- ToDo: remove
+	( "interruptible", ITinterruptible, bit ffiBit),
 	( "unsafe",	ITunsafe,	 bit ffiBit),
 	( "stdcall",    ITstdcallconv,	 bit ffiBit),
 	( "ccall",      ITccallconv,	 bit ffiBit),
-	( "dotnet",     ITdotnet,	 bit ffiBit),
+	( "prim",       ITprimcallconv,	 bit ffiBit),
 
-	( "rec",	ITrec,		 bit arrowsBit),
+	( "rec",	ITrec,		 bit recBit),
 	( "proc",	ITproc,		 bit arrowsBit)
      ]
 
@@ -672,27 +710,32 @@ reservedSymsFM = listToUFM $
        ,("!",   ITbang,     always)
 
         -- For data T (a::*) = MkT
-       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
+       ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
         -- For 'forall a . t'
-       ,(".", ITdot, explicitForallEnabled)
+       ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 
        ,("-<",  ITlarrowtail, arrowsEnabled)
        ,(">-",  ITrarrowtail, arrowsEnabled)
        ,("-<<", ITLarrowtail, arrowsEnabled)
        ,(">>-", ITRarrowtail, arrowsEnabled)
 
-#if __GLASGOW_HASKELL__ >= 605
        ,("â·",   ITdcolon, unicodeSyntaxEnabled)
        ,("â",   ITdarrow, unicodeSyntaxEnabled)
        ,("â",   ITforall, \i -> unicodeSyntaxEnabled i &&
                                 explicitForallEnabled i)
        ,("â",   ITrarrow, unicodeSyntaxEnabled)
        ,("â",   ITlarrow, unicodeSyntaxEnabled)
-       ,("â¯",   ITdotdot, unicodeSyntaxEnabled)
+
+       ,("â¤",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("â¤",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("â¤",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("â¤",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+
+       ,("â", ITstar, unicodeSyntaxEnabled)
+
         -- ToDo: ideally, â and â· should be "specials", so that they cannot
         -- form part of a large operator.  This would let us have a better
         -- syntax for kinds: Éâ·*â* would be a legal kind signature. (maybe).
-#endif
        ]
 
 -- -----------------------------------------------------------------------------
@@ -701,11 +744,11 @@ reservedSymsFM = listToUFM $
 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 
 special :: Token -> Action
-special tok span _buf len = return (L span tok)
+special tok span _buf _len = return (L span tok)
 
 token, layout_token :: Token -> Action
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
 
 idtoken :: (StringBuffer -> Int -> Token) -> Action
 idtoken f span buf len = return (L span $! (f buf len))
@@ -727,18 +770,36 @@ begin :: Int -> Action
 begin code _span _str _len = do pushLexState code; lexToken
 
 pop :: Action
-pop _span _buf _len = do popLexState; lexToken
+pop _span _buf _len = do _ <- popLexState
+                         lexToken
+
+hopefully_open_brace :: Action
+hopefully_open_brace span buf len
+ = do relaxed <- extension relaxedLayout
+      ctx <- getContext
+      (AI l _) <- getInput
+      let offset = srcLocCol l
+          isOK = relaxed ||
+                 case ctx of
+                 Layout prev_off : _ -> prev_off < offset
+                 _                   -> True
+      if isOK then pop_and open_brace span buf len
+              else failSpanMsgP span (text "Missing block")
 
 pop_and :: Action -> Action
-pop_and act span buf len = do popLexState; act span buf len
+pop_and act span buf len = do _ <- popLexState
+                              act span buf len
 
 {-# INLINE nextCharIs #-}
+nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 
-notFollowedBy char _ _ _ (AI _ _ buf) 
+notFollowedBy :: Char -> AlexAccPred Int
+notFollowedBy char _ _ _ (AI _ buf) 
   = nextCharIs buf (/=char)
 
-notFollowedBySymbol _ _ _ (AI _ _ buf)
+notFollowedBySymbol :: AlexAccPred Int
+notFollowedBySymbol _ _ _ (AI _ buf)
   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 
 -- We must reject doc comments as being ordinary comments everywhere.
@@ -746,20 +807,26 @@ notFollowedBySymbol _ _ _ (AI _ _ buf)
 -- maximal munch, but not always, because the nested comment rule is
 -- valid in all states, but the doc-comment rules are only valid in
 -- the non-layout states.
-isNormalComment bits _ _ (AI _ _ buf)
+isNormalComment :: AlexAccPred Int
+isNormalComment bits _ _ (AI _ buf)
   | haddockEnabled bits = notFollowedByDocOrPragma
   | otherwise           = nextCharIs buf (/='#')
   where
     notFollowedByDocOrPragma
        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 
+spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 
-haddockDisabledAnd p bits _ _ (AI _ _ buf)
+{-
+haddockDisabledAnd p bits _ _ (AI _ buf)
   = if haddockEnabled bits then False else (p buf)
+-}
 
-atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
+atEOL :: AlexAccPred Int
+atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 
+ifExtension :: (Int -> Bool) -> AlexAccPred Int
 ifExtension pred bits _ _ _ = pred bits
 
 multiline_doc_comment :: Action
@@ -790,6 +857,11 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
             | otherwise -> input
           Nothing -> input
 
+lineCommentToken :: Action
+lineCommentToken span buf len = do
+  b <- extension rawTokenStreamEnabled
+  if b then strtoken ITlineComment span buf len else lexToken
+
 {-
   nested comments require traversing by hand, they can't be parsed
   using regular expressions.
@@ -797,20 +869,24 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
 nested_comment :: P (Located Token) -> Action
 nested_comment cont span _str _len = do
   input <- getInput
-  go (1::Int) input
+  go "" (1::Int) input
   where
-    go 0 input = do setInput input; cont
-    go n input = case alexGetChar input of
+    go commentAcc 0 input = do setInput input
+                               b <- extension rawTokenStreamEnabled
+                               if b
+                                 then docCommentEnd input commentAcc ITblockComment _str span
+                                 else cont
+    go commentAcc n input = case alexGetChar input of
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('\125',input) -> go (n-1) input
-        Just (c,_)          -> go n input
+        Just ('\125',input) -> go commentAcc (n-1) input
+        Just (_,_)          -> go ('-':commentAcc) n input
       Just ('\123',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('-',input) -> go (n+1) input
-        Just (c,_)       -> go n input
-      Just (c,input) -> go n input
+        Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
+        Just (_,_)       -> go ('\123':commentAcc) n input
+      Just (c,input) -> go (c:commentAcc) n input
 
 nested_doc_comment :: Action
 nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -819,32 +895,47 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing -> errBrace input span
-        Just ('\125',input@(AI end _ buf2)) ->
+        Just ('\125',input) ->
           docCommentEnd input commentAcc docType buf span
-        Just (c,_) -> go ('-':commentAcc) input docType False
+        Just (_,_) -> go ('-':commentAcc) input docType False
       Just ('\123', input) -> case alexGetChar input of
         Nothing  -> errBrace input span
         Just ('-',input) -> do
           setInput input
           let cont = do input <- getInput; go commentAcc input docType False
           nested_comment cont span buf _len
-        Just (c,_) -> go ('\123':commentAcc) input docType False
+        Just (_,_) -> go ('\123':commentAcc) input docType False
       Just (c,input) -> go (c:commentAcc) input docType False
 
+withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
+                 -> P (Located Token)
 withLexedDocType lexDocComment = do
-  input@(AI _ _ buf) <- getInput
+  input@(AI _ buf) <- getInput
   case prevChar buf ' ' of
     '|' -> lexDocComment input ITdocCommentNext False
     '^' -> lexDocComment input ITdocCommentPrev False
     '$' -> lexDocComment input ITdocCommentNamed False
     '*' -> lexDocSection 1 input
     '#' -> lexDocComment input ITdocOptionsOld False
+    _ -> panic "withLexedDocType: Bad doc type"
  where 
     lexDocSection n input = case alexGetChar input of 
       Just ('*', input) -> lexDocSection (n+1) input
-      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Just (_,   _)     -> lexDocComment input (ITdocSection n) True
       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 
+-- RULES pragmas turn on the forall and '.' keywords, and we turn them
+-- off again at the end of the pragma.
+rulePrag :: Action
+rulePrag span _buf _len = do
+  setExts (.|. bit inRulePragBit)
+  return (L span ITrules_prag)
+
+endPrag :: Action
+endPrag span _buf _len = do
+  setExts (.&. complement (bit inRulePragBit))
+  return (L span ITclose_prag)
+
 -- docCommentEnd
 -------------------------------------------------------------------------------
 -- This function is quite tricky. We can't just return a new token, we also
@@ -853,32 +944,21 @@ withLexedDocType lexDocComment = do
 -- it writes the wrong token length to the parser state. This function is
 -- called afterwards, so it can just update the state. 
 
--- This is complicated by the fact that Haddock tokens can span multiple lines, 
--- which is something that the original lexer didn't account for. 
--- I have added last_line_len in the parser state which represents the length 
--- of the part of the token that is on the last line. It is now used for layout 
--- calculation in pushCurrentContext instead of last_len. last_len is, like it 
--- was before, the full length of the token, and it is now only used for error
--- messages. /Waern 
-
 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
                  SrcSpan -> P (Located Token) 
 docCommentEnd input commentAcc docType buf span = do
   setInput input
-  let (AI loc last_offs nextBuf) = input
+  let (AI loc nextBuf) = input
       comment = reverse commentAcc
       span' = mkSrcSpan (srcSpanStart span) loc
       last_len = byteDiff buf nextBuf
       
-      last_line_len = if (last_offs - last_len < 0) 
-        then last_offs
-        else last_len  
-  
-  span `seq` setLastToken span' last_len last_line_len
+  span `seq` setLastToken span' last_len
   return (L span' (docType comment))
  
-errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
- 
+errBrace :: AlexInput -> SrcSpan -> P a
+errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+
 open_brace, close_brace :: Action
 open_brace span _str _len = do 
   ctx <- getContext
@@ -888,14 +968,15 @@ close_brace span _str _len = do
   popContext
   return (L span ITccurly)
 
-qvarid buf len = ITqvarid $! splitQualName buf len
-qconid buf len = ITqconid $! splitQualName buf len
+qvarid, qconid :: StringBuffer -> Int -> Token
+qvarid buf len = ITqvarid $! splitQualName buf len False
+qconid buf len = ITqconid $! splitQualName buf len False
 
-splitQualName :: StringBuffer -> Int -> (FastString,FastString)
+splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 -- takes a StringBuffer and a length, and returns the module name
 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 -- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf orig_buf
+splitQualName orig_buf len parens = split orig_buf orig_buf
   where
     split buf dot_buf
 	| orig_buf `byteDiff` buf >= len  = done dot_buf
@@ -915,11 +996,15 @@ splitQualName orig_buf len = split orig_buf orig_buf
 
     done dot_buf =
 	(lexemeToFastString orig_buf (qual_size - 1),
-	 lexemeToFastString dot_buf (len - qual_size))
+	 if parens -- Prelude.(+)
+            then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
+            else lexemeToFastString dot_buf (len - qual_size))
       where
 	qual_size = orig_buf `byteDiff` dot_buf
 
-varid span buf len = 
+varid :: Action
+varid span buf len =
+  fs `seq`
   case lookupUFM reservedWordsFM fs of
 	Just (keyword,0)    -> do
 		maybe_layout keyword
@@ -933,15 +1018,22 @@ varid span buf len =
   where
 	fs = lexemeToFastString buf len
 
+conid :: StringBuffer -> Int -> Token
 conid buf len = ITconid fs
   where fs = lexemeToFastString buf len
 
-qvarsym buf len = ITqvarsym $! splitQualName buf len
-qconsym buf len = ITqconsym $! splitQualName buf len
+qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
+qvarsym buf len = ITqvarsym $! splitQualName buf len False
+qconsym buf len = ITqconsym $! splitQualName buf len False
+prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
+prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
 
+varsym, consym :: Action
 varsym = sym ITvarsym
 consym = sym ITconsym
 
+sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
+    -> P (Located Token)
 sym con span buf len = 
   case lookupUFM reservedSymsFM fs of
 	Just (keyword,exts) -> do
@@ -963,15 +1055,27 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
 
 -- some conveniences for use with tok_integral
+tok_num :: (Integer -> Integer)
+        -> Int -> Int
+        -> (Integer, (Char->Int)) -> Action
 tok_num = tok_integral ITinteger
+tok_primint :: (Integer -> Integer)
+            -> Int -> Int
+            -> (Integer, (Char->Int)) -> Action
 tok_primint = tok_integral ITprimint
+tok_primword :: Int -> Int
+             -> (Integer, (Char->Int)) -> Action
+tok_primword = tok_integral ITprimword positive
+positive, negative :: (Integer -> Integer)
 positive = id
 negative = negate
+decimal, octal, hexadecimal :: (Integer, Char -> Int)
 decimal = (10,octDecDigit)
 octal = (8,octDecDigit)
 hexadecimal = (16,hexDigit)
 
 -- readRational can understand negative rationals, exponents, everything.
+tok_float, tok_primfloat, tok_primdouble :: String -> Token
 tok_float        str = ITrational   $! readRational str
 tok_primfloat    str = ITprimfloat  $! readRational str
 tok_primdouble   str = ITprimdouble $! readRational str
@@ -991,21 +1095,31 @@ do_bol span _str _len = do
 		return (L span ITvccurly)
 	    EQ -> do
                 --trace "layout: inserting ';'" $ do
-		popLexState
+		_ <- popLexState
 		return (L span ITsemi)
 	    GT -> do
-		popLexState
+		_ <- popLexState
 		lexToken
 
 -- certain keywords put us in the "layout" state, where we might
 -- add an opening curly brace.
-maybe_layout ITdo	= pushLexState layout_do
-maybe_layout ITmdo	= pushLexState layout_do
-maybe_layout ITof	= pushLexState layout
-maybe_layout ITlet	= pushLexState layout
-maybe_layout ITwhere	= pushLexState layout
-maybe_layout ITrec	= pushLexState layout
-maybe_layout _	        = return ()
+maybe_layout :: Token -> P ()
+maybe_layout t = do -- If the alternative layout rule is enabled then
+                    -- we never create an implicit layout context here.
+                    -- Layout is handled XXX instead.
+                    -- The code for closing implicit contexts, or
+                    -- inserting implicit semi-colons, is therefore
+                    -- irrelevant as it only applies in an implicit
+                    -- context.
+                    alr <- extension alternativeLayoutRule
+                    unless alr $ f t
+    where f ITdo    = pushLexState layout_do
+          f ITmdo   = pushLexState layout_do
+          f ITof    = pushLexState layout
+          f ITlet   = pushLexState layout
+          f ITwhere = pushLexState layout
+          f ITrec   = pushLexState layout
+          f _       = return ()
 
 -- Pushing a new implicit layout context.  If the indentation of the
 -- next token is not greater than the previous layout context, then
@@ -1016,24 +1130,29 @@ maybe_layout _	        = return ()
 -- by a 'do', then we allow the new context to be at the same indentation as
 -- the previous context.  This is what the 'strict' argument is for.
 --
+new_layout_context :: Bool -> Action
 new_layout_context strict span _buf _len = do
-    popLexState
-    (AI _ offset _) <- getInput
+    _ <- popLexState
+    (AI l _) <- getInput
+    let offset = srcLocCol l
     ctx <- getContext
+    nondecreasing <- extension nondecreasingIndentation
+    let strict' = strict || not nondecreasing
     case ctx of
 	Layout prev_off : _  | 
-	   (strict     && prev_off >= offset  ||
-	    not strict && prev_off > offset) -> do
+	   (strict'     && prev_off >= offset  ||
+	    not strict' && prev_off > offset) -> do
 		-- token is indented to the left of the previous context.
 		-- we must generate a {} sequence now.
 		pushLexState layout_left
 		return (L span ITvocurly)
-	other -> do
+	_ -> do
 		setContext (Layout offset : ctx)
 		return (L span ITvocurly)
 
+do_layout_left :: Action
 do_layout_left span _buf _len = do
-    popLexState
+    _ <- popLexState
     pushLexState bol  -- we must be at the start of a line
     return (L span ITvccurly)
 
@@ -1043,17 +1162,18 @@ do_layout_left span _buf _len = do
 setLine :: Int -> Action
 setLine code span buf len = do
   let line = parseUnsignedInteger buf len 10 octDecDigit
-  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
 	-- subtract one: the line number refers to the *following* line
-  popLexState
+  _ <- popLexState
   pushLexState code
   lexToken
 
 setFile :: Int -> Action
 setFile code span buf len = do
   let file = lexemeToFastString (stepOn buf) (len-2)
+  setAlrLastLoc noSrcSpan
   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
-  popLexState
+  _ <- popLexState
   pushLexState code
   lexToken
 
@@ -1062,7 +1182,7 @@ setFile code span buf len = do
 -- Options, includes and language pragmas.
 
 lex_string_prag :: (String -> Token) -> Action
-lex_string_prag mkTok span buf len
+lex_string_prag mkTok span _buf _len
     = do input <- getInput
          start <- getSrcLoc
          tok <- go [] input
@@ -1075,12 +1195,12 @@ lex_string_prag mkTok span buf len
                    else case alexGetChar input of
                           Just (c,i) -> go (c:acc) i
                           Nothing -> err input
-          isString i [] = True
+          isString _ [] = True
           isString i (x:xs)
               = case alexGetChar i of
                   Just (c,i') | c == x    -> isString i' xs
                   _other -> False
-          err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
+          err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
 
 
 -- -----------------------------------------------------------------------------
@@ -1089,7 +1209,7 @@ lex_string_prag mkTok span buf len
 -- This stuff is horrible.  I hates it.
 
 lex_string_tok :: Action
-lex_string_tok span buf len = do
+lex_string_tok span _buf _len = do
   tok <- lex_string ""
   end <- getSrcLoc 
   return (L (mkSrcSpan (srcSpanStart span) end) tok)
@@ -1098,7 +1218,7 @@ lex_string :: String -> P Token
 lex_string s = do
   i <- getInput
   case alexGetChar' i of
-    Nothing -> lit_error
+    Nothing -> lit_error i
 
     Just ('"',i)  -> do
 	setInput i
@@ -1123,95 +1243,96 @@ lex_string s = do
     Just ('\\',i)
 	| Just ('&',i) <- next -> do 
 		setInput i; lex_string s
-	| Just (c,i) <- next, is_space c -> do 
+	| Just (c,i) <- next, c <= '\x7f' && is_space c -> do
+                           -- is_space only works for <= '\x7f' (#3751)
 		setInput i; lex_stringgap s
 	where next = alexGetChar' i
 
-    Just (c, i) -> do
-	c' <- lex_char c i
-	lex_string (c':s)
+    Just (c, i1) -> do
+        case c of
+          '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
+          c | isAny c -> do setInput i1; lex_string (c:s)
+          _other -> lit_error i
 
+lex_stringgap :: String -> P Token
 lex_stringgap s = do
-  c <- getCharOrFail
+  i <- getInput
+  c <- getCharOrFail i
   case c of
     '\\' -> lex_string s
     c | is_space c -> lex_stringgap s
-    _other -> lit_error
+    _other -> lit_error i
 
 
 lex_char_tok :: Action
 -- Here we are basically parsing character literals, such as 'x' or '\n'
 -- but, when Template Haskell is on, we additionally spot
 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, 
--- but WIHTOUT CONSUMING the x or T part  (the parser does that).
+-- but WITHOUT CONSUMING the x or T part  (the parser does that).
 -- So we have to do two characters of lookahead: when we see 'x we need to
 -- see if there's a trailing quote
-lex_char_tok span buf len = do	-- We've seen '
+lex_char_tok span _buf _len = do	-- We've seen '
    i1 <- getInput	-- Look ahead to first character
    let loc = srcSpanStart span
    case alexGetChar' i1 of
-	Nothing -> lit_error 
+	Nothing -> lit_error  i1
 
-	Just ('\'', i2@(AI end2 _ _)) -> do 	-- We've seen ''
+	Just ('\'', i2@(AI end2 _)) -> do 	-- We've seen ''
 		  th_exts <- extension thEnabled
 		  if th_exts then do
 			setInput i2
 			return (L (mkSrcSpan loc end2)  ITtyQuote)
-		   else lit_error
+		   else lit_error i1
 
-	Just ('\\', i2@(AI end2 _ _)) -> do 	-- We've seen 'backslash 
+	Just ('\\', i2@(AI _end2 _)) -> do 	-- We've seen 'backslash
 		  setInput i2
 		  lit_ch <- lex_escape
-		  mc <- getCharOrFail	-- Trailing quote
+                  i3 <- getInput
+		  mc <- getCharOrFail i3 -- Trailing quote
 		  if mc == '\'' then finish_char_tok loc lit_ch
-			        else do setInput i2; lit_error 
+			        else lit_error i3
 
-        Just (c, i2@(AI end2 _ _)) 
-		| not (isAny c) -> lit_error
+        Just (c, i2@(AI _end2 _))
+		| not (isAny c) -> lit_error i1
 		| otherwise ->
 
 		-- We've seen 'x, where x is a valid character
 		--  (i.e. not newline etc) but not a quote or backslash
 	   case alexGetChar' i2 of	-- Look ahead one more character
-		Nothing -> lit_error
 		Just ('\'', i3) -> do 	-- We've seen 'x'
 			setInput i3 
 			finish_char_tok loc c
 		_other -> do 		-- We've seen 'x not followed by quote
+		       	  		-- (including the possibility of EOF)
 					-- If TH is on, just parse the quote only
 			th_exts <- extension thEnabled	
-			let (AI end _ _) = i1
+			let (AI end _) = i1
 			if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
-				   else do setInput i2; lit_error
+				   else lit_error i2
 
 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
 finish_char_tok loc ch	-- We've already seen the closing quote
 			-- Just need to check for trailing #
   = do	magicHash <- extension magicHashEnabled
-	i@(AI end _ _) <- getInput
+	i@(AI end _) <- getInput
 	if magicHash then do
 		case alexGetChar' i of
-			Just ('#',i@(AI end _ _)) -> do
+			Just ('#',i@(AI end _)) -> do
 				setInput i
 				return (L (mkSrcSpan loc end) (ITprimchar ch))
 			_other ->
 				return (L (mkSrcSpan loc end) (ITchar ch))
-	        else do
+	    else do
 		   return (L (mkSrcSpan loc end) (ITchar ch))
 
-lex_char :: Char -> AlexInput -> P Char
-lex_char c inp = do
-  case c of
-      '\\' -> do setInput inp; lex_escape
-      c | isAny c -> do setInput inp; return c
-      _other -> lit_error
-
-isAny c | c > '\xff' = isPrint c
+isAny :: Char -> Bool
+isAny c | c > '\x7f' = isPrint c
 	| otherwise  = is_any c
 
 lex_escape :: P Char
 lex_escape = do
-  c <- getCharOrFail
+  i0 <- getInput
+  c <- getCharOrFail i0
   case c of
 	'a'   -> return '\a'
 	'b'   -> return '\b'
@@ -1223,54 +1344,59 @@ lex_escape = do
 	'\\'  -> return '\\'
 	'"'   -> return '\"'
 	'\''  -> return '\''
-	'^'   -> do c <- getCharOrFail
+	'^'   -> do i1 <- getInput
+                    c <- getCharOrFail i1
 		    if c >= '@' && c <= '_'
 			then return (chr (ord c - ord '@'))
-			else lit_error
+			else lit_error i1
 
 	'x'   -> readNum is_hexdigit 16 hexDigit
 	'o'   -> readNum is_octdigit  8 octDecDigit
-	x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+	x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
 
 	c1 ->  do
 	   i <- getInput
 	   case alexGetChar' i of
-	    Nothing -> lit_error
+	    Nothing -> lit_error i0
 	    Just (c2,i2) -> 
               case alexGetChar' i2 of
-		Nothing	-> do setInput i2; lit_error
+		Nothing	-> do lit_error i0
 		Just (c3,i3) -> 
 		   let str = [c1,c2,c3] in
 		   case [ (c,rest) | (p,c) <- silly_escape_chars,
-			      	     Just rest <- [maybePrefixMatch p str] ] of
+			      	     Just rest <- [stripPrefix p str] ] of
 			  (escape_char,[]):_ -> do
 				setInput i3
 				return escape_char
 			  (escape_char,_:_):_ -> do
 				setInput i2
 				return escape_char
-			  [] -> lit_error
+			  [] -> lit_error i0
 
 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
 readNum is_digit base conv = do
   i <- getInput
-  c <- getCharOrFail
+  c <- getCharOrFail i
   if is_digit c 
 	then readNum2 is_digit base conv (conv c)
-	else do setInput i; lit_error
+	else lit_error i
 
+readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
 readNum2 is_digit base conv i = do
   input <- getInput
   read i input
   where read i input = do
 	  case alexGetChar' input of
 	    Just (c,input') | is_digit c -> do
-		read (i*base + conv c) input'
+               let i' = i*base + conv c
+               if i' > 0x10ffff
+                  then setInput input >> lexError "numeric escape sequence out of range"
+                  else read i' input'
 	    _other -> do
-		if i >= 0 && i <= 0x10FFFF
-		   then do setInput input; return (chr i)
-		   else lit_error
+              setInput input; return (chr i)
+
 
+silly_escape_chars :: [(String, Char)]
 silly_escape_chars = [
 	("NUL", '\NUL'),
 	("SOH", '\SOH'),
@@ -1312,11 +1438,11 @@ silly_escape_chars = [
 -- the position of the error in the buffer.  This is so that we can report
 -- a correct location to the user, but also so we can detect UTF-8 decoding
 -- errors if they occur.
-lit_error = lexError "lexical error in string/character literal"
+lit_error :: AlexInput -> P a
+lit_error i = do setInput i; lexError "lexical error in string/character literal"
 
-getCharOrFail :: P Char
-getCharOrFail =  do
-  i <- getInput
+getCharOrFail :: AlexInput -> P Char
+getCharOrFail i =  do
   case alexGetChar' i of
 	Nothing -> lexError "unexpected end-of-file in string/character literal"
 	Just (c,i)  -> do setInput i; return c
@@ -1326,8 +1452,9 @@ getCharOrFail =  do
 
 lex_quasiquote_tok :: Action
 lex_quasiquote_tok span buf len = do
-  let quoter = reverse $ takeWhile (/= '$')
-               $ reverse $ lexemeToString buf (len - 1)
+  let quoter = tail (lexemeToString buf (len - 1))
+		-- 'tail' drops the initial '[', 
+		-- while the -1 drops the trailing '|'
   quoteStart <- getSrcLoc              
   quote <- lex_quasiquote ""
   end <- getSrcLoc 
@@ -1340,7 +1467,7 @@ lex_quasiquote :: String -> P String
 lex_quasiquote s = do
   i <- getInput
   case alexGetChar' i of
-    Nothing -> lit_error
+    Nothing -> lit_error i
 
     Just ('\\',i)
 	| Just ('|',i) <- next -> do 
@@ -1365,6 +1492,11 @@ warn option warning srcspan _buf _len = do
     addWarning option srcspan warning
     lexToken
 
+warnThen :: DynFlag -> SDoc -> Action -> Action
+warnThen option warning action srcspan buf len = do
+    addWarning option srcspan warning
+    action srcspan buf len
+
 -- -----------------------------------------------------------------------------
 -- The Parse Monad
 
@@ -1383,18 +1515,33 @@ data ParseResult a
 
 data PState = PState { 
 	buffer	   :: StringBuffer,
-    dflags     :: DynFlags,
-    messages   :: Messages,
+        dflags     :: DynFlags,
+        messages   :: Messages,
         last_loc   :: SrcSpan,	-- pos of previous token
-        last_offs  :: !Int, 	-- offset of the previous token from the
-				-- beginning of  the current line.
-				-- \t is equal to 8 spaces.
 	last_len   :: !Int,	-- len of previous token
-  last_line_len :: !Int,
         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
 	extsBitmap :: !Int,	-- bitmap that determines permitted extensions
 	context	   :: [LayoutContext],
-	lex_state  :: [Int]
+	lex_state  :: [Int],
+        -- Used in the alternative layout rule:
+        -- These tokens are the next ones to be sent out. They are
+        -- just blindly emitted, without the rule looking at them again:
+        alr_pending_implicit_tokens :: [Located Token],
+        -- This is the next token to be considered or, if it is Nothing,
+        -- we need to get the next token from the input stream:
+        alr_next_token :: Maybe (Located Token),
+        -- This is what we consider to be the locatino of the last token
+        -- emitted:
+        alr_last_loc :: SrcSpan,
+        -- The stack of layout contexts:
+        alr_context :: [ALRContext],
+        -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
+        -- us what sort of layout the '{' will open:
+        alr_expecting_ocurly :: Maybe ALRLayout,
+        -- Have we just had the '}' for a let block? If so, than an 'in'
+        -- token doesn't need to close anything:
+        alr_justClosedExplicitLetBlock :: Bool,
+        code_type_bracket_depth :: Int
      }
 	-- last_loc and last_len are used when generating error messages,
 	-- and in pushCurrentContext only.  Sigh, if only Happy passed the
@@ -1402,6 +1549,14 @@ data PState = PState {
 	-- Getting rid of last_loc would require finding another way to 
 	-- implement pushCurrentContext (which is only called from one place).
 
+data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
+                              Bool{- is it a 'let' block? -}
+                | ALRLayout ALRLayout Int
+data ALRLayout = ALRLayoutLet
+               | ALRLayoutWhere
+               | ALRLayoutOf
+               | ALRLayoutDo
+
 newtype P a = P { unP :: PState -> ParseResult a }
 
 instance Monad P where
@@ -1425,10 +1580,21 @@ failMsgP :: String -> P a
 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
 
 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
+
+failSpanMsgP :: SrcSpan -> SDoc -> P a
+failSpanMsgP span msg = P $ \_ -> PFailed span msg
+
+getPState :: P PState
+getPState = P $ \s -> POk s s
+
+getDynFlags :: P DynFlags
+getDynFlags = P $ \s -> POk s (dflags s)
 
-failSpanMsgP :: SrcSpan -> String -> P a
-failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+withThisPackage :: (PackageId -> a) -> P a
+withThisPackage f
+ = do	pkg	<- liftM thisPackage getDynFlags
+	return	$ f pkg
 
 extension :: (Int -> Bool) -> P Bool
 extension p = P $ \s -> POk s (p $! extsBitmap s)
@@ -1436,33 +1602,41 @@ extension p = P $ \s -> POk s (p $! extsBitmap s)
 getExts :: P Int
 getExts = P $ \s -> POk s (extsBitmap s)
 
+setExts :: (Int -> Int) -> P ()
+setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
+
 setSrcLoc :: SrcLoc -> P ()
 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
 
+incrBracketDepth :: P ()
+incrBracketDepth = P $ \s -> POk (s{code_type_bracket_depth = (code_type_bracket_depth s)+1}) ()
+decrBracketDepth :: P ()
+decrBracketDepth = P $ \s -> POk (s{code_type_bracket_depth = (code_type_bracket_depth s)-1}) ()
+getParserBrakDepth :: P Int
+getParserBrakDepth = P $ \s -> POk s (code_type_bracket_depth s)
+
 getSrcLoc :: P SrcLoc
 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
 
-setLastToken :: SrcSpan -> Int -> Int -> P ()
-setLastToken loc len line_len = P $ \s -> POk s { 
+setLastToken :: SrcSpan -> Int -> P ()
+setLastToken loc len = P $ \s -> POk s { 
   last_loc=loc, 
-  last_len=len,
-  last_line_len=line_len 
-} ()
+  last_len=len
+  } ()
 
-data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
+data AlexInput = AI SrcLoc StringBuffer
 
 alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
+alexInputPrevChar (AI _ buf) = prevChar buf '\n'
 
 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI loc ofs s) 
+alexGetChar (AI loc s) 
   | atEnd s   = Nothing
-  | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+  | otherwise = adj_c `seq` loc' `seq` s' `seq` 
 		--trace (show (ord c)) $
-		Just (adj_c, (AI loc' ofs' s'))
+		Just (adj_c, (AI loc' s'))
   where (c,s') = nextChar s
         loc'   = advanceSrcLoc loc c
-        ofs'   = advanceOffs c ofs
 
 	non_graphic     = '\x0'
 	upper 		= '\x1'
@@ -1474,9 +1648,9 @@ alexGetChar (AI loc ofs s)
 
 	adj_c 
 	  | c <= '\x06' = non_graphic
-	  | c <= '\xff' = c
+	  | c <= '\x7f' = c
           -- Alex doesn't handle Unicode, so when Unicode
-          -- character is encoutered we output these values
+          -- character is encountered we output these values
           -- with the actual character value hidden in the state.
 	  | otherwise = 
 		case generalCategory c of
@@ -1484,20 +1658,20 @@ alexGetChar (AI loc ofs s)
 		  LowercaseLetter       -> lower
 		  TitlecaseLetter       -> upper
 		  ModifierLetter        -> other_graphic
-		  OtherLetter           -> other_graphic
+		  OtherLetter           -> lower -- see #1103
 		  NonSpacingMark        -> other_graphic
 		  SpacingCombiningMark  -> other_graphic
 		  EnclosingMark         -> other_graphic
 		  DecimalNumber         -> digit
 		  LetterNumber          -> other_graphic
-		  OtherNumber           -> other_graphic
-		  ConnectorPunctuation  -> other_graphic
-		  DashPunctuation       -> other_graphic
+                  OtherNumber           -> digit -- see #4373
+		  ConnectorPunctuation  -> symbol
+		  DashPunctuation       -> symbol
 		  OpenPunctuation       -> other_graphic
 		  ClosePunctuation      -> other_graphic
 		  InitialQuote          -> other_graphic
 		  FinalQuote            -> other_graphic
-		  OtherPunctuation      -> other_graphic
+		  OtherPunctuation      -> symbol
 		  MathSymbol            -> symbol
 		  CurrencySymbol        -> symbol
 		  ModifierSymbol        -> symbol
@@ -1508,25 +1682,24 @@ alexGetChar (AI loc ofs s)
 -- This version does not squash unicode characters, it is used when
 -- lexing strings.
 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar' (AI loc ofs s) 
+alexGetChar' (AI loc s) 
   | atEnd s   = Nothing
-  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+  | otherwise = c `seq` loc' `seq` s' `seq` 
 		--trace (show (ord c)) $
-		Just (c, (AI loc' ofs' s'))
+		Just (c, (AI loc' s'))
   where (c,s') = nextChar s
         loc'   = advanceSrcLoc loc c
-        ofs'   = advanceOffs c ofs
-
-advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' offs = 0
-advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-advanceOffs _    offs = offs + 1
 
 getInput :: P AlexInput
-getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
+getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
 
 setInput :: AlexInput -> P ()
-setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
+setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
+
+nextIsEOF :: P Bool
+nextIsEOF = do
+  AI _ s <- getInput
+  return $ atEnd s
 
 pushLexState :: Int -> P ()
 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
@@ -1535,116 +1708,224 @@ popLexState :: P Int
 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
 
 getLexState :: P Int
-getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
+
+popNextToken :: P (Maybe (Located Token))
+popNextToken
+    = P $ \s@PState{ alr_next_token = m } ->
+              POk (s {alr_next_token = Nothing}) m
+
+activeContext :: P Bool
+activeContext = do
+  ctxt <- getALRContext
+  expc <- getAlrExpectingOCurly
+  impt <- implicitTokenPending
+  case (ctxt,expc) of
+    ([],Nothing) -> return impt
+    _other       -> return True
+
+setAlrLastLoc :: SrcSpan -> P ()
+setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
+
+getAlrLastLoc :: P SrcSpan
+getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
+
+getALRContext :: P [ALRContext]
+getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
+
+setALRContext :: [ALRContext] -> P ()
+setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
+
+getJustClosedExplicitLetBlock :: P Bool
+getJustClosedExplicitLetBlock
+ = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
+
+setJustClosedExplicitLetBlock :: Bool -> P ()
+setJustClosedExplicitLetBlock b
+ = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
+
+setNextToken :: Located Token -> P ()
+setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
+
+implicitTokenPending :: P Bool
+implicitTokenPending
+    = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+              case ts of
+              [] -> POk s False
+              _  -> POk s True
+
+popPendingImplicitToken :: P (Maybe (Located Token))
+popPendingImplicitToken
+    = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+              case ts of
+              [] -> POk s Nothing
+              (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
+
+setPendingImplicitTokens :: [Located Token] -> P ()
+setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
+
+getAlrExpectingOCurly :: P (Maybe ALRLayout)
+getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
+
+setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
+setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
 
 -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
 -- integer
 
-genericsBit, ffiBit, parrBit :: Int
+genericsBit :: Int
 genericsBit = 0 -- {| and |}
+ffiBit :: Int
 ffiBit	   = 1
+parrBit :: Int
 parrBit	   = 2
+arrowsBit :: Int
 arrowsBit  = 4
+thBit :: Int
 thBit	   = 5
+ipBit :: Int
 ipBit      = 6
+explicitForallBit :: Int
 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
+bangPatBit :: Int
 bangPatBit = 8	-- Tells the parser to understand bang-patterns
 		-- (doesn't affect the lexer)
+tyFamBit :: Int
 tyFamBit   = 9	-- indexed type families: 'family' keyword and kind sigs
+haddockBit :: Int
 haddockBit = 10 -- Lex and parse Haddock comments
-magicHashBit = 11 -- # in both functions and operators
+magicHashBit :: Int
+magicHashBit = 11 -- "#" in both functions and operators
+kindSigsBit :: Int
 kindSigsBit = 12 -- Kind signatures on type variables
+recursiveDoBit :: Int
 recursiveDoBit = 13 -- mdo
+unicodeSyntaxBit :: Int
 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit :: Int
 unboxedTuplesBit = 15 -- (# and #)
-standaloneDerivingBit = 16 -- standalone instance deriving declarations
+datatypeContextsBit :: Int
+datatypeContextsBit = 16
+transformComprehensionsBit :: Int
 transformComprehensionsBit = 17
+qqBit :: Int
 qqBit	   = 18 -- enable quasiquoting
-
-genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+inRulePragBit :: Int
+inRulePragBit = 19
+rawTokenStreamBit :: Int
+rawTokenStreamBit = 20 -- producing a token stream with all comments included
+recBit :: Int
+recBit = 22 -- rec
+alternativeLayoutRuleBit :: Int
+alternativeLayoutRuleBit = 23
+relaxedLayoutBit :: Int
+relaxedLayoutBit = 24
+nondecreasingIndentationBit :: Int
+nondecreasingIndentationBit = 25
+hetMetBit :: Int
+hetMetBit = 31
+
+always :: Int -> Bool
 always           _     = True
+genericsEnabled :: Int -> Bool
 genericsEnabled  flags = testBit flags genericsBit
-ffiEnabled       flags = testBit flags ffiBit
+parrEnabled :: Int -> Bool
 parrEnabled      flags = testBit flags parrBit
+arrowsEnabled :: Int -> Bool
 arrowsEnabled    flags = testBit flags arrowsBit
+hetMetEnabled :: Int -> Bool
+hetMetEnabled    flags = testBit flags hetMetBit
+thEnabled :: Int -> Bool
 thEnabled        flags = testBit flags thBit
+ipEnabled :: Int -> Bool
 ipEnabled        flags = testBit flags ipBit
+explicitForallEnabled :: Int -> Bool
 explicitForallEnabled flags = testBit flags explicitForallBit
+bangPatEnabled :: Int -> Bool
 bangPatEnabled   flags = testBit flags bangPatBit
-tyFamEnabled     flags = testBit flags tyFamBit
+-- tyFamEnabled :: Int -> Bool
+-- tyFamEnabled     flags = testBit flags tyFamBit
+haddockEnabled :: Int -> Bool
 haddockEnabled   flags = testBit flags haddockBit
+magicHashEnabled :: Int -> Bool
 magicHashEnabled flags = testBit flags magicHashBit
-kindSigsEnabled  flags = testBit flags kindSigsBit
-recursiveDoEnabled flags = testBit flags recursiveDoBit
+-- kindSigsEnabled :: Int -> Bool
+-- kindSigsEnabled  flags = testBit flags kindSigsBit
+unicodeSyntaxEnabled :: Int -> Bool
 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled :: Int -> Bool
 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
-standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
-transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+datatypeContextsEnabled :: Int -> Bool
+datatypeContextsEnabled flags = testBit flags datatypeContextsBit
+qqEnabled :: Int -> Bool
 qqEnabled        flags = testBit flags qqBit
+-- inRulePrag :: Int -> Bool
+-- inRulePrag       flags = testBit flags inRulePragBit
+rawTokenStreamEnabled :: Int -> Bool
+rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
+alternativeLayoutRule :: Int -> Bool
+alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
+relaxedLayout :: Int -> Bool
+relaxedLayout flags = testBit flags relaxedLayoutBit
+nondecreasingIndentation :: Int -> Bool
+nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
 
 -- PState for parsing options pragmas
 --
-pragState :: StringBuffer -> SrcLoc -> PState
-pragState buf loc  = 
-  PState {
-      buffer	      = buf,
-      messages      = emptyMessages,
-      -- XXX defaultDynFlags is not right, but we don't have a real
-      -- dflags handy
-      dflags        = defaultDynFlags,
-      last_loc      = mkSrcSpan loc loc,
-      last_offs     = 0,
-      last_len      = 0,
-      last_line_len = 0,
-      loc           = loc,
-      extsBitmap    = 0,
-      context       = [],
-      lex_state     = [bol, option_prags, 0]
-    }
-
+pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+pragState dynflags buf loc = (mkPState dynflags buf loc) {
+                                 lex_state = [bol, option_prags, 0]
+                             }
 
 -- create a parse state
 --
-mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
-mkPState buf loc flags  = 
+mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+mkPState flags buf loc =
   PState {
       buffer	      = buf,
       dflags        = flags,
       messages      = emptyMessages,
       last_loc      = mkSrcSpan loc loc,
-      last_offs     = 0,
       last_len      = 0,
-      last_line_len = 0,
       loc           = loc,
       extsBitmap    = fromIntegral bitmap,
       context       = [],
-      lex_state     = [bol, 0]
-	-- we begin in the layout state if toplev_layout is set
+      lex_state     = [bol, 0],
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing,
+      alr_justClosedExplicitLetBlock = False,
+      code_type_bracket_depth = 0
     }
     where
-      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
-	       .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
-	       .|. parrBit      `setBitIf` dopt Opt_PArr         flags
-	       .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-	       .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
-	       .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
-	       .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
-	       .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
-	       .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
-	       .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
-	       .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
-	       .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
-	       .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
-	       .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
-	       .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
-	       .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
-           .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+      bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
+	       .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
+	       .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
+	       .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
+	       .|. hetMetBit        `setBitIf` xopt Opt_ModalTypes         flags
+	       .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
+	       .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes	 flags
+	       .|. ipBit             `setBitIf` xopt Opt_ImplicitParams	 flags
+	       .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
+	       .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
+	       .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
+	       .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
+	       .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
+	       .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
+	       .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
+	       .|. recBit 	     `setBitIf` xopt Opt_DoRec  flags
+	       .|. recBit 	     `setBitIf` xopt Opt_Arrows flags
+	       .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
+	       .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
+               .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
+               .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
+               .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
+               .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
+               .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
+               .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
       --
       setBitIf :: Int -> Bool -> Int
       b `setBitIf` cond | cond      = bit b
@@ -1668,7 +1949,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} ()
 
 popContext :: P ()
 popContext = P $ \ s@(PState{ buffer = buf, context = ctx, 
-			   loc = loc, last_len = len, last_loc = last_loc }) ->
+                              last_len = len, last_loc = last_loc }) ->
   case ctx of
 	(_:tl) -> POk s{ context = tl } ()
 	[]     -> PFailed last_loc (srcParseErr buf len)
@@ -1677,14 +1958,15 @@ popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
 -- This is only used at the outer level of a module when the 'module'
 -- keyword is missing.
 pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } -> 
-    POk s{context = Layout (offs-len) : ctx} ()
---trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } -> 
+    POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
 
 getOffside :: P Ordering
-getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
+getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
+                let offs = srcSpanStartCol loc in
 		let ord = case stk of
-			(Layout n:_) -> compare offs n
+			(Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $ 
+                                        compare offs n
 			_            -> GT
 		in POk s ord
 
@@ -1697,8 +1979,8 @@ srcParseErr
   -> Message
 srcParseErr buf len
   = hcat [ if null token 
-	     then ptext SLIT("parse error (possibly incorrect indentation)")
-	     else hcat [ptext SLIT("parse error on input "),
+	     then ptext (sLit "parse error (possibly incorrect indentation)")
+	     else hcat [ptext (sLit "parse error on input "),
           	  	char '`', text token, char '\'']
     ]
   where token = lexemeToString (offsetBytes (-len) buf) len
@@ -1716,7 +1998,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
 lexError :: String -> P a
 lexError str = do
   loc <- getSrcLoc
-  i@(AI end _ buf) <- getInput
+  (AI end buf) <- getInput
   reportLexError loc end buf str
 
 -- -----------------------------------------------------------------------------
@@ -1725,32 +2007,256 @@ lexError str = do
 
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
-  tok@(L span tok__) <- lexToken
---  trace ("token: " ++ show tok__) $ do
+  alr <- extension alternativeLayoutRule
+  let lexTokenFun = if alr then lexTokenAlr else lexToken
+  tok@(L _span _tok__) <- lexTokenFun
+  --trace ("token: " ++ show _tok__) $ do
   cont tok
 
+lexTokenAlr :: P (Located Token)
+lexTokenAlr = do mPending <- popPendingImplicitToken
+                 t <- case mPending of
+                      Nothing ->
+                          do mNext <- popNextToken
+                             t <- case mNext of
+                                  Nothing -> lexToken
+                                  Just next -> return next
+                             alternativeLayoutRuleToken t
+                      Just t ->
+                          return t
+                 setAlrLastLoc (getLoc t)
+                 case unLoc t of
+                     ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
+                     ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
+                     ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
+                     ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     _       -> return ()
+                 return t
+
+alternativeLayoutRuleToken :: Located Token -> P (Located Token)
+alternativeLayoutRuleToken t
+    = do context <- getALRContext
+         lastLoc <- getAlrLastLoc
+         mExpectingOCurly <- getAlrExpectingOCurly
+         justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
+         setJustClosedExplicitLetBlock False
+         dflags <- getDynFlags
+         let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
+             thisLoc = getLoc t
+             thisCol = srcSpanStartCol thisLoc
+             newLine = (lastLoc == noSrcSpan)
+                    || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
+         case (unLoc t, context, mExpectingOCurly) of
+             -- This case handles a GHC extension to the original H98
+             -- layout rule...
+             (ITocurly, _, Just alrLayout) ->
+                 do setAlrExpectingOCurly Nothing
+                    let isLet = case alrLayout of
+                                ALRLayoutLet -> True
+                                _ -> False
+                    setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
+                    return t
+             -- ...and makes this case unnecessary
+             {-
+             -- I think our implicit open-curly handling is slightly
+             -- different to John's, in how it interacts with newlines
+             -- and "in"
+             (ITocurly, _, Just _) ->
+                 do setAlrExpectingOCurly Nothing
+                    setNextToken t
+                    lexTokenAlr
+             -}
+             (_, ALRLayout _ col : ls, Just expectingOCurly)
+              | (thisCol > col) ||
+                (thisCol == col &&
+                 isNonDecreasingIntentation expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+              | otherwise ->
+                 do setAlrExpectingOCurly Nothing
+                    setPendingImplicitTokens [L lastLoc ITccurly]
+                    setNextToken t
+                    return (L lastLoc ITocurly)
+             (_, _, Just expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+             -- We do the [] cases earlier than in the spec, as we
+             -- have an actual EOF token
+             (ITeof, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITeof, _, _) ->
+                 return t
+             -- the other ITeof case omitted; general case below covers it
+             (ITin, _, _)
+              | justClosedExplicitLetBlock ->
+                 return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _)
+              | newLine ->
+                 do setPendingImplicitTokens [t]
+                    setALRContext ls
+                    return (L thisLoc ITccurly)
+             -- This next case is to handle a transitional issue:
+             (ITwhere, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col && transitional ->
+                 do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+                               thisLoc
+                               (transitionalAlternativeLayoutWarning
+                                    "`where' clause at the same depth as implicit layout block")
+                    setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             -- This next case is to handle a transitional issue:
+             (ITvbar, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col && transitional ->
+                 do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+                               thisLoc
+                               (transitionalAlternativeLayoutWarning
+                                    "`|' at the same depth as implicit layout block")
+                    setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             (_, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col ->
+                 do setNextToken t
+                    return (L thisLoc ITsemi)
+              | newLine && thisCol < col ->
+                 do setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             -- We need to handle close before open, as 'then' is both
+             -- an open and a close
+             (u, _, _)
+              | isALRclose u ->
+                 case context of
+                 ALRLayout _ _ : ls ->
+                     do setALRContext ls
+                        setNextToken t
+                        return (L thisLoc ITccurly)
+                 ALRNoLayout _ isLet : ls ->
+                     do let ls' = if isALRopen u
+                                     then ALRNoLayout (containsCommas u) False : ls
+                                     else ls
+                        setALRContext ls'
+                        when isLet $ setJustClosedExplicitLetBlock True
+                        return t
+                 [] ->
+                     do let ls = if isALRopen u
+                                    then [ALRNoLayout (containsCommas u) False]
+                                    else ls
+                        setALRContext ls
+                        -- XXX This is an error in John's code, but
+                        -- it looks reachable to me at first glance
+                        return t
+             (u, _, _)
+              | isALRopen u ->
+                 do setALRContext (ALRNoLayout (containsCommas u) False : context)
+                    return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             (ITin, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             -- the other ITin case omitted; general case below covers it
+             (ITcomma, ALRLayout _ _ : ls, _)
+              | topNoLayoutContainsCommas ls ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             -- the other ITwhere case omitted; general case below covers it
+             (_, _, _) -> return t
+
+transitionalAlternativeLayoutWarning :: String -> SDoc
+transitionalAlternativeLayoutWarning msg
+    = text "transitional layout will not be accepted in the future:"
+   $$ text msg
+
+isALRopen :: Token -> Bool
+isALRopen ITcase        = True
+isALRopen ITif          = True
+isALRopen ITthen        = True
+isALRopen IToparen      = True
+isALRopen ITobrack      = True
+isALRopen ITocurly      = True
+-- GHC Extensions:
+isALRopen IToubxparen   = True
+isALRopen ITparenEscape = True
+isALRopen _             = False
+
+isALRclose :: Token -> Bool
+isALRclose ITof     = True
+isALRclose ITthen   = True
+isALRclose ITelse   = True
+isALRclose ITcparen = True
+isALRclose ITcbrack = True
+isALRclose ITccurly = True
+-- GHC Extensions:
+isALRclose ITcubxparen = True
+isALRclose _        = False
+
+isNonDecreasingIntentation :: ALRLayout -> Bool
+isNonDecreasingIntentation ALRLayoutDo = True
+isNonDecreasingIntentation _           = False
+
+containsCommas :: Token -> Bool
+containsCommas IToparen = True
+containsCommas ITobrack = True
+-- John doesn't have {} as containing commas, but records contain them,
+-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
+-- (defaultInstallDirs).
+containsCommas ITocurly = True
+-- GHC Extensions:
+containsCommas IToubxparen = True
+containsCommas _        = False
+
+topNoLayoutContainsCommas :: [ALRContext] -> Bool
+topNoLayoutContainsCommas [] = False
+topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
+topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
+
 lexToken :: P (Located Token)
 lexToken = do
-  inp@(AI loc1 _ buf) <- getInput
+  inp@(AI loc1 buf) <- getInput
   sc <- getLexState
   exts <- getExts
   case alexScanUser exts inp sc of
     AlexEOF -> do
         let span = mkSrcSpan loc1 loc1
-        setLastToken span 0 0
+        setLastToken span 0
         return (L span ITeof)
-    AlexError (AI loc2 _ buf) ->
+    AlexError (AI loc2 buf) ->
         reportLexError loc1 loc2 buf "lexical error"
     AlexSkip inp2 _ -> do
         setInput inp2
         lexToken
-    AlexToken inp2@(AI end _ buf2) len t -> do
+    AlexToken inp2@(AI end buf2) _ t -> do
         setInput inp2
         let span = mkSrcSpan loc1 end
         let bytes = byteDiff buf buf2
-        span `seq` setLastToken span bytes bytes
+        span `seq` setLastToken span bytes
         t span buf bytes
 
+reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
 reportLexError loc1 loc2 buf str
   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
   | otherwise =
@@ -1760,4 +2266,72 @@ reportLexError loc1 loc2 buf str
   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
+
+lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
+lexTokenStream buf loc dflags = unP go initState
+    where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
+          initState = mkPState dflags' buf loc
+          go = do
+            ltok <- lexer return
+            case ltok of
+              L _ ITeof -> return []
+              _ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+                                 ("options_ghc", lex_string_prag IToptions_prag),
+                                 ("options_haddock", lex_string_prag ITdocOptions),
+                                 ("language", token ITlanguage_prag),
+                                 ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+               where ignored opt = (opt, nested_comment lexToken)
+                     impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+                     options_pragmas = map ("options_" ++) impls
+                     -- CFILES is a hugs-only thing.
+                     pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+                           ("inline", token (ITinline_prag Inline FunLike)),
+                           ("inlinable", token (ITinline_prag Inlinable FunLike)),
+                           ("inlineable", token (ITinline_prag Inlinable FunLike)),
+			   		  -- Spelling variant
+                           ("notinline", token (ITinline_prag NoInline FunLike)),
+                           ("specialize", token ITspec_prag),
+                           ("source", token ITsource_prag),
+                           ("warning", token ITwarning_prag),
+                           ("deprecated", token ITdeprecated_prag),
+                           ("scc", token ITscc_prag),
+                           ("generated", token ITgenerated_prag),
+                           ("core", token ITcore_prag),
+                           ("unpack", token ITunpack_prag),
+                           ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
+                             ("notinline conlike", token (ITinline_prag NoInline ConLike)),
+                             ("specialize inline", token (ITspec_inline_prag True)),
+                             ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+                                       Just found -> found span buf len
+                                       Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+                                          && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+                    where unprefix prag' = case stripPrefix "{-#" prag' of
+                                             Just rest -> rest
+                                             Nothing -> prag'
+                          canonical prag' = case prag' of
+                                              "noinline" -> "notinline"
+                                              "specialise" -> "specialize"
+                                              "constructorlike" -> "conlike"
+                                              _ -> prag'
+                          canon_ws s = unwords (map canonical (words s))
 }