Tweak alternative layout rule

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index 1823864..7594079 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -12,7 +12,6 @@
  -----------------------------------------------------------------------------
  
  --   ToDo / known bugs:
---    - Unicode
  --    - parsing integers is a bit slow
  --    - readRational is a bit slow
  --
@@ -20,46 +19,67 @@
  --    - M... should be 3 tokens, not 1.
  --    - pragma-end should be only valid in a pragma
  
+--   qualified operator NOTES.
+--   
+--   - If M.(+) is a single lexeme, then..
+--     - Probably (+) should be a single lexeme too, for consistency.
+--       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
+--     - But we have to rule out reserved operators, otherwise (..) becomes
+--       a different lexeme.
+--     - Should we therefore also rule out reserved operators in the qualified
+--       form?  This is quite difficult to achieve.  We don't do it for
+--       qualified varids.
+
  {
+-- XXX The above flags turn off warnings in the generated code:
+{-# OPTIONS_GHC -fno-warn-unused-matches #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+-- But alex still generates some code that causes the "lazy unlifted bindings"
+-- warning, and old compilers don't know about it so we can't easily turn
+-- it off, so for now we use the sledge hammer:
+{-# OPTIONS_GHC -w #-}
+
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+
  module Lexer (
     Token(..), lexer, pragState, mkPState, PState(..),
     P(..), ParseResult(..), getSrcLoc, 
+   getPState, getDynFlags, withThisPackage,
     failLocMsgP, failSpanMsgP, srcParseFail,
-   getMessages,
+   getMessages, 
     popContext, pushCurrentContext, setLastToken, setSrcLoc,
     getLexState, popLexState, pushLexState,
-   extension, standaloneDerivingEnabled, bangPatEnabled
+   extension, standaloneDerivingEnabled, bangPatEnabled,
+   addWarning,
+   lexTokenStream
    ) where
  
-#include "HsVersions.h"
-
  import Bag
  import ErrUtils
  import Outputable
  import StringBuffer
  import FastString
-import FastTypes
  import SrcLoc
  import UniqFM
  import DynFlags
+import Module
  import Ctype
-import Util            ( maybePrefixMatch, readRational )
+import Util            ( readRational )
  
  import Control.Monad
  import Data.Bits
-import Data.Char       ( chr, isSpace )
+import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
  import Data.Ratio
-import Debug.Trace
-
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char       ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode  ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
  }
  
  $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
-$whitechar   = [\ \n\r\f\v\xa0 $unispace]
+$whitechar   = [\ \n\r\f\v $unispace]
  $white_no_nl = $whitechar # \n
  $tab         = \t
  
@@ -69,16 +89,16 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
  $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
+$asclarge  = [A-Z]
  $large     = [$asclarge $unilarge]
  
  $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
+$ascsmall  = [a-z]
  $small     = [$ascsmall $unismall \_]
  
  $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -90,6 +110,8 @@ $symchar   = [$symbol \:]
  $nl        = [\n\r]
  $idchar    = [$small $large $digit \']
  
+$pragmachar = [$small $large $digit]
+
  $docsym    = [\| \^ \* \$]
  
  @varid     = $small $idchar*
@@ -141,12 +163,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- space followed by a Haddock comment symbol (docsym) (in which case we'd
  -- have a Haddock comment). The rules then munch the rest of the line.
  
-"-- " ~$docsym .* ;
-"--" [^$symbol : \ ] .* ;
+"-- " ~[$docsym \#] .* { lineCommentToken }
+"--" [^$symbol : \ ] .* { lineCommentToken }
  
  -- Next, match Haddock comments if no -haddock flag
  
-"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
  
  -- Now, when we've matched comments that begin with 2 dashes and continue
  -- with a different character, we need to match comments that begin with three
@@ -154,17 +176,17 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- make sure that the first non-dash character isn't a symbol, and munch the
  -- rest of the line.
  
-"---"\-* [^$symbol :] .* ;
+"---"\-* [^$symbol :] .* { lineCommentToken }
  
  -- Since the previous rules all match dashes followed by at least one
  -- character, we also need to match a whole line filled with just dashes.
  
-"--"\-* / { atEOL } ;
+"--"\-* / { atEOL } { lineCommentToken }
  
  -- We need this rule since none of the other single line comment rules
  -- actually match this case.
  
-"-- " / { atEOL } ;
+"-- " / { atEOL } { lineCommentToken }
  
  -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
  -- blank lines) until we find a non-whitespace character, then do layout
@@ -204,7 +226,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  <0,option_prags> \n                            { begin bol }
  
-"{-#" $whitechar* (line|LINE)          { begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+                                { dispatch_pragmas linePrags }
  
  -- single-line line pragmas, of the form
  --    # <line> "<file>" <extra-stuff> \n
@@ -220,55 +243,42 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
     -- NOTE: accept -} at the end of a LINE pragma, for compatibility
     -- with older versions of GHC which generated these.
  
--- We only want RULES pragmas to be picked up when explicit forall
--- syntax is enabled is on, because the contents of the pragma always
--- uses it. If it's not on then we're sure to get a parse error.
--- (ToDo: we should really emit a warning when ignoring pragmas)
--- XXX Now that we can enable this without the -fglasgow-exts hammer,
--- is it better just to let the parse error happen?
-<0>
-  "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
-
  <0,option_prags> {
-  "{-#" $whitechar* (INLINE|inline)    { token (ITinline_prag True) }
-  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
-                                       { token (ITinline_prag False) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-                                       { token ITspec_prag }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-       $whitechar* (INLINE|inline)     { token (ITspec_inline_prag True) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-       $whitechar* (NO(T?)INLINE|no(t?)inline)
-                                       { token (ITspec_inline_prag False) }
-  "{-#" $whitechar* (SOURCE|source)    { token ITsource_prag }
-  "{-#" $whitechar* (DEPRECATED|deprecated)
-                                       { token ITdeprecated_prag }
-  "{-#" $whitechar* (SCC|scc)          { token ITscc_prag }
-  "{-#" $whitechar* (GENERATED|generated)
-                                       { token ITgenerated_prag }
-  "{-#" $whitechar* (CORE|core)                { token ITcore_prag }
-  "{-#" $whitechar* (UNPACK|unpack)    { token ITunpack_prag }
-
-  "{-#" $whitechar* (DOCOPTIONS|docoptions)
-  / { ifExtension haddockEnabled }     { lex_string_prag ITdocOptions }
-
- "{-#"                                 { nested_comment lexToken }
+  "{-#" $whitechar* $pragmachar+ 
+        $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+                                 { dispatch_pragmas twoWordPrags }
+
+  "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+                                 { dispatch_pragmas oneWordPrags }
+
+  -- We ignore all these pragmas, but don't generate a warning for them
+  "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+                                 { dispatch_pragmas ignoredPrags }
  
    -- ToDo: should only be valid inside a pragma:
-  "#-}"                                { token ITclose_prag}
+  "#-}"                                { endPrag }
  }
  
  <option_prags> {
-  "{-#" $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
-                                        { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                                   { dispatch_pragmas fileHeaderPrags }
+
+  "-- #"                                 { multiline_doc_comment }
+}
+
+<0> {
+  -- In the "0" mode we ignore these pragmas
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                     { nested_comment lexToken }
+}
+
+<0> {
+  "-- #" .* { lineCommentToken }
  }
  
  <0,option_prags> {
-       -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
-  "{-#" $whitechar* $idchar+           { nested_comment lexToken }
+  "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
+                    (nested_comment lexToken) }
  }
  
  -- '0' state: ordinary lexemes
@@ -276,8 +286,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- Haddock comments
  
  <0> {
-  "-- " $docsym    / { ifExtension haddockEnabled } { multiline_doc_comment }
-  "{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
+  "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
  }
  
  -- "special" symbols
@@ -296,6 +306,9 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    "|]"     / { ifExtension thEnabled } { token ITcloseQuote }
    \$ @varid / { ifExtension thEnabled }        { skip_one_varid ITidEscape }
    "$("     / { ifExtension thEnabled } { token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
  }
  
  <0> {
@@ -347,38 +360,44 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
  }
  
--- ToDo: M.(,,,)
-
+-- ToDo: - move `var` and (sym) into lexical syntax?
+--       - remove backquote from $special?
  <0> {
-  @qual @varsym                        { idtoken qvarsym }
-  @qual @consym                        { idtoken qconsym }
-  @varsym                      { varsym }
-  @consym                      { consym }
+  @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
+  @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
+  @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
+  @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
+  @varsym                                          { varsym }
+  @consym                                          { consym }
  }
  
  -- For the normal boxed literals we need to be careful
  -- when trying to be close to Haskell98
  <0> {
    -- Normal integral literals (:: Num a => a, from Integer)
-  @decimal                     { tok_num positive 0 0 decimal }
-  0[oO] @octal                 { tok_num positive 2 2 octal }
-  0[xX] @hexadecimal           { tok_num positive 2 2 hexadecimal }
+  @decimal           { tok_num positive 0 0 decimal }
+  0[oO] @octal       { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
  
    -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point              { strtoken tok_float }
+  @floating_point    { strtoken tok_float }
  }
  
  <0> {
-  -- Unboxed ints (:: Int#)
+  -- Unboxed ints (:: Int#) and words (:: Word#)
    -- It's simpler (and faster?) to give separate cases to the negatives,
    -- especially considering octal/hexadecimal prefixes.
-  @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
-  0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
-  @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
-  @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
    @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
  
+  @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+
    -- Unboxed floats and doubles (:: Float#, :: Double#)
    -- prim_{float,double} work with signed literals
    @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
@@ -405,7 +424,6 @@ data Token
    | ITdata
    | ITdefault
    | ITderiving
-  | ITderive
    | ITdo
    | ITelse
    | IThiding
@@ -436,22 +454,28 @@ data Token
    | ITunsafe
    | ITstdcallconv
    | ITccallconv
-  | ITdotnet
+  | ITprimcallconv
    | ITmdo
    | ITfamily
+  | ITgroup
+  | ITby
+  | ITusing
  
         -- Pragmas
    | ITinline_prag Bool         -- True <=> INLINE, False <=> NOINLINE
+  | ITinline_conlike_prag Bool  -- same
    | ITspec_prag                        -- SPECIALISE   
    | ITspec_inline_prag Bool    -- SPECIALISE INLINE (or NOINLINE)
    | ITsource_prag
    | ITrules_prag
+  | ITwarning_prag
    | ITdeprecated_prag
    | ITline_prag
    | ITscc_prag
    | ITgenerated_prag
    | ITcore_prag                 -- hdaume: core annotations
    | ITunpack_prag
+  | ITann_prag
    | ITclose_prag
    | IToptions_prag String
    | ITinclude_prag String
@@ -482,8 +506,8 @@ data Token
    | ITvocurly
    | ITvccurly
    | ITobrack
-  | ITopabrack                 -- [:, for parallel arrays with -fparr
-  | ITcpabrack                 -- :], for parallel arrays with -fparr
+  | ITopabrack                 -- [:, for parallel arrays with -XParr
+  | ITcpabrack                 -- :], for parallel arrays with -XParr
    | ITcbrack
    | IToparen
    | ITcparen
@@ -502,11 +526,11 @@ data Token
    | ITqconid  (FastString,FastString)
    | ITqvarsym (FastString,FastString)
    | ITqconsym (FastString,FastString)
+  | ITprefixqvarsym (FastString,FastString)
+  | ITprefixqconsym (FastString,FastString)
  
    | ITdupipvarid   FastString  -- GHC extension: implicit param: ?x
  
-  | ITpragma StringBuffer
-
    | ITchar       Char
    | ITstring     FastString
    | ITinteger    Integer
@@ -515,10 +539,11 @@ data Token
    | ITprimchar   Char
    | ITprimstring FastString
    | ITprimint    Integer
+  | ITprimword   Integer
    | ITprimfloat  Rational
    | ITprimdouble Rational
  
-  -- MetaHaskell extension tokens
+  -- Template Haskell extension tokens
    | ITopenExpQuote             --  [| or [e|
    | ITopenPatQuote             --  [p|
    | ITopenDecQuote             --  [d|
@@ -528,6 +553,7 @@ data Token
    | ITparenEscape              --  $( 
    | ITvarQuote                 --  '
    | ITtyQuote                  --  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
  
    -- Arrow notation extension
    | ITproc
@@ -548,18 +574,21 @@ data Token
    | ITdocCommentNamed String     -- something beginning '-- $'
    | ITdocSection      Int String -- a section heading
    | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
+  | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
+  | ITlineComment     String     -- comment starting by "--"
+  | ITblockComment    String     -- comment in {- -}
  
  #ifdef DEBUG
    deriving Show -- debugging
  #endif
  
+{-
  isSpecial :: Token -> Bool
  -- If we see M.x, where x is a keyword, but
  -- is special, we treat is as just plain M.x, 
  -- not as a keyword.
  isSpecial ITas         = True
  isSpecial IThiding     = True
-isSpecial ITderive     = True
  isSpecial ITqualified  = True
  isSpecial ITforall     = True
  isSpecial ITexport     = True
@@ -570,9 +599,14 @@ isSpecial ITthreadsafe     = True
  isSpecial ITunsafe     = True
  isSpecial ITccallconv   = True
  isSpecial ITstdcallconv = True
+isSpecial ITprimcallconv = True
  isSpecial ITmdo                = True
  isSpecial ITfamily     = True
+isSpecial ITgroup   = True
+isSpecial ITby      = True
+isSpecial ITusing   = True
  isSpecial _             = False
+-}
  
  -- the bitmap provided as the third component indicates whether the
  -- corresponding extension keyword is valid under the extension options
@@ -581,6 +615,7 @@ isSpecial _             = False
  -- facilitates using a keyword in two different extensions that can be
  -- activated independently)
  --
+reservedWordsFM :: UniqFM (Token, Int)
  reservedWordsFM = listToUFM $
         map (\(x, y, z) -> (mkFastString x, (y, z)))
         [( "_",         ITunderscore,   0 ),
@@ -590,7 +625,6 @@ reservedWordsFM = listToUFM $
         ( "data",       ITdata,         0 ),     
         ( "default",    ITdefault,      0 ),  
         ( "deriving",   ITderiving,     0 ), 
-       ( "derive",     ITderive,       0 ), 
         ( "do",         ITdo,           0 ),       
         ( "else",       ITelse,         0 ),     
         ( "hiding",     IThiding,       0 ),
@@ -611,22 +645,25 @@ reservedWordsFM = listToUFM $
         ( "where",      ITwhere,        0 ),
         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
  
-       ( "forall",     ITforall,        bit explicitForallBit),
+    ( "forall",        ITforall,        bit explicitForallBit .|. bit inRulePragBit),
         ( "mdo",        ITmdo,           bit recursiveDoBit),
         ( "family",     ITfamily,        bit tyFamBit),
+    ( "group",  ITgroup,     bit transformComprehensionsBit),
+    ( "by",     ITby,        bit transformComprehensionsBit),
+    ( "using",  ITusing,     bit transformComprehensionsBit),
  
         ( "foreign",    ITforeign,       bit ffiBit),
         ( "export",     ITexport,        bit ffiBit),
         ( "label",      ITlabel,         bit ffiBit),
         ( "dynamic",    ITdynamic,       bit ffiBit),
         ( "safe",       ITsafe,          bit ffiBit),
-       ( "threadsafe", ITthreadsafe,    bit ffiBit),
+       ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
         ( "unsafe",     ITunsafe,        bit ffiBit),
         ( "stdcall",    ITstdcallconv,   bit ffiBit),
         ( "ccall",      ITccallconv,     bit ffiBit),
-       ( "dotnet",     ITdotnet,        bit ffiBit),
+       ( "prim",       ITprimcallconv,  bit ffiBit),
  
-       ( "rec",        ITrec,           bit arrowsBit),
+       ( "rec",        ITrec,           bit recBit),
         ( "proc",       ITproc,          bit arrowsBit)
       ]
  
@@ -649,16 +686,15 @@ reservedSymsFM = listToUFM $
         ,("!",   ITbang,     always)
  
          -- For data T (a::*) = MkT
-       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
+       ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
          -- For 'forall a . t'
-       ,(".", ITdot, explicitForallEnabled)
+       ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
  
         ,("-<",  ITlarrowtail, arrowsEnabled)
         ,(">-",  ITrarrowtail, arrowsEnabled)
         ,("-<<", ITLarrowtail, arrowsEnabled)
         ,(">>-", ITRarrowtail, arrowsEnabled)
  
-#if __GLASGOW_HASKELL__ >= 605
         ,("∷",   ITdcolon, unicodeSyntaxEnabled)
         ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
         ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
@@ -666,10 +702,17 @@ reservedSymsFM = listToUFM $
         ,("→",   ITrarrow, unicodeSyntaxEnabled)
         ,("←",   ITlarrow, unicodeSyntaxEnabled)
         ,("⋯",   ITdotdot, unicodeSyntaxEnabled)
+
+       ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+
+       ,("★", ITstar, unicodeSyntaxEnabled)
+
          -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
          -- form part of a large operator.  This would let us have a better
          -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
-#endif
         ]
  
  -- -----------------------------------------------------------------------------
@@ -678,11 +721,11 @@ reservedSymsFM = listToUFM $
  type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
  
  special :: Token -> Action
-special tok span _buf len = return (L span tok)
+special tok span _buf _len = return (L span tok)
  
  token, layout_token :: Token -> Action
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
  
  idtoken :: (StringBuffer -> Int -> Token) -> Action
  idtoken f span buf len = return (L span $! (f buf len))
@@ -704,17 +747,22 @@ begin :: Int -> Action
  begin code _span _str _len = do pushLexState code; lexToken
  
  pop :: Action
-pop _span _buf _len = do popLexState; lexToken
+pop _span _buf _len = do _ <- popLexState
+                         lexToken
  
  pop_and :: Action -> Action
-pop_and act span buf len = do popLexState; act span buf len
+pop_and act span buf len = do _ <- popLexState
+                              act span buf len
  
  {-# INLINE nextCharIs #-}
+nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
  nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
  
+notFollowedBy :: Char -> AlexAccPred Int
  notFollowedBy char _ _ _ (AI _ _ buf) 
    = nextCharIs buf (/=char)
  
+notFollowedBySymbol :: AlexAccPred Int
  notFollowedBySymbol _ _ _ (AI _ _ buf)
    = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
  
@@ -723,6 +771,7 @@ notFollowedBySymbol _ _ _ (AI _ _ buf)
  -- maximal munch, but not always, because the nested comment rule is
  -- valid in all states, but the doc-comment rules are only valid in
  -- the non-layout states.
+isNormalComment :: AlexAccPred Int
  isNormalComment bits _ _ (AI _ _ buf)
    | haddockEnabled bits = notFollowedByDocOrPragma
    | otherwise           = nextCharIs buf (/='#')
@@ -730,13 +779,18 @@ isNormalComment bits _ _ (AI _ _ buf)
      notFollowedByDocOrPragma
         = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
  
+spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
  spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
  
+{-
  haddockDisabledAnd p bits _ _ (AI _ _ buf)
    = if haddockEnabled bits then False else (p buf)
+-}
  
+atEOL :: AlexAccPred Int
  atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
  
+ifExtension :: (Int -> Bool) -> AlexAccPred Int
  ifExtension pred bits _ _ _ = pred bits
  
  multiline_doc_comment :: Action
@@ -767,6 +821,11 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
              | otherwise -> input
            Nothing -> input
  
+lineCommentToken :: Action
+lineCommentToken span buf len = do
+  b <- extension rawTokenStreamEnabled
+  if b then strtoken ITlineComment span buf len else lexToken
+
  {-
    nested comments require traversing by hand, they can't be parsed
    using regular expressions.
@@ -774,20 +833,24 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
  nested_comment :: P (Located Token) -> Action
  nested_comment cont span _str _len = do
    input <- getInput
-  go 1 input
+  go "" (1::Int) input
    where
-    go 0 input = do setInput input; cont
-    go n input = case alexGetChar input of
+    go commentAcc 0 input = do setInput input
+                               b <- extension rawTokenStreamEnabled
+                               if b
+                                 then docCommentEnd input commentAcc ITblockComment _str span
+                                 else cont
+    go commentAcc n input = case alexGetChar input of
        Nothing -> errBrace input span
        Just ('-',input) -> case alexGetChar input of
          Nothing  -> errBrace input span
-        Just ('\125',input) -> go (n-1) input
-        Just (c,_)          -> go n input
+        Just ('\125',input) -> go commentAcc (n-1) input
+        Just (_,_)          -> go ('-':commentAcc) n input
        Just ('\123',input) -> case alexGetChar input of
          Nothing  -> errBrace input span
-        Just ('-',input) -> go (n+1) input
-        Just (c,_)       -> go n input
-      Just (c,input) -> go n input
+        Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
+        Just (_,_)       -> go ('\123':commentAcc) n input
+      Just (c,input) -> go (c:commentAcc) n input
  
  nested_doc_comment :: Action
  nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -796,31 +859,47 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
        Nothing -> errBrace input span
        Just ('-',input) -> case alexGetChar input of
          Nothing -> errBrace input span
-        Just ('\125',input@(AI end _ buf2)) ->
+        Just ('\125',input) ->
            docCommentEnd input commentAcc docType buf span
-        Just (c,_) -> go ('-':commentAcc) input docType False
+        Just (_,_) -> go ('-':commentAcc) input docType False
        Just ('\123', input) -> case alexGetChar input of
          Nothing  -> errBrace input span
          Just ('-',input) -> do
            setInput input
            let cont = do input <- getInput; go commentAcc input docType False
            nested_comment cont span buf _len
-        Just (c,_) -> go ('\123':commentAcc) input docType False
+        Just (_,_) -> go ('\123':commentAcc) input docType False
        Just (c,input) -> go (c:commentAcc) input docType False
  
+withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
+                 -> P (Located Token)
  withLexedDocType lexDocComment = do
    input@(AI _ _ buf) <- getInput
    case prevChar buf ' ' of
      '|' -> lexDocComment input ITdocCommentNext False
      '^' -> lexDocComment input ITdocCommentPrev False
      '$' -> lexDocComment input ITdocCommentNamed False
-    '*' -> lexDocSection 1 input 
+    '*' -> lexDocSection 1 input
+    '#' -> lexDocComment input ITdocOptionsOld False
+    _ -> panic "withLexedDocType: Bad doc type"
   where 
      lexDocSection n input = case alexGetChar input of 
        Just ('*', input) -> lexDocSection (n+1) input
-      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Just (_,   _)     -> lexDocComment input (ITdocSection n) True
        Nothing -> do setInput input; lexToken -- eof reached, lex it normally
  
+-- RULES pragmas turn on the forall and '.' keywords, and we turn them
+-- off again at the end of the pragma.
+rulePrag :: Action
+rulePrag span _buf _len = do
+  setExts (.|. bit inRulePragBit)
+  return (L span ITrules_prag)
+
+endPrag :: Action
+endPrag span _buf _len = do
+  setExts (.&. complement (bit inRulePragBit))
+  return (L span ITclose_prag)
+
  -- docCommentEnd
  -------------------------------------------------------------------------------
  -- This function is quite tricky. We can't just return a new token, we also
@@ -853,8 +932,9 @@ docCommentEnd input commentAcc docType buf span = do
    span `seq` setLastToken span' last_len last_line_len
    return (L span' (docType comment))
   
+errBrace :: AlexInput -> SrcSpan -> P a
  errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
- 
+
  open_brace, close_brace :: Action
  open_brace span _str _len = do 
    ctx <- getContext
@@ -864,14 +944,15 @@ close_brace span _str _len = do
    popContext
    return (L span ITccurly)
  
-qvarid buf len = ITqvarid $! splitQualName buf len
-qconid buf len = ITqconid $! splitQualName buf len
+qvarid, qconid :: StringBuffer -> Int -> Token
+qvarid buf len = ITqvarid $! splitQualName buf len False
+qconid buf len = ITqconid $! splitQualName buf len False
  
-splitQualName :: StringBuffer -> Int -> (FastString,FastString)
+splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
  -- takes a StringBuffer and a length, and returns the module name
  -- and identifier parts of a qualified name.  Splits at the *last* dot,
  -- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf orig_buf
+splitQualName orig_buf len parens = split orig_buf orig_buf
    where
      split buf dot_buf
         | orig_buf `byteDiff` buf >= len  = done dot_buf
@@ -891,11 +972,15 @@ splitQualName orig_buf len = split orig_buf orig_buf
  
      done dot_buf =
         (lexemeToFastString orig_buf (qual_size - 1),
-        lexemeToFastString dot_buf (len - qual_size))
+        if parens -- Prelude.(+)
+            then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
+            else lexemeToFastString dot_buf (len - qual_size))
        where
         qual_size = orig_buf `byteDiff` dot_buf
  
-varid span buf len = 
+varid :: Action
+varid span buf len =
+  fs `seq`
    case lookupUFM reservedWordsFM fs of
         Just (keyword,0)    -> do
                 maybe_layout keyword
@@ -909,15 +994,22 @@ varid span buf len =
    where
         fs = lexemeToFastString buf len
  
+conid :: StringBuffer -> Int -> Token
  conid buf len = ITconid fs
    where fs = lexemeToFastString buf len
  
-qvarsym buf len = ITqvarsym $! splitQualName buf len
-qconsym buf len = ITqconsym $! splitQualName buf len
+qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
+qvarsym buf len = ITqvarsym $! splitQualName buf len False
+qconsym buf len = ITqconsym $! splitQualName buf len False
+prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
+prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
  
+varsym, consym :: Action
  varsym = sym ITvarsym
  consym = sym ITconsym
  
+sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
+    -> P (Located Token)
  sym con span buf len = 
    case lookupUFM reservedSymsFM fs of
         Just (keyword,exts) -> do
@@ -939,15 +1031,27 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
       (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
  
  -- some conveniences for use with tok_integral
+tok_num :: (Integer -> Integer)
+        -> Int -> Int
+        -> (Integer, (Char->Int)) -> Action
  tok_num = tok_integral ITinteger
+tok_primint :: (Integer -> Integer)
+            -> Int -> Int
+            -> (Integer, (Char->Int)) -> Action
  tok_primint = tok_integral ITprimint
+tok_primword :: Int -> Int
+             -> (Integer, (Char->Int)) -> Action
+tok_primword = tok_integral ITprimword positive
+positive, negative :: (Integer -> Integer)
  positive = id
  negative = negate
+decimal, octal, hexadecimal :: (Integer, Char -> Int)
  decimal = (10,octDecDigit)
  octal = (8,octDecDigit)
  hexadecimal = (16,hexDigit)
  
  -- readRational can understand negative rationals, exponents, everything.
+tok_float, tok_primfloat, tok_primdouble :: String -> Token
  tok_float        str = ITrational   $! readRational str
  tok_primfloat    str = ITprimfloat  $! readRational str
  tok_primdouble   str = ITprimdouble $! readRational str
@@ -967,21 +1071,31 @@ do_bol span _str _len = do
                 return (L span ITvccurly)
             EQ -> do
                  --trace "layout: inserting ';'" $ do
-               popLexState
+               _ <- popLexState
                 return (L span ITsemi)
             GT -> do
-               popLexState
+               _ <- popLexState
                 lexToken
  
  -- certain keywords put us in the "layout" state, where we might
  -- add an opening curly brace.
-maybe_layout ITdo      = pushLexState layout_do
-maybe_layout ITmdo     = pushLexState layout_do
-maybe_layout ITof      = pushLexState layout
-maybe_layout ITlet     = pushLexState layout
-maybe_layout ITwhere   = pushLexState layout
-maybe_layout ITrec     = pushLexState layout
-maybe_layout _         = return ()
+maybe_layout :: Token -> P ()
+maybe_layout t = do -- If the alternative layout rule is enabled then
+                    -- we never create an implicit layout context here.
+                    -- Layout is handled XXX instead.
+                    -- The code for closing implicit contexts, or
+                    -- inserting implicit semi-colons, is therefore
+                    -- irrelevant as it only applies in an implicit
+                    -- context.
+                    alr <- extension alternativeLayoutRule
+                    unless alr $ f t
+    where f ITdo    = pushLexState layout_do
+          f ITmdo   = pushLexState layout_do
+          f ITof    = pushLexState layout
+          f ITlet   = pushLexState layout
+          f ITwhere = pushLexState layout
+          f ITrec   = pushLexState layout
+          f _       = return ()
  
  -- Pushing a new implicit layout context.  If the indentation of the
  -- next token is not greater than the previous layout context, then
@@ -992,8 +1106,9 @@ maybe_layout _             = return ()
  -- by a 'do', then we allow the new context to be at the same indentation as
  -- the previous context.  This is what the 'strict' argument is for.
  --
+new_layout_context :: Bool -> Action
  new_layout_context strict span _buf _len = do
-    popLexState
+    _ <- popLexState
      (AI _ offset _) <- getInput
      ctx <- getContext
      case ctx of
@@ -1004,12 +1119,13 @@ new_layout_context strict span _buf _len = do
                 -- we must generate a {} sequence now.
                 pushLexState layout_left
                 return (L span ITvocurly)
-       other -> do
+       _ -> do
                 setContext (Layout offset : ctx)
                 return (L span ITvocurly)
  
+do_layout_left :: Action
  do_layout_left span _buf _len = do
-    popLexState
+    _ <- popLexState
      pushLexState bol  -- we must be at the start of a line
      return (L span ITvccurly)
  
@@ -1019,17 +1135,18 @@ do_layout_left span _buf _len = do
  setLine :: Int -> Action
  setLine code span buf len = do
    let line = parseUnsignedInteger buf len 10 octDecDigit
-  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
         -- subtract one: the line number refers to the *following* line
-  popLexState
+  _ <- popLexState
    pushLexState code
    lexToken
  
  setFile :: Int -> Action
  setFile code span buf len = do
    let file = lexemeToFastString (stepOn buf) (len-2)
+  setAlrLastLoc noSrcSpan
    setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
-  popLexState
+  _ <- popLexState
    pushLexState code
    lexToken
  
@@ -1038,7 +1155,7 @@ setFile code span buf len = do
  -- Options, includes and language pragmas.
  
  lex_string_prag :: (String -> Token) -> Action
-lex_string_prag mkTok span buf len
+lex_string_prag mkTok span _buf _len
      = do input <- getInput
           start <- getSrcLoc
           tok <- go [] input
@@ -1051,7 +1168,7 @@ lex_string_prag mkTok span buf len
                     else case alexGetChar input of
                            Just (c,i) -> go (c:acc) i
                            Nothing -> err input
-          isString i [] = True
+          isString _ [] = True
            isString i (x:xs)
                = case alexGetChar i of
                    Just (c,i') | c == x    -> isString i' xs
@@ -1065,7 +1182,7 @@ lex_string_prag mkTok span buf len
  -- This stuff is horrible.  I hates it.
  
  lex_string_tok :: Action
-lex_string_tok span buf len = do
+lex_string_tok span _buf _len = do
    tok <- lex_string ""
    end <- getSrcLoc 
    return (L (mkSrcSpan (srcSpanStart span) end) tok)
@@ -1107,6 +1224,7 @@ lex_string s = do
         c' <- lex_char c i
         lex_string (c':s)
  
+lex_stringgap :: String -> P Token
  lex_stringgap s = do
    c <- getCharOrFail
    case c of
@@ -1119,10 +1237,10 @@ lex_char_tok :: Action
  -- Here we are basically parsing character literals, such as 'x' or '\n'
  -- but, when Template Haskell is on, we additionally spot
  -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, 
--- but WIHTOUT CONSUMING the x or T part  (the parser does that).
+-- but WITHOUT CONSUMING the x or T part  (the parser does that).
  -- So we have to do two characters of lookahead: when we see 'x we need to
  -- see if there's a trailing quote
-lex_char_tok span buf len = do -- We've seen '
+lex_char_tok span _buf _len = do       -- We've seen '
     i1 <- getInput      -- Look ahead to first character
     let loc = srcSpanStart span
     case alexGetChar' i1 of
@@ -1135,25 +1253,25 @@ lex_char_tok span buf len = do  -- We've seen '
                         return (L (mkSrcSpan loc end2)  ITtyQuote)
                    else lit_error
  
-       Just ('\\', i2@(AI end2 _ _)) -> do     -- We've seen 'backslash 
+       Just ('\\', i2@(AI _end2 _ _)) -> do    -- We've seen 'backslash
                   setInput i2
                   lit_ch <- lex_escape
                   mc <- getCharOrFail   -- Trailing quote
                   if mc == '\'' then finish_char_tok loc lit_ch
                                 else do setInput i2; lit_error 
  
-        Just (c, i2@(AI end2 _ _)) 
+        Just (c, i2@(AI _end2 _ _))
                 | not (isAny c) -> lit_error
                 | otherwise ->
  
                 -- We've seen 'x, where x is a valid character
                 --  (i.e. not newline etc) but not a quote or backslash
            case alexGetChar' i2 of      -- Look ahead one more character
-               Nothing -> lit_error
                 Just ('\'', i3) -> do   -- We've seen 'x'
                         setInput i3 
                         finish_char_tok loc c
                 _other -> do            -- We've seen 'x not followed by quote
+                                       -- (including the possibility of EOF)
                                         -- If TH is on, just parse the quote only
                         th_exts <- extension thEnabled  
                         let (AI end _ _) = i1
@@ -1182,7 +1300,8 @@ lex_char c inp = do
        c | isAny c -> do setInput inp; return c
        _other -> lit_error
  
-isAny c | c > '\xff' = isPrint c
+isAny :: Char -> Bool
+isAny c | c > '\x7f' = isPrint c
         | otherwise  = is_any c
  
  lex_escape :: P Char
@@ -1206,7 +1325,7 @@ lex_escape = do
  
         'x'   -> readNum is_hexdigit 16 hexDigit
         'o'   -> readNum is_octdigit  8 octDecDigit
-       x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+       x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
  
         c1 ->  do
            i <- getInput
@@ -1218,7 +1337,7 @@ lex_escape = do
                 Just (c3,i3) -> 
                    let str = [c1,c2,c3] in
                    case [ (c,rest) | (p,c) <- silly_escape_chars,
-                                    Just rest <- [maybePrefixMatch p str] ] of
+                                    Just rest <- [stripPrefix p str] ] of
                           (escape_char,[]):_ -> do
                                 setInput i3
                                 return escape_char
@@ -1235,6 +1354,7 @@ readNum is_digit base conv = do
         then readNum2 is_digit base conv (conv c)
         else do setInput i; lit_error
  
+readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
  readNum2 is_digit base conv i = do
    input <- getInput
    read i input
@@ -1247,6 +1367,7 @@ readNum2 is_digit base conv i = do
                    then do setInput input; return (chr i)
                    else lit_error
  
+silly_escape_chars :: [(String, Char)]
  silly_escape_chars = [
         ("NUL", '\NUL'),
         ("SOH", '\SOH'),
@@ -1288,6 +1409,7 @@ silly_escape_chars = [
  -- the position of the error in the buffer.  This is so that we can report
  -- a correct location to the user, but also so we can detect UTF-8 decoding
  -- errors if they occur.
+lit_error :: P a
  lit_error = lexError "lexical error in string/character literal"
  
  getCharOrFail :: P Char
@@ -1298,13 +1420,54 @@ getCharOrFail =  do
         Just (c,i)  -> do setInput i; return c
  
  -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+       | Just ('|',i) <- next -> do 
+               setInput i; lex_quasiquote ('|' : s)
+       | Just (']',i) <- next -> do 
+               setInput i; lex_quasiquote (']' : s)
+       where next = alexGetChar' i
+
+    Just ('|',i)
+       | Just (']',i) <- next -> do 
+               setInput i; return s
+       where next = alexGetChar' i
+
+    Just (c, i) -> do
+        setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
  -- Warnings
  
  warn :: DynFlag -> SDoc -> Action
-warn option warning span _buf _len = do
-    addWarning option (mkWarnMsg span alwaysQualify warning)
+warn option warning srcspan _buf _len = do
+    addWarning option srcspan warning
      lexToken
  
+warnThen :: DynFlag -> SDoc -> Action -> Action
+warnThen option warning action srcspan buf len = do
+    addWarning option srcspan warning
+    action srcspan buf len
+
  -- -----------------------------------------------------------------------------
  -- The Parse Monad
  
@@ -1323,18 +1486,24 @@ data ParseResult a
  
  data PState = PState { 
         buffer     :: StringBuffer,
-    dflags     :: DynFlags,
-    messages   :: Messages,
+        dflags     :: DynFlags,
+        messages   :: Messages,
          last_loc   :: SrcSpan, -- pos of previous token
          last_offs  :: !Int,    -- offset of the previous token from the
                                 -- beginning of  the current line.
                                 -- \t is equal to 8 spaces.
         last_len   :: !Int,     -- len of previous token
-  last_line_len :: !Int,
+        last_line_len :: !Int,
          loc        :: SrcLoc,   -- current loc (end of prev token + 1)
         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
         context    :: [LayoutContext],
-       lex_state  :: [Int]
+       lex_state  :: [Int],
+        -- Used in the alternative layout rule:
+        alr_pending_implicit_tokens :: [Located Token],
+        alr_next_token :: Maybe (Located Token),
+        alr_last_loc :: SrcSpan,
+        alr_context :: [ALRContext],
+        alr_expecting_ocurly :: Maybe ALRLayout
       }
         -- last_loc and last_len are used when generating error messages,
         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
@@ -1342,6 +1511,13 @@ data PState = PState {
         -- Getting rid of last_loc would require finding another way to 
         -- implement pushCurrentContext (which is only called from one place).
  
+data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
+                | ALRLayout ALRLayout Int
+data ALRLayout = ALRLayoutLet
+               | ALRLayoutWhere
+               | ALRLayoutOf
+               | ALRLayoutDo
+
  newtype P a = P { unP :: PState -> ParseResult a }
  
  instance Monad P where
@@ -1350,7 +1526,7 @@ instance Monad P where
    fail = failP
  
  returnP :: a -> P a
-returnP a = P $ \s -> POk s a
+returnP a = a `seq` (P $ \s -> POk s a)
  
  thenP :: P a -> (a -> P b) -> P b
  (P m) `thenP` k = P $ \ s ->
@@ -1365,10 +1541,21 @@ failMsgP :: String -> P a
  failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
  
  failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
+
+failSpanMsgP :: SrcSpan -> SDoc -> P a
+failSpanMsgP span msg = P $ \_ -> PFailed span msg
+
+getPState :: P PState
+getPState = P $ \s -> POk s s
  
-failSpanMsgP :: SrcSpan -> String -> P a
-failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+getDynFlags :: P DynFlags
+getDynFlags = P $ \s -> POk s (dflags s)
+
+withThisPackage :: (PackageId -> a) -> P a
+withThisPackage f
+ = do  pkg     <- liftM thisPackage getDynFlags
+       return  $ f pkg
  
  extension :: (Int -> Bool) -> P Bool
  extension p = P $ \s -> POk s (p $! extsBitmap s)
@@ -1376,6 +1563,9 @@ extension p = P $ \s -> POk s (p $! extsBitmap s)
  getExts :: P Int
  getExts = P $ \s -> POk s (extsBitmap s)
  
+setExts :: (Int -> Int) -> P ()
+setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
+
  setSrcLoc :: SrcLoc -> P ()
  setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
  
@@ -1414,9 +1604,9 @@ alexGetChar (AI loc ofs s)
  
         adj_c 
           | c <= '\x06' = non_graphic
-         | c <= '\xff' = c
+         | c <= '\x7f' = c
            -- Alex doesn't handle Unicode, so when Unicode
-          -- character is encoutered we output these values
+          -- character is encountered we output these values
            -- with the actual character value hidden in the state.
           | otherwise = 
                 case generalCategory c of
@@ -1424,20 +1614,20 @@ alexGetChar (AI loc ofs s)
                   LowercaseLetter       -> lower
                   TitlecaseLetter       -> upper
                   ModifierLetter        -> other_graphic
-                 OtherLetter           -> other_graphic
+                 OtherLetter           -> lower -- see #1103
                   NonSpacingMark        -> other_graphic
                   SpacingCombiningMark  -> other_graphic
                   EnclosingMark         -> other_graphic
                   DecimalNumber         -> digit
                   LetterNumber          -> other_graphic
                   OtherNumber           -> other_graphic
-                 ConnectorPunctuation  -> other_graphic
-                 DashPunctuation       -> other_graphic
+                 ConnectorPunctuation  -> symbol
+                 DashPunctuation       -> symbol
                   OpenPunctuation       -> other_graphic
                   ClosePunctuation      -> other_graphic
                   InitialQuote          -> other_graphic
                   FinalQuote            -> other_graphic
-                 OtherPunctuation      -> other_graphic
+                 OtherPunctuation      -> symbol
                   MathSymbol            -> symbol
                   CurrencySymbol        -> symbol
                   ModifierSymbol        -> symbol
@@ -1458,7 +1648,7 @@ alexGetChar' (AI loc ofs s)
          ofs'   = advanceOffs c ofs
  
  advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' offs = 0
+advanceOffs '\n' _    = 0
  advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
  advanceOffs _    offs = offs + 1
  
@@ -1475,60 +1665,147 @@ popLexState :: P Int
  popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
  
  getLexState :: P Int
-getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
+
+popNextToken :: P (Maybe (Located Token))
+popNextToken
+    = P $ \s@PState{ alr_next_token = m } ->
+              POk (s {alr_next_token = Nothing}) m
+
+setAlrLastLoc :: SrcSpan -> P ()
+setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
+
+getAlrLastLoc :: P SrcSpan
+getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
+
+getALRContext :: P [ALRContext]
+getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
+
+setALRContext :: [ALRContext] -> P ()
+setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
+
+setNextToken :: Located Token -> P ()
+setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
+
+popPendingImplicitToken :: P (Maybe (Located Token))
+popPendingImplicitToken
+    = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+              case ts of
+              [] -> POk s Nothing
+              (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
+
+setPendingImplicitTokens :: [Located Token] -> P ()
+setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
+
+getAlrExpectingOCurly :: P (Maybe ALRLayout)
+getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
+
+setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
+setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
  
  -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
  -- integer
  
-genericsBit, ffiBit, parrBit :: Int
+genericsBit :: Int
  genericsBit = 0 -- {| and |}
+ffiBit :: Int
  ffiBit    = 1
+parrBit :: Int
  parrBit           = 2
+arrowsBit :: Int
  arrowsBit  = 4
+thBit :: Int
  thBit     = 5
+ipBit :: Int
  ipBit      = 6
+explicitForallBit :: Int
  explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
+bangPatBit :: Int
  bangPatBit = 8 -- Tells the parser to understand bang-patterns
                 -- (doesn't affect the lexer)
+tyFamBit :: Int
  tyFamBit   = 9 -- indexed type families: 'family' keyword and kind sigs
+haddockBit :: Int
  haddockBit = 10 -- Lex and parse Haddock comments
-magicHashBit = 11 -- # in both functions and operators
+magicHashBit :: Int
+magicHashBit = 11 -- "#" in both functions and operators
+kindSigsBit :: Int
  kindSigsBit = 12 -- Kind signatures on type variables
+recursiveDoBit :: Int
  recursiveDoBit = 13 -- mdo
+unicodeSyntaxBit :: Int
  unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit :: Int
  unboxedTuplesBit = 15 -- (# and #)
+standaloneDerivingBit :: Int
  standaloneDerivingBit = 16 -- standalone instance deriving declarations
-
-genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+transformComprehensionsBit :: Int
+transformComprehensionsBit = 17
+qqBit :: Int
+qqBit     = 18 -- enable quasiquoting
+inRulePragBit :: Int
+inRulePragBit = 19
+rawTokenStreamBit :: Int
+rawTokenStreamBit = 20 -- producing a token stream with all comments included
+newQualOpsBit :: Int
+newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
+recBit :: Int
+recBit = 22 -- rec
+alternativeLayoutRuleBit :: Int
+alternativeLayoutRuleBit = 23
+
+always :: Int -> Bool
  always           _     = True
+genericsEnabled :: Int -> Bool
  genericsEnabled  flags = testBit flags genericsBit
-ffiEnabled       flags = testBit flags ffiBit
+parrEnabled :: Int -> Bool
  parrEnabled      flags = testBit flags parrBit
+arrowsEnabled :: Int -> Bool
  arrowsEnabled    flags = testBit flags arrowsBit
+thEnabled :: Int -> Bool
  thEnabled        flags = testBit flags thBit
+ipEnabled :: Int -> Bool
  ipEnabled        flags = testBit flags ipBit
+explicitForallEnabled :: Int -> Bool
  explicitForallEnabled flags = testBit flags explicitForallBit
+bangPatEnabled :: Int -> Bool
  bangPatEnabled   flags = testBit flags bangPatBit
-tyFamEnabled     flags = testBit flags tyFamBit
+-- tyFamEnabled :: Int -> Bool
+-- tyFamEnabled     flags = testBit flags tyFamBit
+haddockEnabled :: Int -> Bool
  haddockEnabled   flags = testBit flags haddockBit
+magicHashEnabled :: Int -> Bool
  magicHashEnabled flags = testBit flags magicHashBit
-kindSigsEnabled  flags = testBit flags kindSigsBit
-recursiveDoEnabled flags = testBit flags recursiveDoBit
+-- kindSigsEnabled :: Int -> Bool
+-- kindSigsEnabled  flags = testBit flags kindSigsBit
+unicodeSyntaxEnabled :: Int -> Bool
  unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled :: Int -> Bool
  unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled :: Int -> Bool
  standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+qqEnabled :: Int -> Bool
+qqEnabled        flags = testBit flags qqBit
+-- inRulePrag :: Int -> Bool
+-- inRulePrag       flags = testBit flags inRulePragBit
+rawTokenStreamEnabled :: Int -> Bool
+rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
+newQualOps :: Int -> Bool
+newQualOps       flags = testBit flags newQualOpsBit
+oldQualOps :: Int -> Bool
+oldQualOps flags = not (newQualOps flags)
+alternativeLayoutRule :: Int -> Bool
+alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
  
  -- PState for parsing options pragmas
  --
-pragState :: StringBuffer -> SrcLoc -> PState
-pragState buf loc  = 
+pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+pragState dynflags buf loc =
    PState {
-      buffer         = buf,
+      buffer        = buf,
        messages      = emptyMessages,
-      -- XXX defaultDynFlags is not right, but we don't have a real
-      -- dflags handy
-      dflags        = defaultDynFlags,
+      dflags        = dynflags,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1536,7 +1813,12 @@ pragState buf loc  =
        loc           = loc,
        extsBitmap    = 0,
        context       = [],
-      lex_state     = [bol, option_prags, 0]
+      lex_state     = [bol, option_prags, 0],
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing
      }
  
  
@@ -1555,39 +1837,48 @@ mkPState buf loc flags  =
        loc           = loc,
        extsBitmap    = fromIntegral bitmap,
        context       = [],
-      lex_state     = [bol, 0]
+      lex_state     = [bol, 0],
         -- we begin in the layout state if toplev_layout is set
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing
      }
      where
        bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
-              .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
-              .|. parrBit      `setBitIf` dopt Opt_PArr         flags
-              .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-              .|. thBit        `setBitIf` dopt Opt_TH           flags
-              .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
-              .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
-              .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
-              .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
-              .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
-              .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
-              .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
-              .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
-              .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
-              .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
-              .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
-              .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
-              .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
-              .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+              .|. ffiBit            `setBitIf` dopt Opt_ForeignFunctionInterface flags
+              .|. parrBit           `setBitIf` dopt Opt_PArr         flags
+              .|. arrowsBit         `setBitIf` dopt Opt_Arrows       flags
+              .|. thBit             `setBitIf` dopt Opt_TemplateHaskell flags
+              .|. qqBit             `setBitIf` dopt Opt_QuasiQuotes flags
+              .|. ipBit             `setBitIf` dopt Opt_ImplicitParams flags
+              .|. explicitForallBit `setBitIf` dopt Opt_ExplicitForAll flags
+              .|. bangPatBit        `setBitIf` dopt Opt_BangPatterns flags
+              .|. tyFamBit          `setBitIf` dopt Opt_TypeFamilies flags
+              .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
+              .|. magicHashBit      `setBitIf` dopt Opt_MagicHash    flags
+              .|. kindSigsBit       `setBitIf` dopt Opt_KindSignatures flags
+              .|. recursiveDoBit    `setBitIf` dopt Opt_RecursiveDo flags
+              .|. recBit            `setBitIf` dopt Opt_DoRec  flags
+              .|. recBit            `setBitIf` dopt Opt_Arrows flags
+              .|. unicodeSyntaxBit  `setBitIf` dopt Opt_UnicodeSyntax flags
+              .|. unboxedTuplesBit  `setBitIf` dopt Opt_UnboxedTuples flags
                .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
+               .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+               .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
+               .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
+               .|. alternativeLayoutRuleBit `setBitIf` dopt Opt_AlternativeLayoutRule flags
        --
        setBitIf :: Int -> Bool -> Int
        b `setBitIf` cond | cond      = bit b
                         | otherwise = 0
  
-addWarning :: DynFlag -> WarnMsg -> P ()
-addWarning option w
+addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
+addWarning option srcspan warning
   = P $ \s@PState{messages=(ws,es), dflags=d} ->
-       let ws' = if dopt option d then ws `snocBag` w else ws
+       let warning' = mkWarnMsg srcspan alwaysQualify warning
+           ws' = if dopt option d then ws `snocBag` warning' else ws
         in POk s{messages=(ws', es)} ()
  
  getMessages :: PState -> Messages
@@ -1601,7 +1892,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} ()
  
  popContext :: P ()
  popContext = P $ \ s@(PState{ buffer = buf, context = ctx, 
-                          loc = loc, last_len = len, last_loc = last_loc }) ->
+                              last_len = len, last_loc = last_loc }) ->
    case ctx of
         (_:tl) -> POk s{ context = tl } ()
         []     -> PFailed last_loc (srcParseErr buf len)
@@ -1630,8 +1921,8 @@ srcParseErr
    -> Message
  srcParseErr buf len
    = hcat [ if null token 
-            then ptext SLIT("parse error (possibly incorrect indentation)")
-            else hcat [ptext SLIT("parse error on input "),
+            then ptext (sLit "parse error (possibly incorrect indentation)")
+            else hcat [ptext (sLit "parse error on input "),
                         char '`', text token, char '\'']
      ]
    where token = lexemeToString (offsetBytes (-len) buf) len
@@ -1649,7 +1940,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
  lexError :: String -> P a
  lexError str = do
    loc <- getSrcLoc
-  i@(AI end _ buf) <- getInput
+  (AI end _ buf) <- getInput
    reportLexError loc end buf str
  
  -- -----------------------------------------------------------------------------
@@ -1658,31 +1949,189 @@ lexError str = do
  
  lexer :: (Located Token -> P a) -> P a
  lexer cont = do
-  tok@(L span tok__) <- lexToken
+  alr <- extension alternativeLayoutRule
+  let lexTokenFun = if alr then lexTokenAlr else lexToken
+  tok@(L _span _tok__) <- lexTokenFun
  --  trace ("token: " ++ show tok__) $ do
    cont tok
  
+lexTokenAlr :: P (Located Token)
+lexTokenAlr = do mPending <- popPendingImplicitToken
+                 t <- case mPending of
+                      Nothing ->
+                          do mNext <- popNextToken
+                             t <- case mNext of
+                                  Nothing -> lexToken
+                                  Just next -> return next
+                             alternativeLayoutRuleToken t
+                      Just t ->
+                          return t
+                 setAlrLastLoc (getLoc t)
+                 case unLoc t of
+                     ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
+                     ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
+                     ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
+                     ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     _       -> return ()
+                 return t
+
+alternativeLayoutRuleToken :: Located Token -> P (Located Token)
+alternativeLayoutRuleToken t
+    = do context <- getALRContext
+         lastLoc <- getAlrLastLoc
+         mExpectingOCurly <- getAlrExpectingOCurly
+         let thisLoc = getLoc t
+             thisCol = srcSpanStartCol thisLoc
+             newLine = (lastLoc == noSrcSpan)
+                    || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
+         case (unLoc t, context, mExpectingOCurly) of
+             -- I think our implicit open-curly handling is slightly
+             -- different to John's, in how it interacts with newlines
+             -- and "in"
+             (ITocurly, _, Just _) ->
+                 do setAlrExpectingOCurly Nothing
+                    setNextToken t
+                    lexTokenAlr
+             (_, ALRLayout _ col : ls, Just expectingOCurly)
+              | (thisCol > col) ||
+                (thisCol == col &&
+                 isNonDecreasingIntentation expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+              | otherwise ->
+                 do setAlrExpectingOCurly Nothing
+                    setPendingImplicitTokens [L lastLoc ITccurly]
+                    setNextToken t
+                    return (L lastLoc ITocurly)
+             (_, _, Just expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+             -- We do the [] cases earlier than in the spec, as we
+             -- have an actual EOF token
+             (ITeof, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITeof, _, _) ->
+                 return t
+             -- the other ITeof case omitted; general case below covers it
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _)
+              | newLine ->
+                 do setPendingImplicitTokens [t]
+                    setALRContext ls
+                    return (L thisLoc ITccurly)
+             (_, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col ->
+                 do setNextToken t
+                    return (L thisLoc ITsemi)
+              | newLine && thisCol < col ->
+                 do setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             (u, _, _)
+              | isALRopen u ->
+                 do setALRContext (ALRNoLayout (containsCommas u) : context)
+                    return t
+             (u, _, _)
+              | isALRclose u ->
+                 case context of
+                 ALRLayout _ _ : ls ->
+                     do setALRContext ls
+                        setNextToken t
+                        return (L thisLoc ITccurly)
+                 ALRNoLayout _ : ls ->
+                     do setALRContext ls
+                        return t
+                 [] ->
+                     -- XXX This is an error in John's code, but
+                     -- it looks reachable to me at first glance
+                     return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             (ITin, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             -- the other ITin case omitted; general case below covers it
+             (ITcomma, ALRLayout _ _ : ls, _)
+              | topNoLayoutContainsCommas ls ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             -- the other ITwhere case omitted; general case below covers it
+             (_, _, _) -> return t
+
+isALRopen :: Token -> Bool
+isALRopen ITcase   = True
+isALRopen ITif     = True
+isALRopen IToparen = True
+isALRopen ITobrack = True
+isALRopen ITocurly = True
+-- GHC Extensions:
+isALRopen IToubxparen = True
+isALRopen _        = False
+
+isALRclose :: Token -> Bool
+isALRclose ITof     = True
+isALRclose ITthen   = True
+isALRclose ITcparen = True
+isALRclose ITcbrack = True
+isALRclose ITccurly = True
+-- GHC Extensions:
+isALRclose ITcubxparen = True
+isALRclose _        = False
+
+isNonDecreasingIntentation :: ALRLayout -> Bool
+isNonDecreasingIntentation ALRLayoutDo = True
+isNonDecreasingIntentation _           = False
+
+containsCommas :: Token -> Bool
+containsCommas IToparen = True
+containsCommas ITobrack = True
+-- GHC Extensions:
+containsCommas IToubxparen = True
+containsCommas _        = False
+
+topNoLayoutContainsCommas :: [ALRContext] -> Bool
+topNoLayoutContainsCommas [] = False
+topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
+topNoLayoutContainsCommas (ALRNoLayout b : _) = b
+
  lexToken :: P (Located Token)
  lexToken = do
    inp@(AI loc1 _ buf) <- getInput
    sc <- getLexState
    exts <- getExts
    case alexScanUser exts inp sc of
-    AlexEOF -> do let span = mkSrcSpan loc1 loc1
-                 setLastToken span 0 0
-                 return (L span ITeof)
-    AlexError (AI loc2 _ buf) -> do 
-       reportLexError loc1 loc2 buf "lexical error"
+    AlexEOF -> do
+        let span = mkSrcSpan loc1 loc1
+        setLastToken span 0 0
+        return (L span ITeof)
+    AlexError (AI loc2 _ buf) ->
+        reportLexError loc1 loc2 buf "lexical error"
      AlexSkip inp2 _ -> do
-       setInput inp2
-       lexToken
-    AlexToken inp2@(AI end _ buf2) len t -> do
-    setInput inp2
-    let span = mkSrcSpan loc1 end
-    let bytes = byteDiff buf buf2
-    span `seq` setLastToken span bytes bytes
-    t span buf bytes
-
+        setInput inp2
+        lexToken
+    AlexToken inp2@(AI end _ buf2) _ t -> do
+        setInput inp2
+        let span = mkSrcSpan loc1 end
+        let bytes = byteDiff buf buf2
+        span `seq` setLastToken span bytes bytes
+        t span buf bytes
+
+reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
  reportLexError loc1 loc2 buf str
    | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
    | otherwise =
@@ -1692,4 +2141,68 @@ reportLexError loc1 loc2 buf str
    if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
      then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
      else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
+
+lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
+lexTokenStream buf loc dflags = unP go initState
+    where initState = mkPState buf loc (dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream)
+          go = do
+            ltok <- lexer return
+            case ltok of
+              L _ ITeof -> return []
+              _ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+                                 ("options_ghc", lex_string_prag IToptions_prag),
+                                 ("options_haddock", lex_string_prag ITdocOptions),
+                                 ("language", token ITlanguage_prag),
+                                 ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+               where ignored opt = (opt, nested_comment lexToken)
+                     impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+                     options_pragmas = map ("options_" ++) impls
+                     -- CFILES is a hugs-only thing.
+                     pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+                           ("inline", token (ITinline_prag True)),
+                           ("notinline", token (ITinline_prag False)),
+                           ("specialize", token ITspec_prag),
+                           ("source", token ITsource_prag),
+                           ("warning", token ITwarning_prag),
+                           ("deprecated", token ITdeprecated_prag),
+                           ("scc", token ITscc_prag),
+                           ("generated", token ITgenerated_prag),
+                           ("core", token ITcore_prag),
+                           ("unpack", token ITunpack_prag),
+                           ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_conlike_prag True)),
+                             ("notinline conlike", token (ITinline_conlike_prag False)),
+                             ("specialize inline", token (ITspec_inline_prag True)),
+                             ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+                                       Just found -> found span buf len
+                                       Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+                                          && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+                    where unprefix prag' = case stripPrefix "{-#" prag' of
+                                             Just rest -> rest
+                                             Nothing -> prag'
+                          canonical prag' = case prag' of
+                                              "noinline" -> "notinline"
+                                              "specialise" -> "specialize"
+                                              "constructorlike" -> "conlike"
+                                              _ -> prag'
+                          canon_ws s = unwords (map canonical (words s))
  }