Implement GADTSyntax extension

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index 4d8e0f0..9237384 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -31,26 +31,28 @@
  --       qualified varids.
  
  {
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
---
--- Note that Alex itself generates code with with some unused bindings and
--- without type signatures, so removing the flag might not be possible.
+-- XXX The above flags turn off warnings in the generated code:
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-unused-matches #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+-- But alex still generates some code that causes the "lazy unlifted bindings"
+-- warning, and old compilers don't know about it so we can't easily turn
+-- it off, so for now we use the sledge hammer:
+{-# OPTIONS_GHC -w #-}
  
  {-# OPTIONS_GHC -funbox-strict-fields #-}
  
  module Lexer (
     Token(..), lexer, pragState, mkPState, PState(..),
     P(..), ParseResult(..), getSrcLoc, 
+   getPState, getDynFlags, withThisPackage,
     failLocMsgP, failSpanMsgP, srcParseFail,
-   getMessages,
+   getMessages, 
     popContext, pushCurrentContext, setLastToken, setSrcLoc,
     getLexState, popLexState, pushLexState,
-   extension, standaloneDerivingEnabled, bangPatEnabled,
+   extension, bangPatEnabled, datatypeContextsEnabled,
     addWarning,
     lexTokenStream
    ) where
@@ -63,12 +65,18 @@ import FastString
  import SrcLoc
  import UniqFM
  import DynFlags
+import Module
  import Ctype
-import Util            ( maybePrefixMatch, readRational )
+import BasicTypes      ( InlineSpec(..), RuleMatchInfo(..) )
+import Util            ( readRational )
  
  import Control.Monad
  import Data.Bits
  import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
  import Data.Ratio
  }
  
@@ -104,6 +112,8 @@ $symchar   = [$symbol \:]
  $nl        = [\n\r]
  $idchar    = [$small $large $digit \']
  
+$pragmachar = [$small $large $digit]
+
  $docsym    = [\| \^ \* \$]
  
  @varid     = $small $idchar*
@@ -131,7 +141,7 @@ haskell :-
  
  -- everywhere: skip whitespace and comments
  $white_no_nl+                          ;
-$tab+         { warn Opt_WarnTabs (text "Tab character") }
+$tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
  
  -- Everywhere: deal with nested comments.  We explicitly rule out
  -- pragmas, "{-#", so that we don't accidentally treat them as comments.
@@ -201,7 +211,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- context if the curly brace is missing.
  -- Careful! This stuff is quite delicate.
  <layout, layout_do> {
-  \{ / { notFollowedBy '-' }           { pop_and open_brace }
+  \{ / { notFollowedBy '-' }           { hopefully_open_brace }
         -- we might encounter {-# here, but {- has been handled already
    \n                                   ;
    ^\# (line)?                          { begin line_prag1 }
@@ -218,8 +228,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  <0,option_prags> \n                            { begin bol }
  
-"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
-                            { begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+                                { dispatch_pragmas linePrags }
  
  -- single-line line pragmas, of the form
  --    # <line> "<file>" <extra-stuff> \n
@@ -236,69 +246,31 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
     -- with older versions of GHC which generated these.
  
  <0,option_prags> {
-  "{-#" $whitechar* (RULES|rules)  / { notFollowedByPragmaChar } { rulePrag }
-  "{-#" $whitechar* (INLINE|inline)     / { notFollowedByPragmaChar }
-                    { token (ITinline_prag True) }
-  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
-                                       { token (ITinline_prag False) }
-  "{-#" $whitechar* (INLINE|inline)
-        $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar }
-                                        { token (ITinline_conlike_prag True) }
-  "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline)
-        $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar }
-                                        { token (ITinline_conlike_prag False) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
-                                       { token ITspec_prag }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-       $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
-                    { token (ITspec_inline_prag True) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-       $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
-                                       { token (ITspec_inline_prag False) }
-  "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
-                    { token ITsource_prag }
-  "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
-                                       { token ITwarning_prag }
-  "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
-                                       { token ITdeprecated_prag }
-  "{-#" $whitechar* (SCC|scc)  / { notFollowedByPragmaChar }
-                    { token ITscc_prag }
-  "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
-                                       { token ITgenerated_prag }
-  "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
-                    { token ITcore_prag }
-  "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
-                    { token ITunpack_prag }
-  "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
-                    { token ITann_prag }
+  "{-#" $whitechar* $pragmachar+ 
+        $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+                                 { dispatch_pragmas twoWordPrags }
+
+  "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+                                 { dispatch_pragmas oneWordPrags }
  
    -- We ignore all these pragmas, but don't generate a warning for them
-  -- CFILES is a hugs-only thing.
-  "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar }
-                    { nested_comment lexToken }
+  "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+                                 { dispatch_pragmas ignoredPrags }
  
    -- ToDo: should only be valid inside a pragma:
    "#-}"                                { endPrag }
  }
  
  <option_prags> {
-  "{-#"  $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
-                                        { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
-                                        { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
-                   / { notFollowedByPragmaChar }
-                                         { lex_string_prag ITdocOptions }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                                   { dispatch_pragmas fileHeaderPrags }
+
    "-- #"                                 { multiline_doc_comment }
-  "{-#"  $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
-                                         { token ITlanguage_prag }
-  "{-#"  $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
-                                         { lex_string_prag ITinclude_prag }
  }
  
  <0> {
    -- In the "0" mode we ignore these pragmas
-  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
                       { nested_comment lexToken }
  }
  
@@ -315,7 +287,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  -- Haddock comments
  
-<0> {
+<0,option_prags> {
    "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
    "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
  }
@@ -337,8 +309,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    \$ @varid / { ifExtension thEnabled }        { skip_one_varid ITidEscape }
    "$("     / { ifExtension thEnabled } { token ITparenEscape }
  
+-- For backward compatibility, accept the old dollar syntax
    "[$" @varid "|"  / { ifExtension qqEnabled }
                       { lex_quasiquote_tok }
+
+  "[" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
  }
  
  <0> {
@@ -393,10 +369,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- ToDo: - move `var` and (sym) into lexical syntax?
  --       - remove backquote from $special?
  <0> {
-  @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
-  @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
-  @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
-  @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
+  @qual @varsym                                    { idtoken qvarsym }
+  @qual @consym                                    { idtoken qconsym }
    @varsym                                          { varsym }
    @consym                                          { consym }
  }
@@ -481,10 +455,11 @@ data Token
    | ITdynamic
    | ITsafe
    | ITthreadsafe
+  | ITinterruptible
    | ITunsafe
    | ITstdcallconv
    | ITccallconv
-  | ITdotnet
+  | ITprimcallconv
    | ITmdo
    | ITfamily
    | ITgroup
@@ -492,8 +467,7 @@ data Token
    | ITusing
  
         -- Pragmas
-  | ITinline_prag Bool         -- True <=> INLINE, False <=> NOINLINE
-  | ITinline_conlike_prag Bool  -- same
+  | ITinline_prag InlineSpec RuleMatchInfo
    | ITspec_prag                        -- SPECIALISE   
    | ITspec_inline_prag Bool    -- SPECIALISE INLINE (or NOINLINE)
    | ITsource_prag
@@ -536,8 +510,8 @@ data Token
    | ITvocurly
    | ITvccurly
    | ITobrack
-  | ITopabrack                 -- [:, for parallel arrays with -XParr
-  | ITcpabrack                 -- :], for parallel arrays with -XParr
+  | ITopabrack                 -- [:, for parallel arrays with -XParallelArrays
+  | ITcpabrack                 -- :], for parallel arrays with -XParallelArrays
    | ITcbrack
    | IToparen
    | ITcparen
@@ -561,8 +535,6 @@ data Token
  
    | ITdupipvarid   FastString  -- GHC extension: implicit param: ?x
  
-  | ITpragma StringBuffer
-
    | ITchar       Char
    | ITstring     FastString
    | ITinteger    Integer
@@ -575,7 +547,7 @@ data Token
    | ITprimfloat  Rational
    | ITprimdouble Rational
  
-  -- MetaHaskell extension tokens
+  -- Template Haskell extension tokens
    | ITopenExpQuote             --  [| or [e|
    | ITopenPatQuote             --  [p|
    | ITopenDecQuote             --  [d|
@@ -628,9 +600,11 @@ isSpecial ITlabel          = True
  isSpecial ITdynamic    = True
  isSpecial ITsafe       = True
  isSpecial ITthreadsafe         = True
+isSpecial ITinterruptible = True
  isSpecial ITunsafe     = True
  isSpecial ITccallconv   = True
  isSpecial ITstdcallconv = True
+isSpecial ITprimcallconv = True
  isSpecial ITmdo                = True
  isSpecial ITfamily     = True
  isSpecial ITgroup   = True
@@ -689,12 +663,13 @@ reservedWordsFM = listToUFM $
         ( "dynamic",    ITdynamic,       bit ffiBit),
         ( "safe",       ITsafe,          bit ffiBit),
         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
+       ( "interruptible", ITinterruptible, bit ffiBit),
         ( "unsafe",     ITunsafe,        bit ffiBit),
         ( "stdcall",    ITstdcallconv,   bit ffiBit),
         ( "ccall",      ITccallconv,     bit ffiBit),
-       ( "dotnet",     ITdotnet,        bit ffiBit),
+       ( "prim",       ITprimcallconv,  bit ffiBit),
  
-       ( "rec",        ITrec,           bit arrowsBit),
+       ( "rec",        ITrec,           bit recBit),
         ( "proc",       ITproc,          bit arrowsBit)
       ]
  
@@ -732,7 +707,14 @@ reservedSymsFM = listToUFM $
                                  explicitForallEnabled i)
         ,("→",   ITrarrow, unicodeSyntaxEnabled)
         ,("←",   ITlarrow, unicodeSyntaxEnabled)
-       ,("⋯",   ITdotdot, unicodeSyntaxEnabled)
+
+       ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+
+       ,("★", ITstar, unicodeSyntaxEnabled)
+
          -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
          -- form part of a large operator.  This would let us have a better
          -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
@@ -770,34 +752,45 @@ begin :: Int -> Action
  begin code _span _str _len = do pushLexState code; lexToken
  
  pop :: Action
-pop _span _buf _len = do popLexState; lexToken
+pop _span _buf _len = do _ <- popLexState
+                         lexToken
+
+hopefully_open_brace :: Action
+hopefully_open_brace span buf len
+ = do relaxed <- extension relaxedLayout
+      ctx <- getContext
+      (AI l _) <- getInput
+      let offset = srcLocCol l
+          isOK = relaxed ||
+                 case ctx of
+                 Layout prev_off : _ -> prev_off < offset
+                 _                   -> True
+      if isOK then pop_and open_brace span buf len
+              else failSpanMsgP span (text "Missing block")
  
  pop_and :: Action -> Action
-pop_and act span buf len = do popLexState; act span buf len
+pop_and act span buf len = do _ <- popLexState
+                              act span buf len
  
  {-# INLINE nextCharIs #-}
  nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
  nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
  
  notFollowedBy :: Char -> AlexAccPred Int
-notFollowedBy char _ _ _ (AI _ _ buf) 
+notFollowedBy char _ _ _ (AI _ buf) 
    = nextCharIs buf (/=char)
  
  notFollowedBySymbol :: AlexAccPred Int
-notFollowedBySymbol _ _ _ (AI _ _ buf)
+notFollowedBySymbol _ _ _ (AI _ buf)
    = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
  
-notFollowedByPragmaChar :: AlexAccPred Int
-notFollowedByPragmaChar _ _ _ (AI _ _ buf)
-  = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
-
  -- We must reject doc comments as being ordinary comments everywhere.
  -- In some cases the doc comment will be selected as the lexeme due to
  -- maximal munch, but not always, because the nested comment rule is
  -- valid in all states, but the doc-comment rules are only valid in
  -- the non-layout states.
  isNormalComment :: AlexAccPred Int
-isNormalComment bits _ _ (AI _ _ buf)
+isNormalComment bits _ _ (AI _ buf)
    | haddockEnabled bits = notFollowedByDocOrPragma
    | otherwise           = nextCharIs buf (/='#')
    where
@@ -808,12 +801,12 @@ spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
  spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
  
  {-
-haddockDisabledAnd p bits _ _ (AI _ _ buf)
+haddockDisabledAnd p bits _ _ (AI _ buf)
    = if haddockEnabled bits then False else (p buf)
  -}
  
  atEOL :: AlexAccPred Int
-atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
+atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
  
  ifExtension :: (Int -> Bool) -> AlexAccPred Int
  ifExtension pred bits _ _ _ = pred bits
@@ -899,7 +892,7 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
  withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
                   -> P (Located Token)
  withLexedDocType lexDocComment = do
-  input@(AI _ _ buf) <- getInput
+  input@(AI _ buf) <- getInput
    case prevChar buf ' ' of
      '|' -> lexDocComment input ITdocCommentNext False
      '^' -> lexDocComment input ITdocCommentPrev False
@@ -916,12 +909,12 @@ withLexedDocType lexDocComment = do
  -- RULES pragmas turn on the forall and '.' keywords, and we turn them
  -- off again at the end of the pragma.
  rulePrag :: Action
-rulePrag span _ _ = do
+rulePrag span _buf _len = do
    setExts (.|. bit inRulePragBit)
    return (L span ITrules_prag)
  
  endPrag :: Action
-endPrag span _ _ = do
+endPrag span _buf _len = do
    setExts (.&. complement (bit inRulePragBit))
    return (L span ITclose_prag)
  
@@ -933,32 +926,20 @@ endPrag span _ _ = do
  -- it writes the wrong token length to the parser state. This function is
  -- called afterwards, so it can just update the state. 
  
--- This is complicated by the fact that Haddock tokens can span multiple lines, 
--- which is something that the original lexer didn't account for. 
--- I have added last_line_len in the parser state which represents the length 
--- of the part of the token that is on the last line. It is now used for layout 
--- calculation in pushCurrentContext instead of last_len. last_len is, like it 
--- was before, the full length of the token, and it is now only used for error
--- messages. /Waern 
-
  docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
                   SrcSpan -> P (Located Token) 
  docCommentEnd input commentAcc docType buf span = do
    setInput input
-  let (AI loc last_offs nextBuf) = input
+  let (AI loc nextBuf) = input
        comment = reverse commentAcc
        span' = mkSrcSpan (srcSpanStart span) loc
        last_len = byteDiff buf nextBuf
        
-      last_line_len = if (last_offs - last_len < 0) 
-        then last_offs
-        else last_len  
-  
-  span `seq` setLastToken span' last_len last_line_len
+  span `seq` setLastToken span' last_len
    return (L span' (docType comment))
   
  errBrace :: AlexInput -> SrcSpan -> P a
-errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
  
  open_brace, close_brace :: Action
  open_brace span _str _len = do 
@@ -1096,22 +1077,31 @@ do_bol span _str _len = do
                 return (L span ITvccurly)
             EQ -> do
                  --trace "layout: inserting ';'" $ do
-               popLexState
+               _ <- popLexState
                 return (L span ITsemi)
             GT -> do
-               popLexState
+               _ <- popLexState
                 lexToken
  
  -- certain keywords put us in the "layout" state, where we might
  -- add an opening curly brace.
  maybe_layout :: Token -> P ()
-maybe_layout ITdo      = pushLexState layout_do
-maybe_layout ITmdo     = pushLexState layout_do
-maybe_layout ITof      = pushLexState layout
-maybe_layout ITlet     = pushLexState layout
-maybe_layout ITwhere   = pushLexState layout
-maybe_layout ITrec     = pushLexState layout
-maybe_layout _         = return ()
+maybe_layout t = do -- If the alternative layout rule is enabled then
+                    -- we never create an implicit layout context here.
+                    -- Layout is handled XXX instead.
+                    -- The code for closing implicit contexts, or
+                    -- inserting implicit semi-colons, is therefore
+                    -- irrelevant as it only applies in an implicit
+                    -- context.
+                    alr <- extension alternativeLayoutRule
+                    unless alr $ f t
+    where f ITdo    = pushLexState layout_do
+          f ITmdo   = pushLexState layout_do
+          f ITof    = pushLexState layout
+          f ITlet   = pushLexState layout
+          f ITwhere = pushLexState layout
+          f ITrec   = pushLexState layout
+          f _       = return ()
  
  -- Pushing a new implicit layout context.  If the indentation of the
  -- next token is not greater than the previous layout context, then
@@ -1124,13 +1114,16 @@ maybe_layout _          = return ()
  --
  new_layout_context :: Bool -> Action
  new_layout_context strict span _buf _len = do
-    popLexState
-    (AI _ offset _) <- getInput
+    _ <- popLexState
+    (AI l _) <- getInput
+    let offset = srcLocCol l
      ctx <- getContext
+    nondecreasing <- extension nondecreasingIndentation
+    let strict' = strict || not nondecreasing
      case ctx of
         Layout prev_off : _  | 
-          (strict     && prev_off >= offset  ||
-           not strict && prev_off > offset) -> do
+          (strict'     && prev_off >= offset  ||
+           not strict' && prev_off > offset) -> do
                 -- token is indented to the left of the previous context.
                 -- we must generate a {} sequence now.
                 pushLexState layout_left
@@ -1141,7 +1134,7 @@ new_layout_context strict span _buf _len = do
  
  do_layout_left :: Action
  do_layout_left span _buf _len = do
-    popLexState
+    _ <- popLexState
      pushLexState bol  -- we must be at the start of a line
      return (L span ITvccurly)
  
@@ -1151,17 +1144,18 @@ do_layout_left span _buf _len = do
  setLine :: Int -> Action
  setLine code span buf len = do
    let line = parseUnsignedInteger buf len 10 octDecDigit
-  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
         -- subtract one: the line number refers to the *following* line
-  popLexState
+  _ <- popLexState
    pushLexState code
    lexToken
  
  setFile :: Int -> Action
  setFile code span buf len = do
    let file = lexemeToFastString (stepOn buf) (len-2)
+  setAlrLastLoc noSrcSpan
    setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
-  popLexState
+  _ <- popLexState
    pushLexState code
    lexToken
  
@@ -1188,7 +1182,7 @@ lex_string_prag mkTok span _buf _len
                = case alexGetChar i of
                    Just (c,i') | c == x    -> isString i' xs
                    _other -> False
-          err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
+          err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
  
  
  -- -----------------------------------------------------------------------------
@@ -1206,7 +1200,7 @@ lex_string :: String -> P Token
  lex_string s = do
    i <- getInput
    case alexGetChar' i of
-    Nothing -> lit_error
+    Nothing -> lit_error i
  
      Just ('"',i)  -> do
         setInput i
@@ -1231,52 +1225,57 @@ lex_string s = do
      Just ('\\',i)
         | Just ('&',i) <- next -> do 
                 setInput i; lex_string s
-       | Just (c,i) <- next, is_space c -> do 
+       | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
+                           -- is_space only works for <= '\x7f' (#3751)
                 setInput i; lex_stringgap s
         where next = alexGetChar' i
  
-    Just (c, i) -> do
-       c' <- lex_char c i
-       lex_string (c':s)
+    Just (c, i1) -> do
+        case c of
+          '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
+          c | isAny c -> do setInput i1; lex_string (c:s)
+          _other -> lit_error i
  
  lex_stringgap :: String -> P Token
  lex_stringgap s = do
-  c <- getCharOrFail
+  i <- getInput
+  c <- getCharOrFail i
    case c of
      '\\' -> lex_string s
      c | is_space c -> lex_stringgap s
-    _other -> lit_error
+    _other -> lit_error i
  
  
  lex_char_tok :: Action
  -- Here we are basically parsing character literals, such as 'x' or '\n'
  -- but, when Template Haskell is on, we additionally spot
  -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, 
--- but WIHTOUT CONSUMING the x or T part  (the parser does that).
+-- but WITHOUT CONSUMING the x or T part  (the parser does that).
  -- So we have to do two characters of lookahead: when we see 'x we need to
  -- see if there's a trailing quote
  lex_char_tok span _buf _len = do       -- We've seen '
     i1 <- getInput      -- Look ahead to first character
     let loc = srcSpanStart span
     case alexGetChar' i1 of
-       Nothing -> lit_error 
+       Nothing -> lit_error  i1
  
-       Just ('\'', i2@(AI end2 _ _)) -> do     -- We've seen ''
+       Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
                   th_exts <- extension thEnabled
                   if th_exts then do
                         setInput i2
                         return (L (mkSrcSpan loc end2)  ITtyQuote)
-                  else lit_error
+                  else lit_error i1
  
-       Just ('\\', i2@(AI _end2 _ _)) -> do    -- We've seen 'backslash
+       Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
                   setInput i2
                   lit_ch <- lex_escape
-                 mc <- getCharOrFail   -- Trailing quote
+                  i3 <- getInput
+                 mc <- getCharOrFail i3 -- Trailing quote
                   if mc == '\'' then finish_char_tok loc lit_ch
-                               else do setInput i2; lit_error 
+                               else lit_error i3
  
-        Just (c, i2@(AI _end2 _ _))
-               | not (isAny c) -> lit_error
+        Just (c, i2@(AI _end2 _))
+               | not (isAny c) -> lit_error i1
                 | otherwise ->
  
                 -- We've seen 'x, where x is a valid character
@@ -1289,39 +1288,33 @@ lex_char_tok span _buf _len = do        -- We've seen '
                                         -- (including the possibility of EOF)
                                         -- If TH is on, just parse the quote only
                         th_exts <- extension thEnabled  
-                       let (AI end _ _) = i1
+                       let (AI end _) = i1
                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
-                                  else do setInput i2; lit_error
+                                  else lit_error i2
  
  finish_char_tok :: SrcLoc -> Char -> P (Located Token)
  finish_char_tok loc ch -- We've already seen the closing quote
                         -- Just need to check for trailing #
    = do magicHash <- extension magicHashEnabled
-       i@(AI end _ _) <- getInput
+       i@(AI end _) <- getInput
         if magicHash then do
                 case alexGetChar' i of
-                       Just ('#',i@(AI end _ _)) -> do
+                       Just ('#',i@(AI end _)) -> do
                                 setInput i
                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
                         _other ->
                                 return (L (mkSrcSpan loc end) (ITchar ch))
-               else do
+           else do
                    return (L (mkSrcSpan loc end) (ITchar ch))
  
-lex_char :: Char -> AlexInput -> P Char
-lex_char c inp = do
-  case c of
-      '\\' -> do setInput inp; lex_escape
-      c | isAny c -> do setInput inp; return c
-      _other -> lit_error
-
  isAny :: Char -> Bool
  isAny c | c > '\x7f' = isPrint c
         | otherwise  = is_any c
  
  lex_escape :: P Char
  lex_escape = do
-  c <- getCharOrFail
+  i0 <- getInput
+  c <- getCharOrFail i0
    case c of
         'a'   -> return '\a'
         'b'   -> return '\b'
@@ -1333,10 +1326,11 @@ lex_escape = do
         '\\'  -> return '\\'
         '"'   -> return '\"'
         '\''  -> return '\''
-       '^'   -> do c <- getCharOrFail
+       '^'   -> do i1 <- getInput
+                    c <- getCharOrFail i1
                     if c >= '@' && c <= '_'
                         then return (chr (ord c - ord '@'))
-                       else lit_error
+                       else lit_error i1
  
         'x'   -> readNum is_hexdigit 16 hexDigit
         'o'   -> readNum is_octdigit  8 octDecDigit
@@ -1345,29 +1339,29 @@ lex_escape = do
         c1 ->  do
            i <- getInput
            case alexGetChar' i of
-           Nothing -> lit_error
+           Nothing -> lit_error i0
             Just (c2,i2) -> 
                case alexGetChar' i2 of
-               Nothing -> do setInput i2; lit_error
+               Nothing -> do lit_error i0
                 Just (c3,i3) -> 
                    let str = [c1,c2,c3] in
                    case [ (c,rest) | (p,c) <- silly_escape_chars,
-                                    Just rest <- [maybePrefixMatch p str] ] of
+                                    Just rest <- [stripPrefix p str] ] of
                           (escape_char,[]):_ -> do
                                 setInput i3
                                 return escape_char
                           (escape_char,_:_):_ -> do
                                 setInput i2
                                 return escape_char
-                         [] -> lit_error
+                         [] -> lit_error i0
  
  readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
  readNum is_digit base conv = do
    i <- getInput
-  c <- getCharOrFail
+  c <- getCharOrFail i
    if is_digit c 
         then readNum2 is_digit base conv (conv c)
-       else do setInput i; lit_error
+       else lit_error i
  
  readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
  readNum2 is_digit base conv i = do
@@ -1376,11 +1370,13 @@ readNum2 is_digit base conv i = do
    where read i input = do
           case alexGetChar' input of
             Just (c,input') | is_digit c -> do
-               read (i*base + conv c) input'
+               let i' = i*base + conv c
+               if i' > 0x10ffff
+                  then setInput input >> lexError "numeric escape sequence out of range"
+                  else read i' input'
             _other -> do
-               if i >= 0 && i <= 0x10FFFF
-                  then do setInput input; return (chr i)
-                  else lit_error
+              setInput input; return (chr i)
+
  
  silly_escape_chars :: [(String, Char)]
  silly_escape_chars = [
@@ -1424,12 +1420,11 @@ silly_escape_chars = [
  -- the position of the error in the buffer.  This is so that we can report
  -- a correct location to the user, but also so we can detect UTF-8 decoding
  -- errors if they occur.
-lit_error :: P a
-lit_error = lexError "lexical error in string/character literal"
+lit_error :: AlexInput -> P a
+lit_error i = do setInput i; lexError "lexical error in string/character literal"
  
-getCharOrFail :: P Char
-getCharOrFail =  do
-  i <- getInput
+getCharOrFail :: AlexInput -> P Char
+getCharOrFail i =  do
    case alexGetChar' i of
         Nothing -> lexError "unexpected end-of-file in string/character literal"
         Just (c,i)  -> do setInput i; return c
@@ -1439,8 +1434,9 @@ getCharOrFail =  do
  
  lex_quasiquote_tok :: Action
  lex_quasiquote_tok span buf len = do
-  let quoter = reverse $ takeWhile (/= '$')
-               $ reverse $ lexemeToString buf (len - 1)
+  let quoter = tail (lexemeToString buf (len - 1))
+               -- 'tail' drops the initial '[', 
+               -- while the -1 drops the trailing '|'
    quoteStart <- getSrcLoc              
    quote <- lex_quasiquote ""
    end <- getSrcLoc 
@@ -1453,7 +1449,7 @@ lex_quasiquote :: String -> P String
  lex_quasiquote s = do
    i <- getInput
    case alexGetChar' i of
-    Nothing -> lit_error
+    Nothing -> lit_error i
  
      Just ('\\',i)
         | Just ('|',i) <- next -> do 
@@ -1501,18 +1497,32 @@ data ParseResult a
  
  data PState = PState { 
         buffer     :: StringBuffer,
-    dflags     :: DynFlags,
-    messages   :: Messages,
+        dflags     :: DynFlags,
+        messages   :: Messages,
          last_loc   :: SrcSpan, -- pos of previous token
-        last_offs  :: !Int,    -- offset of the previous token from the
-                               -- beginning of  the current line.
-                               -- \t is equal to 8 spaces.
         last_len   :: !Int,     -- len of previous token
-  last_line_len :: !Int,
          loc        :: SrcLoc,   -- current loc (end of prev token + 1)
         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
         context    :: [LayoutContext],
-       lex_state  :: [Int]
+       lex_state  :: [Int],
+        -- Used in the alternative layout rule:
+        -- These tokens are the next ones to be sent out. They are
+        -- just blindly emitted, without the rule looking at them again:
+        alr_pending_implicit_tokens :: [Located Token],
+        -- This is the next token to be considered or, if it is Nothing,
+        -- we need to get the next token from the input stream:
+        alr_next_token :: Maybe (Located Token),
+        -- This is what we consider to be the locatino of the last token
+        -- emitted:
+        alr_last_loc :: SrcSpan,
+        -- The stack of layout contexts:
+        alr_context :: [ALRContext],
+        -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
+        -- us what sort of layout the '{' will open:
+        alr_expecting_ocurly :: Maybe ALRLayout,
+        -- Have we just had the '}' for a let block? If so, than an 'in'
+        -- token doesn't need to close anything:
+        alr_justClosedExplicitLetBlock :: Bool
       }
         -- last_loc and last_len are used when generating error messages,
         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
@@ -1520,6 +1530,14 @@ data PState = PState {
         -- Getting rid of last_loc would require finding another way to 
         -- implement pushCurrentContext (which is only called from one place).
  
+data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
+                              Bool{- is it a 'let' block? -}
+                | ALRLayout ALRLayout Int
+data ALRLayout = ALRLayoutLet
+               | ALRLayoutWhere
+               | ALRLayoutOf
+               | ALRLayoutDo
+
  newtype P a = P { unP :: PState -> ParseResult a }
  
  instance Monad P where
@@ -1548,6 +1566,17 @@ failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
  failSpanMsgP :: SrcSpan -> SDoc -> P a
  failSpanMsgP span msg = P $ \_ -> PFailed span msg
  
+getPState :: P PState
+getPState = P $ \s -> POk s s
+
+getDynFlags :: P DynFlags
+getDynFlags = P $ \s -> POk s (dflags s)
+
+withThisPackage :: (PackageId -> a) -> P a
+withThisPackage f
+ = do  pkg     <- liftM thisPackage getDynFlags
+       return  $ f pkg
+
  extension :: (Int -> Bool) -> P Bool
  extension p = P $ \s -> POk s (p $! extsBitmap s)
  
@@ -1563,27 +1592,25 @@ setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
  getSrcLoc :: P SrcLoc
  getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
  
-setLastToken :: SrcSpan -> Int -> Int -> P ()
-setLastToken loc len line_len = P $ \s -> POk s { 
+setLastToken :: SrcSpan -> Int -> P ()
+setLastToken loc len = P $ \s -> POk s { 
    last_loc=loc, 
-  last_len=len,
-  last_line_len=line_len 
-} ()
+  last_len=len
+  } ()
  
-data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
+data AlexInput = AI SrcLoc StringBuffer
  
  alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
+alexInputPrevChar (AI _ buf) = prevChar buf '\n'
  
  alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (AI loc ofs s) 
+alexGetChar (AI loc s) 
    | atEnd s   = Nothing
-  | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+  | otherwise = adj_c `seq` loc' `seq` s' `seq` 
                 --trace (show (ord c)) $
-               Just (adj_c, (AI loc' ofs' s'))
+               Just (adj_c, (AI loc' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
-        ofs'   = advanceOffs c ofs
  
         non_graphic     = '\x0'
         upper           = '\x1'
@@ -1597,7 +1624,7 @@ alexGetChar (AI loc ofs s)
           | c <= '\x06' = non_graphic
           | c <= '\x7f' = c
            -- Alex doesn't handle Unicode, so when Unicode
-          -- character is encoutered we output these values
+          -- character is encountered we output these values
            -- with the actual character value hidden in the state.
           | otherwise = 
                 case generalCategory c of
@@ -1611,7 +1638,7 @@ alexGetChar (AI loc ofs s)
                   EnclosingMark         -> other_graphic
                   DecimalNumber         -> digit
                   LetterNumber          -> other_graphic
-                 OtherNumber           -> other_graphic
+                  OtherNumber           -> digit -- see #4373
                   ConnectorPunctuation  -> symbol
                   DashPunctuation       -> symbol
                   OpenPunctuation       -> other_graphic
@@ -1629,25 +1656,19 @@ alexGetChar (AI loc ofs s)
  -- This version does not squash unicode characters, it is used when
  -- lexing strings.
  alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar' (AI loc ofs s) 
+alexGetChar' (AI loc s) 
    | atEnd s   = Nothing
-  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+  | otherwise = c `seq` loc' `seq` s' `seq` 
                 --trace (show (ord c)) $
-               Just (c, (AI loc' ofs' s'))
+               Just (c, (AI loc' s'))
    where (c,s') = nextChar s
          loc'   = advanceSrcLoc loc c
-        ofs'   = advanceOffs c ofs
-
-advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' _    = 0
-advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-advanceOffs _    offs = offs + 1
  
  getInput :: P AlexInput
-getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
+getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
  
  setInput :: AlexInput -> P ()
-setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
+setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
  
  pushLexState :: Int -> P ()
  pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
@@ -1658,8 +1679,52 @@ popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
  getLexState :: P Int
  getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
  
+popNextToken :: P (Maybe (Located Token))
+popNextToken
+    = P $ \s@PState{ alr_next_token = m } ->
+              POk (s {alr_next_token = Nothing}) m
+
+setAlrLastLoc :: SrcSpan -> P ()
+setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
+
+getAlrLastLoc :: P SrcSpan
+getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
+
+getALRContext :: P [ALRContext]
+getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
+
+setALRContext :: [ALRContext] -> P ()
+setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
+
+getJustClosedExplicitLetBlock :: P Bool
+getJustClosedExplicitLetBlock
+ = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
+
+setJustClosedExplicitLetBlock :: Bool -> P ()
+setJustClosedExplicitLetBlock b
+ = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
+
+setNextToken :: Located Token -> P ()
+setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
+
+popPendingImplicitToken :: P (Maybe (Located Token))
+popPendingImplicitToken
+    = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+              case ts of
+              [] -> POk s Nothing
+              (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
+
+setPendingImplicitTokens :: [Located Token] -> P ()
+setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
+
+getAlrExpectingOCurly :: P (Maybe ALRLayout)
+getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
+
+setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
+setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
+
  -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParallelArrays) are represented by a bitmap stored in an unboxed
  -- integer
  
  genericsBit :: Int
@@ -1693,8 +1758,8 @@ unicodeSyntaxBit :: Int
  unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
  unboxedTuplesBit :: Int
  unboxedTuplesBit = 15 -- (# and #)
-standaloneDerivingBit :: Int
-standaloneDerivingBit = 16 -- standalone instance deriving declarations
+datatypeContextsBit :: Int
+datatypeContextsBit = 16
  transformComprehensionsBit :: Int
  transformComprehensionsBit = 17
  qqBit :: Int
@@ -1703,8 +1768,14 @@ inRulePragBit :: Int
  inRulePragBit = 19
  rawTokenStreamBit :: Int
  rawTokenStreamBit = 20 -- producing a token stream with all comments included
-newQualOpsBit :: Int
-newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
+recBit :: Int
+recBit = 22 -- rec
+alternativeLayoutRuleBit :: Int
+alternativeLayoutRuleBit = 23
+relaxedLayoutBit :: Int
+relaxedLayoutBit = 24
+nondecreasingIndentationBit :: Int
+nondecreasingIndentationBit = 25
  
  always :: Int -> Bool
  always           _     = True
@@ -1734,82 +1805,74 @@ unicodeSyntaxEnabled :: Int -> Bool
  unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
  unboxedTuplesEnabled :: Int -> Bool
  unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
-standaloneDerivingEnabled :: Int -> Bool
-standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+datatypeContextsEnabled :: Int -> Bool
+datatypeContextsEnabled flags = testBit flags datatypeContextsBit
  qqEnabled :: Int -> Bool
  qqEnabled        flags = testBit flags qqBit
  -- inRulePrag :: Int -> Bool
  -- inRulePrag       flags = testBit flags inRulePragBit
  rawTokenStreamEnabled :: Int -> Bool
  rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
-newQualOps :: Int -> Bool
-newQualOps       flags = testBit flags newQualOpsBit
-oldQualOps :: Int -> Bool
-oldQualOps flags = not (newQualOps flags)
+alternativeLayoutRule :: Int -> Bool
+alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
+relaxedLayout :: Int -> Bool
+relaxedLayout flags = testBit flags relaxedLayoutBit
+nondecreasingIndentation :: Int -> Bool
+nondecreasingIndentation flags = testBit flags nondecreasingIndentationBit
  
  -- PState for parsing options pragmas
  --
  pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
-pragState dynflags buf loc =
-  PState {
-      buffer        = buf,
-      messages      = emptyMessages,
-      dflags        = dynflags,
-      last_loc      = mkSrcSpan loc loc,
-      last_offs     = 0,
-      last_len      = 0,
-      last_line_len = 0,
-      loc           = loc,
-      extsBitmap    = 0,
-      context       = [],
-      lex_state     = [bol, option_prags, 0]
-    }
-
+pragState dynflags buf loc = (mkPState dynflags buf loc) {
+                                 lex_state = [bol, option_prags, 0]
+                             }
  
  -- create a parse state
  --
-mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
-mkPState buf loc flags  = 
+mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+mkPState flags buf loc =
    PState {
        buffer         = buf,
        dflags        = flags,
        messages      = emptyMessages,
        last_loc      = mkSrcSpan loc loc,
-      last_offs     = 0,
        last_len      = 0,
-      last_line_len = 0,
        loc           = loc,
        extsBitmap    = fromIntegral bitmap,
        context       = [],
-      lex_state     = [bol, 0]
-       -- we begin in the layout state if toplev_layout is set
+      lex_state     = [bol, 0],
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing,
+      alr_justClosedExplicitLetBlock = False
      }
      where
-      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
-              .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
-              .|. parrBit      `setBitIf` dopt Opt_PArr         flags
-              .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-              .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
-              .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
-              .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
-              .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
-              .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
-              .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
-              .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
-              .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
-              .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
-              .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
-              .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
-              .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
-              .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
-              .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
-              .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
-              .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
-              .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
-              .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
-               .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+      bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
+              .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
+              .|. parrBit           `setBitIf` xopt Opt_ParallelArrays  flags
+              .|. arrowsBit         `setBitIf` xopt Opt_Arrows          flags
+              .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
+              .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes     flags
+              .|. ipBit             `setBitIf` xopt Opt_ImplicitParams  flags
+              .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll  flags
+              .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
+              .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
+              .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
+              .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
+              .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
+              .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
+              .|. recBit            `setBitIf` xopt Opt_DoRec  flags
+              .|. recBit            `setBitIf` xopt Opt_Arrows flags
+              .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
+              .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
+               .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
+               .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
                 .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
-               .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
+               .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
+               .|. relaxedLayoutBit `setBitIf` xopt Opt_RelaxedLayout flags
+               .|. nondecreasingIndentationBit `setBitIf` xopt Opt_NondecreasingIndentation flags
        --
        setBitIf :: Int -> Bool -> Int
        b `setBitIf` cond | cond      = bit b
@@ -1842,14 +1905,15 @@ popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
  -- This is only used at the outer level of a module when the 'module'
  -- keyword is missing.
  pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_line_len=len, context=ctx } -> 
-    POk s{context = Layout (offs-len) : ctx} ()
---trace ("off: " ++ show offs ++ ", len: " ++ show len) $ POk s{context = Layout (offs-len) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } -> 
+    POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
  
  getOffside :: P Ordering
-getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
+getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
+                let offs = srcSpanStartCol loc in
                 let ord = case stk of
-                       (Layout n:_) -> compare offs n
+                       (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $ 
+                                        compare offs n
                         _            -> GT
                 in POk s ord
  
@@ -1881,7 +1945,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
  lexError :: String -> P a
  lexError str = do
    loc <- getSrcLoc
-  (AI end _ buf) <- getInput
+  (AI end buf) <- getInput
    reportLexError loc end buf str
  
  -- -----------------------------------------------------------------------------
@@ -1890,30 +1954,253 @@ lexError str = do
  
  lexer :: (Located Token -> P a) -> P a
  lexer cont = do
-  tok@(L _span _tok__) <- lexToken
---  trace ("token: " ++ show tok__) $ do
+  alr <- extension alternativeLayoutRule
+  let lexTokenFun = if alr then lexTokenAlr else lexToken
+  tok@(L _span _tok__) <- lexTokenFun
+  --trace ("token: " ++ show _tok__) $ do
    cont tok
  
+lexTokenAlr :: P (Located Token)
+lexTokenAlr = do mPending <- popPendingImplicitToken
+                 t <- case mPending of
+                      Nothing ->
+                          do mNext <- popNextToken
+                             t <- case mNext of
+                                  Nothing -> lexToken
+                                  Just next -> return next
+                             alternativeLayoutRuleToken t
+                      Just t ->
+                          return t
+                 setAlrLastLoc (getLoc t)
+                 case unLoc t of
+                     ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
+                     ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
+                     ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
+                     ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     _       -> return ()
+                 return t
+
+alternativeLayoutRuleToken :: Located Token -> P (Located Token)
+alternativeLayoutRuleToken t
+    = do context <- getALRContext
+         lastLoc <- getAlrLastLoc
+         mExpectingOCurly <- getAlrExpectingOCurly
+         justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
+         setJustClosedExplicitLetBlock False
+         dflags <- getDynFlags
+         let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
+             thisLoc = getLoc t
+             thisCol = srcSpanStartCol thisLoc
+             newLine = (lastLoc == noSrcSpan)
+                    || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
+         case (unLoc t, context, mExpectingOCurly) of
+             -- This case handles a GHC extension to the original H98
+             -- layout rule...
+             (ITocurly, _, Just alrLayout) ->
+                 do setAlrExpectingOCurly Nothing
+                    let isLet = case alrLayout of
+                                ALRLayoutLet -> True
+                                _ -> False
+                    setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
+                    return t
+             -- ...and makes this case unnecessary
+             {-
+             -- I think our implicit open-curly handling is slightly
+             -- different to John's, in how it interacts with newlines
+             -- and "in"
+             (ITocurly, _, Just _) ->
+                 do setAlrExpectingOCurly Nothing
+                    setNextToken t
+                    lexTokenAlr
+             -}
+             (_, ALRLayout _ col : ls, Just expectingOCurly)
+              | (thisCol > col) ||
+                (thisCol == col &&
+                 isNonDecreasingIntentation expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+              | otherwise ->
+                 do setAlrExpectingOCurly Nothing
+                    setPendingImplicitTokens [L lastLoc ITccurly]
+                    setNextToken t
+                    return (L lastLoc ITocurly)
+             (_, _, Just expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+             -- We do the [] cases earlier than in the spec, as we
+             -- have an actual EOF token
+             (ITeof, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITeof, _, _) ->
+                 return t
+             -- the other ITeof case omitted; general case below covers it
+             (ITin, _, _)
+              | justClosedExplicitLetBlock ->
+                 return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _)
+              | newLine ->
+                 do setPendingImplicitTokens [t]
+                    setALRContext ls
+                    return (L thisLoc ITccurly)
+             -- This next case is to handle a transitional issue:
+             (ITwhere, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col && transitional ->
+                 do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+                               thisLoc
+                               (transitionalAlternativeLayoutWarning
+                                    "`where' clause at the same depth as implicit layout block")
+                    setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             -- This next case is to handle a transitional issue:
+             (ITvbar, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col && transitional ->
+                 do addWarning Opt_WarnAlternativeLayoutRuleTransitional
+                               thisLoc
+                               (transitionalAlternativeLayoutWarning
+                                    "`|' at the same depth as implicit layout block")
+                    setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             (_, ALRLayout _ col : ls, _)
+              | newLine && thisCol == col ->
+                 do setNextToken t
+                    return (L thisLoc ITsemi)
+              | newLine && thisCol < col ->
+                 do setALRContext ls
+                    setNextToken t
+                    -- Note that we use lastLoc, as we may need to close
+                    -- more layouts, or give a semicolon
+                    return (L lastLoc ITccurly)
+             -- We need to handle close before open, as 'then' is both
+             -- an open and a close
+             (u, _, _)
+              | isALRclose u ->
+                 case context of
+                 ALRLayout _ _ : ls ->
+                     do setALRContext ls
+                        setNextToken t
+                        return (L thisLoc ITccurly)
+                 ALRNoLayout _ isLet : ls ->
+                     do let ls' = if isALRopen u
+                                     then ALRNoLayout (containsCommas u) False : ls
+                                     else ls
+                        setALRContext ls'
+                        when isLet $ setJustClosedExplicitLetBlock True
+                        return t
+                 [] ->
+                     do let ls = if isALRopen u
+                                    then [ALRNoLayout (containsCommas u) False]
+                                    else ls
+                        setALRContext ls
+                        -- XXX This is an error in John's code, but
+                        -- it looks reachable to me at first glance
+                        return t
+             (u, _, _)
+              | isALRopen u ->
+                 do setALRContext (ALRNoLayout (containsCommas u) False : context)
+                    return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             (ITin, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             -- the other ITin case omitted; general case below covers it
+             (ITcomma, ALRLayout _ _ : ls, _)
+              | topNoLayoutContainsCommas ls ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             -- the other ITwhere case omitted; general case below covers it
+             (_, _, _) -> return t
+
+transitionalAlternativeLayoutWarning :: String -> SDoc
+transitionalAlternativeLayoutWarning msg
+    = text "transitional layout will not be accepted in the future:"
+   $$ text msg
+
+isALRopen :: Token -> Bool
+isALRopen ITcase        = True
+isALRopen ITif          = True
+isALRopen ITthen        = True
+isALRopen IToparen      = True
+isALRopen ITobrack      = True
+isALRopen ITocurly      = True
+-- GHC Extensions:
+isALRopen IToubxparen   = True
+isALRopen ITparenEscape = True
+isALRopen _             = False
+
+isALRclose :: Token -> Bool
+isALRclose ITof     = True
+isALRclose ITthen   = True
+isALRclose ITelse   = True
+isALRclose ITcparen = True
+isALRclose ITcbrack = True
+isALRclose ITccurly = True
+-- GHC Extensions:
+isALRclose ITcubxparen = True
+isALRclose _        = False
+
+isNonDecreasingIntentation :: ALRLayout -> Bool
+isNonDecreasingIntentation ALRLayoutDo = True
+isNonDecreasingIntentation _           = False
+
+containsCommas :: Token -> Bool
+containsCommas IToparen = True
+containsCommas ITobrack = True
+-- John doesn't have {} as containing commas, but records contain them,
+-- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
+-- (defaultInstallDirs).
+containsCommas ITocurly = True
+-- GHC Extensions:
+containsCommas IToubxparen = True
+containsCommas _        = False
+
+topNoLayoutContainsCommas :: [ALRContext] -> Bool
+topNoLayoutContainsCommas [] = False
+topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
+topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
+
  lexToken :: P (Located Token)
  lexToken = do
-  inp@(AI loc1 _ buf) <- getInput
+  inp@(AI loc1 buf) <- getInput
    sc <- getLexState
    exts <- getExts
    case alexScanUser exts inp sc of
      AlexEOF -> do
          let span = mkSrcSpan loc1 loc1
-        setLastToken span 0 0
+        setLastToken span 0
          return (L span ITeof)
-    AlexError (AI loc2 _ buf) ->
+    AlexError (AI loc2 buf) ->
          reportLexError loc1 loc2 buf "lexical error"
      AlexSkip inp2 _ -> do
          setInput inp2
          lexToken
-    AlexToken inp2@(AI end _ buf2) _ t -> do
+    AlexToken inp2@(AI end buf2) _ t -> do
          setInput inp2
          let span = mkSrcSpan loc1 end
          let bytes = byteDiff buf buf2
-        span `seq` setLastToken span bytes bytes
+        span `seq` setLastToken span bytes
          t span buf bytes
  
  reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
@@ -1929,10 +2216,69 @@ reportLexError loc1 loc2 buf str
  
  lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
  lexTokenStream buf loc dflags = unP go initState
-    where initState = mkPState buf loc (dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream)
+    where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
+          initState = mkPState dflags' buf loc
            go = do
              ltok <- lexer return
              case ltok of
                L _ ITeof -> return []
                _ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+                                 ("options_ghc", lex_string_prag IToptions_prag),
+                                 ("options_haddock", lex_string_prag ITdocOptions),
+                                 ("language", token ITlanguage_prag),
+                                 ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+               where ignored opt = (opt, nested_comment lexToken)
+                     impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+                     options_pragmas = map ("options_" ++) impls
+                     -- CFILES is a hugs-only thing.
+                     pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+                           ("inline", token (ITinline_prag Inline FunLike)),
+                           ("inlinable", token (ITinline_prag Inlinable FunLike)),
+                           ("inlineable", token (ITinline_prag Inlinable FunLike)),
+                                         -- Spelling variant
+                           ("notinline", token (ITinline_prag NoInline FunLike)),
+                           ("specialize", token ITspec_prag),
+                           ("source", token ITsource_prag),
+                           ("warning", token ITwarning_prag),
+                           ("deprecated", token ITdeprecated_prag),
+                           ("scc", token ITscc_prag),
+                           ("generated", token ITgenerated_prag),
+                           ("core", token ITcore_prag),
+                           ("unpack", token ITunpack_prag),
+                           ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
+                             ("notinline conlike", token (ITinline_prag NoInline ConLike)),
+                             ("specialize inline", token (ITspec_inline_prag True)),
+                             ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+                                       Just found -> found span buf len
+                                       Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+                                          && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+                    where unprefix prag' = case stripPrefix "{-#" prag' of
+                                             Just rest -> rest
+                                             Nothing -> prag'
+                          canonical prag' = case prag' of
+                                              "noinline" -> "notinline"
+                                              "specialise" -> "specialize"
+                                              "constructorlike" -> "conlike"
+                                              _ -> prag'
+                          canon_ws s = unwords (map canonical (words s))
  }