X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=00926bd13d87b6a7c0f6db94e008a3c42223bd58;hp=aa2703e37d567c217b6031f2ada6efb1ba2469a5;hb=609e7ddfb10bc04762b820e70e0487ad6c514c2e;hpb=6c06fdc7ad20682f0f52b5a78e5e3487a2ed047b

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index aa2703e..00926bd 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -12,7 +12,6 @@
 -----------------------------------------------------------------------------
 
 --   ToDo / known bugs:
---    - Unicode
 --    - parsing integers is a bit slow
 --    - readRational is a bit slow
 --
@@ -32,23 +31,24 @@
 --       qualified varids.
 
 {
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
---
--- Note that Alex itself generates code with with some unused bindings and
--- without type signatures, so removing the flag might not be possible.
+-- XXX The above flags turn off warnings in the generated code:
+{-# OPTIONS_GHC -fno-warn-unused-matches #-}
+{-# OPTIONS_GHC -fno-warn-unused-binds #-}
+{-# OPTIONS_GHC -fno-warn-unused-imports #-}
+{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
+-- But alex still generates some code that causes the "lazy unlifted bindings"
+-- warning, and old compilers don't know about it so we can't easily turn
+-- it off, so for now we use the sledge hammer:
+{-# OPTIONS_GHC -w #-}
 
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
 module Lexer (
    Token(..), lexer, pragState, mkPState, PState(..),
    P(..), ParseResult(..), getSrcLoc, 
+   getPState, getDynFlags, withThisPackage,
    failLocMsgP, failSpanMsgP, srcParseFail,
-   getMessages,
+   getMessages, 
    popContext, pushCurrentContext, setLastToken, setSrcLoc,
    getLexState, popLexState, pushLexState,
    extension, standaloneDerivingEnabled, bangPatEnabled,
@@ -61,18 +61,21 @@ import ErrUtils
 import Outputable
 import StringBuffer
 import FastString
-import FastTypes
 import SrcLoc
 import UniqFM
 import DynFlags
+import Module
 import Ctype
-import Util		( maybePrefixMatch, readRational )
+import Util		( readRational )
 
 import Control.Monad
 import Data.Bits
 import Data.Char
+import Data.List
+import Data.Maybe
+import Data.Map (Map)
+import qualified Data.Map as Map
 import Data.Ratio
-import Debug.Trace
 }
 
 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -107,6 +110,8 @@ $symchar   = [$symbol \:]
 $nl        = [\n\r]
 $idchar    = [$small $large $digit \']
 
+$pragmachar = [$small $large $digit]
+
 $docsym    = [\| \^ \* \$]
 
 @varid     = $small $idchar*
@@ -221,8 +226,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 
 <0,option_prags> \n				{ begin bol }
 
-"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
-                            { begin line_prag2 }
+"{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
+                                { dispatch_pragmas linePrags }
 
 -- single-line line pragmas, of the form
 --    # <line> "<file>" <extra-stuff> \n
@@ -239,69 +244,31 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    -- with older versions of GHC which generated these.
 
 <0,option_prags> {
-  "{-#" $whitechar* (RULES|rules)  / { notFollowedByPragmaChar } { rulePrag }
-  "{-#" $whitechar* (INLINE|inline)	 / { notFollowedByPragmaChar }
-                    { token (ITinline_prag True) }
-  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
-  					{ token (ITinline_prag False) }
-  "{-#" $whitechar* (INLINE|inline)
-        $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar }
-                                        { token (ITinline_conlike_prag True) }
-  "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline)
-        $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar }
-                                        { token (ITinline_conlike_prag False) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
-  					{ token ITspec_prag }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
-                    { token (ITspec_inline_prag True) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
-					{ token (ITspec_inline_prag False) }
-  "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
-                    { token ITsource_prag }
-  "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
-  					{ token ITwarning_prag }
-  "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
-  					{ token ITdeprecated_prag }
-  "{-#" $whitechar* (SCC|scc)  / { notFollowedByPragmaChar }
-                    { token ITscc_prag }
-  "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
-  					{ token ITgenerated_prag }
-  "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
-                    { token ITcore_prag }
-  "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
-                    { token ITunpack_prag }
-  "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
-                    { token ITann_prag }
+  "{-#" $whitechar* $pragmachar+ 
+        $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
+                                 { dispatch_pragmas twoWordPrags }
+
+  "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
+                                 { dispatch_pragmas oneWordPrags }
 
   -- We ignore all these pragmas, but don't generate a warning for them
-  -- CFILES is a hugs-only thing.
-  "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar }
-                    { nested_comment lexToken }
+  "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
+                                 { dispatch_pragmas ignoredPrags }
 
   -- ToDo: should only be valid inside a pragma:
   "#-}" 				{ endPrag }
 }
 
 <option_prags> {
-  "{-#"  $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
-                                        { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
-                                        { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
-                   / { notFollowedByPragmaChar }
-                                         { lex_string_prag ITdocOptions }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
+                                   { dispatch_pragmas fileHeaderPrags }
+
   "-- #"                                 { multiline_doc_comment }
-  "{-#"  $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
-                                         { token ITlanguage_prag }
-  "{-#"  $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
-                                         { lex_string_prag ITinclude_prag }
 }
 
 <0> {
   -- In the "0" mode we ignore these pragmas
-  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
+  "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
                      { nested_comment lexToken }
 }
 
@@ -487,7 +454,7 @@ data Token
   | ITunsafe
   | ITstdcallconv
   | ITccallconv
-  | ITdotnet
+  | ITprimcallconv
   | ITmdo
   | ITfamily
   | ITgroup
@@ -564,8 +531,6 @@ data Token
 
   | ITdupipvarid   FastString	-- GHC extension: implicit param: ?x
 
-  | ITpragma StringBuffer
-
   | ITchar       Char
   | ITstring     FastString
   | ITinteger    Integer
@@ -578,7 +543,7 @@ data Token
   | ITprimfloat  Rational
   | ITprimdouble Rational
 
-  -- MetaHaskell extension tokens
+  -- Template Haskell extension tokens
   | ITopenExpQuote  		--  [| or [e|
   | ITopenPatQuote		--  [p|
   | ITopenDecQuote		--  [d|
@@ -634,6 +599,7 @@ isSpecial ITthreadsafe 	= True
 isSpecial ITunsafe    	= True
 isSpecial ITccallconv   = True
 isSpecial ITstdcallconv = True
+isSpecial ITprimcallconv = True
 isSpecial ITmdo		= True
 isSpecial ITfamily	= True
 isSpecial ITgroup   = True
@@ -649,6 +615,7 @@ isSpecial _             = False
 -- facilitates using a keyword in two different extensions that can be
 -- activated independently)
 --
+reservedWordsFM :: UniqFM (Token, Int)
 reservedWordsFM = listToUFM $
 	map (\(x, y, z) -> (mkFastString x, (y, z)))
        [( "_",		ITunderscore, 	0 ),
@@ -690,13 +657,13 @@ reservedWordsFM = listToUFM $
 	( "label",	ITlabel,	 bit ffiBit),
 	( "dynamic",	ITdynamic,	 bit ffiBit),
 	( "safe",	ITsafe,		 bit ffiBit),
-	( "threadsafe",	ITthreadsafe,	 bit ffiBit),
+	( "threadsafe",	ITthreadsafe,	 bit ffiBit),  -- ToDo: remove
 	( "unsafe",	ITunsafe,	 bit ffiBit),
 	( "stdcall",    ITstdcallconv,	 bit ffiBit),
 	( "ccall",      ITccallconv,	 bit ffiBit),
-	( "dotnet",     ITdotnet,	 bit ffiBit),
+	( "prim",       ITprimcallconv,	 bit ffiBit),
 
-	( "rec",	ITrec,		 bit arrowsBit),
+	( "rec",	ITrec,		 bit recBit),
 	( "proc",	ITproc,		 bit arrowsBit)
      ]
 
@@ -772,28 +739,31 @@ begin :: Int -> Action
 begin code _span _str _len = do pushLexState code; lexToken
 
 pop :: Action
-pop _span _buf _len = do popLexState; lexToken
+pop _span _buf _len = do _ <- popLexState
+                         lexToken
 
 pop_and :: Action -> Action
-pop_and act span buf len = do popLexState; act span buf len
+pop_and act span buf len = do _ <- popLexState
+                              act span buf len
 
 {-# INLINE nextCharIs #-}
+nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 
+notFollowedBy :: Char -> AlexAccPred Int
 notFollowedBy char _ _ _ (AI _ _ buf) 
   = nextCharIs buf (/=char)
 
+notFollowedBySymbol :: AlexAccPred Int
 notFollowedBySymbol _ _ _ (AI _ _ buf)
   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 
-notFollowedByPragmaChar _ _ _ (AI _ _ buf)
-  = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
-
 -- We must reject doc comments as being ordinary comments everywhere.
 -- In some cases the doc comment will be selected as the lexeme due to
 -- maximal munch, but not always, because the nested comment rule is
 -- valid in all states, but the doc-comment rules are only valid in
 -- the non-layout states.
+isNormalComment :: AlexAccPred Int
 isNormalComment bits _ _ (AI _ _ buf)
   | haddockEnabled bits = notFollowedByDocOrPragma
   | otherwise           = nextCharIs buf (/='#')
@@ -801,6 +771,7 @@ isNormalComment bits _ _ (AI _ _ buf)
     notFollowedByDocOrPragma
        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 
+spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 
 {-
@@ -808,8 +779,10 @@ haddockDisabledAnd p bits _ _ (AI _ _ buf)
   = if haddockEnabled bits then False else (p buf)
 -}
 
+atEOL :: AlexAccPred Int
 atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
 
+ifExtension :: (Int -> Bool) -> AlexAccPred Int
 ifExtension pred bits _ _ _ = pred bits
 
 multiline_doc_comment :: Action
@@ -890,6 +863,8 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
         Just (_,_) -> go ('\123':commentAcc) input docType False
       Just (c,input) -> go (c:commentAcc) input docType False
 
+withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
+                 -> P (Located Token)
 withLexedDocType lexDocComment = do
   input@(AI _ _ buf) <- getInput
   case prevChar buf ' ' of
@@ -898,6 +873,7 @@ withLexedDocType lexDocComment = do
     '$' -> lexDocComment input ITdocCommentNamed False
     '*' -> lexDocSection 1 input
     '#' -> lexDocComment input ITdocOptionsOld False
+    _ -> panic "withLexedDocType: Bad doc type"
  where 
     lexDocSection n input = case alexGetChar input of 
       Just ('*', input) -> lexDocSection (n+1) input
@@ -907,12 +883,12 @@ withLexedDocType lexDocComment = do
 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 -- off again at the end of the pragma.
 rulePrag :: Action
-rulePrag span buf len = do
+rulePrag span _buf _len = do
   setExts (.|. bit inRulePragBit)
   return (L span ITrules_prag)
 
 endPrag :: Action
-endPrag span buf len = do
+endPrag span _buf _len = do
   setExts (.&. complement (bit inRulePragBit))
   return (L span ITclose_prag)
 
@@ -948,8 +924,9 @@ docCommentEnd input commentAcc docType buf span = do
   span `seq` setLastToken span' last_len last_line_len
   return (L span' (docType comment))
  
+errBrace :: AlexInput -> SrcSpan -> P a
 errBrace (AI end _ _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
- 
+
 open_brace, close_brace :: Action
 open_brace span _str _len = do 
   ctx <- getContext
@@ -959,6 +936,7 @@ close_brace span _str _len = do
   popContext
   return (L span ITccurly)
 
+qvarid, qconid :: StringBuffer -> Int -> Token
 qvarid buf len = ITqvarid $! splitQualName buf len False
 qconid buf len = ITqconid $! splitQualName buf len False
 
@@ -992,7 +970,8 @@ splitQualName orig_buf len parens = split orig_buf orig_buf
       where
 	qual_size = orig_buf `byteDiff` dot_buf
 
-varid span buf len = 
+varid :: Action
+varid span buf len =
   fs `seq`
   case lookupUFM reservedWordsFM fs of
 	Just (keyword,0)    -> do
@@ -1007,17 +986,22 @@ varid span buf len =
   where
 	fs = lexemeToFastString buf len
 
+conid :: StringBuffer -> Int -> Token
 conid buf len = ITconid fs
   where fs = lexemeToFastString buf len
 
+qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
 qvarsym buf len = ITqvarsym $! splitQualName buf len False
 qconsym buf len = ITqconsym $! splitQualName buf len False
 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
 
+varsym, consym :: Action
 varsym = sym ITvarsym
 consym = sym ITconsym
 
+sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
+    -> P (Located Token)
 sym con span buf len = 
   case lookupUFM reservedSymsFM fs of
 	Just (keyword,exts) -> do
@@ -1039,16 +1023,27 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
 
 -- some conveniences for use with tok_integral
+tok_num :: (Integer -> Integer)
+        -> Int -> Int
+        -> (Integer, (Char->Int)) -> Action
 tok_num = tok_integral ITinteger
+tok_primint :: (Integer -> Integer)
+            -> Int -> Int
+            -> (Integer, (Char->Int)) -> Action
 tok_primint = tok_integral ITprimint
+tok_primword :: Int -> Int
+             -> (Integer, (Char->Int)) -> Action
 tok_primword = tok_integral ITprimword positive
+positive, negative :: (Integer -> Integer)
 positive = id
 negative = negate
+decimal, octal, hexadecimal :: (Integer, Char -> Int)
 decimal = (10,octDecDigit)
 octal = (8,octDecDigit)
 hexadecimal = (16,hexDigit)
 
 -- readRational can understand negative rationals, exponents, everything.
+tok_float, tok_primfloat, tok_primdouble :: String -> Token
 tok_float        str = ITrational   $! readRational str
 tok_primfloat    str = ITprimfloat  $! readRational str
 tok_primdouble   str = ITprimdouble $! readRational str
@@ -1068,21 +1063,31 @@ do_bol span _str _len = do
 		return (L span ITvccurly)
 	    EQ -> do
                 --trace "layout: inserting ';'" $ do
-		popLexState
+		_ <- popLexState
 		return (L span ITsemi)
 	    GT -> do
-		popLexState
+		_ <- popLexState
 		lexToken
 
 -- certain keywords put us in the "layout" state, where we might
 -- add an opening curly brace.
-maybe_layout ITdo	= pushLexState layout_do
-maybe_layout ITmdo	= pushLexState layout_do
-maybe_layout ITof	= pushLexState layout
-maybe_layout ITlet	= pushLexState layout
-maybe_layout ITwhere	= pushLexState layout
-maybe_layout ITrec	= pushLexState layout
-maybe_layout _	        = return ()
+maybe_layout :: Token -> P ()
+maybe_layout t = do -- If the alternative layout rule is enabled then
+                    -- we never create an implicit layout context here.
+                    -- Layout is handled XXX instead.
+                    -- The code for closing implicit contexts, or
+                    -- inserting implicit semi-colons, is therefore
+                    -- irrelevant as it only applies in an implicit
+                    -- context.
+                    alr <- extension alternativeLayoutRule
+                    unless alr $ f t
+    where f ITdo    = pushLexState layout_do
+          f ITmdo   = pushLexState layout_do
+          f ITof    = pushLexState layout
+          f ITlet   = pushLexState layout
+          f ITwhere = pushLexState layout
+          f ITrec   = pushLexState layout
+          f _       = return ()
 
 -- Pushing a new implicit layout context.  If the indentation of the
 -- next token is not greater than the previous layout context, then
@@ -1093,8 +1098,9 @@ maybe_layout _	        = return ()
 -- by a 'do', then we allow the new context to be at the same indentation as
 -- the previous context.  This is what the 'strict' argument is for.
 --
+new_layout_context :: Bool -> Action
 new_layout_context strict span _buf _len = do
-    popLexState
+    _ <- popLexState
     (AI _ offset _) <- getInput
     ctx <- getContext
     case ctx of
@@ -1109,8 +1115,9 @@ new_layout_context strict span _buf _len = do
 		setContext (Layout offset : ctx)
 		return (L span ITvocurly)
 
+do_layout_left :: Action
 do_layout_left span _buf _len = do
-    popLexState
+    _ <- popLexState
     pushLexState bol  -- we must be at the start of a line
     return (L span ITvccurly)
 
@@ -1122,7 +1129,7 @@ setLine code span buf len = do
   let line = parseUnsignedInteger buf len 10 octDecDigit
   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
 	-- subtract one: the line number refers to the *following* line
-  popLexState
+  _ <- popLexState
   pushLexState code
   lexToken
 
@@ -1130,7 +1137,7 @@ setFile :: Int -> Action
 setFile code span buf len = do
   let file = lexemeToFastString (stepOn buf) (len-2)
   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
-  popLexState
+  _ <- popLexState
   pushLexState code
   lexToken
 
@@ -1208,6 +1215,7 @@ lex_string s = do
 	c' <- lex_char c i
 	lex_string (c':s)
 
+lex_stringgap :: String -> P Token
 lex_stringgap s = do
   c <- getCharOrFail
   case c of
@@ -1220,7 +1228,7 @@ lex_char_tok :: Action
 -- Here we are basically parsing character literals, such as 'x' or '\n'
 -- but, when Template Haskell is on, we additionally spot
 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, 
--- but WIHTOUT CONSUMING the x or T part  (the parser does that).
+-- but WITHOUT CONSUMING the x or T part  (the parser does that).
 -- So we have to do two characters of lookahead: when we see 'x we need to
 -- see if there's a trailing quote
 lex_char_tok span _buf _len = do	-- We've seen '
@@ -1283,6 +1291,7 @@ lex_char c inp = do
       c | isAny c -> do setInput inp; return c
       _other -> lit_error
 
+isAny :: Char -> Bool
 isAny c | c > '\x7f' = isPrint c
 	| otherwise  = is_any c
 
@@ -1319,7 +1328,7 @@ lex_escape = do
 		Just (c3,i3) -> 
 		   let str = [c1,c2,c3] in
 		   case [ (c,rest) | (p,c) <- silly_escape_chars,
-			      	     Just rest <- [maybePrefixMatch p str] ] of
+			      	     Just rest <- [stripPrefix p str] ] of
 			  (escape_char,[]):_ -> do
 				setInput i3
 				return escape_char
@@ -1336,6 +1345,7 @@ readNum is_digit base conv = do
 	then readNum2 is_digit base conv (conv c)
 	else do setInput i; lit_error
 
+readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
 readNum2 is_digit base conv i = do
   input <- getInput
   read i input
@@ -1348,6 +1358,7 @@ readNum2 is_digit base conv i = do
 		   then do setInput input; return (chr i)
 		   else lit_error
 
+silly_escape_chars :: [(String, Char)]
 silly_escape_chars = [
 	("NUL", '\NUL'),
 	("SOH", '\SOH'),
@@ -1389,6 +1400,7 @@ silly_escape_chars = [
 -- the position of the error in the buffer.  This is so that we can report
 -- a correct location to the user, but also so we can detect UTF-8 decoding
 -- errors if they occur.
+lit_error :: P a
 lit_error = lexError "lexical error in string/character literal"
 
 getCharOrFail :: P Char
@@ -1465,18 +1477,24 @@ data ParseResult a
 
 data PState = PState { 
 	buffer	   :: StringBuffer,
-    dflags     :: DynFlags,
-    messages   :: Messages,
+        dflags     :: DynFlags,
+        messages   :: Messages,
         last_loc   :: SrcSpan,	-- pos of previous token
         last_offs  :: !Int, 	-- offset of the previous token from the
 				-- beginning of  the current line.
 				-- \t is equal to 8 spaces.
 	last_len   :: !Int,	-- len of previous token
-  last_line_len :: !Int,
+        last_line_len :: !Int,
         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
 	extsBitmap :: !Int,	-- bitmap that determines permitted extensions
 	context	   :: [LayoutContext],
-	lex_state  :: [Int]
+	lex_state  :: [Int],
+        -- Used in the alternative layout rule:
+        alr_pending_implicit_tokens :: [Located Token],
+        alr_next_token :: Maybe (Located Token),
+        alr_last_loc :: SrcSpan,
+        alr_context :: [ALRContext],
+        alr_expecting_ocurly :: Maybe ALRLayout
      }
 	-- last_loc and last_len are used when generating error messages,
 	-- and in pushCurrentContext only.  Sigh, if only Happy passed the
@@ -1484,6 +1502,13 @@ data PState = PState {
 	-- Getting rid of last_loc would require finding another way to 
 	-- implement pushCurrentContext (which is only called from one place).
 
+data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
+                | ALRLayout ALRLayout Int
+data ALRLayout = ALRLayoutLet
+               | ALRLayoutWhere
+               | ALRLayoutOf
+               | ALRLayoutDo
+
 newtype P a = P { unP :: PState -> ParseResult a }
 
 instance Monad P where
@@ -1512,6 +1537,17 @@ failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
 failSpanMsgP :: SrcSpan -> SDoc -> P a
 failSpanMsgP span msg = P $ \_ -> PFailed span msg
 
+getPState :: P PState
+getPState = P $ \s -> POk s s
+
+getDynFlags :: P DynFlags
+getDynFlags = P $ \s -> POk s (dflags s)
+
+withThisPackage :: (PackageId -> a) -> P a
+withThisPackage f
+ = do	pkg	<- liftM thisPackage getDynFlags
+	return	$ f pkg
+
 extension :: (Int -> Bool) -> P Bool
 extension p = P $ \s -> POk s (p $! extsBitmap s)
 
@@ -1561,7 +1597,7 @@ alexGetChar (AI loc ofs s)
 	  | c <= '\x06' = non_graphic
 	  | c <= '\x7f' = c
           -- Alex doesn't handle Unicode, so when Unicode
-          -- character is encoutered we output these values
+          -- character is encountered we output these values
           -- with the actual character value hidden in the state.
 	  | otherwise = 
 		case generalCategory c of
@@ -1622,58 +1658,136 @@ popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
 getLexState :: P Int
 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
 
+popNextToken :: P (Maybe (Located Token))
+popNextToken
+    = P $ \s@PState{ alr_next_token = m } ->
+              POk (s {alr_next_token = Nothing}) m
+
+setAlrLastLoc :: SrcSpan -> P ()
+setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
+
+getAlrLastLoc :: P SrcSpan
+getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
+
+getALRContext :: P [ALRContext]
+getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
+
+setALRContext :: [ALRContext] -> P ()
+setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
+
+setNextToken :: Located Token -> P ()
+setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
+
+popPendingImplicitToken :: P (Maybe (Located Token))
+popPendingImplicitToken
+    = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
+              case ts of
+              [] -> POk s Nothing
+              (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
+
+setPendingImplicitTokens :: [Located Token] -> P ()
+setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
+
+getAlrExpectingOCurly :: P (Maybe ALRLayout)
+getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
+
+setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
+setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
+
 -- for reasons of efficiency, flags indicating language extensions (eg,
 -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
 -- integer
 
-genericsBit, ffiBit, parrBit :: Int
+genericsBit :: Int
 genericsBit = 0 -- {| and |}
+ffiBit :: Int
 ffiBit	   = 1
+parrBit :: Int
 parrBit	   = 2
+arrowsBit :: Int
 arrowsBit  = 4
+thBit :: Int
 thBit	   = 5
+ipBit :: Int
 ipBit      = 6
+explicitForallBit :: Int
 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
+bangPatBit :: Int
 bangPatBit = 8	-- Tells the parser to understand bang-patterns
 		-- (doesn't affect the lexer)
+tyFamBit :: Int
 tyFamBit   = 9	-- indexed type families: 'family' keyword and kind sigs
+haddockBit :: Int
 haddockBit = 10 -- Lex and parse Haddock comments
+magicHashBit :: Int
 magicHashBit = 11 -- "#" in both functions and operators
+kindSigsBit :: Int
 kindSigsBit = 12 -- Kind signatures on type variables
+recursiveDoBit :: Int
 recursiveDoBit = 13 -- mdo
+unicodeSyntaxBit :: Int
 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit :: Int
 unboxedTuplesBit = 15 -- (# and #)
+standaloneDerivingBit :: Int
 standaloneDerivingBit = 16 -- standalone instance deriving declarations
+transformComprehensionsBit :: Int
 transformComprehensionsBit = 17
+qqBit :: Int
 qqBit	   = 18 -- enable quasiquoting
+inRulePragBit :: Int
 inRulePragBit = 19
+rawTokenStreamBit :: Int
 rawTokenStreamBit = 20 -- producing a token stream with all comments included
+newQualOpsBit :: Int
 newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
+recBit :: Int
+recBit = 22 -- rec
+alternativeLayoutRuleBit :: Int
+alternativeLayoutRuleBit = 23
 
-genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+always :: Int -> Bool
 always           _     = True
+genericsEnabled :: Int -> Bool
 genericsEnabled  flags = testBit flags genericsBit
-ffiEnabled       flags = testBit flags ffiBit
+parrEnabled :: Int -> Bool
 parrEnabled      flags = testBit flags parrBit
+arrowsEnabled :: Int -> Bool
 arrowsEnabled    flags = testBit flags arrowsBit
+thEnabled :: Int -> Bool
 thEnabled        flags = testBit flags thBit
+ipEnabled :: Int -> Bool
 ipEnabled        flags = testBit flags ipBit
+explicitForallEnabled :: Int -> Bool
 explicitForallEnabled flags = testBit flags explicitForallBit
+bangPatEnabled :: Int -> Bool
 bangPatEnabled   flags = testBit flags bangPatBit
-tyFamEnabled     flags = testBit flags tyFamBit
+-- tyFamEnabled :: Int -> Bool
+-- tyFamEnabled     flags = testBit flags tyFamBit
+haddockEnabled :: Int -> Bool
 haddockEnabled   flags = testBit flags haddockBit
+magicHashEnabled :: Int -> Bool
 magicHashEnabled flags = testBit flags magicHashBit
-kindSigsEnabled  flags = testBit flags kindSigsBit
-recursiveDoEnabled flags = testBit flags recursiveDoBit
+-- kindSigsEnabled :: Int -> Bool
+-- kindSigsEnabled  flags = testBit flags kindSigsBit
+unicodeSyntaxEnabled :: Int -> Bool
 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled :: Int -> Bool
 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled :: Int -> Bool
 standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
-transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled :: Int -> Bool
 qqEnabled        flags = testBit flags qqBit
-inRulePrag       flags = testBit flags inRulePragBit
+-- inRulePrag :: Int -> Bool
+-- inRulePrag       flags = testBit flags inRulePragBit
+rawTokenStreamEnabled :: Int -> Bool
 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
+newQualOps :: Int -> Bool
 newQualOps       flags = testBit flags newQualOpsBit
+oldQualOps :: Int -> Bool
 oldQualOps flags = not (newQualOps flags)
+alternativeLayoutRule :: Int -> Bool
+alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
 
 -- PState for parsing options pragmas
 --
@@ -1690,7 +1804,12 @@ pragState dynflags buf loc =
       loc           = loc,
       extsBitmap    = 0,
       context       = [],
-      lex_state     = [bol, option_prags, 0]
+      lex_state     = [bol, option_prags, 0],
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing
     }
 
 
@@ -1709,35 +1828,38 @@ mkPState buf loc flags  =
       loc           = loc,
       extsBitmap    = fromIntegral bitmap,
       context       = [],
-      lex_state     = [bol, 0]
+      lex_state     = [bol, 0],
 	-- we begin in the layout state if toplev_layout is set
+      alr_pending_implicit_tokens = [],
+      alr_next_token = Nothing,
+      alr_last_loc = noSrcSpan,
+      alr_context = [],
+      alr_expecting_ocurly = Nothing
     }
     where
       bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
-	       .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
-	       .|. parrBit      `setBitIf` dopt Opt_PArr         flags
-	       .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-	       .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
-	       .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
-	       .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
-	       .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
-	       .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
-	       .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
-	       .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
-	       .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
-	       .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
-	       .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
-	       .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
-	       .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+	       .|. ffiBit            `setBitIf` dopt Opt_ForeignFunctionInterface flags
+	       .|. parrBit           `setBitIf` dopt Opt_PArr         flags
+	       .|. arrowsBit         `setBitIf` dopt Opt_Arrows       flags
+	       .|. thBit             `setBitIf` dopt Opt_TemplateHaskell flags
+	       .|. qqBit             `setBitIf` dopt Opt_QuasiQuotes flags
+	       .|. ipBit             `setBitIf` dopt Opt_ImplicitParams flags
+	       .|. explicitForallBit `setBitIf` dopt Opt_ExplicitForAll flags
+	       .|. bangPatBit        `setBitIf` dopt Opt_BangPatterns flags
+	       .|. tyFamBit          `setBitIf` dopt Opt_TypeFamilies flags
+	       .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
+	       .|. magicHashBit      `setBitIf` dopt Opt_MagicHash    flags
+	       .|. kindSigsBit       `setBitIf` dopt Opt_KindSignatures flags
+	       .|. recursiveDoBit    `setBitIf` dopt Opt_RecursiveDo flags
+	       .|. recBit 	     `setBitIf` dopt Opt_DoRec  flags
+	       .|. recBit 	     `setBitIf` dopt Opt_Arrows flags
+	       .|. unicodeSyntaxBit  `setBitIf` dopt Opt_UnicodeSyntax flags
+	       .|. unboxedTuplesBit  `setBitIf` dopt Opt_UnboxedTuples flags
 	       .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
                .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
                .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
+               .|. alternativeLayoutRuleBit `setBitIf` dopt Opt_AlternativeLayoutRule flags
       --
       setBitIf :: Int -> Bool -> Int
       b `setBitIf` cond | cond      = bit b
@@ -1818,10 +1940,165 @@ lexError str = do
 
 lexer :: (Located Token -> P a) -> P a
 lexer cont = do
-  tok@(L _span _tok__) <- lexToken
+  alr <- extension alternativeLayoutRule
+  let lexTokenFun = if alr then lexTokenAlr else lexToken
+  tok@(L _span _tok__) <- lexTokenFun
 --  trace ("token: " ++ show tok__) $ do
   cont tok
 
+lexTokenAlr :: P (Located Token)
+lexTokenAlr = do mPending <- popPendingImplicitToken
+                 t <- case mPending of
+                      Nothing ->
+                          do mNext <- popNextToken
+                             t <- case mNext of
+                                  Nothing -> lexToken
+                                  Just next -> return next
+                             alternativeLayoutRuleToken t
+                      Just t ->
+                          return t
+                 setAlrLastLoc (getLoc t)
+                 case unLoc t of
+                     ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
+                     ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
+                     ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
+                     ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
+                     _       -> return ()
+                 return t
+
+alternativeLayoutRuleToken :: Located Token -> P (Located Token)
+alternativeLayoutRuleToken t
+    = do context <- getALRContext
+         lastLoc <- getAlrLastLoc
+         mExpectingOCurly <- getAlrExpectingOCurly
+         let thisLoc = getLoc t
+             thisCol = srcSpanStartCol thisLoc
+             newLine = srcSpanStartLine thisLoc > srcSpanEndLine lastLoc
+         case (unLoc t, context, mExpectingOCurly) of
+             -- I think our implicit open-curly handling is slightly
+             -- different to John's, in how it interacts with newlines
+             -- and "in"
+             (ITocurly, _, Just _) ->
+                 do setAlrExpectingOCurly Nothing
+                    setNextToken t
+                    lexTokenAlr
+             (_, ALRLayout _ col : ls, Just expectingOCurly)
+              | thisCol > col ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+              | otherwise ->
+                 do setAlrExpectingOCurly Nothing
+                    setPendingImplicitTokens [L thisLoc ITccurly]
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+             (_, _, Just expectingOCurly) ->
+                 do setAlrExpectingOCurly Nothing
+                    setALRContext (ALRLayout expectingOCurly thisCol : context)
+                    setNextToken t
+                    return (L thisLoc ITocurly)
+             -- We do the [] cases earlier than in the spec, as we
+             -- have an actual EOF token
+             (ITeof, ALRLayout _ _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITeof, _, _) ->
+                 return t
+             -- the other ITeof case omitted; general case below covers it
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _)
+              | newLine ->
+                 do setPendingImplicitTokens [t]
+                    setALRContext ls
+                    return (L thisLoc ITccurly)
+             (_, ls@(ALRLayout _ col : _), _)
+              | newLine && thisCol <= col ->
+                 do let f ls'@(ALRLayout _ col' : ls'')
+                         | thisCol < col' = case f ls'' of
+                                            (ts, ls''') ->
+                                                (L thisLoc ITccurly : ts, ls''')
+                         | thisCol == col' = ([L thisLoc ITsemi], ls')
+                         | otherwise       = ([], ls')
+                        f ls' = ([], ls')
+                    case f ls of
+                        (t' : ts, ls') ->
+                            do setALRContext ls'
+                               setPendingImplicitTokens ts
+                               setNextToken t
+                               return t'
+                        _ -> panic "Layout rule: [] when considering newline"
+             (u, _, _)
+              | isALRopen u ->
+                 do setALRContext (ALRNoLayout (containsCommas u) : context)
+                    return t
+             (u, _, _)
+              | isALRclose u ->
+                 case context of
+                 ALRLayout _ _ : ls ->
+                     do setALRContext ls
+                        setNextToken t
+                        return (L thisLoc ITccurly)
+                 ALRNoLayout _ : ls ->
+                     do setALRContext ls
+                        return t
+                 [] ->
+                     -- XXX This is an error in John's code, but
+                     -- it looks reachable to me at first glance
+                     return t
+             (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             (ITin, _ : ls, _) ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             -- the other ITin case omitted; general case below covers it
+             (ITcomma, ALRLayout _ _ : ls, _)
+              | topNoLayoutContainsCommas ls ->
+                 do setALRContext ls
+                    setNextToken t
+                    return (L thisLoc ITccurly)
+             (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
+                 do setALRContext ls
+                    setPendingImplicitTokens [t]
+                    return (L thisLoc ITccurly)
+             -- the other ITwhere case omitted; general case below covers it
+             (_, _, _) -> return t
+
+isALRopen :: Token -> Bool
+isALRopen ITcase   = True
+isALRopen ITif     = True
+isALRopen IToparen = True
+isALRopen ITobrack = True
+isALRopen ITocurly = True
+-- GHC Extensions:
+isALRopen IToubxparen = True
+isALRopen _        = False
+
+isALRclose :: Token -> Bool
+isALRclose ITof     = True
+isALRclose ITthen   = True
+isALRclose ITcparen = True
+isALRclose ITcbrack = True
+isALRclose ITccurly = True
+-- GHC Extensions:
+isALRclose ITcubxparen = True
+isALRclose _        = False
+
+containsCommas :: Token -> Bool
+containsCommas IToparen = True
+containsCommas ITobrack = True
+-- GHC Extensions:
+containsCommas IToubxparen = True
+containsCommas _        = False
+
+topNoLayoutContainsCommas :: [ALRContext] -> Bool
+topNoLayoutContainsCommas [] = False
+topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
+topNoLayoutContainsCommas (ALRNoLayout b : _) = b
+
 lexToken :: P (Located Token)
 lexToken = do
   inp@(AI loc1 _ buf) <- getInput
@@ -1844,6 +2121,7 @@ lexToken = do
         span `seq` setLastToken span bytes bytes
         t span buf bytes
 
+reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
 reportLexError loc1 loc2 buf str
   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
   | otherwise =
@@ -1862,4 +2140,59 @@ lexTokenStream buf loc dflags = unP go initState
             case ltok of
               L _ ITeof -> return []
               _ -> liftM (ltok:) go
+
+linePrags = Map.singleton "line" (begin line_prag2)
+
+fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
+                                 ("options_ghc", lex_string_prag IToptions_prag),
+                                 ("options_haddock", lex_string_prag ITdocOptions),
+                                 ("language", token ITlanguage_prag),
+                                 ("include", lex_string_prag ITinclude_prag)])
+
+ignoredPrags = Map.fromList (map ignored pragmas)
+               where ignored opt = (opt, nested_comment lexToken)
+                     impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
+                     options_pragmas = map ("options_" ++) impls
+                     -- CFILES is a hugs-only thing.
+                     pragmas = options_pragmas ++ ["cfiles", "contract"]
+
+oneWordPrags = Map.fromList([("rules", rulePrag),
+                           ("inline", token (ITinline_prag True)),
+                           ("notinline", token (ITinline_prag False)),
+                           ("specialize", token ITspec_prag),
+                           ("source", token ITsource_prag),
+                           ("warning", token ITwarning_prag),
+                           ("deprecated", token ITdeprecated_prag),
+                           ("scc", token ITscc_prag),
+                           ("generated", token ITgenerated_prag),
+                           ("core", token ITcore_prag),
+                           ("unpack", token ITunpack_prag),
+                           ("ann", token ITann_prag)])
+
+twoWordPrags = Map.fromList([("inline conlike", token (ITinline_conlike_prag True)),
+                             ("notinline conlike", token (ITinline_conlike_prag False)),
+                             ("specialize inline", token (ITspec_inline_prag True)),
+                             ("specialize notinline", token (ITspec_inline_prag False))])
+
+
+dispatch_pragmas :: Map String Action -> Action
+dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
+                                       Just found -> found span buf len
+                                       Nothing -> lexError "unknown pragma"
+
+known_pragma :: Map String Action -> AlexAccPred Int
+known_pragma prags _ _ len (AI _ _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
+                                          && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
+
+clean_pragma :: String -> String
+clean_pragma prag = canon_ws (map toLower (unprefix prag))
+                    where unprefix prag' = case stripPrefix "{-#" prag' of
+                                             Just rest -> rest
+                                             Nothing -> prag'
+                          canonical prag' = case prag' of
+                                              "noinline" -> "notinline"
+                                              "specialise" -> "specialize"
+                                              "constructorlike" -> "conlike"
+                                              _ -> prag'
+                          canon_ws s = unwords (map canonical (words s))
 }