Better documentation for -XLiberalTypeSynonyms, and steal forall keyword

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index d9c5fc8..66f4fe5 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -21,6 +21,18 @@
  --    - pragma-end should be only valid in a pragma
  
  {
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+--
+-- Note that Alex itself generates code with with some unused bindings and
+-- without type signatures, so removing the flag might not be possible.
+
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+
  module Lexer (
     Token(..), lexer, pragState, mkPState, PState(..),
     P(..), ParseResult(..), getSrcLoc, 
@@ -28,11 +40,10 @@ module Lexer (
     getMessages,
     popContext, pushCurrentContext, setLastToken, setSrcLoc,
     getLexState, popLexState, pushLexState,
-   extension, glaExtsEnabled, bangPatEnabled
+   extension, standaloneDerivingEnabled, bangPatEnabled,
+   addWarning
    ) where
  
-#include "HsVersions.h"
-
  import Bag
  import ErrUtils
  import Outputable
@@ -47,19 +58,15 @@ import Util         ( maybePrefixMatch, readRational )
  
  import Control.Monad
  import Data.Bits
-import Data.Char       ( chr, isSpace )
+import Data.Char       ( chr, ord, isSpace )
  import Data.Ratio
  import Debug.Trace
  
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char       ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode  ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
+import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
  }
  
  $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
-$whitechar   = [\ \n\r\f\v\xa0 $unispace]
+$whitechar   = [\ \n\r\f\v $unispace]
  $white_no_nl = $whitechar # \n
  $tab         = \t
  
@@ -69,16 +76,16 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
  $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
+$asclarge  = [A-Z]
  $large     = [$asclarge $unilarge]
  
  $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
+$ascsmall  = [a-z]
  $small     = [$ascsmall $unismall \_]
  
  $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -141,12 +148,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- space followed by a Haddock comment symbol (docsym) (in which case we'd
  -- have a Haddock comment). The rules then munch the rest of the line.
  
-"-- " ~$docsym .* ;
+"-- " ~[$docsym \#] .* ;
  "--" [^$symbol : \ ] .* ;
  
  -- Next, match Haddock comments if no -haddock flag
  
-"-- " $docsym .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } ;
  
  -- Now, when we've matched comments that begin with 2 dashes and continue
  -- with a different character, we need to match comments that begin with three
@@ -202,7 +209,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  -- generate a matching '}' token.
  <layout_left>  ()                      { do_layout_left }
  
-<0,option_prags,glaexts> \n                            { begin bol }
+<0,option_prags> \n                            { begin bol }
  
  "{-#" $whitechar* (line|LINE)          { begin line_prag2 }
  
@@ -220,15 +227,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
     -- NOTE: accept -} at the end of a LINE pragma, for compatibility
     -- with older versions of GHC which generated these.
  
--- We only want RULES pragmas to be picked up when -fglasgow-exts
--- is on, because the contents of the pragma is always written using
--- glasgow-exts syntax (using forall etc.), so if glasgow exts are not
--- enabled, we're sure to get a parse error.
--- (ToDo: we should really emit a warning when ignoring pragmas)
-<glaexts>
-  "{-#" $whitechar* (RULES|rules)      { token ITrules_prag }
-
-<0,option_prags,glaexts> {
+<0,option_prags> {
+  "{-#" $whitechar* (RULES|rules)       { token ITrules_prag }
    "{-#" $whitechar* (INLINE|inline)    { token (ITinline_prag True) }
    "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
                                         { token (ITinline_prag False) }
@@ -240,6 +240,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
         $whitechar* (NO(T?)INLINE|no(t?)inline)
                                         { token (ITspec_inline_prag False) }
    "{-#" $whitechar* (SOURCE|source)    { token ITsource_prag }
+  "{-#" $whitechar* (WARNING|warning)
+                                       { token ITwarning_prag }
    "{-#" $whitechar* (DEPRECATED|deprecated)
                                         { token ITdeprecated_prag }
    "{-#" $whitechar* (SCC|scc)          { token ITscc_prag }
@@ -248,46 +250,58 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    "{-#" $whitechar* (CORE|core)                { token ITcore_prag }
    "{-#" $whitechar* (UNPACK|unpack)    { token ITunpack_prag }
  
-  "{-#" $whitechar* (DOCOPTIONS|docoptions)
-  / { ifExtension haddockEnabled }     { lex_string_prag ITdocOptions }
-
- "{-#"                                 { nested_comment lexToken }
+  -- We ignore all these pragmas, but don't generate a warning for them
+  -- CFILES is a hugs-only thing.
+  "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+                    { nested_comment lexToken }
  
    -- ToDo: should only be valid inside a pragma:
-  "#-}"                                { token ITclose_prag}
+  "#-}"                                { endPrag }
  }
  
  <option_prags> {
-  "{-#" $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
+  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
                                          { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
+                                         { lex_string_prag ITdocOptions }
+  "-- #"                                 { multiline_doc_comment }
+  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
+  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+}
+
+<0> {
+  -- In the "0" mode we ignore these pragmas
+  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+                     { nested_comment lexToken }
+}
+
+<0> {
+  "-- #" .* ;
  }
  
-<0,option_prags,glaexts> {
-       -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
-  "{-#" $whitechar* $idchar+           { nested_comment lexToken }
+<0,option_prags> {
+  "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
+                    (nested_comment lexToken) }
  }
  
  -- '0' state: ordinary lexemes
--- 'glaexts' state: glasgow extensions (postfix '#', etc.)
  
  -- Haddock comments
  
-<0,glaexts> {
-  "-- " $docsym    / { ifExtension haddockEnabled } { multiline_doc_comment }
-  "{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
+<0> {
+  "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
  }
  
  -- "special" symbols
  
-<0,glaexts> {
+<0> {
    "[:" / { ifExtension parrEnabled }   { token ITopabrack }
    ":]" / { ifExtension parrEnabled }   { token ITcpabrack }
  }
    
-<0,glaexts> {
+<0> {
    "[|"     / { ifExtension thEnabled } { token ITopenExpQuote }
    "[e|"            / { ifExtension thEnabled } { token ITopenExpQuote }
    "[p|"            / { ifExtension thEnabled } { token ITopenPatQuote }
@@ -296,26 +310,34 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    "|]"     / { ifExtension thEnabled } { token ITcloseQuote }
    \$ @varid / { ifExtension thEnabled }        { skip_one_varid ITidEscape }
    "$("     / { ifExtension thEnabled } { token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
  }
  
-<0,glaexts> {
+<0> {
    "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
                                         { special IToparenbar }
    "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
  }
  
-<0,glaexts> {
+<0> {
    \? @varid / { ifExtension ipEnabled }        { skip_one_varid ITdupipvarid }
  }
  
-<glaexts> {
-  "(#" / { notFollowedBySymbol }       { token IToubxparen }
-  "#)"                                 { token ITcubxparen }
-  "{|"                                 { token ITocurlybar }
-  "|}"                                 { token ITccurlybar }
+<0> {
+  "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
+         { token IToubxparen }
+  "#)" / { ifExtension unboxedTuplesEnabled }
+         { token ITcubxparen }
+}
+
+<0> {
+  "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
+  "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
  }
  
-<0,option_prags,glaexts> {
+<0,option_prags> {
    \(                                   { special IToparen }
    \)                                   { special ITcparen }
    \[                                   { special ITobrack }
@@ -328,21 +350,14 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    \}                                   { close_brace }
  }
  
-<0,option_prags,glaexts> {
-  @qual @varid                 { check_qvarid }
+<0,option_prags> {
+  @qual @varid                 { idtoken qvarid }
    @qual @conid                 { idtoken qconid }
    @varid                       { varid }
    @conid                       { idtoken conid }
  }
  
--- after an illegal qvarid, such as 'M.let', 
--- we back up and try again in the bad_qvarid state:
-<bad_qvarid> {
-  @conid                       { pop_and (idtoken conid) }
-  @qual @conid                 { pop_and (idtoken qconid) }
-}
-
-<0,glaexts> {
+<0> {
    @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
    @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
    @varid "#"+       / { ifExtension magicHashEnabled } { varid }
@@ -351,7 +366,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  -- ToDo: M.(,,,)
  
-<0,glaexts> {
+<0> {
    @qual @varsym                        { idtoken qvarsym }
    @qual @consym                        { idtoken qconsym }
    @varsym                      { varsym }
@@ -360,38 +375,42 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  -- For the normal boxed literals we need to be careful
  -- when trying to be close to Haskell98
-<0,glaexts> {
+<0> {
    -- Normal integral literals (:: Num a => a, from Integer)
-  @decimal                     { tok_num positive 0 0 decimal }
-  0[oO] @octal                 { tok_num positive 2 2 octal }
-  0[xX] @hexadecimal           { tok_num positive 2 2 hexadecimal }
+  @decimal           { tok_num positive 0 0 decimal }
+  0[oO] @octal       { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
  
    -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point              { strtoken tok_float }
+  @floating_point    { strtoken tok_float }
  }
  
-<glaexts> {
-  -- Unboxed ints (:: Int#)
+<0> {
+  -- Unboxed ints (:: Int#) and words (:: Word#)
    -- It's simpler (and faster?) to give separate cases to the negatives,
    -- especially considering octal/hexadecimal prefixes.
-  @decimal \#                  { tok_primint positive 0 1 decimal }
-  0[oO] @octal \#              { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal \#                { tok_primint positive 2 3 hexadecimal }
-  @negative @decimal \#                        { tok_primint negative 1 2 decimal }
-  @negative 0[oO] @octal \#            { tok_primint negative 3 4 octal }
-  @negative 0[xX] @hexadecimal \#      { tok_primint negative 3 4 hexadecimal }
+  @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+
+  @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
+  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
  
    -- Unboxed floats and doubles (:: Float#, :: Double#)
    -- prim_{float,double} work with signed literals
-  @signed @floating_point \#           { init_strtoken 1 tok_primfloat }
-  @signed @floating_point \# \#                { init_strtoken 2 tok_primdouble }
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
  }
  
  -- Strings and chars are lexed by hand-written code.  The reason is
  -- that even if we recognise the string or char here in the regex
  -- lexer, we would still have to parse the string afterward in order
  -- to convert it to a String.
-<0,glaexts> {
+<0> {
    \'                           { lex_char_tok }
    \"                           { lex_string_tok }
  }
@@ -407,7 +426,6 @@ data Token
    | ITdata
    | ITdefault
    | ITderiving
-  | ITderive
    | ITdo
    | ITelse
    | IThiding
@@ -441,6 +459,9 @@ data Token
    | ITdotnet
    | ITmdo
    | ITfamily
+  | ITgroup
+  | ITby
+  | ITusing
  
         -- Pragmas
    | ITinline_prag Bool         -- True <=> INLINE, False <=> NOINLINE
@@ -448,6 +469,7 @@ data Token
    | ITspec_inline_prag Bool    -- SPECIALISE INLINE (or NOINLINE)
    | ITsource_prag
    | ITrules_prag
+  | ITwarning_prag
    | ITdeprecated_prag
    | ITline_prag
    | ITscc_prag
@@ -484,8 +506,8 @@ data Token
    | ITvocurly
    | ITvccurly
    | ITobrack
-  | ITopabrack                 -- [:, for parallel arrays with -fparr
-  | ITcpabrack                 -- :], for parallel arrays with -fparr
+  | ITopabrack                 -- [:, for parallel arrays with -XParr
+  | ITcpabrack                 -- :], for parallel arrays with -XParr
    | ITcbrack
    | IToparen
    | ITcparen
@@ -517,6 +539,7 @@ data Token
    | ITprimchar   Char
    | ITprimstring FastString
    | ITprimint    Integer
+  | ITprimword   Integer
    | ITprimfloat  Rational
    | ITprimdouble Rational
  
@@ -530,6 +553,7 @@ data Token
    | ITparenEscape              --  $( 
    | ITvarQuote                 --  '
    | ITtyQuote                  --  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
  
    -- Arrow notation extension
    | ITproc
@@ -550,18 +574,19 @@ data Token
    | ITdocCommentNamed String     -- something beginning '-- $'
    | ITdocSection      Int String -- a section heading
    | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
+  | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
  
  #ifdef DEBUG
    deriving Show -- debugging
  #endif
  
+{-
  isSpecial :: Token -> Bool
  -- If we see M.x, where x is a keyword, but
  -- is special, we treat is as just plain M.x, 
  -- not as a keyword.
  isSpecial ITas         = True
  isSpecial IThiding     = True
-isSpecial ITderive     = True
  isSpecial ITqualified  = True
  isSpecial ITforall     = True
  isSpecial ITexport     = True
@@ -574,7 +599,11 @@ isSpecial ITccallconv   = True
  isSpecial ITstdcallconv = True
  isSpecial ITmdo                = True
  isSpecial ITfamily     = True
+isSpecial ITgroup   = True
+isSpecial ITby      = True
+isSpecial ITusing   = True
  isSpecial _             = False
+-}
  
  -- the bitmap provided as the third component indicates whether the
  -- corresponding extension keyword is valid under the extension options
@@ -592,7 +621,6 @@ reservedWordsFM = listToUFM $
         ( "data",       ITdata,         0 ),     
         ( "default",    ITdefault,      0 ),  
         ( "deriving",   ITderiving,     0 ), 
-       ( "derive",     ITderive,       0 ), 
         ( "do",         ITdo,           0 ),       
         ( "else",       ITelse,         0 ),     
         ( "hiding",     IThiding,       0 ),
@@ -613,9 +641,12 @@ reservedWordsFM = listToUFM $
         ( "where",      ITwhere,        0 ),
         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
  
-       ( "forall",     ITforall,        bit explicitForallBit),
+    ( "forall",        ITforall,        bit explicitForallBit .|. bit inRulePragBit),
         ( "mdo",        ITmdo,           bit recursiveDoBit),
         ( "family",     ITfamily,        bit tyFamBit),
+    ( "group",  ITgroup,     bit transformComprehensionsBit),
+    ( "by",     ITby,        bit transformComprehensionsBit),
+    ( "using",  ITusing,     bit transformComprehensionsBit),
  
         ( "foreign",    ITforeign,       bit ffiBit),
         ( "export",     ITexport,        bit ffiBit),
@@ -651,11 +682,9 @@ reservedSymsFM = listToUFM $
         ,("!",   ITbang,     always)
  
          -- For data T (a::*) = MkT
-       ,("*", ITstar, \i -> glaExtsEnabled i ||
-                            kindSigsEnabled i ||
-                            tyFamEnabled i)
+       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
          -- For 'forall a . t'
-       ,(".", ITdot, explicitForallEnabled)
+       ,(".", ITdot, \i -> explicitForallEnabled i || inRulePrag i)
  
         ,("-<",  ITlarrowtail, arrowsEnabled)
         ,(">-",  ITrarrowtail, arrowsEnabled)
@@ -682,11 +711,11 @@ reservedSymsFM = listToUFM $
  type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
  
  special :: Token -> Action
-special tok span _buf len = return (L span tok)
+special tok span _buf _len = return (L span tok)
  
  token, layout_token :: Token -> Action
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
  
  idtoken :: (StringBuffer -> Int -> Token) -> Action
  idtoken f span buf len = return (L span $! (f buf len))
@@ -736,8 +765,10 @@ isNormalComment bits _ _ (AI _ _ buf)
  
  spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
  
+{-
  haddockDisabledAnd p bits _ _ (AI _ _ buf)
    = if haddockEnabled bits then False else (p buf)
+-}
  
  atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
  
@@ -778,7 +809,7 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
  nested_comment :: P (Located Token) -> Action
  nested_comment cont span _str _len = do
    input <- getInput
-  go 1 input
+  go (1::Int) input
    where
      go 0 input = do setInput input; cont
      go n input = case alexGetChar input of
@@ -786,12 +817,12 @@ nested_comment cont span _str _len = do
        Just ('-',input) -> case alexGetChar input of
          Nothing  -> errBrace input span
          Just ('\125',input) -> go (n-1) input
-        Just (c,_)          -> go n input
+        Just (_,_)          -> go n input
        Just ('\123',input) -> case alexGetChar input of
          Nothing  -> errBrace input span
          Just ('-',input) -> go (n+1) input
-        Just (c,_)       -> go n input
-      Just (c,input) -> go n input
+        Just (_,_)       -> go n input
+      Just (_,input) -> go n input
  
  nested_doc_comment :: Action
  nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -800,16 +831,16 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
        Nothing -> errBrace input span
        Just ('-',input) -> case alexGetChar input of
          Nothing -> errBrace input span
-        Just ('\125',input@(AI end _ buf2)) ->
+        Just ('\125',input) ->
            docCommentEnd input commentAcc docType buf span
-        Just (c,_) -> go ('-':commentAcc) input docType False
+        Just (_,_) -> go ('-':commentAcc) input docType False
        Just ('\123', input) -> case alexGetChar input of
          Nothing  -> errBrace input span
          Just ('-',input) -> do
            setInput input
            let cont = do input <- getInput; go commentAcc input docType False
            nested_comment cont span buf _len
-        Just (c,_) -> go ('\123':commentAcc) input docType False
+        Just (_,_) -> go ('\123':commentAcc) input docType False
        Just (c,input) -> go (c:commentAcc) input docType False
  
  withLexedDocType lexDocComment = do
@@ -818,13 +849,26 @@ withLexedDocType lexDocComment = do
      '|' -> lexDocComment input ITdocCommentNext False
      '^' -> lexDocComment input ITdocCommentPrev False
      '$' -> lexDocComment input ITdocCommentNamed False
-    '*' -> lexDocSection 1 input 
+    '*' -> lexDocSection 1 input
+    '#' -> lexDocComment input ITdocOptionsOld False
   where 
      lexDocSection n input = case alexGetChar input of 
        Just ('*', input) -> lexDocSection (n+1) input
-      Just (c, _) -> lexDocComment input (ITdocSection n) True
+      Just (_,   _)     -> lexDocComment input (ITdocSection n) True
        Nothing -> do setInput input; lexToken -- eof reached, lex it normally
  
+-- RULES pragmas turn on the forall and '.' keywords, and we turn them
+-- off again at the end of the pragma.
+rulePrag :: Action
+rulePrag span buf len = do
+  setExts (.|. inRulePragBit)
+  return (L span ITrules_prag)
+
+endPrag :: Action
+endPrag span buf len = do
+  setExts (.&. complement (bit inRulePragBit))
+  return (L span ITclose_prag)
+
  -- docCommentEnd
  -------------------------------------------------------------------------------
  -- This function is quite tricky. We can't just return a new token, we also
@@ -868,30 +912,6 @@ close_brace span _str _len = do
    popContext
    return (L span ITccurly)
  
--- We have to be careful not to count M.<varid> as a qualified name
--- when <varid> is a keyword.  We hack around this by catching 
--- the offending tokens afterward, and re-lexing in a different state.
-check_qvarid span buf len = do
-  case lookupUFM reservedWordsFM var of
-       Just (keyword,exts)
-         | not (isSpecial keyword) ->
-         if exts == 0 
-            then try_again
-            else do
-               b <- extension (\i -> exts .&. i /= 0)
-               if b then try_again
-                    else return token
-       _other -> return token
-  where
-       (mod,var) = splitQualName buf len
-       token     = L span (ITqvarid (mod,var))
-
-       try_again = do
-               (AI _ offs _) <- getInput       
-               setInput (AI (srcSpanStart span) (offs-len) buf)
-               pushLexState bad_qvarid
-               lexToken
-
  qvarid buf len = ITqvarid $! splitQualName buf len
  qconid buf len = ITqconid $! splitQualName buf len
  
@@ -924,6 +944,7 @@ splitQualName orig_buf len = split orig_buf orig_buf
         qual_size = orig_buf `byteDiff` dot_buf
  
  varid span buf len = 
+  fs `seq`
    case lookupUFM reservedWordsFM fs of
         Just (keyword,0)    -> do
                 maybe_layout keyword
@@ -969,6 +990,7 @@ tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
  -- some conveniences for use with tok_integral
  tok_num = tok_integral ITinteger
  tok_primint = tok_integral ITprimint
+tok_primword = tok_integral ITprimword positive
  positive = id
  negative = negate
  decimal = (10,octDecDigit)
@@ -1032,7 +1054,7 @@ new_layout_context strict span _buf _len = do
                 -- we must generate a {} sequence now.
                 pushLexState layout_left
                 return (L span ITvocurly)
-       other -> do
+       _ -> do
                 setContext (Layout offset : ctx)
                 return (L span ITvocurly)
  
@@ -1066,7 +1088,7 @@ setFile code span buf len = do
  -- Options, includes and language pragmas.
  
  lex_string_prag :: (String -> Token) -> Action
-lex_string_prag mkTok span buf len
+lex_string_prag mkTok span _buf _len
      = do input <- getInput
           start <- getSrcLoc
           tok <- go [] input
@@ -1079,7 +1101,7 @@ lex_string_prag mkTok span buf len
                     else case alexGetChar input of
                            Just (c,i) -> go (c:acc) i
                            Nothing -> err input
-          isString i [] = True
+          isString _ [] = True
            isString i (x:xs)
                = case alexGetChar i of
                    Just (c,i') | c == x    -> isString i' xs
@@ -1093,7 +1115,7 @@ lex_string_prag mkTok span buf len
  -- This stuff is horrible.  I hates it.
  
  lex_string_tok :: Action
-lex_string_tok span buf len = do
+lex_string_tok span _buf _len = do
    tok <- lex_string ""
    end <- getSrcLoc 
    return (L (mkSrcSpan (srcSpanStart span) end) tok)
@@ -1106,8 +1128,8 @@ lex_string s = do
  
      Just ('"',i)  -> do
         setInput i
-       glaexts <- extension glaExtsEnabled
-       if glaexts
+       magicHash <- extension magicHashEnabled
+       if magicHash
           then do
             i <- getInput
             case alexGetChar' i of
@@ -1150,7 +1172,7 @@ lex_char_tok :: Action
  -- but WIHTOUT CONSUMING the x or T part  (the parser does that).
  -- So we have to do two characters of lookahead: when we see 'x we need to
  -- see if there's a trailing quote
-lex_char_tok span buf len = do -- We've seen '
+lex_char_tok span _buf _len = do       -- We've seen '
     i1 <- getInput      -- Look ahead to first character
     let loc = srcSpanStart span
     case alexGetChar' i1 of
@@ -1163,14 +1185,14 @@ lex_char_tok span buf len = do  -- We've seen '
                         return (L (mkSrcSpan loc end2)  ITtyQuote)
                    else lit_error
  
-       Just ('\\', i2@(AI end2 _ _)) -> do     -- We've seen 'backslash 
+       Just ('\\', i2@(AI _end2 _ _)) -> do    -- We've seen 'backslash
                   setInput i2
                   lit_ch <- lex_escape
                   mc <- getCharOrFail   -- Trailing quote
                   if mc == '\'' then finish_char_tok loc lit_ch
                                 else do setInput i2; lit_error 
  
-        Just (c, i2@(AI end2 _ _)) 
+        Just (c, i2@(AI _end2 _ _))
                 | not (isAny c) -> lit_error
                 | otherwise ->
  
@@ -1191,9 +1213,9 @@ lex_char_tok span buf len = do    -- We've seen '
  finish_char_tok :: SrcLoc -> Char -> P (Located Token)
  finish_char_tok loc ch -- We've already seen the closing quote
                         -- Just need to check for trailing #
-  = do glaexts <- extension glaExtsEnabled
+  = do magicHash <- extension magicHashEnabled
         i@(AI end _ _) <- getInput
-       if glaexts then do
+       if magicHash then do
                 case alexGetChar' i of
                         Just ('#',i@(AI end _ _)) -> do
                                 setInput i
@@ -1210,7 +1232,7 @@ lex_char c inp = do
        c | isAny c -> do setInput inp; return c
        _other -> lit_error
  
-isAny c | c > '\xff' = isPrint c
+isAny c | c > '\x7f' = isPrint c
         | otherwise  = is_any c
  
  lex_escape :: P Char
@@ -1234,7 +1256,7 @@ lex_escape = do
  
         'x'   -> readNum is_hexdigit 16 hexDigit
         'o'   -> readNum is_octdigit  8 octDecDigit
-       x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+       x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
  
         c1 ->  do
            i <- getInput
@@ -1326,13 +1348,54 @@ getCharOrFail =  do
         Just (c,i)  -> do setInput i; return c
  
  -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+       | Just ('|',i) <- next -> do 
+               setInput i; lex_quasiquote ('|' : s)
+       | Just (']',i) <- next -> do 
+               setInput i; lex_quasiquote (']' : s)
+       where next = alexGetChar' i
+
+    Just ('|',i)
+       | Just (']',i) <- next -> do 
+               setInput i; return s
+       where next = alexGetChar' i
+
+    Just (c, i) -> do
+        setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
  -- Warnings
  
  warn :: DynFlag -> SDoc -> Action
-warn option warning span _buf _len = do
-    addWarning option (mkWarnMsg span alwaysQualify warning)
+warn option warning srcspan _buf _len = do
+    addWarning option srcspan warning
      lexToken
  
+warnThen :: DynFlag -> SDoc -> Action -> Action
+warnThen option warning action srcspan buf len = do
+    addWarning option srcspan warning
+    action srcspan buf len
+
  -- -----------------------------------------------------------------------------
  -- The Parse Monad
  
@@ -1378,7 +1441,7 @@ instance Monad P where
    fail = failP
  
  returnP :: a -> P a
-returnP a = P $ \s -> POk s a
+returnP a = a `seq` (P $ \s -> POk s a)
  
  thenP :: P a -> (a -> P b) -> P b
  (P m) `thenP` k = P $ \ s ->
@@ -1393,10 +1456,10 @@ failMsgP :: String -> P a
  failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
  
  failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
  
-failSpanMsgP :: SrcSpan -> String -> P a
-failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+failSpanMsgP :: SrcSpan -> SDoc -> P a
+failSpanMsgP span msg = P $ \_ -> PFailed span msg
  
  extension :: (Int -> Bool) -> P Bool
  extension p = P $ \s -> POk s (p $! extsBitmap s)
@@ -1404,6 +1467,9 @@ extension p = P $ \s -> POk s (p $! extsBitmap s)
  getExts :: P Int
  getExts = P $ \s -> POk s (extsBitmap s)
  
+setExts :: (Int -> Int) -> P ()
+setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
+
  setSrcLoc :: SrcLoc -> P ()
  setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
  
@@ -1442,7 +1508,7 @@ alexGetChar (AI loc ofs s)
  
         adj_c 
           | c <= '\x06' = non_graphic
-         | c <= '\xff' = c
+         | c <= '\x7f' = c
            -- Alex doesn't handle Unicode, so when Unicode
            -- character is encoutered we output these values
            -- with the actual character value hidden in the state.
@@ -1452,7 +1518,7 @@ alexGetChar (AI loc ofs s)
                   LowercaseLetter       -> lower
                   TitlecaseLetter       -> upper
                   ModifierLetter        -> other_graphic
-                 OtherLetter           -> other_graphic
+                 OtherLetter           -> lower -- see #1103
                   NonSpacingMark        -> other_graphic
                   SpacingCombiningMark  -> other_graphic
                   EnclosingMark         -> other_graphic
@@ -1486,7 +1552,7 @@ alexGetChar' (AI loc ofs s)
          ofs'   = advanceOffs c ofs
  
  advanceOffs :: Char -> Int -> Int
-advanceOffs '\n' offs = 0
+advanceOffs '\n' _    = 0
  advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
  advanceOffs _    offs = offs + 1
  
@@ -1503,14 +1569,14 @@ popLexState :: P Int
  popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
  
  getLexState :: P Int
-getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
  
  -- for reasons of efficiency, flags indicating language extensions (eg,
--- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
  -- integer
  
-glaExtsBit, ffiBit, parrBit :: Int
-glaExtsBit = 0
+genericsBit, ffiBit, parrBit :: Int
+genericsBit = 0 -- {| and |}
  ffiBit    = 1
  parrBit           = 2
  arrowsBit  = 4
@@ -1521,14 +1587,19 @@ bangPatBit = 8  -- Tells the parser to understand bang-patterns
                 -- (doesn't affect the lexer)
  tyFamBit   = 9 -- indexed type families: 'family' keyword and kind sigs
  haddockBit = 10 -- Lex and parse Haddock comments
-magicHashBit = 11 -- # in both functions and operators
+magicHashBit = 11 -- "#" in both functions and operators
  kindSigsBit = 12 -- Kind signatures on type variables
  recursiveDoBit = 13 -- mdo
  unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit = 15 -- (# and #)
+standaloneDerivingBit = 16 -- standalone instance deriving declarations
+transformComprehensionsBit = 17
+qqBit     = 18 -- enable quasiquoting
+inRulePragBit = 19
  
-glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
  always           _     = True
-glaExtsEnabled   flags = testBit flags glaExtsBit
+genericsEnabled  flags = testBit flags genericsBit
  ffiEnabled       flags = testBit flags ffiBit
  parrEnabled      flags = testBit flags parrBit
  arrowsEnabled    flags = testBit flags arrowsBit
@@ -1542,17 +1613,20 @@ magicHashEnabled flags = testBit flags magicHashBit
  kindSigsEnabled  flags = testBit flags kindSigsBit
  recursiveDoEnabled flags = testBit flags recursiveDoBit
  unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled        flags = testBit flags qqBit
+inRulePrag       flags = testBit flags inRulePragBit
  
  -- PState for parsing options pragmas
  --
-pragState :: StringBuffer -> SrcLoc -> PState
-pragState buf loc  = 
+pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
+pragState dynflags buf loc =
    PState {
-      buffer         = buf,
+      buffer        = buf,
        messages      = emptyMessages,
-      -- XXX defaultDynFlags is not right, but we don't have a real
-      -- dflags handy
-      dflags        = defaultDynFlags,
+      dflags        = dynflags,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1579,17 +1653,23 @@ mkPState buf loc flags  =
        loc           = loc,
        extsBitmap    = fromIntegral bitmap,
        context       = [],
-      lex_state     = [bol, if glaExtsEnabled bitmap then glaexts else 0]
+      lex_state     = [bol, 0]
         -- we begin in the layout state if toplev_layout is set
      }
      where
-      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts  flags
-              .|. ffiBit       `setBitIf` dopt Opt_FFI          flags
+      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
+              .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
                .|. parrBit      `setBitIf` dopt Opt_PArr         flags
                .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
-              .|. thBit        `setBitIf` dopt Opt_TH           flags
+              .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
+              .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
                .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
                .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+              .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
+              .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
+              .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
+              .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
+              .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
                .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
                .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
                .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
@@ -1597,15 +1677,19 @@ mkPState buf loc flags  =
                .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
                .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
                .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
+              .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+              .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
+           .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
        --
        setBitIf :: Int -> Bool -> Int
        b `setBitIf` cond | cond      = bit b
                         | otherwise = 0
  
-addWarning :: DynFlag -> WarnMsg -> P ()
-addWarning option w
+addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
+addWarning option srcspan warning
   = P $ \s@PState{messages=(ws,es), dflags=d} ->
-       let ws' = if dopt option d then ws `snocBag` w else ws
+       let warning' = mkWarnMsg srcspan alwaysQualify warning
+           ws' = if dopt option d then ws `snocBag` warning' else ws
         in POk s{messages=(ws', es)} ()
  
  getMessages :: PState -> Messages
@@ -1619,7 +1703,7 @@ setContext ctx = P $ \s -> POk s{context=ctx} ()
  
  popContext :: P ()
  popContext = P $ \ s@(PState{ buffer = buf, context = ctx, 
-                          loc = loc, last_len = len, last_loc = last_loc }) ->
+                              last_len = len, last_loc = last_loc }) ->
    case ctx of
         (_:tl) -> POk s{ context = tl } ()
         []     -> PFailed last_loc (srcParseErr buf len)
@@ -1648,8 +1732,8 @@ srcParseErr
    -> Message
  srcParseErr buf len
    = hcat [ if null token 
-            then ptext SLIT("parse error (possibly incorrect indentation)")
-            else hcat [ptext SLIT("parse error on input "),
+            then ptext (sLit "parse error (possibly incorrect indentation)")
+            else hcat [ptext (sLit "parse error on input "),
                         char '`', text token, char '\'']
      ]
    where token = lexemeToString (offsetBytes (-len) buf) len
@@ -1667,7 +1751,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
  lexError :: String -> P a
  lexError str = do
    loc <- getSrcLoc
-  i@(AI end _ buf) <- getInput
+  (AI end _ buf) <- getInput
    reportLexError loc end buf str
  
  -- -----------------------------------------------------------------------------
@@ -1676,7 +1760,7 @@ lexError str = do
  
  lexer :: (Located Token -> P a) -> P a
  lexer cont = do
-  tok@(L span tok__) <- lexToken
+  tok@(L _span _tok__) <- lexToken
  --  trace ("token: " ++ show tok__) $ do
    cont tok
  
@@ -1686,20 +1770,21 @@ lexToken = do
    sc <- getLexState
    exts <- getExts
    case alexScanUser exts inp sc of
-    AlexEOF -> do let span = mkSrcSpan loc1 loc1
-                 setLastToken span 0 0
-                 return (L span ITeof)
-    AlexError (AI loc2 _ buf) -> do 
-       reportLexError loc1 loc2 buf "lexical error"
+    AlexEOF -> do
+        let span = mkSrcSpan loc1 loc1
+        setLastToken span 0 0
+        return (L span ITeof)
+    AlexError (AI loc2 _ buf) ->
+        reportLexError loc1 loc2 buf "lexical error"
      AlexSkip inp2 _ -> do
-       setInput inp2
-       lexToken
-    AlexToken inp2@(AI end _ buf2) len t -> do
-    setInput inp2
-    let span = mkSrcSpan loc1 end
-    let bytes = byteDiff buf buf2
-    span `seq` setLastToken span bytes bytes
-    t span buf bytes
+        setInput inp2
+        lexToken
+    AlexToken inp2@(AI end _ buf2) _ t -> do
+        setInput inp2
+        let span = mkSrcSpan loc1 end
+        let bytes = byteDiff buf buf2
+        span `seq` setLastToken span bytes bytes
+        t span buf bytes
  
  reportLexError loc1 loc2 buf str
    | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")