Add quasi-quotation, courtesy of Geoffrey Mainland

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index 49dabf0..84ee57e 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -21,18 +21,28 @@
  --    - pragma-end should be only valid in a pragma
  
  {
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
  module Lexer (
     Token(..), lexer, pragState, mkPState, PState(..),
     P(..), ParseResult(..), getSrcLoc, 
     failLocMsgP, failSpanMsgP, srcParseFail,
+   getMessages,
     popContext, pushCurrentContext, setLastToken, setSrcLoc,
     getLexState, popLexState, pushLexState,
-   extension, glaExtsEnabled, bangPatEnabled
+   extension, standaloneDerivingEnabled, bangPatEnabled,
+   addWarning
    ) where
  
  #include "HsVersions.h"
  
-import ErrUtils                ( Message )
+import Bag
+import ErrUtils
  import Outputable
  import StringBuffer
  import FastString
@@ -43,8 +53,9 @@ import DynFlags
  import Ctype
  import Util            ( maybePrefixMatch, readRational )
  
+import Control.Monad
  import Data.Bits
-import Data.Char       ( chr, isSpace )
+import Data.Char       ( chr, ord, isSpace )
  import Data.Ratio
  import Debug.Trace
  
@@ -55,29 +66,30 @@ import Compat.Unicode       ( GeneralCategory(..), generalCategory, isPrint, isUpper )
  #endif
  }
  
-$unispace    = \x05
-$whitechar   = [\ \t\n\r\f\v\xa0 $unispace]
+$unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
+$whitechar   = [\ \n\r\f\v\xa0 $unispace]
  $white_no_nl = $whitechar # \n
+$tab         = \t
  
  $ascdigit  = 0-9
-$unidigit  = \x03
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
  $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
-$unisymbol = \x04
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
-$unilarge  = \x01
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  $large     = [$asclarge $unilarge]
  
-$unismall  = \x02
+$unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
  $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  $small     = [$ascsmall $unismall \_]
  
-$unigraphic = \x06
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
  $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
  
  $octit    = 0-7
@@ -104,10 +116,16 @@ $docsym    = [\| \^ \* \$]
  
  @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  
+-- normal signed numerical literals can only be explicitly negative,
+-- not explicitly positive (contrast @exponent)
+@negative = \-
+@signed = @negative ?
+
  haskell :-
  
  -- everywhere: skip whitespace and comments
  $white_no_nl+                          ;
+$tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  -- Everywhere: deal with nested comments.  We explicitly rule out
  -- pragmas, "{-#", so that we don't accidentally treat them as comments.
@@ -131,7 +149,7 @@ $white_no_nl+                               ;
  -- space followed by a Haddock comment symbol (docsym) (in which case we'd
  -- have a Haddock comment). The rules then munch the rest of the line.
  
-"-- " ~$docsym .* ;
+"-- " ~[$docsym \#] .* ;
  "--" [^$symbol : \ ] .* ;
  
  -- Next, match Haddock comments if no -haddock flag
@@ -192,7 +210,7 @@ $white_no_nl+                               ;
  -- generate a matching '}' token.
  <layout_left>  ()                      { do_layout_left }
  
-<0,option_prags,glaexts> \n                            { begin bol }
+<0,option_prags> \n                            { begin bol }
  
  "{-#" $whitechar* (line|LINE)          { begin line_prag2 }
  
@@ -210,15 +228,16 @@ $white_no_nl+                             ;
     -- NOTE: accept -} at the end of a LINE pragma, for compatibility
     -- with older versions of GHC which generated these.
  
--- We only want RULES pragmas to be picked up when -fglasgow-exts
--- is on, because the contents of the pragma is always written using
--- glasgow-exts syntax (using forall etc.), so if glasgow exts are not
--- enabled, we're sure to get a parse error.
+-- We only want RULES pragmas to be picked up when explicit forall
+-- syntax is enabled is on, because the contents of the pragma always
+-- uses it. If it's not on then we're sure to get a parse error.
  -- (ToDo: we should really emit a warning when ignoring pragmas)
-<glaexts>
-  "{-#" $whitechar* (RULES|rules)      { token ITrules_prag }
+-- XXX Now that we can enable this without the -fglasgow-exts hammer,
+-- is it better just to let the parse error happen?
+<0>
+  "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
  
-<0,option_prags,glaexts> {
+<0,option_prags> {
    "{-#" $whitechar* (INLINE|inline)    { token (ITinline_prag True) }
    "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
                                         { token (ITinline_prag False) }
@@ -238,9 +257,6 @@ $white_no_nl+                               ;
    "{-#" $whitechar* (CORE|core)                { token ITcore_prag }
    "{-#" $whitechar* (UNPACK|unpack)    { token ITunpack_prag }
  
-  "{-#" $whitechar* (DOCOPTIONS|docoptions)
-  / { ifExtension haddockEnabled }     { lex_string_prag ITdocOptions }
-
   "{-#"                                 { nested_comment lexToken }
  
    -- ToDo: should only be valid inside a pragma:
@@ -248,36 +264,42 @@ $white_no_nl+                             ;
  }
  
  <option_prags> {
-  "{-#" $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
+  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
                                          { lex_string_prag IToptions_prag }
-  "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
+                                         { lex_string_prag ITdocOptions }
+  "-- #"                                 { multiline_doc_comment }
+  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
+  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
  }
  
-<0,option_prags,glaexts> {
+<0> {
+  "-- #" .* ;
+}
+
+<0,option_prags> {
         -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
    "{-#" $whitechar* $idchar+           { nested_comment lexToken }
  }
  
  -- '0' state: ordinary lexemes
--- 'glaexts' state: glasgow extensions (postfix '#', etc.)
  
  -- Haddock comments
  
-<0,glaexts> {
-  "-- " / $docsym    { multiline_doc_comment }
-  "{-" \ ? / $docsym { nested_doc_comment }
+<0> {
+  "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
  }
  
  -- "special" symbols
  
-<0,glaexts> {
+<0> {
    "[:" / { ifExtension parrEnabled }   { token ITopabrack }
    ":]" / { ifExtension parrEnabled }   { token ITcpabrack }
  }
    
-<0,glaexts> {
+<0> {
    "[|"     / { ifExtension thEnabled } { token ITopenExpQuote }
    "[e|"            / { ifExtension thEnabled } { token ITopenExpQuote }
    "[p|"            / { ifExtension thEnabled } { token ITopenPatQuote }
@@ -286,26 +308,34 @@ $white_no_nl+                             ;
    "|]"     / { ifExtension thEnabled } { token ITcloseQuote }
    \$ @varid / { ifExtension thEnabled }        { skip_one_varid ITidEscape }
    "$("     / { ifExtension thEnabled } { token ITparenEscape }
+
+  "[$" @varid "|"  / { ifExtension qqEnabled }
+                     { lex_quasiquote_tok }
  }
  
-<0,glaexts> {
+<0> {
    "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
                                         { special IToparenbar }
    "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
  }
  
-<0,glaexts> {
+<0> {
    \? @varid / { ifExtension ipEnabled }        { skip_one_varid ITdupipvarid }
  }
  
-<glaexts> {
-  "(#" / { notFollowedBySymbol }       { token IToubxparen }
-  "#)"                                 { token ITcubxparen }
-  "{|"                                 { token ITocurlybar }
-  "|}"                                 { token ITccurlybar }
+<0> {
+  "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
+         { token IToubxparen }
+  "#)" / { ifExtension unboxedTuplesEnabled }
+         { token ITcubxparen }
  }
  
-<0,option_prags,glaexts> {
+<0> {
+  "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
+  "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
+}
+
+<0,option_prags> {
    \(                                   { special IToparen }
    \)                                   { special ITcparen }
    \[                                   { special ITobrack }
@@ -318,67 +348,68 @@ $white_no_nl+                             ;
    \}                                   { close_brace }
  }
  
-<0,option_prags,glaexts> {
-  @qual @varid                 { check_qvarid }
+<0,option_prags> {
+  @qual @varid                 { idtoken qvarid }
    @qual @conid                 { idtoken qconid }
    @varid                       { varid }
    @conid                       { idtoken conid }
  }
  
--- after an illegal qvarid, such as 'M.let', 
--- we back up and try again in the bad_qvarid state:
-<bad_qvarid> {
-  @conid                       { pop_and (idtoken conid) }
-  @qual @conid                 { pop_and (idtoken qconid) }
-}
-
-<glaexts> {
-  @qual @varid "#"+            { idtoken qvarid }
-  @qual @conid "#"+            { idtoken qconid }
-  @varid "#"+                  { varid }
-  @conid "#"+                  { idtoken conid }
+<0> {
+  @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
+  @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
+  @varid "#"+       / { ifExtension magicHashEnabled } { varid }
+  @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
  }
  
  -- ToDo: M.(,,,)
  
-<0,glaexts> {
+<0> {
    @qual @varsym                        { idtoken qvarsym }
    @qual @consym                        { idtoken qconsym }
    @varsym                      { varsym }
    @consym                      { consym }
  }
  
-<0,glaexts> {
-  @decimal                     { tok_decimal }
-  0[oO] @octal                 { tok_octal }
-  0[xX] @hexadecimal           { tok_hexadecimal }
-}
+-- For the normal boxed literals we need to be careful
+-- when trying to be close to Haskell98
+<0> {
+  -- Normal integral literals (:: Num a => a, from Integer)
+  @decimal                     { tok_num positive 0 0 decimal }
+  0[oO] @octal                 { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal           { tok_num positive 2 2 hexadecimal }
  
-<glaexts> {
-  @decimal \#                  { prim_decimal }
-  0[oO] @octal \#              { prim_octal }
-  0[xX] @hexadecimal \#                { prim_hexadecimal }
+  -- Normal rational literals (:: Fractional a => a, from Rational)
+  @floating_point              { strtoken tok_float }
  }
  
-<0,glaexts> @floating_point            { strtoken tok_float }
-<glaexts>   @floating_point \#         { init_strtoken 1 prim_float }
-<glaexts>   @floating_point \# \#      { init_strtoken 2 prim_double }
+<0> {
+  -- Unboxed ints (:: Int#)
+  -- It's simpler (and faster?) to give separate cases to the negatives,
+  -- especially considering octal/hexadecimal prefixes.
+  @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+  0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+
+  -- Unboxed floats and doubles (:: Float#, :: Double#)
+  -- prim_{float,double} work with signed literals
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
+}
  
  -- Strings and chars are lexed by hand-written code.  The reason is
  -- that even if we recognise the string or char here in the regex
  -- lexer, we would still have to parse the string afterward in order
  -- to convert it to a String.
-<0,glaexts> {
+<0> {
    \'                           { lex_char_tok }
    \"                           { lex_string_tok }
  }
  
  {
--- work around bug in Alex 2.0
-#if __GLASGOW_HASKELL__ < 503
-unsafeAt arr i = arr ! i
-#endif
-
  -- -----------------------------------------------------------------------------
  -- The token type
  
@@ -389,7 +420,6 @@ data Token
    | ITdata
    | ITdefault
    | ITderiving
-  | ITderive
    | ITdo
    | ITelse
    | IThiding
@@ -423,6 +453,9 @@ data Token
    | ITdotnet
    | ITmdo
    | ITfamily
+  | ITgroup
+  | ITby
+  | ITusing
  
         -- Pragmas
    | ITinline_prag Bool         -- True <=> INLINE, False <=> NOINLINE
@@ -512,6 +545,7 @@ data Token
    | ITparenEscape              --  $( 
    | ITvarQuote                 --  '
    | ITtyQuote                  --  ''
+  | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
  
    -- Arrow notation extension
    | ITproc
@@ -532,6 +566,7 @@ data Token
    | ITdocCommentNamed String     -- something beginning '-- $'
    | ITdocSection      Int String -- a section heading
    | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
+  | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
  
  #ifdef DEBUG
    deriving Show -- debugging
@@ -543,7 +578,6 @@ isSpecial :: Token -> Bool
  -- not as a keyword.
  isSpecial ITas         = True
  isSpecial IThiding     = True
-isSpecial ITderive     = True
  isSpecial ITqualified  = True
  isSpecial ITforall     = True
  isSpecial ITexport     = True
@@ -556,6 +590,9 @@ isSpecial ITccallconv   = True
  isSpecial ITstdcallconv = True
  isSpecial ITmdo                = True
  isSpecial ITfamily     = True
+isSpecial ITgroup   = True
+isSpecial ITby      = True
+isSpecial ITusing   = True
  isSpecial _             = False
  
  -- the bitmap provided as the third component indicates whether the
@@ -574,7 +611,6 @@ reservedWordsFM = listToUFM $
         ( "data",       ITdata,         0 ),     
         ( "default",    ITdefault,      0 ),  
         ( "deriving",   ITderiving,     0 ), 
-       ( "derive",     ITderive,       0 ), 
         ( "do",         ITdo,           0 ),       
         ( "else",       ITelse,         0 ),     
         ( "hiding",     IThiding,       0 ),
@@ -595,9 +631,12 @@ reservedWordsFM = listToUFM $
         ( "where",      ITwhere,        0 ),
         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
  
-       ( "forall",     ITforall,        bit tvBit),
-       ( "mdo",        ITmdo,           bit glaExtsBit),
-       ( "family",     ITfamily,        bit idxTysBit),
+    ( "forall",        ITforall,        bit explicitForallBit),
+       ( "mdo",        ITmdo,           bit recursiveDoBit),
+       ( "family",     ITfamily,        bit tyFamBit),
+    ( "group",  ITgroup,     bit transformComprehensionsBit),
+    ( "by",     ITby,        bit transformComprehensionsBit),
+    ( "using",  ITusing,     bit transformComprehensionsBit),
  
         ( "foreign",    ITforeign,       bit ffiBit),
         ( "export",     ITexport,        bit ffiBit),
@@ -614,40 +653,42 @@ reservedWordsFM = listToUFM $
         ( "proc",       ITproc,          bit arrowsBit)
       ]
  
+reservedSymsFM :: UniqFM (Token, Int -> Bool)
  reservedSymsFM = listToUFM $
-       map (\ (x,y,z) -> (mkFastString x,(y,z)))
-      [ ("..", ITdotdot,       0)
-       ,(":",  ITcolon,        0)      -- (:) is a reserved op, 
-                                               -- meaning only list cons
-       ,("::", ITdcolon,       0)
-       ,("=",  ITequal,        0)
-       ,("\\", ITlam,          0)
-       ,("|",  ITvbar,         0)
-       ,("<-", ITlarrow,       0)
-       ,("->", ITrarrow,       0)
-       ,("@",  ITat,           0)
-       ,("~",  ITtilde,        0)
-       ,("=>", ITdarrow,       0)
-       ,("-",  ITminus,        0)
-       ,("!",  ITbang,         0)
-
-       ,("*",  ITstar,         bit glaExtsBit .|. 
-                               bit idxTysBit)      -- For data T (a::*) = MkT
-       ,(".",  ITdot,          bit tvBit)          -- For 'forall a . t'
-
-       ,("-<", ITlarrowtail,   bit arrowsBit)
-       ,(">-", ITrarrowtail,   bit arrowsBit)
-       ,("-<<",        ITLarrowtail,   bit arrowsBit)
-       ,(">>-",        ITRarrowtail,   bit arrowsBit)
+    map (\ (x,y,z) -> (mkFastString x,(y,z)))
+      [ ("..",  ITdotdot,   always)
+        -- (:) is a reserved op, meaning only list cons
+       ,(":",   ITcolon,    always)
+       ,("::",  ITdcolon,   always)
+       ,("=",   ITequal,    always)
+       ,("\\",  ITlam,      always)
+       ,("|",   ITvbar,     always)
+       ,("<-",  ITlarrow,   always)
+       ,("->",  ITrarrow,   always)
+       ,("@",   ITat,       always)
+       ,("~",   ITtilde,    always)
+       ,("=>",  ITdarrow,   always)
+       ,("-",   ITminus,    always)
+       ,("!",   ITbang,     always)
+
+        -- For data T (a::*) = MkT
+       ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
+        -- For 'forall a . t'
+       ,(".", ITdot, explicitForallEnabled)
+
+       ,("-<",  ITlarrowtail, arrowsEnabled)
+       ,(">-",  ITrarrowtail, arrowsEnabled)
+       ,("-<<", ITLarrowtail, arrowsEnabled)
+       ,(">>-", ITRarrowtail, arrowsEnabled)
  
  #if __GLASGOW_HASKELL__ >= 605
-       ,("λ", ITlam,          bit glaExtsBit)
-       ,("∷",   ITdcolon,       bit glaExtsBit)
-       ,("⇒",   ITdarrow,    bit glaExtsBit)
-       ,("∀",        ITforall,       bit glaExtsBit)
-       ,("→",   ITrarrow,    bit glaExtsBit)
-       ,("←",   ITlarrow,    bit glaExtsBit)
-       ,("?",  ITdotdot,       bit glaExtsBit)
+       ,("∷",   ITdcolon, unicodeSyntaxEnabled)
+       ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
+       ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
+                                explicitForallEnabled i)
+       ,("→",   ITrarrow, unicodeSyntaxEnabled)
+       ,("←",   ITlarrow, unicodeSyntaxEnabled)
+       ,("⋯",   ITdotdot, unicodeSyntaxEnabled)
          -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
          -- form part of a large operator.  This would let us have a better
          -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
@@ -700,12 +741,17 @@ notFollowedBy char _ _ _ (AI _ _ buf)
  notFollowedBySymbol _ _ _ (AI _ _ buf)
    = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
  
+-- We must reject doc comments as being ordinary comments everywhere.
+-- In some cases the doc comment will be selected as the lexeme due to
+-- maximal munch, but not always, because the nested comment rule is
+-- valid in all states, but the doc-comment rules are only valid in
+-- the non-layout states.
  isNormalComment bits _ _ (AI _ _ buf)
    | haddockEnabled bits = notFollowedByDocOrPragma
    | otherwise           = nextCharIs buf (/='#')
-  where 
-    notFollowedByDocOrPragma 
-       = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
+  where
+    notFollowedByDocOrPragma
+       = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
  
  spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
  
@@ -751,7 +797,7 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
  nested_comment :: P (Located Token) -> Action
  nested_comment cont span _str _len = do
    input <- getInput
-  go 1 input
+  go (1::Int) input
    where
      go 0 input = do setInput input; cont
      go n input = case alexGetChar input of
@@ -786,13 +832,13 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
        Just (c,input) -> go (c:commentAcc) input docType False
  
  withLexedDocType lexDocComment = do
-  input <- getInput
-  case alexGetChar input of
-    Nothing -> error "Can't happen"
-    Just ('|', input) -> lexDocComment input ITdocCommentNext False
-    Just ('^', input) -> lexDocComment input ITdocCommentPrev False
-    Just ('$', input) -> lexDocComment input ITdocCommentNamed False
-    Just ('*', input) -> lexDocSection 1 input 
+  input@(AI _ _ buf) <- getInput
+  case prevChar buf ' ' of
+    '|' -> lexDocComment input ITdocCommentNext False
+    '^' -> lexDocComment input ITdocCommentPrev False
+    '$' -> lexDocComment input ITdocCommentNamed False
+    '*' -> lexDocSection 1 input
+    '#' -> lexDocComment input ITdocOptionsOld False
   where 
      lexDocSection n input = case alexGetChar input of 
        Just ('*', input) -> lexDocSection (n+1) input
@@ -842,30 +888,6 @@ close_brace span _str _len = do
    popContext
    return (L span ITccurly)
  
--- We have to be careful not to count M.<varid> as a qualified name
--- when <varid> is a keyword.  We hack around this by catching 
--- the offending tokens afterward, and re-lexing in a different state.
-check_qvarid span buf len = do
-  case lookupUFM reservedWordsFM var of
-       Just (keyword,exts)
-         | not (isSpecial keyword) ->
-         if exts == 0 
-            then try_again
-            else do
-               b <- extension (\i -> exts .&. i /= 0)
-               if b then try_again
-                    else return token
-       _other -> return token
-  where
-       (mod,var) = splitQualName buf len
-       token     = L span (ITqvarid (mod,var))
-
-       try_again = do
-               (AI _ offs _) <- getInput       
-               setInput (AI (srcSpanStart span) (offs-len) buf)
-               pushLexState bad_qvarid
-               lexToken
-
  qvarid buf len = ITqvarid $! splitQualName buf len
  qconid buf len = ITqconid $! splitQualName buf len
  
@@ -922,36 +944,37 @@ consym = sym ITconsym
  
  sym con span buf len = 
    case lookupUFM reservedSymsFM fs of
-       Just (keyword,0)    -> return (L span keyword)
         Just (keyword,exts) -> do
-               b <- extension (\i -> exts .&. i /= 0)
+               b <- extension exts
                 if b then return (L span keyword)
                      else return (L span $! con fs)
         _other -> return (L span $! con fs)
    where
         fs = lexemeToFastString buf len
  
-tok_decimal span buf len 
-  = return (L span (ITinteger  $! parseInteger buf len 10 octDecDigit))
-
-tok_octal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
-
-tok_hexadecimal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
-
-prim_decimal span buf len 
-  = return (L span (ITprimint  $! parseInteger buf (len-1) 10 octDecDigit))
-
-prim_octal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
-
-prim_hexadecimal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
-
+-- Variations on the integral numeric literal.
+tok_integral :: (Integer -> Token)
+     -> (Integer -> Integer)
+ --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
+     -> Int -> Int
+     -> (Integer, (Char->Int)) -> Action
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
+  return $ L span $ itint $! transint $ parseUnsignedInteger
+     (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
+
+-- some conveniences for use with tok_integral
+tok_num = tok_integral ITinteger
+tok_primint = tok_integral ITprimint
+positive = id
+negative = negate
+decimal = (10,octDecDigit)
+octal = (8,octDecDigit)
+hexadecimal = (16,hexDigit)
+
+-- readRational can understand negative rationals, exponents, everything.
  tok_float        str = ITrational   $! readRational str
-prim_float       str = ITprimfloat  $! readRational str
-prim_double      str = ITprimdouble $! readRational str
+tok_primfloat    str = ITprimfloat  $! readRational str
+tok_primdouble   str = ITprimdouble $! readRational str
  
  -- -----------------------------------------------------------------------------
  -- Layout processing
@@ -1019,7 +1042,7 @@ do_layout_left span _buf _len = do
  
  setLine :: Int -> Action
  setLine code span buf len = do
-  let line = parseInteger buf len 10 octDecDigit
+  let line = parseUnsignedInteger buf len 10 octDecDigit
    setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
         -- subtract one: the line number refers to the *following* line
    popLexState
@@ -1079,8 +1102,8 @@ lex_string s = do
  
      Just ('"',i)  -> do
         setInput i
-       glaexts <- extension glaExtsEnabled
-       if glaexts
+       magicHash <- extension magicHashEnabled
+       if magicHash
           then do
             i <- getInput
             case alexGetChar' i of
@@ -1164,9 +1187,9 @@ lex_char_tok span buf len = do    -- We've seen '
  finish_char_tok :: SrcLoc -> Char -> P (Located Token)
  finish_char_tok loc ch -- We've already seen the closing quote
                         -- Just need to check for trailing #
-  = do glaexts <- extension glaExtsEnabled
+  = do magicHash <- extension magicHashEnabled
         i@(AI end _ _) <- getInput
-       if glaexts then do
+       if magicHash then do
                 case alexGetChar' i of
                         Just ('#',i@(AI end _ _)) -> do
                                 setInput i
@@ -1299,6 +1322,50 @@ getCharOrFail =  do
         Just (c,i)  -> do setInput i; return c
  
  -- -----------------------------------------------------------------------------
+-- QuasiQuote
+
+lex_quasiquote_tok :: Action
+lex_quasiquote_tok span buf len = do
+  let quoter = reverse $ takeWhile (/= '$')
+               $ reverse $ lexemeToString buf (len - 1)
+  quoteStart <- getSrcLoc              
+  quote <- lex_quasiquote ""
+  end <- getSrcLoc 
+  return (L (mkSrcSpan (srcSpanStart span) end)
+           (ITquasiQuote (mkFastString quoter,
+                          mkFastString (reverse quote),
+                          mkSrcSpan quoteStart end)))
+
+lex_quasiquote :: String -> P String
+lex_quasiquote s = do
+  i <- getInput
+  case alexGetChar' i of
+    Nothing -> lit_error
+
+    Just ('\\',i)
+       | Just ('|',i) <- next -> do 
+               setInput i; lex_quasiquote ('|' : s)
+       | Just (']',i) <- next -> do 
+               setInput i; lex_quasiquote (']' : s)
+       where next = alexGetChar' i
+
+    Just ('|',i)
+       | Just (']',i) <- next -> do 
+               setInput i; return s
+       where next = alexGetChar' i
+
+    Just (c, i) -> do
+        setInput i; lex_quasiquote (c : s)
+
+-- -----------------------------------------------------------------------------
+-- Warnings
+
+warn :: DynFlag -> SDoc -> Action
+warn option warning srcspan _buf _len = do
+    addWarning option srcspan warning
+    lexToken
+
+-- -----------------------------------------------------------------------------
  -- The Parse Monad
  
  data LayoutContext
@@ -1316,6 +1383,8 @@ data ParseResult a
  
  data PState = PState { 
         buffer     :: StringBuffer,
+    dflags     :: DynFlags,
+    messages   :: Messages,
          last_loc   :: SrcSpan, -- pos of previous token
          last_offs  :: !Int,    -- offset of the previous token from the
                                 -- beginning of  the current line.
@@ -1406,6 +1475,9 @@ alexGetChar (AI loc ofs s)
         adj_c 
           | c <= '\x06' = non_graphic
           | c <= '\xff' = c
+          -- Alex doesn't handle Unicode, so when Unicode
+          -- character is encoutered we output these values
+          -- with the actual character value hidden in the state.
           | otherwise = 
                 case generalCategory c of
                   UppercaseLetter       -> upper
@@ -1469,30 +1541,47 @@ getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
  -- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
  -- integer
  
-glaExtsBit, ffiBit, parrBit :: Int
-glaExtsBit = 0
+genericsBit, ffiBit, parrBit :: Int
+genericsBit = 0 -- {| and |}
  ffiBit    = 1
  parrBit           = 2
  arrowsBit  = 4
  thBit     = 5
  ipBit      = 6
-tvBit     = 7  -- Scoped type variables enables 'forall' keyword
+explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
  bangPatBit = 8 -- Tells the parser to understand bang-patterns
                 -- (doesn't affect the lexer)
-idxTysBit  = 9 -- indexed type families: 'family' keyword and kind sigs
+tyFamBit   = 9 -- indexed type families: 'family' keyword and kind sigs
  haddockBit = 10 -- Lex and parse Haddock comments
-
-glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
-glaExtsEnabled flags = testBit flags glaExtsBit
-ffiEnabled     flags = testBit flags ffiBit
-parrEnabled    flags = testBit flags parrBit
-arrowsEnabled  flags = testBit flags arrowsBit
-thEnabled      flags = testBit flags thBit
-ipEnabled      flags = testBit flags ipBit
-tvEnabled      flags = testBit flags tvBit
-bangPatEnabled flags = testBit flags bangPatBit
-idxTysEnabled  flags = testBit flags idxTysBit
-haddockEnabled flags = testBit flags haddockBit
+magicHashBit = 11 -- # in both functions and operators
+kindSigsBit = 12 -- Kind signatures on type variables
+recursiveDoBit = 13 -- mdo
+unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
+unboxedTuplesBit = 15 -- (# and #)
+standaloneDerivingBit = 16 -- standalone instance deriving declarations
+transformComprehensionsBit = 17
+qqBit     = 18 -- enable quasiquoting
+
+genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+always           _     = True
+genericsEnabled  flags = testBit flags genericsBit
+ffiEnabled       flags = testBit flags ffiBit
+parrEnabled      flags = testBit flags parrBit
+arrowsEnabled    flags = testBit flags arrowsBit
+thEnabled        flags = testBit flags thBit
+ipEnabled        flags = testBit flags ipBit
+explicitForallEnabled flags = testBit flags explicitForallBit
+bangPatEnabled   flags = testBit flags bangPatBit
+tyFamEnabled     flags = testBit flags tyFamBit
+haddockEnabled   flags = testBit flags haddockBit
+magicHashEnabled flags = testBit flags magicHashBit
+kindSigsEnabled  flags = testBit flags kindSigsBit
+recursiveDoEnabled flags = testBit flags recursiveDoBit
+unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
+unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
+standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
+transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
+qqEnabled        flags = testBit flags qqBit
  
  -- PState for parsing options pragmas
  --
@@ -1500,6 +1589,10 @@ pragState :: StringBuffer -> SrcLoc -> PState
  pragState buf loc  = 
    PState {
        buffer         = buf,
+      messages      = emptyMessages,
+      -- XXX defaultDynFlags is not right, but we don't have a real
+      -- dflags handy
+      dflags        = defaultDynFlags,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1517,6 +1610,8 @@ mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
  mkPState buf loc flags  = 
    PState {
        buffer         = buf,
+      dflags        = flags,
+      messages      = emptyMessages,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1524,25 +1619,47 @@ mkPState buf loc flags  =
        loc           = loc,
        extsBitmap    = fromIntegral bitmap,
        context       = [],
-      lex_state     = [bol, if glaExtsEnabled bitmap then glaexts else 0]
+      lex_state     = [bol, 0]
         -- we begin in the layout state if toplev_layout is set
      }
      where
-      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
-              .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
-              .|. parrBit    `setBitIf` dopt Opt_PArr        flags
-              .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
-              .|. thBit      `setBitIf` dopt Opt_TH          flags
-              .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
-              .|. tvBit      `setBitIf` dopt Opt_ScopedTypeVariables flags
-              .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags
-              .|. idxTysBit  `setBitIf` dopt Opt_IndexedTypes flags
-              .|. haddockBit `setBitIf` dopt Opt_Haddock     flags
+      bitmap = genericsBit `setBitIf` dopt Opt_Generics flags
+              .|. ffiBit       `setBitIf` dopt Opt_ForeignFunctionInterface flags
+              .|. parrBit      `setBitIf` dopt Opt_PArr         flags
+              .|. arrowsBit    `setBitIf` dopt Opt_Arrows       flags
+              .|. thBit        `setBitIf` dopt Opt_TemplateHaskell flags
+              .|. qqBit        `setBitIf` dopt Opt_QuasiQuotes flags
+              .|. ipBit        `setBitIf` dopt Opt_ImplicitParams flags
+              .|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+              .|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
+              .|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
+              .|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags
+              .|. explicitForallBit `setBitIf` dopt Opt_RankNTypes flags
+              .|. bangPatBit   `setBitIf` dopt Opt_BangPatterns flags
+              .|. tyFamBit     `setBitIf` dopt Opt_TypeFamilies flags
+              .|. haddockBit   `setBitIf` dopt Opt_Haddock      flags
+              .|. magicHashBit `setBitIf` dopt Opt_MagicHash    flags
+              .|. kindSigsBit  `setBitIf` dopt Opt_KindSignatures flags
+              .|. recursiveDoBit `setBitIf` dopt Opt_RecursiveDo flags
+              .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
+              .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
+              .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
+           .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
        --
        setBitIf :: Int -> Bool -> Int
        b `setBitIf` cond | cond      = bit b
                         | otherwise = 0
  
+addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
+addWarning option srcspan warning
+ = P $ \s@PState{messages=(ws,es), dflags=d} ->
+       let warning' = mkWarnMsg srcspan alwaysQualify warning
+           ws' = if dopt option d then ws `snocBag` warning' else ws
+       in POk s{messages=(ws', es)} ()
+
+getMessages :: PState -> Messages
+getMessages PState{messages=ms} = ms
+
  getContext :: P [LayoutContext]
  getContext = P $ \s@PState{context=ctx} -> POk s ctx