Fix stack check amount for user declared continuation functions

[ghc-hetmet.git] / compiler / parser / Lexer.x
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x

index 6a25ae5..db48dbe 100644 (file)
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -25,6 +25,7 @@ module Lexer (
     Token(..), lexer, pragState, mkPState, PState(..),
     P(..), ParseResult(..), getSrcLoc, 
     failLocMsgP, failSpanMsgP, srcParseFail,
+   getMessages,
     popContext, pushCurrentContext, setLastToken, setSrcLoc,
     getLexState, popLexState, pushLexState,
     extension, glaExtsEnabled, bangPatEnabled
@@ -32,7 +33,8 @@ module Lexer (
  
  #include "HsVersions.h"
  
-import ErrUtils                ( Message )
+import Bag
+import ErrUtils
  import Outputable
  import StringBuffer
  import FastString
@@ -43,6 +45,7 @@ import DynFlags
  import Ctype
  import Util            ( maybePrefixMatch, readRational )
  
+import Control.Monad
  import Data.Bits
  import Data.Char       ( chr, isSpace )
  import Data.Ratio
@@ -55,29 +58,30 @@ import Compat.Unicode       ( GeneralCategory(..), generalCategory, isPrint, isUpper )
  #endif
  }
  
-$unispace    = \x05
-$whitechar   = [\ \t\n\r\f\v\xa0 $unispace]
+$unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
+$whitechar   = [\ \n\r\f\v\xa0 $unispace]
  $white_no_nl = $whitechar # \n
+$tab         = \t
  
  $ascdigit  = 0-9
-$unidigit  = \x03
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
  $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
-$unisymbol = \x04
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
-$unilarge  = \x01
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  $large     = [$asclarge $unilarge]
  
-$unismall  = \x02
+$unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
  $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  $small     = [$ascsmall $unismall \_]
  
-$unigraphic = \x06
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
  $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
  
  $octit    = 0-7
@@ -104,10 +108,16 @@ $docsym    = [\| \^ \* \$]
  
  @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
  
+-- normal signed numerical literals can only be explicitly negative,
+-- not explicitly positive (contrast @exponent)
+@negative = \-
+@signed = @negative ?
+
  haskell :-
  
  -- everywhere: skip whitespace and comments
  $white_no_nl+                          ;
+$tab+         { warn Opt_WarnTabs (text "Tab character") }
  
  -- Everywhere: deal with nested comments.  We explicitly rule out
  -- pragmas, "{-#", so that we don't accidentally treat them as comments.
@@ -233,6 +243,8 @@ $white_no_nl+                               ;
    "{-#" $whitechar* (DEPRECATED|deprecated)
                                         { token ITdeprecated_prag }
    "{-#" $whitechar* (SCC|scc)          { token ITscc_prag }
+  "{-#" $whitechar* (GENERATED|generated)
+                                       { token ITgenerated_prag }
    "{-#" $whitechar* (CORE|core)                { token ITcore_prag }
    "{-#" $whitechar* (UNPACK|unpack)    { token ITunpack_prag }
  
@@ -264,8 +276,8 @@ $white_no_nl+                               ;
  -- Haddock comments
  
  <0,glaexts> {
-  "-- " / $docsym    { multiline_doc_comment }
-  "{-" \ ? / $docsym { nested_doc_comment }
+  "-- " $docsym    / { ifExtension haddockEnabled } { multiline_doc_comment }
+  "{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
  }
  
  -- "special" symbols
@@ -346,22 +358,35 @@ $white_no_nl+                             ;
    @consym                      { consym }
  }
  
+-- For the normal boxed literals we need to be careful
+-- when trying to be close to Haskell98
  <0,glaexts> {
-  @decimal                     { tok_decimal }
-  0[oO] @octal                 { tok_octal }
-  0[xX] @hexadecimal           { tok_hexadecimal }
+  -- Normal integral literals (:: Num a => a, from Integer)
+  @decimal                     { tok_num positive 0 0 decimal }
+  0[oO] @octal                 { tok_num positive 2 2 octal }
+  0[xX] @hexadecimal           { tok_num positive 2 2 hexadecimal }
+
+  -- Normal rational literals (:: Fractional a => a, from Rational)
+  @floating_point              { strtoken tok_float }
  }
  
  <glaexts> {
-  @decimal \#                  { prim_decimal }
-  0[oO] @octal \#              { prim_octal }
-  0[xX] @hexadecimal \#                { prim_hexadecimal }
+  -- Unboxed ints (:: Int#)
+  -- It's simpler (and faster?) to give separate cases to the negatives,
+  -- especially considering octal/hexadecimal prefixes.
+  @decimal \#                  { tok_primint positive 0 1 decimal }
+  0[oO] @octal \#              { tok_primint positive 2 3 octal }
+  0[xX] @hexadecimal \#                { tok_primint positive 2 3 hexadecimal }
+  @negative @decimal \#                        { tok_primint negative 1 2 decimal }
+  @negative 0[oO] @octal \#            { tok_primint negative 3 4 octal }
+  @negative 0[xX] @hexadecimal \#      { tok_primint negative 3 4 hexadecimal }
+
+  -- Unboxed floats and doubles (:: Float#, :: Double#)
+  -- prim_{float,double} work with signed literals
+  @signed @floating_point \#           { init_strtoken 1 tok_primfloat }
+  @signed @floating_point \# \#                { init_strtoken 2 tok_primdouble }
  }
  
-<0,glaexts> @floating_point            { strtoken tok_float }
-<glaexts>   @floating_point \#         { init_strtoken 1 prim_float }
-<glaexts>   @floating_point \# \#      { init_strtoken 2 prim_double }
-
  -- Strings and chars are lexed by hand-written code.  The reason is
  -- that even if we recognise the string or char here in the regex
  -- lexer, we would still have to parse the string afterward in order
@@ -372,11 +397,6 @@ $white_no_nl+                              ;
  }
  
  {
--- work around bug in Alex 2.0
-#if __GLASGOW_HASKELL__ < 503
-unsafeAt arr i = arr ! i
-#endif
-
  -- -----------------------------------------------------------------------------
  -- The token type
  
@@ -387,9 +407,9 @@ data Token
    | ITdata
    | ITdefault
    | ITderiving
+  | ITderive
    | ITdo
    | ITelse
-  | ITfor
    | IThiding
    | ITif
    | ITimport
@@ -420,7 +440,6 @@ data Token
    | ITccallconv
    | ITdotnet
    | ITmdo
-  | ITiso
    | ITfamily
  
         -- Pragmas
@@ -432,6 +451,7 @@ data Token
    | ITdeprecated_prag
    | ITline_prag
    | ITscc_prag
+  | ITgenerated_prag
    | ITcore_prag                 -- hdaume: core annotations
    | ITunpack_prag
    | ITclose_prag
@@ -541,7 +561,7 @@ isSpecial :: Token -> Bool
  -- not as a keyword.
  isSpecial ITas         = True
  isSpecial IThiding     = True
-isSpecial ITfor        = True
+isSpecial ITderive     = True
  isSpecial ITqualified  = True
  isSpecial ITforall     = True
  isSpecial ITexport     = True
@@ -553,7 +573,6 @@ isSpecial ITunsafe          = True
  isSpecial ITccallconv   = True
  isSpecial ITstdcallconv = True
  isSpecial ITmdo                = True
-isSpecial ITiso                = True
  isSpecial ITfamily     = True
  isSpecial _             = False
  
@@ -573,9 +592,9 @@ reservedWordsFM = listToUFM $
         ( "data",       ITdata,         0 ),     
         ( "default",    ITdefault,      0 ),  
         ( "deriving",   ITderiving,     0 ), 
+       ( "derive",     ITderive,       0 ), 
         ( "do",         ITdo,           0 ),       
         ( "else",       ITelse,         0 ),     
-       ( "for",        ITfor,          0 ),
         ( "hiding",     IThiding,       0 ),
         ( "if",         ITif,           0 ),       
         ( "import",     ITimport,       0 ),   
@@ -596,7 +615,7 @@ reservedWordsFM = listToUFM $
  
         ( "forall",     ITforall,        bit tvBit),
         ( "mdo",        ITmdo,           bit glaExtsBit),
-       ( "family",     ITfamily,        bit idxTysBit),
+       ( "family",     ITfamily,        bit tyFamBit),
  
         ( "foreign",    ITforeign,       bit ffiBit),
         ( "export",     ITexport,        bit ffiBit),
@@ -631,7 +650,7 @@ reservedSymsFM = listToUFM $
         ,("!",  ITbang,         0)
  
         ,("*",  ITstar,         bit glaExtsBit .|. 
-                               bit idxTysBit)      -- For data T (a::*) = MkT
+                               bit tyFamBit)       -- For data T (a::*) = MkT
         ,(".",  ITdot,          bit tvBit)          -- For 'forall a . t'
  
         ,("-<", ITlarrowtail,   bit arrowsBit)
@@ -640,7 +659,6 @@ reservedSymsFM = listToUFM $
         ,(">>-",        ITRarrowtail,   bit arrowsBit)
  
  #if __GLASGOW_HASKELL__ >= 605
-       ,("λ", ITlam,          bit glaExtsBit)
         ,("∷",   ITdcolon,       bit glaExtsBit)
         ,("⇒",   ITdarrow,    bit glaExtsBit)
         ,("∀",        ITforall,       bit glaExtsBit)
@@ -690,24 +708,28 @@ pop _span _buf _len = do popLexState; lexToken
  pop_and :: Action -> Action
  pop_and act span buf len = do popLexState; act span buf len
  
-notFollowedBy char _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf /= char
+{-# INLINE nextCharIs #-}
+nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
+
+notFollowedBy char _ _ _ (AI _ _ buf) 
+  = nextCharIs buf (/=char)
  
  notFollowedBySymbol _ _ _ (AI _ _ buf)
-  = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
+  = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
  
+-- We must reject doc comments as being ordinary comments everywhere.
+-- In some cases the doc comment will be selected as the lexeme due to
+-- maximal munch, but not always, because the nested comment rule is
+-- valid in all states, but the doc-comment rules are only valid in
+-- the non-layout states.
  isNormalComment bits _ _ (AI _ _ buf)
-  = (if haddockEnabled bits then False else (followedBySpaceDoc buf))
-    || notFollowedByDocOrPragma
-  where 
-    notFollowedByDocOrPragma = not $ spaceAndP buf
-      (\buf' -> currentChar buf' `elem` "|^*$#")
-
-spaceAndP buf p = p buf || currentChar buf == ' ' && p buf'
-  where buf' = snd (nextChar buf)
-
-followedBySpaceDoc buf = spaceAndP buf followedByDoc
+  | haddockEnabled bits = notFollowedByDocOrPragma
+  | otherwise           = nextCharIs buf (/='#')
+  where
+    notFollowedByDocOrPragma
+       = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
  
-followedByDoc buf = currentChar buf `elem` "|^*$"
+spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
  
  haddockDisabledAnd p bits _ _ (AI _ _ buf)
    = if haddockEnabled bits then False else (p buf)
@@ -786,13 +808,12 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
        Just (c,input) -> go (c:commentAcc) input docType False
  
  withLexedDocType lexDocComment = do
-  input <- getInput
-  case alexGetChar input of
-    Nothing -> error "Can't happen"
-    Just ('|', input) -> lexDocComment input ITdocCommentNext False
-    Just ('^', input) -> lexDocComment input ITdocCommentPrev False
-    Just ('$', input) -> lexDocComment input ITdocCommentNamed False
-    Just ('*', input) -> lexDocSection 1 input 
+  input@(AI _ _ buf) <- getInput
+  case prevChar buf ' ' of
+    '|' -> lexDocComment input ITdocCommentNext False
+    '^' -> lexDocComment input ITdocCommentPrev False
+    '$' -> lexDocComment input ITdocCommentNamed False
+    '*' -> lexDocSection 1 input 
   where 
      lexDocSection n input = case alexGetChar input of 
        Just ('*', input) -> lexDocSection (n+1) input
@@ -931,27 +952,29 @@ sym con span buf len =
    where
         fs = lexemeToFastString buf len
  
-tok_decimal span buf len 
-  = return (L span (ITinteger  $! parseInteger buf len 10 octDecDigit))
-
-tok_octal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
-
-tok_hexadecimal span buf len 
-  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
-
-prim_decimal span buf len 
-  = return (L span (ITprimint  $! parseInteger buf (len-1) 10 octDecDigit))
-
-prim_octal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
-
-prim_hexadecimal span buf len 
-  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
-
+-- Variations on the integral numeric literal.
+tok_integral :: (Integer -> Token)
+     -> (Integer -> Integer)
+ --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
+     -> Int -> Int
+     -> (Integer, (Char->Int)) -> Action
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
+  return $ L span $ itint $! transint $ parseUnsignedInteger
+     (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
+
+-- some conveniences for use with tok_integral
+tok_num = tok_integral ITinteger
+tok_primint = tok_integral ITprimint
+positive = id
+negative = negate
+decimal = (10,octDecDigit)
+octal = (8,octDecDigit)
+hexadecimal = (16,hexDigit)
+
+-- readRational can understand negative rationals, exponents, everything.
  tok_float        str = ITrational   $! readRational str
-prim_float       str = ITprimfloat  $! readRational str
-prim_double      str = ITprimdouble $! readRational str
+tok_primfloat    str = ITprimfloat  $! readRational str
+tok_primdouble   str = ITprimdouble $! readRational str
  
  -- -----------------------------------------------------------------------------
  -- Layout processing
@@ -1019,7 +1042,7 @@ do_layout_left span _buf _len = do
  
  setLine :: Int -> Action
  setLine code span buf len = do
-  let line = parseInteger buf len 10 octDecDigit
+  let line = parseUnsignedInteger buf len 10 octDecDigit
    setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
         -- subtract one: the line number refers to the *following* line
    popLexState
@@ -1299,6 +1322,14 @@ getCharOrFail =  do
         Just (c,i)  -> do setInput i; return c
  
  -- -----------------------------------------------------------------------------
+-- Warnings
+
+warn :: DynFlag -> SDoc -> Action
+warn option warning span _buf _len = do
+    addWarning option (mkWarnMsg span alwaysQualify warning)
+    lexToken
+
+-- -----------------------------------------------------------------------------
  -- The Parse Monad
  
  data LayoutContext
@@ -1316,6 +1347,8 @@ data ParseResult a
  
  data PState = PState { 
         buffer     :: StringBuffer,
+    dflags     :: DynFlags,
+    messages   :: Messages,
          last_loc   :: SrcSpan, -- pos of previous token
          last_offs  :: !Int,    -- offset of the previous token from the
                                 -- beginning of  the current line.
@@ -1406,6 +1439,9 @@ alexGetChar (AI loc ofs s)
         adj_c 
           | c <= '\x06' = non_graphic
           | c <= '\xff' = c
+          -- Alex doesn't handle Unicode, so when Unicode
+          -- character is encoutered we output these values
+          -- with the actual character value hidden in the state.
           | otherwise = 
                 case generalCategory c of
                   UppercaseLetter       -> upper
@@ -1479,7 +1515,7 @@ ipBit      = 6
  tvBit     = 7  -- Scoped type variables enables 'forall' keyword
  bangPatBit = 8 -- Tells the parser to understand bang-patterns
                 -- (doesn't affect the lexer)
-idxTysBit  = 9 -- indexed type families: 'family' keyword and kind sigs
+tyFamBit   = 9 -- indexed type families: 'family' keyword and kind sigs
  haddockBit = 10 -- Lex and parse Haddock comments
  
  glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
@@ -1491,7 +1527,7 @@ thEnabled      flags = testBit flags thBit
  ipEnabled      flags = testBit flags ipBit
  tvEnabled      flags = testBit flags tvBit
  bangPatEnabled flags = testBit flags bangPatBit
-idxTysEnabled  flags = testBit flags idxTysBit
+tyFamEnabled   flags = testBit flags tyFamBit
  haddockEnabled flags = testBit flags haddockBit
  
  -- PState for parsing options pragmas
@@ -1500,6 +1536,10 @@ pragState :: StringBuffer -> SrcLoc -> PState
  pragState buf loc  = 
    PState {
        buffer         = buf,
+      messages      = emptyMessages,
+      -- XXX defaultDynFlags is not right, but we don't have a real
+      -- dflags handy
+      dflags        = defaultDynFlags,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1517,6 +1557,8 @@ mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
  mkPState buf loc flags  = 
    PState {
        buffer         = buf,
+      dflags        = flags,
+      messages      = emptyMessages,
        last_loc      = mkSrcSpan loc loc,
        last_offs     = 0,
        last_len      = 0,
@@ -1528,21 +1570,30 @@ mkPState buf loc flags  =
         -- we begin in the layout state if toplev_layout is set
      }
      where
-      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
-              .|. ffiBit     `setBitIf` dopt Opt_FFI         flags
-              .|. parrBit    `setBitIf` dopt Opt_PArr        flags
-              .|. arrowsBit  `setBitIf` dopt Opt_Arrows      flags
-              .|. thBit      `setBitIf` dopt Opt_TH          flags
+      bitmap =     glaExtsBit `setBitIf` dopt Opt_GlasgowExts  flags
+              .|. ffiBit     `setBitIf` dopt Opt_FFI          flags
+              .|. parrBit    `setBitIf` dopt Opt_PArr         flags
+              .|. arrowsBit  `setBitIf` dopt Opt_Arrows       flags
+              .|. thBit      `setBitIf` dopt Opt_TH           flags
                .|. ipBit      `setBitIf` dopt Opt_ImplicitParams flags
                .|. tvBit      `setBitIf` dopt Opt_ScopedTypeVariables flags
                .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags
-              .|. idxTysBit  `setBitIf` dopt Opt_IndexedTypes flags
-              .|. haddockBit `setBitIf` dopt Opt_Haddock     flags
+              .|. tyFamBit   `setBitIf` dopt Opt_TypeFamilies flags
+              .|. haddockBit `setBitIf` dopt Opt_Haddock      flags
        --
        setBitIf :: Int -> Bool -> Int
        b `setBitIf` cond | cond      = bit b
                         | otherwise = 0
  
+addWarning :: DynFlag -> WarnMsg -> P ()
+addWarning option w
+ = P $ \s@PState{messages=(ws,es), dflags=d} ->
+       let ws' = if dopt option d then ws `snocBag` w else ws
+       in POk s{messages=(ws', es)} ()
+
+getMessages :: PState -> Messages
+getMessages PState{messages=ms} = ms
+
  getContext :: P [LayoutContext]
  getContext = P $ \s@PState{context=ctx} -> POk s ctx