-- Note that Alex itself generates code with with some unused bindings and
-- without type signatures, so removing the flag might not be possible.
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
P(..), ParseResult(..), getSrcLoc,
import Data.Ratio
import Debug.Trace
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
+import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
}
$unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
-$whitechar = [\ \n\r\f\v\xa0 $unispace]
+$whitechar = [\ \n\r\f\v $unispace]
$white_no_nl = $whitechar # \n
$tab = \t
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
-$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
+$asclarge = [A-Z]
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
-$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
+$ascsmall = [a-z]
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
-- NOTE: accept -} at the end of a LINE pragma, for compatibility
-- with older versions of GHC which generated these.
--- We only want RULES pragmas to be picked up when explicit forall
--- syntax is enabled is on, because the contents of the pragma always
--- uses it. If it's not on then we're sure to get a parse error.
--- (ToDo: we should really emit a warning when ignoring pragmas)
--- XXX Now that we can enable this without the -fglasgow-exts hammer,
--- is it better just to let the parse error happen?
-<0>
- "{-#" $whitechar* (RULES|rules) / { ifExtension explicitForallEnabled } { token ITrules_prag }
-
<0,option_prags> {
+ "{-#" $whitechar* (RULES|rules) { token ITrules_prag }
"{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) }
"{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITinline_prag False) }
$whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITspec_inline_prag False) }
"{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
+ "{-#" $whitechar* (WARNING|warning)
+ { token ITwarning_prag }
"{-#" $whitechar* (DEPRECATED|deprecated)
{ token ITdeprecated_prag }
"{-#" $whitechar* (SCC|scc) { token ITscc_prag }
"{-#" $whitechar* (CORE|core) { token ITcore_prag }
"{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag }
- "{-#" { nested_comment lexToken }
+ -- We ignore all these pragmas, but don't generate a warning for them
+ -- CFILES is a hugs-only thing.
+ "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+ { nested_comment lexToken }
-- ToDo: should only be valid inside a pragma:
- "#-}" { token ITclose_prag}
+ "#-}" { endPrag }
}
<option_prags> {
}
<0> {
+ -- In the "0" mode we ignore these pragmas
+ "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+ { nested_comment lexToken }
+}
+
+<0> {
"-- #" .* ;
}
<0,option_prags> {
- -- This is to catch things like {-# OPTIONS OPTIONS_HUGS ...
- "{-#" $whitechar* $idchar+ { nested_comment lexToken }
+ "{-#" { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
+ (nested_comment lexToken) }
}
-- '0' state: ordinary lexemes
| ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE)
| ITsource_prag
| ITrules_prag
+ | ITwarning_prag
| ITdeprecated_prag
| ITline_prag
| ITscc_prag
( "where", ITwhere, 0 ),
( "_scc_", ITscc, 0 ), -- ToDo: remove
- ( "forall", ITforall, bit explicitForallBit),
+ ( "forall", ITforall, bit explicitForallBit .|. bit inRulePragBit),
( "mdo", ITmdo, bit recursiveDoBit),
( "family", ITfamily, bit tyFamBit),
( "group", ITgroup, bit transformComprehensionsBit),
-- For data T (a::*) = MkT
,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i)
-- For 'forall a . t'
- ,(".", ITdot, explicitForallEnabled)
+ ,(".", ITdot, \i -> explicitForallEnabled i || inRulePrag i)
,("-<", ITlarrowtail, arrowsEnabled)
,(">-", ITrarrowtail, arrowsEnabled)
Just (_, _) -> lexDocComment input (ITdocSection n) True
Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+-- RULES pragmas turn on the forall and '.' keywords, and we turn them
+-- off again at the end of the pragma.
+rulePrag :: Action
+rulePrag span buf len = do
+ setExts (.|. inRulePragBit)
+ return (L span ITrules_prag)
+
+endPrag :: Action
+endPrag span buf len = do
+ setExts (.&. complement (bit inRulePragBit))
+ return (L span ITclose_prag)
+
-- docCommentEnd
-------------------------------------------------------------------------------
-- This function is quite tricky. We can't just return a new token, we also
c | isAny c -> do setInput inp; return c
_other -> lit_error
-isAny c | c > '\xff' = isPrint c
+isAny c | c > '\x7f' = isPrint c
| otherwise = is_any c
lex_escape :: P Char
addWarning option srcspan warning
lexToken
+warnThen :: DynFlag -> SDoc -> Action -> Action
+warnThen option warning action srcspan buf len = do
+ addWarning option srcspan warning
+ action srcspan buf len
+
-- -----------------------------------------------------------------------------
-- The Parse Monad
getExts :: P Int
getExts = P $ \s -> POk s (extsBitmap s)
+setExts :: (Int -> Int) -> P ()
+setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
+
setSrcLoc :: SrcLoc -> P ()
setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
adj_c
| c <= '\x06' = non_graphic
- | c <= '\xff' = c
+ | c <= '\x7f' = c
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
LowercaseLetter -> lower
TitlecaseLetter -> upper
ModifierLetter -> other_graphic
- OtherLetter -> other_graphic
+ OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic
-- (doesn't affect the lexer)
tyFamBit = 9 -- indexed type families: 'family' keyword and kind sigs
haddockBit = 10 -- Lex and parse Haddock comments
-magicHashBit = 11 -- # in both functions and operators
+magicHashBit = 11 -- "#" in both functions and operators
kindSigsBit = 12 -- Kind signatures on type variables
recursiveDoBit = 13 -- mdo
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
standaloneDerivingBit = 16 -- standalone instance deriving declarations
transformComprehensionsBit = 17
qqBit = 18 -- enable quasiquoting
+inRulePragBit = 19
genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
always _ = True
standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
qqEnabled flags = testBit flags qqBit
+inRulePrag flags = testBit flags inRulePragBit
-- PState for parsing options pragmas
--
.|. qqBit `setBitIf` dopt Opt_QuasiQuotes flags
.|. ipBit `setBitIf` dopt Opt_ImplicitParams flags
.|. explicitForallBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+ .|. explicitForallBit `setBitIf` dopt Opt_LiberalTypeSynonyms flags
.|. explicitForallBit `setBitIf` dopt Opt_PolymorphicComponents flags
.|. explicitForallBit `setBitIf` dopt Opt_ExistentialQuantification flags
.|. explicitForallBit `setBitIf` dopt Opt_Rank2Types flags