-- - M... should be 3 tokens, not 1.
-- - pragma-end should be only valid in a pragma
+-- qualified operator NOTES.
+--
+-- - If M.(+) is a single lexeme, then..
+-- - Probably (+) should be a single lexeme too, for consistency.
+-- Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
+-- - But we have to rule out reserved operators, otherwise (..) becomes
+-- a different lexeme.
+-- - Should we therefore also rule out reserved operators in the qualified
+-- form? This is quite difficult to achieve. We don't do it for
+-- qualified varids.
+
{
{-# OPTIONS -w #-}
-- The above warning supression flag is a temporary kludge.
import Control.Monad
import Data.Bits
-import Data.Char ( chr, ord, isSpace )
+import Data.Char
import Data.Ratio
import Debug.Trace
-
-import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
}
$unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
<0,option_prags> \n { begin bol }
-"{-#" $whitechar* (line|LINE) { begin line_prag2 }
+"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
+ { begin line_prag2 }
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
-- with older versions of GHC which generated these.
<0,option_prags> {
- "{-#" $whitechar* (RULES|rules) { token ITrules_prag }
- "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) }
- "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
+ "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag }
+ "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar }
+ { token (ITinline_prag True) }
+ "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
{ token (ITinline_prag False) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+ "{-#" $whitechar* (INLINE|inline)
+ $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar }
+ { token (ITinline_conlike_prag True) }
+ "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline)
+ $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar }
+ { token (ITinline_conlike_prag False) }
+ "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
{ token ITspec_prag }
"{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) }
+ $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
+ { token (ITspec_inline_prag True) }
"{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar* (NO(T?)INLINE|no(t?)inline)
+ $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
{ token (ITspec_inline_prag False) }
- "{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
- "{-#" $whitechar* (WARNING|warning)
+ "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
+ { token ITsource_prag }
+ "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
{ token ITwarning_prag }
- "{-#" $whitechar* (DEPRECATED|deprecated)
+ "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
{ token ITdeprecated_prag }
- "{-#" $whitechar* (SCC|scc) { token ITscc_prag }
- "{-#" $whitechar* (GENERATED|generated)
+ "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar }
+ { token ITscc_prag }
+ "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
{ token ITgenerated_prag }
- "{-#" $whitechar* (CORE|core) { token ITcore_prag }
- "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag }
+ "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
+ { token ITcore_prag }
+ "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
+ { token ITunpack_prag }
+ "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
+ { token ITann_prag }
-- We ignore all these pragmas, but don't generate a warning for them
-- CFILES is a hugs-only thing.
- "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+ "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar }
{ nested_comment lexToken }
-- ToDo: should only be valid inside a pragma:
}
<option_prags> {
- "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+ "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
+ { lex_string_prag IToptions_prag }
+ "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
{ lex_string_prag IToptions_prag }
"{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock)
+ / { notFollowedByPragmaChar }
{ lex_string_prag ITdocOptions }
"-- #" { multiline_doc_comment }
- "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
- "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag }
+ "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
+ { token ITlanguage_prag }
+ "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
+ { lex_string_prag ITinclude_prag }
}
<0> {
-- In the "0" mode we ignore these pragmas
- "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+ "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
{ nested_comment lexToken }
}
@conid "#"+ / { ifExtension magicHashEnabled } { idtoken conid }
}
--- ToDo: M.(,,,)
-
+-- ToDo: - move `var` and (sym) into lexical syntax?
+-- - remove backquote from $special?
<0> {
- @qual @varsym { idtoken qvarsym }
- @qual @consym { idtoken qconsym }
- @varsym { varsym }
- @consym { consym }
+ @qual @varsym / { ifExtension oldQualOps } { idtoken qvarsym }
+ @qual @consym / { ifExtension oldQualOps } { idtoken qconsym }
+ @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
+ @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
+ @varsym { varsym }
+ @consym { consym }
}
-- For the normal boxed literals we need to be careful
-- Pragmas
| ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE
+ | ITinline_conlike_prag Bool -- same
| ITspec_prag -- SPECIALISE
| ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE)
| ITsource_prag
| ITgenerated_prag
| ITcore_prag -- hdaume: core annotations
| ITunpack_prag
+ | ITann_prag
| ITclose_prag
| IToptions_prag String
| ITinclude_prag String
| ITqconid (FastString,FastString)
| ITqvarsym (FastString,FastString)
| ITqconsym (FastString,FastString)
+ | ITprefixqvarsym (FastString,FastString)
+ | ITprefixqconsym (FastString,FastString)
| ITdupipvarid FastString -- GHC extension: implicit param: ?x
,("-<<", ITLarrowtail, arrowsEnabled)
,(">>-", ITRarrowtail, arrowsEnabled)
-#if __GLASGOW_HASKELL__ >= 605
,("∷", ITdcolon, unicodeSyntaxEnabled)
,("⇒", ITdarrow, unicodeSyntaxEnabled)
,("∀", ITforall, \i -> unicodeSyntaxEnabled i &&
-- ToDo: ideally, → and ∷ should be "specials", so that they cannot
-- form part of a large operator. This would let us have a better
-- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
-#endif
]
-- -----------------------------------------------------------------------------
notFollowedBySymbol _ _ _ (AI _ _ buf)
= nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
+notFollowedByPragmaChar _ _ _ (AI _ _ buf)
+ = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
+
-- We must reject doc comments as being ordinary comments everywhere.
-- In some cases the doc comment will be selected as the lexeme due to
-- maximal munch, but not always, because the nested comment rule is
-- off again at the end of the pragma.
rulePrag :: Action
rulePrag span buf len = do
- setExts (.|. inRulePragBit)
+ setExts (.|. bit inRulePragBit)
return (L span ITrules_prag)
endPrag :: Action
popContext
return (L span ITccurly)
-qvarid buf len = ITqvarid $! splitQualName buf len
-qconid buf len = ITqconid $! splitQualName buf len
+qvarid buf len = ITqvarid $! splitQualName buf len False
+qconid buf len = ITqconid $! splitQualName buf len False
-splitQualName :: StringBuffer -> Int -> (FastString,FastString)
+splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
-- takes a StringBuffer and a length, and returns the module name
-- and identifier parts of a qualified name. Splits at the *last* dot,
-- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf orig_buf
+splitQualName orig_buf len parens = split orig_buf orig_buf
where
split buf dot_buf
| orig_buf `byteDiff` buf >= len = done dot_buf
done dot_buf =
(lexemeToFastString orig_buf (qual_size - 1),
- lexemeToFastString dot_buf (len - qual_size))
+ if parens -- Prelude.(+)
+ then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
+ else lexemeToFastString dot_buf (len - qual_size))
where
qual_size = orig_buf `byteDiff` dot_buf
conid buf len = ITconid fs
where fs = lexemeToFastString buf len
-qvarsym buf len = ITqvarsym $! splitQualName buf len
-qconsym buf len = ITqconsym $! splitQualName buf len
+qvarsym buf len = ITqvarsym $! splitQualName buf len False
+qconsym buf len = ITqconsym $! splitQualName buf len False
+prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
+prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
varsym = sym ITvarsym
consym = sym ITconsym
-- We've seen 'x, where x is a valid character
-- (i.e. not newline etc) but not a quote or backslash
case alexGetChar' i2 of -- Look ahead one more character
- Nothing -> lit_error
Just ('\'', i3) -> do -- We've seen 'x'
setInput i3
finish_char_tok loc c
_other -> do -- We've seen 'x not followed by quote
+ -- (including the possibility of EOF)
-- If TH is on, just parse the quote only
th_exts <- extension thEnabled
let (AI end _ _) = i1
DecimalNumber -> digit
LetterNumber -> other_graphic
OtherNumber -> other_graphic
- ConnectorPunctuation -> other_graphic
- DashPunctuation -> other_graphic
+ ConnectorPunctuation -> symbol
+ DashPunctuation -> symbol
OpenPunctuation -> other_graphic
ClosePunctuation -> other_graphic
InitialQuote -> other_graphic
FinalQuote -> other_graphic
- OtherPunctuation -> other_graphic
+ OtherPunctuation -> symbol
MathSymbol -> symbol
CurrencySymbol -> symbol
ModifierSymbol -> symbol
qqBit = 18 -- enable quasiquoting
inRulePragBit = 19
rawTokenStreamBit = 20 -- producing a token stream with all comments included
+newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
always _ = True
qqEnabled flags = testBit flags qqBit
inRulePrag flags = testBit flags inRulePragBit
rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
+newQualOps flags = testBit flags newQualOpsBit
+oldQualOps flags = not (newQualOps flags)
-- PState for parsing options pragmas
--
.|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
.|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
.|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
+ .|. newQualOpsBit `setBitIf` dopt Opt_NewQualifiedOperators flags
--
setBitIf :: Int -> Bool -> Int
b `setBitIf` cond | cond = bit b