import Control.Monad
import Data.Bits
-import Data.Char ( chr, ord, isSpace )
+import Data.Char
import Data.Ratio
import Debug.Trace
-
-import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
}
$unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
<0,option_prags> \n { begin bol }
-"{-#" $whitechar* (line|LINE) { begin line_prag2 }
+"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
+ { begin line_prag2 }
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
-- with older versions of GHC which generated these.
<0,option_prags> {
- "{-#" $whitechar* (RULES|rules) { rulePrag }
- "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) }
- "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
+ "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag }
+ "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar }
+ { token (ITinline_prag True) }
+ "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
{ token (ITinline_prag False) }
- "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+ "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
{ token ITspec_prag }
"{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) }
+ $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
+ { token (ITspec_inline_prag True) }
"{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
- $whitechar* (NO(T?)INLINE|no(t?)inline)
+ $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
{ token (ITspec_inline_prag False) }
- "{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
- "{-#" $whitechar* (WARNING|warning)
+ "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
+ { token ITsource_prag }
+ "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
{ token ITwarning_prag }
- "{-#" $whitechar* (DEPRECATED|deprecated)
+ "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
{ token ITdeprecated_prag }
- "{-#" $whitechar* (SCC|scc) { token ITscc_prag }
- "{-#" $whitechar* (GENERATED|generated)
+ "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar }
+ { token ITscc_prag }
+ "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
{ token ITgenerated_prag }
- "{-#" $whitechar* (CORE|core) { token ITcore_prag }
- "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag }
+ "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
+ { token ITcore_prag }
+ "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
+ { token ITunpack_prag }
+ "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
+ { token ITann_prag }
-- We ignore all these pragmas, but don't generate a warning for them
-- CFILES is a hugs-only thing.
- "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+ "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles) / { notFollowedByPragmaChar }
{ nested_comment lexToken }
-- ToDo: should only be valid inside a pragma:
}
<option_prags> {
- "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag }
- "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+ "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
+ { lex_string_prag IToptions_prag }
+ "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
{ lex_string_prag IToptions_prag }
"{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock)
+ / { notFollowedByPragmaChar }
{ lex_string_prag ITdocOptions }
"-- #" { multiline_doc_comment }
- "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
- "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag }
+ "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
+ { token ITlanguage_prag }
+ "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
+ { lex_string_prag ITinclude_prag }
}
<0> {
-- In the "0" mode we ignore these pragmas
- "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+ "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
{ nested_comment lexToken }
}
| ITgenerated_prag
| ITcore_prag -- hdaume: core annotations
| ITunpack_prag
+ | ITann_prag
| ITclose_prag
| IToptions_prag String
| ITinclude_prag String
,("-<<", ITLarrowtail, arrowsEnabled)
,(">>-", ITRarrowtail, arrowsEnabled)
-#if __GLASGOW_HASKELL__ >= 605
,("∷", ITdcolon, unicodeSyntaxEnabled)
,("⇒", ITdarrow, unicodeSyntaxEnabled)
,("∀", ITforall, \i -> unicodeSyntaxEnabled i &&
-- ToDo: ideally, → and ∷ should be "specials", so that they cannot
-- form part of a large operator. This would let us have a better
-- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
-#endif
]
-- -----------------------------------------------------------------------------
notFollowedBySymbol _ _ _ (AI _ _ buf)
= nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
+notFollowedByPragmaChar _ _ _ (AI _ _ buf)
+ = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
+
-- We must reject doc comments as being ordinary comments everywhere.
-- In some cases the doc comment will be selected as the lexeme due to
-- maximal munch, but not always, because the nested comment rule is
-- We've seen 'x, where x is a valid character
-- (i.e. not newline etc) but not a quote or backslash
case alexGetChar' i2 of -- Look ahead one more character
- Nothing -> lit_error
Just ('\'', i3) -> do -- We've seen 'x'
setInput i3
finish_char_tok loc c
_other -> do -- We've seen 'x not followed by quote
+ -- (including the possibility of EOF)
-- If TH is on, just parse the quote only
th_exts <- extension thEnabled
let (AI end _ _) = i1
DecimalNumber -> digit
LetterNumber -> other_graphic
OtherNumber -> other_graphic
- ConnectorPunctuation -> other_graphic
- DashPunctuation -> other_graphic
+ ConnectorPunctuation -> symbol
+ DashPunctuation -> symbol
OpenPunctuation -> other_graphic
ClosePunctuation -> other_graphic
InitialQuote -> other_graphic
FinalQuote -> other_graphic
- OtherPunctuation -> other_graphic
+ OtherPunctuation -> symbol
MathSymbol -> symbol
CurrencySymbol -> symbol
ModifierSymbol -> symbol