-- Note that Alex itself generates code with with some unused bindings and
-- without type signatures, so removing the flag might not be possible.
-- Note that Alex itself generates code with with some unused bindings and
-- without type signatures, so removing the flag might not be possible.
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
P(..), ParseResult(..), getSrcLoc,
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
P(..), ParseResult(..), getSrcLoc,
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
+import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
$whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITspec_inline_prag False) }
"{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
$whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITspec_inline_prag False) }
"{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
"{-#" $whitechar* (DEPRECATED|deprecated)
{ token ITdeprecated_prag }
"{-#" $whitechar* (SCC|scc) { token ITscc_prag }
"{-#" $whitechar* (DEPRECATED|deprecated)
{ token ITdeprecated_prag }
"{-#" $whitechar* (SCC|scc) { token ITscc_prag }
-- when trying to be close to Haskell98
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
-- when trying to be close to Haskell98
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
- @decimal { tok_num positive 0 0 decimal }
- 0[oO] @octal { tok_num positive 2 2 octal }
- 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
+ @decimal { tok_num positive 0 0 decimal }
+ 0[oO] @octal { tok_num positive 2 2 octal }
+ 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
- @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
- 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
- 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
- @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
- @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+ @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+ 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+ 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+ @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+ @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
-- Unboxed floats and doubles (:: Float#, :: Double#)
-- prim_{float,double} work with signed literals
@signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
-- Unboxed floats and doubles (:: Float#, :: Double#)
-- prim_{float,double} work with signed literals
@signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
- | ITopabrack -- [:, for parallel arrays with -fparr
- | ITcpabrack -- :], for parallel arrays with -fparr
+ | ITopabrack -- [:, for parallel arrays with -XParr
+ | ITcpabrack -- :], for parallel arrays with -XParr
-- some conveniences for use with tok_integral
tok_num = tok_integral ITinteger
tok_primint = tok_integral ITprimint
-- some conveniences for use with tok_integral
tok_num = tok_integral ITinteger
tok_primint = tok_integral ITprimint
failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic
getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
-- for reasons of efficiency, flags indicating language extensions (eg,
getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
-- for reasons of efficiency, flags indicating language extensions (eg,
-- (doesn't affect the lexer)
tyFamBit = 9 -- indexed type families: 'family' keyword and kind sigs
haddockBit = 10 -- Lex and parse Haddock comments
-- (doesn't affect the lexer)
tyFamBit = 9 -- indexed type families: 'family' keyword and kind sigs
haddockBit = 10 -- Lex and parse Haddock comments
kindSigsBit = 12 -- Kind signatures on type variables
recursiveDoBit = 13 -- mdo
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
kindSigsBit = 12 -- Kind signatures on type variables
recursiveDoBit = 13 -- mdo
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
- then ptext SLIT("parse error (possibly incorrect indentation)")
- else hcat [ptext SLIT("parse error on input "),
+ then ptext (sLit "parse error (possibly incorrect indentation)")
+ else hcat [ptext (sLit "parse error on input "),
char '`', text token, char '\'']
]
where token = lexemeToString (offsetBytes (-len) buf) len
char '`', text token, char '\'']
]
where token = lexemeToString (offsetBytes (-len) buf) len