-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
module Lexer (
Token(..), lexer, pragState, mkPState, PState(..),
-#if __GLASGOW_HASKELL__ >= 605
-import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#else
-import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
-#endif
+import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
$whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITspec_inline_prag False) }
"{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
$whitechar* (NO(T?)INLINE|no(t?)inline)
{ token (ITspec_inline_prag False) }
"{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
"{-#" $whitechar* (DEPRECATED|deprecated)
{ token ITdeprecated_prag }
"{-#" $whitechar* (SCC|scc) { token ITscc_prag }
"{-#" $whitechar* (DEPRECATED|deprecated)
{ token ITdeprecated_prag }
"{-#" $whitechar* (SCC|scc) { token ITscc_prag }
-- when trying to be close to Haskell98
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
-- when trying to be close to Haskell98
<0> {
-- Normal integral literals (:: Num a => a, from Integer)
- @decimal { tok_num positive 0 0 decimal }
- 0[oO] @octal { tok_num positive 2 2 octal }
- 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
+ @decimal { tok_num positive 0 0 decimal }
+ 0[oO] @octal { tok_num positive 2 2 octal }
+ 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
-- It's simpler (and faster?) to give separate cases to the negatives,
-- especially considering octal/hexadecimal prefixes.
- @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
- 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
- 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
- @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
- @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+ @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
+ 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+ 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+ @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
+ @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
-- Unboxed floats and doubles (:: Float#, :: Double#)
-- prim_{float,double} work with signed literals
@signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
-- Unboxed floats and doubles (:: Float#, :: Double#)
-- prim_{float,double} work with signed literals
@signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
- | ITopabrack -- [:, for parallel arrays with -fparr
- | ITcpabrack -- :], for parallel arrays with -fparr
+ | ITopabrack -- [:, for parallel arrays with -XParr
+ | ITcpabrack -- :], for parallel arrays with -XParr
isSpecial :: Token -> Bool
-- If we see M.x, where x is a keyword, but
-- is special, we treat is as just plain M.x,
isSpecial :: Token -> Bool
-- If we see M.x, where x is a keyword, but
-- is special, we treat is as just plain M.x,
-- the bitmap provided as the third component indicates whether the
-- corresponding extension keyword is valid under the extension options
-- the bitmap provided as the third component indicates whether the
-- corresponding extension keyword is valid under the extension options
-token t span buf len = return (L span t)
-layout_token t span buf len = pushLexState layout >> return (L span t)
+token t span _buf _len = return (L span t)
+layout_token t span _buf _len = pushLexState layout >> return (L span t)
idtoken :: (StringBuffer -> Int -> Token) -> Action
idtoken f span buf len = return (L span $! (f buf len))
idtoken :: (StringBuffer -> Int -> Token) -> Action
idtoken f span buf len = return (L span $! (f buf len))
Just ('-',input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('\125',input) -> go (n-1) input
Just ('-',input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('\125',input) -> go (n-1) input
Just ('\123',input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('-',input) -> go (n+1) input
Just ('\123',input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('-',input) -> go (n+1) input
- Just (c,_) -> go n input
- Just (c,input) -> go n input
+ Just (_,_) -> go n input
+ Just (_,input) -> go n input
Nothing -> errBrace input span
Just ('-',input) -> case alexGetChar input of
Nothing -> errBrace input span
Nothing -> errBrace input span
Just ('-',input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('\123', input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('-',input) -> do
setInput input
let cont = do input <- getInput; go commentAcc input docType False
nested_comment cont span buf _len
Just ('\123', input) -> case alexGetChar input of
Nothing -> errBrace input span
Just ('-',input) -> do
setInput input
let cont = do input <- getInput; go commentAcc input docType False
nested_comment cont span buf _len
Just (c,input) -> go (c:commentAcc) input docType False
withLexedDocType lexDocComment = do
Just (c,input) -> go (c:commentAcc) input docType False
withLexedDocType lexDocComment = do
where
lexDocSection n input = case alexGetChar input of
Just ('*', input) -> lexDocSection (n+1) input
where
lexDocSection n input = case alexGetChar input of
Just ('*', input) -> lexDocSection (n+1) input
-- some conveniences for use with tok_integral
tok_num = tok_integral ITinteger
tok_primint = tok_integral ITprimint
-- some conveniences for use with tok_integral
tok_num = tok_integral ITinteger
tok_primint = tok_integral ITprimint
-- Options, includes and language pragmas.
lex_string_prag :: (String -> Token) -> Action
-- Options, includes and language pragmas.
lex_string_prag :: (String -> Token) -> Action
else case alexGetChar input of
Just (c,i) -> go (c:acc) i
Nothing -> err input
else case alexGetChar input of
Just (c,i) -> go (c:acc) i
Nothing -> err input
-- but WIHTOUT CONSUMING the x or T part (the parser does that).
-- So we have to do two characters of lookahead: when we see 'x we need to
-- see if there's a trailing quote
-- but WIHTOUT CONSUMING the x or T part (the parser does that).
-- So we have to do two characters of lookahead: when we see 'x we need to
-- see if there's a trailing quote
setInput i2
lit_ch <- lex_escape
mc <- getCharOrFail -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
else do setInput i2; lit_error
setInput i2
lit_ch <- lex_escape
mc <- getCharOrFail -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
else do setInput i2; lit_error
failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic
-- (doesn't affect the lexer)
tyFamBit = 9 -- indexed type families: 'family' keyword and kind sigs
haddockBit = 10 -- Lex and parse Haddock comments
-- (doesn't affect the lexer)
tyFamBit = 9 -- indexed type families: 'family' keyword and kind sigs
haddockBit = 10 -- Lex and parse Haddock comments
kindSigsBit = 12 -- Kind signatures on type variables
recursiveDoBit = 13 -- mdo
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
kindSigsBit = 12 -- Kind signatures on type variables
recursiveDoBit = 13 -- mdo
unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
- then ptext SLIT("parse error (possibly incorrect indentation)")
- else hcat [ptext SLIT("parse error on input "),
+ then ptext (sLit "parse error (possibly incorrect indentation)")
+ else hcat [ptext (sLit "parse error on input "),
char '`', text token, char '\'']
]
where token = lexemeToString (offsetBytes (-len) buf) len
char '`', text token, char '\'']
]
where token = lexemeToString (offsetBytes (-len) buf) len
reportLexError loc end buf str
-- -----------------------------------------------------------------------------
reportLexError loc end buf str
-- -----------------------------------------------------------------------------