--
-- Maintainer : libraries@haskell.org
-- Stability : provisional
--- Portability : portable
+-- Portability : non-portable (uses Text.ParserCombinators.ReadP)
--
-- The cut-down Haskell lexer, used by Text.Read
--
module Text.Read.Lex
-- lexing types
- ( Lexeme(..) -- :: *; Show, Eq
+ ( Lexeme(..) -- :: *; Show, Eq
+
+ -- lexer
+ , lex -- :: ReadP Lexeme Skips leading spaces
+ , hsLex -- :: ReadP String
+ , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
- -- lexer
- , lex -- :: ReadP Lexeme -- Skips leading spaces
- , hsLex -- :: ReadP String
-
- , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
- , readOctP -- :: Num a => ReadP a
- , readDecP -- :: Num a => ReadP a
- , readHexP -- :: Num a => ReadP a
+ , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
+ , readOctP -- :: Num a => ReadP a
+ , readDecP -- :: Num a => ReadP a
+ , readHexP -- :: Num a => ReadP a
)
where
import Text.ParserCombinators.ReadP
+#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
-import GHC.Show( Show(.. ), showChar, showString,
- isSpace, isAlpha, isAlphaNum,
- isOctDigit, isHexDigit, toUpper )
-import GHC.Real( Ratio, Integral, Rational, (%), fromIntegral, fromRational,
- toInteger, (^), (^^) )
-import GHC.Float( Float, Double )
+import GHC.Show( Show(.. ), isSpace, isAlpha, isAlphaNum )
+import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
+ toInteger, (^), (^^), infinity, notANumber )
import GHC.List
-import GHC.Show( ShowS, shows )
-import GHC.Enum( minBound, maxBound )
+import GHC.Enum( maxBound )
+#else
+import Prelude hiding ( lex )
+import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
+import Data.Ratio( Ratio, (%) )
+#endif
+#ifdef __HUGS__
+import Hugs.Prelude( Ratio(..) )
+#endif
import Data.Maybe
-import Data.Either
import Control.Monad
-- -----------------------------------------------------------------------------
-- Lexing types
-type LexP = ReadP Lexeme
-
data Lexeme
= Char Char -- Quotes removed,
| String String -- escapes interpreted
do c <- satisfy isPuncChar
return (Punc [c])
where
- isPuncChar c = c `elem` ",;()[]{}_`"
+ isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
-- identifiers
lexId :: ReadP Lexeme
-lexId =
- do c <- satisfy isAlpha
- s <- munch isIdfChar
- return (Ident (c:s))
- where
- isIdfChar c = isAlphaNum c || c `elem` "_'"
+lexId = lex_nan <++ lex_id
+ where
+ -- NaN and Infinity look like identifiers, so
+ -- we parse them first.
+ lex_nan = (string "NaN" >> return (Rat notANumber)) +++
+ (string "Infinity" >> return (Rat infinity))
+
+ lex_id = do c <- satisfy isIdsChar
+ s <- munch isIdfChar
+ return (Ident (c:s))
+
+ -- Identifiers can start with a '_'
+ isIdsChar c = isAlpha c || c == '_'
+ isIdfChar c = isAlphaNum c || c `elem` "_'"
+
+#ifndef __GLASGOW_HASKELL__
+infinity, notANumber :: Rational
+infinity = 1 :% 0
+notANumber = 0 :% 0
+#endif
-- ---------------------------------------------------------------------------
-- Lexing character literals
lexLitChar :: ReadP Lexeme
lexLitChar =
do char '\''
- (c,esc) <- lexChar
- guard (esc || c /= '\'')
+ (c,esc) <- lexCharE
+ guard (esc || c /= '\'') -- Eliminate '' possibility
char '\''
return (Char c)
-lexChar :: ReadP (Char, Bool) -- "escaped or not"?
-lexChar =
+lexChar :: ReadP Char
+lexChar = do { (c,_) <- lexCharE; return c }
+
+lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
+lexCharE =
do c <- get
if c == '\\'
then do c <- lexEsc; return (c, True)
_ -> pfail
lexNumeric =
- do base <- lexBase
+ do base <- lexBaseChar
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
- where
- lexBase =
- do s <- look
- case s of
- 'o':_ -> do get; return 8
- 'x':_ -> do get; return 16
- _ -> do return 10
-
+
lexCntrlChar =
do char '^'
c <- get
lexAscii =
do choice
- [ string "NUL" >> return '\NUL'
- , string "SOH" >> return '\SOH'
+ [ (string "SOH" >> return '\SOH') <++
+ (string "SO" >> return '\SO')
+ -- \SO and \SOH need maximal-munch treatment
+ -- See the Haskell report Sect 2.6
+
+ , string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
- , string "SO" >> return '\SO'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
else let s = f "" in
return (String s)
- lexStrItem =
- (lexEmpty >> lexStrItem)
- +++ lexChar
+ lexStrItem = (lexEmpty >> lexStrItem)
+ +++ lexCharE
lexEmpty =
do char '\\'
-- ---------------------------------------------------------------------------
-- Lexing numbers
-infinity, notANumber :: Rational
-infinity = 1 % 0
-notANumber = 0 % 0
-
type Base = Int
type Digits = [Int]
-showDigit :: Int -> ShowS
-showDigit n | n <= 9 = shows n
- | otherwise = showChar (chr (n + ord 'A' - 10))
-
lexNumber :: ReadP Lexeme
-lexNumber = do { string "NaN"; return (Rat notANumber) } +++
- do { string "Infinity"; return (Rat infinity) } +++
- do { base <- lexBase ; lexNumberBase base }
- where
- lexBase =
- do s <- look
- case s of
- '0':'o':_ -> do get; get; return 8
- '0':'O':_ -> do get; get; return 8
- '0':'x':_ -> do get; get; return 16
- '0':'X':_ -> do get; get; return 16
- _ -> do return 10
-
-lexNumberBase :: Base -> ReadP Lexeme
-lexNumberBase base =
- do xs <- lexDigits base
- mFrac <- lexFrac base
- mExp <- lexExp base
+lexNumber
+ = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
+ -- If that fails, try for a decimal number
+ lexDecNumber -- Start with ordinary digits
+
+lexHexOct :: ReadP Lexeme
+lexHexOct
+ = do char '0'
+ base <- lexBaseChar
+ digits <- lexDigits base
+ return (Int (val (fromIntegral base) 0 digits))
+
+lexBaseChar :: ReadP Int
+-- Lex a single character indicating the base,
+-- or return 10 if there isn't one
+lexBaseChar = lex_base <++ return 10
+ where
+ lex_base = do { c <- get;
+ case c of
+ 'o' -> return 8
+ 'O' -> return 8
+ 'x' -> return 16
+ 'X' -> return 16
+ _ -> pfail }
+
+lexDecNumber :: ReadP Lexeme
+lexDecNumber =
+ do xs <- lexDigits 10
+ mFrac <- lexFrac <++ return Nothing
+ mExp <- lexExp <++ return Nothing
return (value xs mFrac mExp)
where
- baseInteger :: Integer
- baseInteger = fromIntegral base
-
- value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp
+ value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
-> Lexeme
valueFracExp a Nothing Nothing
= Int a -- 43
valueFracExp a Nothing (Just exp)
- | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7
+ | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
| otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
valueFracExp a (Just fs) mExp
= case mExp of
Just exp -> Rat (valExp rat exp) -- 4.3e-4
where
rat :: Rational
- rat = fromInteger a + frac (fromIntegral base) 0 1 fs
+ rat = fromInteger a + frac 10 0 1 fs
valExp :: Rational -> Integer -> Rational
- valExp rat exp = rat * (fromIntegral base ^^ exp)
-
-lexFrac :: Base -> ReadP (Maybe Digits)
-lexFrac base =
- do s <- look
- case s of
- '.' : d : _ | isJust (valDig base d) ->
- -- The lookahead checks for point and at least one
- -- valid following digit. For example 1..n must
- -- lex the "1" off rather than failing.
- do get
- frac <- lexDigits base
- return (Just frac)
-
- _ ->
- do return Nothing
-
-lexExp :: Base -> ReadP (Maybe Integer)
-lexExp base =
- do s <- look
- case s of
- e : _ | e `elem` "eE" && base == 10 ->
- do get
- (signedExp +++ exp)
- where
- signedExp =
- do c <- char '-' +++ char '+'
- n <- lexInteger 10
- return (Just (if c == '-' then -n else n))
-
- exp =
- do n <- lexInteger 10
- return (Just n)
-
- _ ->
- do return Nothing
+ valExp rat exp = rat * (10 ^^ exp)
+
+lexFrac :: ReadP (Maybe Digits)
+-- Read the fractional part; fail if it doesn't
+-- start ".d" where d is a digit
+lexFrac = do char '.'
+ frac <- lexDigits 10
+ return (Just frac)
+
+lexExp :: ReadP (Maybe Integer)
+lexExp = do char 'e' +++ char 'E'
+ exp <- signedExp +++ lexInteger 10
+ return (Just exp)
+ where
+ signedExp
+ = do c <- char '-' +++ char '+'
+ n <- lexInteger 10
+ return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
-- Lex a non-empty sequence of digits in specified base
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
-valDig 10 c
- | '0' <= c && c <= '9' = Just (ord c - ord '0')
- | otherwise = Nothing
+valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
+valDecDig c
+ | '0' <= c && c <= '9' = Just (ord c - ord '0')
+ | otherwise = Nothing
+
-- ----------------------------------------------------------------------
-- other numeric lexing functions