-{-# OPTIONS -fno-implicit-prelude #-}
+{-# LANGUAGE CPP, NoImplicitPrelude #-}
+
-----------------------------------------------------------------------------
-- |
-- Module : Text.Read.Lex
--
-- Maintainer : libraries@haskell.org
-- Stability : provisional
--- Portability : portable
+-- Portability : non-portable (uses Text.ParserCombinators.ReadP)
--
-- The cut-down Haskell lexer, used by Text.Read
--
module Text.Read.Lex
-- lexing types
- ( LexP -- :: *; = ReadP Lexeme
- , Lexeme(..) -- :: *; Show, Eq
-
- -- lexer
- , lex -- :: LexP
- , lexLitChar -- :: LexP
-
- -- numbers
- , Number -- :: *; Show, Eq
-
- , numberToInt -- :: Number -> Maybe Int
- , numberToInteger -- :: Number -> Maybe Integer
- , numberToRational -- :: Number -> Maybe Integer
- , numberToFloat -- :: Number -> Maybe Float
- , numberToDouble -- :: Number -> Maybe Double
-
- , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
- , readOctP -- :: Num a => ReadP a
- , readDecP -- :: Num a => ReadP a
- , readHexP -- :: Num a => ReadP a
+ ( Lexeme(..) -- :: *; Show, Eq
+
+ -- lexer
+ , lex -- :: ReadP Lexeme Skips leading spaces
+ , hsLex -- :: ReadP String
+ , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
+
+ , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
+ , readOctP -- :: Num a => ReadP a
+ , readDecP -- :: Num a => ReadP a
+ , readHexP -- :: Num a => ReadP a
)
where
import Text.ParserCombinators.ReadP
+#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
-import GHC.Show( Show(.. ), showChar, showString,
- isSpace, isAlpha, isAlphaNum,
- isOctDigit, isHexDigit, toUpper )
-import GHC.Real( Ratio, Integral, Rational, (%), fromIntegral, fromRational,
- toInteger, (^), (^^) )
-import GHC.Float( Float, Double )
+import GHC.Show( Show(..) )
+#ifndef __HADDOCK__
+import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
+#endif
+import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
+ toInteger, (^), (^^), infinity, notANumber )
import GHC.List
-import GHC.Show( ShowS, shows )
-import GHC.Enum( minBound, maxBound )
+import GHC.Enum( maxBound )
+#else
+import Prelude hiding ( lex )
+import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
+import Data.Ratio( Ratio, (%) )
+#endif
+#ifdef __HUGS__
+import Hugs.Prelude( Ratio(..) )
+#endif
import Data.Maybe
-import Data.Either
import Control.Monad
-- -----------------------------------------------------------------------------
-- Lexing types
-type LexP = ReadP Lexeme
-
+-- ^ Haskell lexemes.
data Lexeme
- = Char Char
- | String String
- | Single Char
- | Symbol String
- | Ident String
- | Number Number
- deriving (Eq)
-
-instance Show Lexeme where
- showsPrec n (Char c) = showsPrec n c
- showsPrec n (String s) = showsPrec n s
- showsPrec _ (Single c) = showChar c
- showsPrec _ (Ident s) = showString s
- showsPrec _ (Symbol s) = showString s
- showsPrec n (Number x) = showsPrec n x
+ = Char Char -- ^ Character literal
+ | String String -- ^ String literal, with escapes interpreted
+ | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
+ | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
+ | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
+ | Int Integer -- ^ Integer literal
+ | Rat Rational -- ^ Floating point literal
+ | EOF
+ deriving (Eq, Show)
-- -----------------------------------------------------------------------------
-- Lexing
-lex :: LexP
-lex =
- do skipSpaces
- (lexLitChar
- +++ lexString
- +++ lexSingle
- +++ lexSymbol
- +++ lexIdf
- +++ lexNumber)
+lex :: ReadP Lexeme
+lex = skipSpaces >> lexToken
+
+hsLex :: ReadP String
+-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
+hsLex = do skipSpaces
+ (s,_) <- gather lexToken
+ return s
+
+lexToken :: ReadP Lexeme
+lexToken = lexEOF +++
+ lexLitChar +++
+ lexString +++
+ lexPunc +++
+ lexSymbol +++
+ lexId +++
+ lexNumber
+
-- ----------------------------------------------------------------------
--- symbols
+-- End of file
+lexEOF :: ReadP Lexeme
+lexEOF = do s <- look
+ guard (null s)
+ return EOF
-lexSymbol :: LexP
+-- ---------------------------------------------------------------------------
+-- Single character lexemes
+
+lexPunc :: ReadP Lexeme
+lexPunc =
+ do c <- satisfy isPuncChar
+ return (Punc [c])
+ where
+ isPuncChar c = c `elem` ",;()[]{}`"
+
+-- ----------------------------------------------------------------------
+-- Symbols
+
+lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
- return (Symbol s)
+ if s `elem` reserved_ops then
+ return (Punc s) -- Reserved-ops count as punctuation
+ else
+ return (Symbol s)
where
isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
+ reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
-- ----------------------------------------------------------------------
-- identifiers
-lexIdf :: LexP
-lexIdf =
- do c <- satisfy isAlpha
- s <- munch isIdfChar
- return (Ident (c:s))
- where
- isIdfChar c = isAlphaNum c || c `elem` "_'"
+lexId :: ReadP Lexeme
+lexId = lex_nan <++ lex_id
+ where
+ -- NaN and Infinity look like identifiers, so
+ -- we parse them first.
+ lex_nan = (string "NaN" >> return (Rat notANumber)) +++
+ (string "Infinity" >> return (Rat infinity))
+
+ lex_id = do c <- satisfy isIdsChar
+ s <- munch isIdfChar
+ return (Ident (c:s))
+
+ -- Identifiers can start with a '_'
+ isIdsChar c = isAlpha c || c == '_'
+ isIdfChar c = isAlphaNum c || c `elem` "_'"
+
+#ifndef __GLASGOW_HASKELL__
+infinity, notANumber :: Rational
+infinity = 1 :% 0
+notANumber = 0 :% 0
+#endif
-- ---------------------------------------------------------------------------
-- Lexing character literals
-lexLitChar :: LexP
+lexLitChar :: ReadP Lexeme
lexLitChar =
- do char '\''
- (c,esc) <- lexChar
- guard (esc || c /= '\'')
- char '\''
+ do _ <- char '\''
+ (c,esc) <- lexCharE
+ guard (esc || c /= '\'') -- Eliminate '' possibility
+ _ <- char '\''
return (Char c)
-lexChar :: ReadP (Char, Bool) -- "escaped or not"?
-lexChar =
- do c <- get
- if c == '\\'
- then do c <- lexEsc; return (c, True)
- else do return (c, False)
+lexChar :: ReadP Char
+lexChar = do { (c,_) <- lexCharE; return c }
+
+lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
+lexCharE =
+ do c1 <- get
+ if c1 == '\\'
+ then do c2 <- lexEsc; return (c2, True)
+ else do return (c1, False)
where
lexEsc =
lexEscChar
_ -> pfail
lexNumeric =
- do base <- lexBase
+ do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
- where
- lexBase =
- do s <- look
- case s of
- 'o':_ -> do get; return 8
- 'x':_ -> do get; return 16
- _ -> do return 10
-
+
lexCntrlChar =
- do char '^'
+ do _ <- char '^'
c <- get
case c of
'@' -> return '\^@'
lexAscii =
do choice
- [ string "NUL" >> return '\NUL'
- , string "SOH" >> return '\SOH'
+ [ (string "SOH" >> return '\SOH') <++
+ (string "SO" >> return '\SO')
+ -- \SO and \SOH need maximal-munch treatment
+ -- See the Haskell report Sect 2.6
+
+ , string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
- , string "SO" >> return '\SO'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
-- ---------------------------------------------------------------------------
-- string literal
-lexString :: LexP
+lexString :: ReadP Lexeme
lexString =
- do char '"'
+ do _ <- char '"'
body id
where
body f =
do (c,esc) <- lexStrItem
if c /= '"' || esc
then body (f.(c:))
- else return (String (f ""))
+ else let s = f "" in
+ return (String s)
- lexStrItem =
- (lexEmpty >> lexStrItem)
- +++ lexChar
+ lexStrItem = (lexEmpty >> lexStrItem)
+ +++ lexCharE
lexEmpty =
- do char '\\'
+ do _ <- char '\\'
c <- get
case c of
'&' -> do return ()
- _ | isSpace c -> do skipSpaces; char '\\'; return ()
+ _ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
_ -> do pfail
-- ---------------------------------------------------------------------------
--- single character lexemes
-
-lexSingle :: LexP
-lexSingle =
- do c <- satisfy isSingleChar
- return (Single c)
- where
- isSingleChar c = c `elem` ",;()[]{=}_`"
-
--- ---------------------------------------------------------------------------
-- Lexing numbers
-data Number
- = MkNumber
- { value :: Either Integer Rational
- , base :: Base
- , digits :: Digits
- , fraction :: Maybe Digits
- , exponent :: Maybe Integer
- }
- deriving (Eq)
-
type Base = Int
type Digits = [Int]
-instance Show Number where
- showsPrec _ x =
- showsBase (base x)
- . foldr (.) id (map showDigit (digits x))
- . showsFrac (fraction x)
- . showsExp (exponent x)
- where
- showsBase 8 = showString "0o"
- showsBase 10 = id
- showsBase 16 = showString "0x"
-
- showsFrac Nothing = id
- showsFrac (Just ys) =
- showChar '.'
- . foldr (.) id (map showDigit ys)
-
- showsExp Nothing = id
- showsExp (Just exp) =
- showChar 'e'
- . shows exp
-
-showDigit :: Int -> ShowS
-showDigit n | n <= 9 = shows n
- | otherwise = showChar (chr (n + ord 'A' - 10))
-
-lexNumber :: LexP
-lexNumber =
- do base <- lexBase
- lexNumberBase base
- where
- lexBase =
- do s <- look
- case s of
- '0':'o':_ -> do get; get; return 8
- '0':'O':_ -> do get; get; return 8
- '0':'x':_ -> do get; get; return 16
- '0':'X':_ -> do get; get; return 16
- _ -> do return 10
-
-lexNumberBase :: Base -> LexP
-lexNumberBase base =
- do xs <- lexDigits base
- mFrac <- lexFrac base
- mExp <- lexExp base
- return (Number (MkNumber (value xs mFrac mExp) base xs mFrac mExp))
+lexNumber :: ReadP Lexeme
+lexNumber
+ = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
+ -- If that fails, try for a decimal number
+ lexDecNumber -- Start with ordinary digits
+
+lexHexOct :: ReadP Lexeme
+lexHexOct
+ = do _ <- char '0'
+ base <- lexBaseChar
+ digits <- lexDigits base
+ return (Int (val (fromIntegral base) 0 digits))
+
+lexBaseChar :: ReadP Int
+-- Lex a single character indicating the base; fail if not there
+lexBaseChar = do { c <- get;
+ case c of
+ 'o' -> return 8
+ 'O' -> return 8
+ 'x' -> return 16
+ 'X' -> return 16
+ _ -> pfail }
+
+lexDecNumber :: ReadP Lexeme
+lexDecNumber =
+ do xs <- lexDigits 10
+ mFrac <- lexFrac <++ return Nothing
+ mExp <- lexExp <++ return Nothing
+ return (value xs mFrac mExp)
where
- value xs mFrac mExp = valueFracExp (val (fromIntegral base) 0 xs) mFrac mExp
+ value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
- valueFracExp a Nothing mExp
- | validIntExp mExp = Left (valueExpInt a mExp)
- | otherwise = Right (valueExp (fromIntegral a) mExp)
- valueFracExp a (Just fs) mExp =
- Right (valueExp (fromInteger a + frac (fromIntegral base) 0 1 fs) mExp)
-
- -- only positive exponents allowed
- validIntExp Nothing = True
- validIntExp (Just e) = e >= 0
-
- valueExpInt a Nothing = a
- valueExpInt a (Just exp) = a * ((fromIntegral base) ^ exp)
-
- valueExp a Nothing = a
- valueExp a (Just exp) = a * ((fromIntegral base) ^^ exp)
-
-lexFrac :: Base -> ReadP (Maybe Digits)
-lexFrac base =
- do s <- look
- case s of
- '.' : _ ->
- do get
- frac <- lexDigits base
- return (Just frac)
-
- _ ->
- do return Nothing
-
-lexExp :: Base -> ReadP (Maybe Integer)
-lexExp base =
- do s <- look
- case s of
- e : _ | e `elem` "eE" && base == 10 ->
- do get
- (signedExp +++ exp)
- where
- signedExp =
- do c <- char '-' +++ char '+'
- n <- lexInteger 10
- return (Just (if c == '-' then -n else n))
-
- exp =
- do n <- lexInteger 10
- return (Just n)
-
- _ ->
- do return Nothing
+ valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
+ -> Lexeme
+ valueFracExp a Nothing Nothing
+ = Int a -- 43
+ valueFracExp a Nothing (Just exp)
+ | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
+ | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
+ valueFracExp a (Just fs) mExp
+ = case mExp of
+ Nothing -> Rat rat -- 4.3
+ Just exp -> Rat (valExp rat exp) -- 4.3e-4
+ where
+ rat :: Rational
+ rat = fromInteger a + frac 10 0 1 fs
+
+ valExp :: Rational -> Integer -> Rational
+ valExp rat exp = rat * (10 ^^ exp)
+
+lexFrac :: ReadP (Maybe Digits)
+-- Read the fractional part; fail if it doesn't
+-- start ".d" where d is a digit
+lexFrac = do _ <- char '.'
+ fraction <- lexDigits 10
+ return (Just fraction)
+
+lexExp :: ReadP (Maybe Integer)
+lexExp = do _ <- char 'e' +++ char 'E'
+ exp <- signedExp +++ lexInteger 10
+ return (Just exp)
+ where
+ signedExp
+ = do c <- char '-' +++ char '+'
+ n <- lexInteger 10
+ return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
+-- Lex a non-empty sequence of digits in specified base
lexDigits base =
do s <- look
xs <- scan s id
return xs
where
scan (c:cs) f = case valDig base c of
- Just n -> do get; scan cs (f.(n:))
+ Just n -> do _ <- get; scan cs (f.(n:))
Nothing -> do return (f [])
scan [] f = do return (f [])
return (val (fromIntegral base) 0 xs)
val :: Num a => a -> a -> Digits -> a
-val base y [] = y
+-- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
+val _ y [] = y
val base y (x:xs) = y' `seq` val base y' xs
where
y' = y * base + fromIntegral x
frac :: Integral a => a -> a -> a -> Digits -> Ratio a
-frac base a b [] = a % b
+frac _ a b [] = a % b
frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
where
a' = a * base + fromIntegral x
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
-valDig 10 c
- | '0' <= c && c <= '9' = Just (ord c - ord '0')
- | otherwise = Nothing
+valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
--- ----------------------------------------------------------------------
--- conversion
+valDig _ _ = error "valDig: Bad base"
-numberToInt :: Number -> Maybe Int
-numberToInt x =
- case numberToInteger x of
- Just n | minBound' <= n && n <= maxBound' -> Just (fromInteger n)
- _ -> Nothing
- where
- minBound' = toInteger (minBound :: Int)
- maxBound' = toInteger (maxBound :: Int)
-
-numberToInteger :: Number -> Maybe Integer
-numberToInteger x =
- case value x of
- Left n -> Just n
- _ -> Nothing
-
-numberToRational :: Number -> Maybe Rational
-numberToRational x =
- case value x of
- Left n -> Just (fromInteger n)
- Right r -> Just r
-
-numberToFloat :: Number -> Maybe Float
-numberToFloat x =
- case value x of
- Left n -> Just (fromInteger n)
- Right r -> Just (fromRational r)
-
-numberToDouble :: Number -> Maybe Double
-numberToDouble x =
- case value x of
- Left n -> Just (fromInteger n)
- Right r -> Just (fromRational r)
+valDecDig :: Char -> Maybe Int
+valDecDig c
+ | '0' <= c && c <= '9' = Just (ord c - ord '0')
+ | otherwise = Nothing
-- ----------------------------------------------------------------------
-- other numeric lexing functions