-{-# OPTIONS -fno-implicit-prelude #-}
+{-# LANGUAGE CPP, NoImplicitPrelude #-}
+
-----------------------------------------------------------------------------
-- |
-- Module : Text.Read.Lex
--
-- Maintainer : libraries@haskell.org
-- Stability : provisional
--- Portability : portable
+-- Portability : non-portable (uses Text.ParserCombinators.ReadP)
--
-- The cut-down Haskell lexer, used by Text.Read
--
module Text.Read.Lex
-- lexing types
- ( Lexeme(..) -- :: *; Show, Eq
-
- -- lexer
- , lex -- :: ReadP Lexeme -- Skips leading spaces
- , hsLex -- :: ReadP String
-
- , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
- , readOctP -- :: Num a => ReadP a
- , readDecP -- :: Num a => ReadP a
- , readHexP -- :: Num a => ReadP a
+ ( Lexeme(..) -- :: *; Show, Eq
+
+ -- lexer
+ , lex -- :: ReadP Lexeme Skips leading spaces
+ , hsLex -- :: ReadP String
+ , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
+
+ , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
+ , readOctP -- :: Num a => ReadP a
+ , readDecP -- :: Num a => ReadP a
+ , readHexP -- :: Num a => ReadP a
)
where
import Text.ParserCombinators.ReadP
+#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
-import GHC.Show( Show(.. ), showChar, showString,
- isSpace, isAlpha, isAlphaNum,
- isOctDigit, isHexDigit, toUpper )
-import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational,
- toInteger, (^), (^^) )
-import GHC.Float( Float, Double )
+import GHC.Show( Show(..) )
+#ifndef __HADDOCK__
+import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
+#endif
+import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
+ toInteger, (^), (^^), infinity, notANumber )
import GHC.List
-import GHC.Show( ShowS, shows )
-import GHC.Enum( minBound, maxBound )
+import GHC.Enum( maxBound )
+#else
+import Prelude hiding ( lex )
+import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
+import Data.Ratio( Ratio, (%) )
+#endif
+#ifdef __HUGS__
+import Hugs.Prelude( Ratio(..) )
+#endif
import Data.Maybe
-import Data.Either
import Control.Monad
-- -----------------------------------------------------------------------------
-- Lexing types
-type LexP = ReadP Lexeme
-
+-- ^ Haskell lexemes.
data Lexeme
- = Char Char -- Quotes removed,
- | String String -- escapes interpreted
- | Punc String -- Punctuation, eg "(", "::"
- | Ident String -- Haskell identifiers, e.g. foo, baz
- | Symbol String -- Haskell symbols, e.g. >>, %
- | Int Integer
- | Rat Rational
+ = Char Char -- ^ Character literal
+ | String String -- ^ String literal, with escapes interpreted
+ | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
+ | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
+ | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
+ | Int Integer -- ^ Integer literal
+ | Rat Rational -- ^ Floating point literal
| EOF
deriving (Eq, Show)
hsLex :: ReadP String
-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
hsLex = do skipSpaces
- (s,_) <- gather lexToken
- return s
+ (s,_) <- gather lexToken
+ return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
- lexLitChar +++
- lexString +++
- lexPunc +++
- lexSymbol +++
- lexId +++
- lexNumber
+ lexLitChar +++
+ lexString +++
+ lexPunc +++
+ lexSymbol +++
+ lexId +++
+ lexNumber
-- ----------------------------------------------------------------------
-- End of file
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
- guard (null s)
- return EOF
+ guard (null s)
+ return EOF
-- ---------------------------------------------------------------------------
-- Single character lexemes
do c <- satisfy isPuncChar
return (Punc [c])
where
- isPuncChar c = c `elem` ",;()[]{}_`"
+ isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
- return (Punc s) -- Reserved-ops count as punctuation
+ return (Punc s) -- Reserved-ops count as punctuation
else
- return (Symbol s)
+ return (Symbol s)
where
isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
-- identifiers
lexId :: ReadP Lexeme
-lexId =
- do c <- satisfy isAlpha
- s <- munch isIdfChar
- return (Ident (c:s))
- where
- isIdfChar c = isAlphaNum c || c `elem` "_'"
+lexId = lex_nan <++ lex_id
+ where
+ -- NaN and Infinity look like identifiers, so
+ -- we parse them first.
+ lex_nan = (string "NaN" >> return (Rat notANumber)) +++
+ (string "Infinity" >> return (Rat infinity))
+
+ lex_id = do c <- satisfy isIdsChar
+ s <- munch isIdfChar
+ return (Ident (c:s))
+
+ -- Identifiers can start with a '_'
+ isIdsChar c = isAlpha c || c == '_'
+ isIdfChar c = isAlphaNum c || c `elem` "_'"
+
+#ifndef __GLASGOW_HASKELL__
+infinity, notANumber :: Rational
+infinity = 1 :% 0
+notANumber = 0 :% 0
+#endif
-- ---------------------------------------------------------------------------
-- Lexing character literals
lexLitChar :: ReadP Lexeme
lexLitChar =
- do char '\''
- (c,esc) <- lexChar
- guard (esc || c /= '\'')
- char '\''
+ do _ <- char '\''
+ (c,esc) <- lexCharE
+ guard (esc || c /= '\'') -- Eliminate '' possibility
+ _ <- char '\''
return (Char c)
-lexChar :: ReadP (Char, Bool) -- "escaped or not"?
-lexChar =
- do c <- get
- if c == '\\'
- then do c <- lexEsc; return (c, True)
- else do return (c, False)
+lexChar :: ReadP Char
+lexChar = do { (c,_) <- lexCharE; return c }
+
+lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
+lexCharE =
+ do c1 <- get
+ if c1 == '\\'
+ then do c2 <- lexEsc; return (c2, True)
+ else do return (c1, False)
where
lexEsc =
lexEscChar
_ -> pfail
lexNumeric =
- do base <- lexBase
+ do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
- where
- lexBase =
- do s <- look
- case s of
- 'o':_ -> do get; return 8
- 'x':_ -> do get; return 16
- _ -> do return 10
-
+
lexCntrlChar =
- do char '^'
+ do _ <- char '^'
c <- get
case c of
'@' -> return '\^@'
lexAscii =
do choice
- [ string "NUL" >> return '\NUL'
- , string "SOH" >> return '\SOH'
+ [ (string "SOH" >> return '\SOH') <++
+ (string "SO" >> return '\SO')
+ -- \SO and \SOH need maximal-munch treatment
+ -- See the Haskell report Sect 2.6
+
+ , string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
- , string "SO" >> return '\SO'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
lexString :: ReadP Lexeme
lexString =
- do char '"'
+ do _ <- char '"'
body id
where
body f =
if c /= '"' || esc
then body (f.(c:))
else let s = f "" in
- return (String s)
+ return (String s)
- lexStrItem =
- (lexEmpty >> lexStrItem)
- +++ lexChar
+ lexStrItem = (lexEmpty >> lexStrItem)
+ +++ lexCharE
lexEmpty =
- do char '\\'
+ do _ <- char '\\'
c <- get
case c of
'&' -> do return ()
- _ | isSpace c -> do skipSpaces; char '\\'; return ()
+ _ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
_ -> do pfail
-- ---------------------------------------------------------------------------
-- Lexing numbers
-infinity, notANumber :: Rational
-infinity = 1 :% 0
-notANumber = 0 :% 0
-
type Base = Int
type Digits = [Int]
-showDigit :: Int -> ShowS
-showDigit n | n <= 9 = shows n
- | otherwise = showChar (chr (n + ord 'A' - 10))
-
lexNumber :: ReadP Lexeme
-lexNumber = do { string "NaN"; return (Rat notANumber) } +++
- do { string "Infinity"; return (Rat infinity) } +++
- do { base <- lexBase ; lexNumberBase base }
- where
- lexBase =
- do s <- look
- case s of
- '0':'o':_ -> do get; get; return 8
- '0':'O':_ -> do get; get; return 8
- '0':'x':_ -> do get; get; return 16
- '0':'X':_ -> do get; get; return 16
- _ -> do return 10
-
-lexNumberBase :: Base -> ReadP Lexeme
-lexNumberBase base =
- do xs <- lexDigits base
- mFrac <- lexFrac base
- mExp <- lexExp base
+lexNumber
+ = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
+ -- If that fails, try for a decimal number
+ lexDecNumber -- Start with ordinary digits
+
+lexHexOct :: ReadP Lexeme
+lexHexOct
+ = do _ <- char '0'
+ base <- lexBaseChar
+ digits <- lexDigits base
+ return (Int (val (fromIntegral base) 0 digits))
+
+lexBaseChar :: ReadP Int
+-- Lex a single character indicating the base; fail if not there
+lexBaseChar = do { c <- get;
+ case c of
+ 'o' -> return 8
+ 'O' -> return 8
+ 'x' -> return 16
+ 'X' -> return 16
+ _ -> pfail }
+
+lexDecNumber :: ReadP Lexeme
+lexDecNumber =
+ do xs <- lexDigits 10
+ mFrac <- lexFrac <++ return Nothing
+ mExp <- lexExp <++ return Nothing
return (value xs mFrac mExp)
where
- baseInteger :: Integer
- baseInteger = fromIntegral base
-
- value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp
+ value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
- -> Lexeme
- valueFracExp a Nothing Nothing
- = Int a -- 43
+ -> Lexeme
+ valueFracExp a Nothing Nothing
+ = Int a -- 43
valueFracExp a Nothing (Just exp)
- | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7
- | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
+ | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
+ | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
valueFracExp a (Just fs) mExp
= case mExp of
- Nothing -> Rat rat -- 4.3
- Just exp -> Rat (valExp rat exp) -- 4.3e-4
+ Nothing -> Rat rat -- 4.3
+ Just exp -> Rat (valExp rat exp) -- 4.3e-4
where
- rat :: Rational
- rat = fromInteger a + frac (fromIntegral base) 0 1 fs
+ rat :: Rational
+ rat = fromInteger a + frac 10 0 1 fs
valExp :: Rational -> Integer -> Rational
- valExp rat exp = rat * (fromIntegral base ^^ exp)
-
-lexFrac :: Base -> ReadP (Maybe Digits)
-lexFrac base =
- do s <- look
- case s of
- '.' : d : _ | isJust (valDig base d) ->
- -- The lookahead checks for point and at least one
- -- valid following digit. For example 1..n must
- -- lex the "1" off rather than failing.
- do get
- frac <- lexDigits base
- return (Just frac)
-
- _ ->
- do return Nothing
-
-lexExp :: Base -> ReadP (Maybe Integer)
-lexExp base =
- do s <- look
- case s of
- e : _ | e `elem` "eE" && base == 10 ->
- do get
- (signedExp +++ exp)
- where
- signedExp =
- do c <- char '-' +++ char '+'
- n <- lexInteger 10
- return (Just (if c == '-' then -n else n))
-
- exp =
- do n <- lexInteger 10
- return (Just n)
-
- _ ->
- do return Nothing
+ valExp rat exp = rat * (10 ^^ exp)
+
+lexFrac :: ReadP (Maybe Digits)
+-- Read the fractional part; fail if it doesn't
+-- start ".d" where d is a digit
+lexFrac = do _ <- char '.'
+ fraction <- lexDigits 10
+ return (Just fraction)
+
+lexExp :: ReadP (Maybe Integer)
+lexExp = do _ <- char 'e' +++ char 'E'
+ exp <- signedExp +++ lexInteger 10
+ return (Just exp)
+ where
+ signedExp
+ = do c <- char '-' +++ char '+'
+ n <- lexInteger 10
+ return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
-- Lex a non-empty sequence of digits in specified base
return xs
where
scan (c:cs) f = case valDig base c of
- Just n -> do get; scan cs (f.(n:))
+ Just n -> do _ <- get; scan cs (f.(n:))
Nothing -> do return (f [])
scan [] f = do return (f [])
val :: Num a => a -> a -> Digits -> a
-- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
-val base y [] = y
+val _ y [] = y
val base y (x:xs) = y' `seq` val base y' xs
where
y' = y * base + fromIntegral x
frac :: Integral a => a -> a -> a -> Digits -> Ratio a
-frac base a b [] = a % b
+frac _ a b [] = a % b
frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
where
a' = a * base + fromIntegral x
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
-valDig 10 c
- | '0' <= c && c <= '9' = Just (ord c - ord '0')
- | otherwise = Nothing
+valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
+valDig _ _ = error "valDig: Bad base"
+
+valDecDig :: Char -> Maybe Int
+valDecDig c
+ | '0' <= c && c <= '9' = Just (ord c - ord '0')
+ | otherwise = Nothing
+
-- ----------------------------------------------------------------------
-- other numeric lexing functions