From 4af6a1708d420733fc9110cbac58e8bfacdaf53d Mon Sep 17 00:00:00 2001 From: simonpj Date: Wed, 5 Jun 2002 14:08:25 +0000 Subject: [PATCH] [project @ 2002-06-05 14:08:24 by simonpj] ------------------------------------------------ Fix the (new) lexer, and make the derived read and show code work according to the new H98 report ------------------------------------------------ The new lexer, based on Koen's cunning parser (Text.ParserCombinators.ReadP) wasn't quite right. It's all very cool now. In particular: * The H98 "lex" function should return the exact string parsed, and it now does, aided by the new combinator ReadP.gather. * As a result the Text.Read.Lex Lexeme type is much simpler than before data Lexeme = Char Char -- Quotes removed, | String String -- escapes interpreted | Punc String -- Punctuation, eg "(", "::" | Ident String -- Haskell identifiers, e.g. foo, baz | Symbol String -- Haskell symbols, e.g. >>, % | Int Integer | Rat Rational | EOF deriving (Eq,Show) * Multi-character punctuation, like "::" was getting lexed as a Symbol, but it should be a Punc. * Parsing numbers wasn't quite right. "1..n" got it confused because it got committed to a decimal point and then found a second '.'. * The new H98 spec for Show is there, which ignores associativity. --- GHC/Read.lhs | 100 +++++++--------- Numeric.hs | 9 +- Text/ParserCombinators/ReadP.hs | 43 +++++-- Text/Read/Lex.hs | 240 ++++++++++++++++----------------------- 4 files changed, 174 insertions(+), 218 deletions(-) diff --git a/GHC/Read.lhs b/GHC/Read.lhs index b7b6965..32bc227 100644 --- a/GHC/Read.lhs +++ b/GHC/Read.lhs @@ -55,14 +55,7 @@ import Text.ParserCombinators.ReadP import qualified Text.Read.Lex as L -import Text.Read.Lex - ( Lexeme(..) - , Number(..) - , numberToInt - , numberToInteger - , numberToFloat - , numberToDouble - ) +import Text.Read.Lex ( Lexeme(..) ) import Text.ParserCombinators.ReadPrec @@ -173,16 +166,15 @@ read s = either error id (readEither s) -- H98 compatibility lex :: ReadS String -- As defined by H98 -lex "" = [("","")] -- ugly hack -lex s = readP_to_S (do { lexeme <- L.lex ; - return (show lexeme) }) s +lex s = readP_to_S L.hsLex s lexLitChar :: ReadS String -- As defined by H98 -lexLitChar = readP_to_S (do { lexeme <- L.lexLitChar ; - return (show lexeme) }) +lexLitChar = readP_to_S (do { P.skipSpaces ; + (s, Char _) <- P.gather L.lex ; + return s }) readLitChar :: ReadS Char -- As defined by H98 -readLitChar = readP_to_S (do { Char c <- L.lexLitChar ; +readLitChar = readP_to_S (do { Char c <- L.lex ; return c }) lexDigits :: ReadS String @@ -197,11 +189,10 @@ lexP = lift L.lex paren :: ReadPrec a -> ReadPrec a -- (paren p) parses (P0) -- where p parses P0 in precedence context zero -paren p = - do Single '(' <- lexP - x <- reset p - Single ')' <- lexP - return x +paren p = do Punc "(" <- lexP + x <- reset p + Punc ")" <- lexP + return x parens :: ReadPrec a -> ReadPrec a -- (parens p) parses P, (P0), ((P0)), etc, @@ -215,15 +206,15 @@ parens p = optional list :: ReadPrec a -> ReadPrec [a] list readx = parens - ( do Single '[' <- lexP + ( do Punc "[" <- lexP (listRest False +++ listNext) ) where listRest started = - do Single c <- lexP + do Punc c <- lexP case c of - ']' -> return [] - ',' | started -> listNext + "]" -> return [] + "," | started -> listNext _ -> pfail listNext = @@ -374,7 +365,7 @@ instance Read Lexeme where %********************************************************* \begin{code} -readNumber :: Num a => (Number -> Maybe a) -> ReadPrec a +readNumber :: Num a => (Lexeme -> Maybe a) -> ReadPrec a -- Read a signed number readNumber convert = parens @@ -383,48 +374,37 @@ readNumber convert = Symbol "-" -> do n <- readNumber convert return (negate n) - Number y -> case convert y of - Just n -> return n - Nothing -> pfail - - _ -> pfail + _ -> case convert x of + Just n -> return n + Nothing -> pfail ) -readIEEENumber :: (RealFloat a) => (Number -> Maybe a) -> ReadPrec a --- Read a Float/Double. -readIEEENumber convert = - parens - ( do x <- lexP - case x of - Ident "NaN" -> return (0/0) - Ident "Infinity" -> return (1/0) - Symbol "-" -> do n <- readIEEENumber convert - return (negate n) - - Number y -> case convert y of - Just n -> return n - Nothing -> pfail - - _ -> pfail - ) +convertInt :: Num a => Lexeme -> Maybe a +convertInt (Int i) = Just (fromInteger i) +convertInt _ = Nothing + +convertFrac :: Fractional a => Lexeme -> Maybe a +convertFrac (Int i) = Just (fromInteger i) +convertFrac (Rat r) = Just (fromRational r) +convertFrac _ = Nothing instance Read Int where - readPrec = readNumber numberToInt + readPrec = readNumber convertInt readListPrec = readListPrecDefault readList = readListDefault instance Read Integer where - readPrec = readNumber numberToInteger + readPrec = readNumber convertInt readListPrec = readListPrecDefault readList = readListDefault instance Read Float where - readPrec = readIEEENumber numberToFloat + readPrec = readNumber convertFrac readListPrec = readListPrecDefault readList = readListDefault instance Read Double where - readPrec = readIEEENumber numberToDouble + readPrec = readNumber convertFrac readListPrec = readListPrecDefault readList = readListDefault @@ -467,7 +447,7 @@ instance (Read a, Read b) => Read (a,b) where parens ( paren ( do x <- readPrec - Single ',' <- lexP + Punc "," <- lexP y <- readPrec return (x,y) ) @@ -482,9 +462,9 @@ instance (Read a, Read b, Read c) => Read (a, b, c) where parens ( paren ( do x <- readPrec - Single ',' <- lexP + Punc "," <- lexP y <- readPrec - Single ',' <- lexP + Punc "," <- lexP z <- readPrec return (x,y,z) ) @@ -498,11 +478,11 @@ instance (Read a, Read b, Read c, Read d) => Read (a, b, c, d) where parens ( paren ( do w <- readPrec - Single ',' <- lexP + Punc "," <- lexP x <- readPrec - Single ',' <- lexP + Punc "," <- lexP y <- readPrec - Single ',' <- lexP + Punc "," <- lexP z <- readPrec return (w,x,y,z) ) @@ -516,13 +496,13 @@ instance (Read a, Read b, Read c, Read d, Read e) => Read (a, b, c, d, e) where parens ( paren ( do v <- readPrec - Single ',' <- lexP + Punc "," <- lexP w <- readPrec - Single ',' <- lexP + Punc "," <- lexP x <- readPrec - Single ',' <- lexP + Punc "," <- lexP y <- readPrec - Single ',' <- lexP + Punc "," <- lexP z <- readPrec return (v,w,x,y,z) ) diff --git a/Numeric.hs b/Numeric.hs index 55102a2..9f84fdb 100644 --- a/Numeric.hs +++ b/Numeric.hs @@ -78,10 +78,11 @@ readFloat = readP_to_S readFloatP readFloatP :: RealFrac a => ReadP a readFloatP = - do L.Number x <- L.lex - case L.numberToRational x of - Nothing -> pfail - Just y -> return (fromRational y) + do tok <- L.lex + case tok of + L.Rat y -> return (fromRational y) + L.Int i -> return (fromInteger i) + other -> pfail -- It's turgid to have readSigned work using list comprehensions, -- but it's specified as a ReadS to ReadS transformer diff --git a/Text/ParserCombinators/ReadP.hs b/Text/ParserCombinators/ReadP.hs index 1e01ae9..8fb06af 100644 --- a/Text/ParserCombinators/ReadP.hs +++ b/Text/ParserCombinators/ReadP.hs @@ -9,6 +9,12 @@ -- Stability : provisional -- Portability : portable -- +-- "ReadP" is a library of parser combinators, originally written by Koen Claessen. +-- It parses all alternatives in parallel, so it never keeps hold of +-- the beginning of the input string, a common source of space leaks with +-- other parsers. The '(+++)' choice combinator is genuinely commutative; +-- it makes no difference which branch is "shorter". + ----------------------------------------------------------------------------- module Text.ParserCombinators.ReadP @@ -20,6 +26,7 @@ module Text.ParserCombinators.ReadP get, -- :: ReadP Char look, -- :: ReadP String (+++), -- :: ReadP a -> ReadP a -> ReadP a + gather, -- :: ReadP a -> ReadP (String, a) -- * Other operations pfail, -- :: ReadP a @@ -82,16 +89,32 @@ look = R (\k -> Look k) (+++) :: ReadP a -> ReadP a -> ReadP a R f1 +++ R f2 = R (\k -> f1 k >|< f2 k) - where - Get f1 >|< Get f2 = Get (\c -> f1 c >|< f2 c) - Fail >|< p = p - p >|< Fail = p - Look f >|< Look g = Look (\s -> f s >|< g s) - Result x p >|< q = Result x (p >|< q) - p >|< Result x q = Result x (p >|< q) - Look f >|< p = Look (\s -> f s >|< p) - p >|< Look f = Look (\s -> p >|< f s) - p >|< q = ReadS (\s -> run p s ++ run q s) + +gather :: ReadP a -> ReadP (String, a) +-- ^ Transforms a parser into one that does the same, but +-- in addition returns the exact characters read. +-- IMPORTANT NOTE: 'gather' gives a runtime error if its first argument +-- is built using any occurrences of readS_to_P. +gather (R m) + = R (\k -> gath id (m (\a -> Result (\s -> k (s,a)) Fail))) + where + gath l (Get f) = Get (\c -> gath (l.(c:)) (f c)) + gath l Fail = Fail + gath l (Look f) = Look (\s -> gath l (f s)) + gath l (Result k p) = k (l []) >|< gath l p + gath l (ReadS r) = error "do not use ReadS in gather!" + +(>|<) :: P a -> P a -> P a +-- Not exported! Works over the representation type +Get f1 >|< Get f2 = Get (\c -> f1 c >|< f2 c) +Fail >|< p = p +p >|< Fail = p +Look f >|< Look g = Look (\s -> f s >|< g s) +Result x p >|< q = Result x (p >|< q) +p >|< Result x q = Result x (p >|< q) +Look f >|< p = Look (\s -> f s >|< p) +p >|< Look f = Look (\s -> p >|< f s) +p >|< q = ReadS (\s -> run p s ++ run q s) run :: P a -> ReadS a run (Get f) [] = [] diff --git a/Text/Read/Lex.hs b/Text/Read/Lex.hs index 7fdf024..9dfd361 100644 --- a/Text/Read/Lex.hs +++ b/Text/Read/Lex.hs @@ -15,22 +15,12 @@ module Text.Read.Lex -- lexing types - ( LexP -- :: *; = ReadP Lexeme - , Lexeme(..) -- :: *; Show, Eq + ( Lexeme(..) -- :: *; Show, Eq -- lexer - , lex -- :: LexP - , lexLitChar -- :: LexP + , lex -- :: ReadP Lexeme -- Skips leading spaces + , hsLex -- :: ReadP String - -- numbers - , Number -- :: *; Show, Eq - - , numberToInt -- :: Number -> Maybe Int - , numberToInteger -- :: Number -> Maybe Integer - , numberToRational -- :: Number -> Maybe Integer - , numberToFloat -- :: Number -> Maybe Float - , numberToDouble -- :: Number -> Maybe Double - , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a , readOctP -- :: Num a => ReadP a , readDecP -- :: Num a => ReadP a @@ -61,50 +51,74 @@ import Control.Monad type LexP = ReadP Lexeme data Lexeme - = Char Char - | String String - | Single Char - | Symbol String - | Ident String - | Number Number - deriving (Eq) - -instance Show Lexeme where - showsPrec n (Char c) = showsPrec n c - showsPrec n (String s) = showsPrec n s - showsPrec _ (Single c) = showChar c - showsPrec _ (Ident s) = showString s - showsPrec _ (Symbol s) = showString s - showsPrec n (Number x) = showsPrec n x + = Char Char -- Quotes removed, + | String String -- escapes interpreted + | Punc String -- Punctuation, eg "(", "::" + | Ident String -- Haskell identifiers, e.g. foo, baz + | Symbol String -- Haskell symbols, e.g. >>, % + | Int Integer + | Rat Rational + | EOF + deriving (Eq, Show) -- ----------------------------------------------------------------------------- -- Lexing -lex :: LexP -lex = - do skipSpaces - (lexLitChar - +++ lexString - +++ lexSingle - +++ lexSymbol - +++ lexIdf - +++ lexNumber) +lex :: ReadP Lexeme +lex = skipSpaces >> lexToken + +hsLex :: ReadP String +-- ^ Haskell lexer: returns the lexed string, rather than the lexeme +hsLex = do skipSpaces + (s,_) <- gather lexToken + return s + +lexToken :: ReadP Lexeme +lexToken = lexEOF +++ + lexLitChar +++ + lexString +++ + lexPunc +++ + lexSymbol +++ + lexId +++ + lexNumber + -- ---------------------------------------------------------------------- --- symbols +-- End of file +lexEOF :: ReadP Lexeme +lexEOF = do s <- look + guard (null s) + return EOF + +-- --------------------------------------------------------------------------- +-- Single character lexemes -lexSymbol :: LexP +lexPunc :: ReadP Lexeme +lexPunc = + do c <- satisfy isPuncChar + return (Punc [c]) + where + isPuncChar c = c `elem` ",;()[]{}_`" + +-- ---------------------------------------------------------------------- +-- Symbols + +lexSymbol :: ReadP Lexeme lexSymbol = do s <- munch1 isSymbolChar - return (Symbol s) + if s `elem` reserved_ops then + return (Punc s) -- Reserved-ops count as punctuation + else + return (Symbol s) where isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~" + reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] -- ---------------------------------------------------------------------- -- identifiers -lexIdf :: LexP -lexIdf = +lexId :: ReadP Lexeme +lexId = do c <- satisfy isAlpha s <- munch isIdfChar return (Ident (c:s)) @@ -114,7 +128,7 @@ lexIdf = -- --------------------------------------------------------------------------- -- Lexing character literals -lexLitChar :: LexP +lexLitChar :: ReadP Lexeme lexLitChar = do char '\'' (c,esc) <- lexChar @@ -243,7 +257,7 @@ lexChar = -- --------------------------------------------------------------------------- -- string literal -lexString :: LexP +lexString :: ReadP Lexeme lexString = do char '"' body id @@ -252,7 +266,8 @@ lexString = do (c,esc) <- lexStrItem if c /= '"' || esc then body (f.(c:)) - else return (String (f "")) + else let s = f "" in + return (String s) lexStrItem = (lexEmpty >> lexStrItem) @@ -267,60 +282,23 @@ lexString = _ -> do pfail -- --------------------------------------------------------------------------- --- single character lexemes - -lexSingle :: LexP -lexSingle = - do c <- satisfy isSingleChar - return (Single c) - where - isSingleChar c = c `elem` ",;()[]{=}_`" - --- --------------------------------------------------------------------------- -- Lexing numbers -data Number - = MkNumber - { value :: Either Integer Rational - , base :: Base - , digits :: Digits - , fraction :: Maybe Digits - , exponent :: Maybe Integer - } - deriving (Eq) +infinity, notANumber :: Rational +infinity = 1 % 0 +notANumber = 0 % 0 type Base = Int type Digits = [Int] -instance Show Number where - showsPrec _ x = - showsBase (base x) - . foldr (.) id (map showDigit (digits x)) - . showsFrac (fraction x) - . showsExp (exponent x) - where - showsBase 8 = showString "0o" - showsBase 10 = id - showsBase 16 = showString "0x" - - showsFrac Nothing = id - showsFrac (Just ys) = - showChar '.' - . foldr (.) id (map showDigit ys) - - showsExp Nothing = id - showsExp (Just exp) = - showChar 'e' - . shows exp - showDigit :: Int -> ShowS showDigit n | n <= 9 = shows n | otherwise = showChar (chr (n + ord 'A' - 10)) -lexNumber :: LexP -lexNumber = - do base <- lexBase - lexNumberBase base +lexNumber :: ReadP Lexeme +lexNumber = do { string "NaN"; return (Rat notANumber) } +++ + do { string "Infinity"; return (Rat infinity) } +++ + do { base <- lexBase ; lexNumberBase base } where lexBase = do s <- look @@ -331,36 +309,44 @@ lexNumber = '0':'X':_ -> do get; get; return 16 _ -> do return 10 -lexNumberBase :: Base -> LexP +lexNumberBase :: Base -> ReadP Lexeme lexNumberBase base = do xs <- lexDigits base mFrac <- lexFrac base mExp <- lexExp base - return (Number (MkNumber (value xs mFrac mExp) base xs mFrac mExp)) + return (value xs mFrac mExp) where - value xs mFrac mExp = valueFracExp (val (fromIntegral base) 0 xs) mFrac mExp - - valueFracExp a Nothing mExp - | validIntExp mExp = Left (valueExpInt a mExp) - | otherwise = Right (valueExp (fromIntegral a) mExp) - valueFracExp a (Just fs) mExp = - Right (valueExp (fromInteger a + frac (fromIntegral base) 0 1 fs) mExp) - - -- only positive exponents allowed - validIntExp Nothing = True - validIntExp (Just e) = e >= 0 + baseInteger :: Integer + baseInteger = fromIntegral base - valueExpInt a Nothing = a - valueExpInt a (Just exp) = a * ((fromIntegral base) ^ exp) - - valueExp a Nothing = a - valueExp a (Just exp) = a * ((fromIntegral base) ^^ exp) + value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp + + valueFracExp :: Integer -> Maybe Digits -> Maybe Integer + -> Lexeme + valueFracExp a Nothing Nothing + = Int a -- 43 + valueFracExp a Nothing (Just exp) + | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7 + | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 + valueFracExp a (Just fs) mExp + = case mExp of + Nothing -> Rat rat -- 4.3 + Just exp -> Rat (valExp rat exp) -- 4.3e-4 + where + rat :: Rational + rat = fromInteger a + frac (fromIntegral base) 0 1 fs + + valExp :: Rational -> Integer -> Rational + valExp rat exp = rat * (fromIntegral base ^^ exp) lexFrac :: Base -> ReadP (Maybe Digits) lexFrac base = do s <- look case s of - '.' : _ -> + '.' : d : _ | isJust (valDig base d) -> + -- The lookahead checks for point and at least one + -- valid following digit. For example 1..n must + -- lex the "1" off rather than failing. do get frac <- lexDigits base return (Just frac) @@ -389,6 +375,7 @@ lexExp base = do return Nothing lexDigits :: Int -> ReadP Digits +-- Lex a non-empty sequence of digits in specified base lexDigits base = do s <- look xs <- scan s id @@ -406,6 +393,7 @@ lexInteger base = return (val (fromIntegral base) 0 xs) val :: Num a => a -> a -> Digits -> a +-- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were val base y [] = y val base y (x:xs) = y' `seq` val base y' xs where @@ -434,42 +422,6 @@ valDig 16 c | otherwise = Nothing -- ---------------------------------------------------------------------- --- conversion - -numberToInt :: Number -> Maybe Int -numberToInt x = - case numberToInteger x of - Just n | minBound' <= n && n <= maxBound' -> Just (fromInteger n) - _ -> Nothing - where - minBound' = toInteger (minBound :: Int) - maxBound' = toInteger (maxBound :: Int) - -numberToInteger :: Number -> Maybe Integer -numberToInteger x = - case value x of - Left n -> Just n - _ -> Nothing - -numberToRational :: Number -> Maybe Rational -numberToRational x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just r - -numberToFloat :: Number -> Maybe Float -numberToFloat x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just (fromRational r) - -numberToDouble :: Number -> Maybe Double -numberToDouble x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just (fromRational r) - --- ---------------------------------------------------------------------- -- other numeric lexing functions readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a -- 1.7.10.4