From 05e43a9bd25232efced01ce45d00b3b3ba12af51 Mon Sep 17 00:00:00 2001 From: simonpj Date: Thu, 11 Apr 2002 12:03:45 +0000 Subject: [PATCH] [project @ 2002-04-11 12:03:43 by simonpj] ------------------- Mainly derived Read ------------------- This commit is a tangle of several things that somehow got wound up together, I'm afraid. The main course ~~~~~~~~~~~~~~~ Replace the derived-Read machinery with Koen's cunning new parser combinator library. The result should be * much smaller code sizes from derived Read * faster execution of derived Read WARNING: I have not thoroughly tested this stuff; I'd be glad if you did! All the hard work is done, but there may be a few nits. The Read class gets two new methods, not exposed in the H98 inteface of course: class Read a where readsPrec :: Int -> ReadS a readList :: ReadS [a] readPrec :: ReadPrec a -- NEW readListPrec :: ReadPrec [a] -- NEW There are the following new libraries: Text.ParserCombinators.ReadP Koens combinator parser Text.ParserCombinators.ReadPrec Ditto, but with precedences Text.Read.Lex An emasculated lexical analyser that provides the functionality of H98 'lex' TcGenDeriv is changed to generate code that uses the new libraries. The built-in instances of Read (List, Maybe, tuples, etc) use the new libraries. Other stuff ~~~~~~~~~~~ 1. Some fixes the the plumbing of external-core generation. Sigbjorn did most of the work earlier, but this commit completes the renaming and typechecking plumbing. 2. Runtime error-generation functions, such as GHC.Err.recSelErr, GHC.Err.recUpdErr, etc, now take an Addr#, pointing to a UTF8-encoded C string, instead of a Haskell string. This makes the *calls* to these functions easier to generate, and smaller too, which is a good thing. In particular, it means that MkId.mkRecordSelectorId doesn't need to be passed "unpackCStringId", which was GRUESOME; and that in turn means that tcTypeAndClassDecls doesn't need to be passed unf_env, which is a very worthwhile cleanup. Win/win situation. 3. GHC now faithfully translates do-notation using ">>" for statements with no binding, just as the report says. While I was there I tidied up HsDo to take a list of Ids instead of 3 (but now 4) separate Ids. Saves a bit of code here and there. Also introduced Inst.newMethodFromName to package a common idiom. --- Data/Char.hs | 5 +- GHC/Err.lhs | 44 +- GHC/Exts.hs | 4 +- GHC/IOBase.lhs | 8 +- GHC/Read.lhs | 920 ++++++++++++++++------------------- GHC/Show.lhs | 23 +- Makefile | 6 +- Numeric.hs | 53 +- System/Random.hs | 4 +- Text/ParserCombinators/ReadP.lhs | 176 +++++++ Text/ParserCombinators/ReadPrec.lhs | 163 +++++++ Text/Read/Lex.lhs | 504 +++++++++++++++++++ 12 files changed, 1357 insertions(+), 553 deletions(-) create mode 100644 Text/ParserCombinators/ReadP.lhs create mode 100644 Text/ParserCombinators/ReadPrec.lhs create mode 100644 Text/Read/Lex.lhs diff --git a/Data/Char.hs b/Data/Char.hs index e0c9566..9174f15 100644 --- a/Data/Char.hs +++ b/Data/Char.hs @@ -9,7 +9,7 @@ -- Stability : provisional -- Portability : portable -- --- $Id: Char.hs,v 1.1 2001/06/28 14:15:02 simonmar Exp $ +-- $Id: Char.hs,v 1.2 2002/04/11 12:03:43 simonpj Exp $ -- -- The Char type and associated operations. -- @@ -43,9 +43,10 @@ module Data.Char #ifdef __GLASGOW_HASKELL__ import GHC.Base import GHC.Show -import GHC.Read (readLitChar, lexLitChar, digitToInt) +import GHC.Read (readLitChar, lexLitChar) #endif #ifdef __HUGS__ isLatin1 c = True #endif + diff --git a/GHC/Err.lhs b/GHC/Err.lhs index c520f9b..fb34ab5 100644 --- a/GHC/Err.lhs +++ b/GHC/Err.lhs @@ -1,5 +1,5 @@ % ----------------------------------------------------------------------------- -% $Id: Err.lhs,v 1.3 2001/07/31 13:11:40 simonmar Exp $ +% $Id: Err.lhs,v 1.4 2002/04/11 12:03:43 simonpj Exp $ % % (c) The University of Glasgow, 1994-2000 % @@ -23,12 +23,11 @@ module GHC.Err , patError , recSelError , recConError - , recUpdError -- :: String -> a + , runtimeError -- :: Addr# -> a -- Addr# points to UTF8 encoded C string , absentErr, parError -- :: a , seqError -- :: a - , errorCString -- :: Addr# -> a -- Arg is a ptr to C string , error -- :: String -> a , assertError -- :: String -> Bool -> a -> a @@ -51,9 +50,6 @@ import GHC.Exception error :: String -> a error s = throw (ErrorCall s) -errorCString :: Addr# -> a -errorCString s = error (unpackCString# s) - -- It is expected that compilers will recognize this and insert error -- messages which are more appropriate to the context in which undefined -- appears. @@ -76,33 +72,27 @@ absentErr, parError, seqError :: a absentErr = error "Oops! The program has entered an `absent' argument!\n" parError = error "Oops! Entered GHCerr.parError (a GHC bug -- please report it!)\n" -seqError = error "Oops! Entered seqError (a GHC bug -- please report it!)\n" - +seqError = error "Oops! Entered seqError (a GHC bug -- please report it!)\n" \end{code} \begin{code} -irrefutPatError - , noMethodBindingError - , nonExhaustiveGuardsError - , patError - , recSelError - , recConError - , recUpdError :: String -> a - -noMethodBindingError s = throw (NoMethodError (untangle s "No instance nor default method for class operation")) -irrefutPatError s = throw (PatternMatchFail (untangle s "Irrefutable pattern failed for pattern")) +recSelError, recConError, irrefutPatError, runtimeError, + nonExhaustiveGuardsError, patError, noMethodBindingError + :: Addr# -> a -- All take a UTF8-encoded C string + +recSelError s = throw (RecSelError (unpackCStringUtf8# s)) -- No location info unfortunately +runtimeError s = error (unpackCStringUtf8# s) -- No location info unfortunately + nonExhaustiveGuardsError s = throw (PatternMatchFail (untangle s "Non-exhaustive guards in")) +irrefutPatError s = throw (PatternMatchFail (untangle s "Irrefutable pattern failed for pattern")) +recConError s = throw (RecConError (untangle s "Missing field in record construction")) +noMethodBindingError s = throw (NoMethodError (untangle s "No instance nor default method for class operation")) patError s = throw (PatternMatchFail (untangle s "Non-exhaustive patterns in")) -recSelError s = throw (RecSelError (untangle s "Missing field in record selection")) -recConError s = throw (RecConError (untangle s "Missing field in record construction")) -recUpdError s = throw (RecUpdError (untangle s "Record doesn't contain field(s) to be updated")) - -assertError :: String -> Bool -> a -> a +assertError :: Addr# -> Bool -> a -> a assertError str pred v | pred = v | otherwise = throw (AssertionFailed (untangle str "Assertion failed")) - \end{code} @@ -115,7 +105,7 @@ It prints location message details \begin{code} -untangle :: String -> String -> String +untangle :: Addr# -> String -> String untangle coded message = location ++ ": " @@ -123,8 +113,10 @@ untangle coded message ++ details ++ "\n" where + coded_str = unpackCStringUtf8# coded + (location, details) - = case (span not_bar coded) of { (loc, rest) -> + = case (span not_bar coded_str) of { (loc, rest) -> case rest of ('|':det) -> (loc, ' ' : det) _ -> (loc, "") diff --git a/GHC/Exts.hs b/GHC/Exts.hs index c5f0ca6..699edbf 100644 --- a/GHC/Exts.hs +++ b/GHC/Exts.hs @@ -8,7 +8,7 @@ -- Stability : experimental -- Portability : non-portable -- --- $Id: Exts.hs,v 1.3 2002/03/14 16:26:40 simonmar Exp $ +-- $Id: Exts.hs,v 1.4 2002/04/11 12:03:44 simonpj Exp $ -- -- GHC Extensions: this is the Approved Way to get at GHC-specific stuff. -- @@ -35,7 +35,7 @@ module GHC.Exts import Prelude -import {-# SOURCE #-} GHC.Prim +import GHC.Prim import GHC.Base import GHC.Word import GHC.Num diff --git a/GHC/IOBase.lhs b/GHC/IOBase.lhs index 6ef6b06..e1b8ec3 100644 --- a/GHC/IOBase.lhs +++ b/GHC/IOBase.lhs @@ -1,5 +1,5 @@ % ------------------------------------------------------------------------------ -% $Id: IOBase.lhs,v 1.7 2002/03/14 12:09:50 simonmar Exp $ +% $Id: IOBase.lhs,v 1.8 2002/04/11 12:03:44 simonpj Exp $ % % (c) The University of Glasgow, 1994-2001 % @@ -78,6 +78,12 @@ bindIO (IO m) k = IO ( \ s -> (# new_s, a #) -> unIO (k a) new_s ) +thenIO :: IO a -> IO b -> IO b +thenIO (IO m) k = IO ( \ s -> + case m s of + (# new_s, a #) -> unIO k new_s + ) + returnIO :: a -> IO a returnIO x = IO (\ s -> (# s, x #)) diff --git a/GHC/Read.lhs b/GHC/Read.lhs index a01c8e2..949ec59 100644 --- a/GHC/Read.lhs +++ b/GHC/Read.lhs @@ -1,5 +1,5 @@ % ------------------------------------------------------------------------------ -% $Id: Read.lhs,v 1.3 2001/12/21 15:07:25 simonmar Exp $ +% $Id: Read.lhs,v 1.4 2002/04/11 12:03:44 simonpj Exp $ % % (c) The University of Glasgow, 1994-2000 % @@ -11,95 +11,74 @@ Instances of the Read class. \begin{code} {-# OPTIONS -fno-implicit-prelude #-} -module GHC.Read where +module GHC.Read + ( Read(..) -- class + + -- ReadS type + , ReadS -- :: *; = String -> [(a,String)] + + -- utility functions + , reads -- :: Read a => ReadS a + , readp -- :: Read a => ReadP a + , readEither -- :: Read a => String -> Either String a + , read -- :: Read a => String -> a + + -- H98 compatibility + , lex -- :: ReadS String + , lexLitChar -- :: ReadS String + , readLitChar -- :: ReadS Char + , lexDigits -- :: ReadS String + + -- defining readers + , lexP -- :: ReadPrec Lexeme + , paren -- :: ReadPrec a -> ReadPrec a + , parens -- :: ReadPrec a -> ReadPrec a + , list -- :: ReadPrec a -> ReadPrec [a] + , choose -- :: [(String, ReadPrec a)] -> ReadPrec a + + -- Temporary + , readList__ + , readParen + ) + where + +import qualified Text.ParserCombinators.ReadP as P + +import Text.ParserCombinators.ReadP + ( ReadP + , readP_to_S + , readS_to_P + ) + +import qualified Text.Read.Lex as L + +import Text.Read.Lex + ( Lexeme(..) + , Number(..) + , numberToInt + , numberToInteger + , numberToFloat + , numberToDouble + ) + +import Text.ParserCombinators.ReadPrec import Data.Maybe import Data.Either import {-# SOURCE #-} GHC.Err ( error ) -import GHC.Enum ( Enum(..), maxBound ) import GHC.Num import GHC.Real import GHC.Float import GHC.List import GHC.Show -- isAlpha etc import GHC.Base -\end{code} - -%********************************************************* -%* * -\subsection{The @Read@ class} -%* * -%********************************************************* - -Note: if you compile this with -DNEW_READS_REP, you'll get -a (simpler) ReadS representation that only allow one valid -parse of a string of characters, instead of a list of -possible ones. - -[changing the ReadS rep has implications for the deriving -machinery for Read, a change that hasn't been made, so you -probably won't want to compile in this new rep. except -when in an experimental mood.] - -\begin{code} - -#ifndef NEW_READS_REP -type ReadS a = String -> [(a,String)] -#else -type ReadS a = String -> Maybe (a,String) -#endif -class Read a where - readsPrec :: Int -> ReadS a - - readList :: ReadS [a] - readList = readList__ reads -\end{code} - -In this module we treat [(a,String)] as a monad in Control.MonadPlus -But Control.MonadPlus isn't defined yet, so we simply give local -declarations for mzero and guard suitable for this particular -type. It would also be reasonably to move Control.MonadPlus to GHC.Base -along with Control.Monad and Functor, but that seems overkill for one -example - -\begin{code} -mzero :: [a] -mzero = [] - -guard :: Bool -> [()] -guard True = [()] -guard False = [] -\end{code} - -%********************************************************* -%* * -\subsection{Utility functions} -%* * -%********************************************************* - -\begin{code} -reads :: (Read a) => ReadS a -reads = readsPrec 0 - -read :: (Read a) => String -> a -read s = - case read_s s of -#ifndef NEW_READS_REP - [x] -> x - [] -> error "Prelude.read: no parse" - _ -> error "Prelude.read: ambiguous parse" -#else - Just x -> x - Nothing -> error "Prelude.read: no parse" -#endif - where - read_s str = do - (x,str1) <- reads str - ("","") <- lex str1 - return x +ratioPrec = 7 -- Precedence of ':%' constructor +appPrec = 10 -- Precedence of applictaion \end{code} +------------------------------------------------------- + TEMPORARY UNTIL I DO DERIVED READ \begin{code} readParen :: Bool -> ReadS a -> ReadS a @@ -125,485 +104,406 @@ readList__ readx readl2 s = (do { ("]",t) <- lex s ; return ([],t) }) ++ (do { (",",t) <- lex s ; (x,u) <- readx t ; (xs,v) <- readl2 u ; return (x:xs,v) }) - \end{code} %********************************************************* %* * -\subsection{Lexical analysis} +\subsection{The @Read@ class and @ReadS@ type} %* * %********************************************************* -This lexer is not completely faithful to the Haskell lexical syntax. -Current limitations: - Qualified names are not handled properly - A `--' does not terminate a symbol - Octal and hexidecimal numerics are not recognized as a single token - \begin{code} -lex :: ReadS String - -lex "" = return ("","") -lex (c:s) | isSpace c = lex (dropWhile isSpace s) -lex ('\'':s) = do - (ch, '\'':t) <- lexLitChar s - guard (ch /= "'") - return ('\'':ch++"'", t) -lex ('"':s) = do - (str,t) <- lexString s - return ('"':str, t) - - where - lexString ('"':s) = return ("\"",s) - lexString s = do - (ch,t) <- lexStrItem s - (str,u) <- lexString t - return (ch++str, u) - - - lexStrItem ('\\':'&':s) = return ("\\&",s) - lexStrItem ('\\':c:s) | isSpace c = do - ('\\':t) <- return (dropWhile isSpace s) - return ("\\&",t) - lexStrItem s = lexLitChar s - -lex (c:s) | isSingle c = return ([c],s) - | isSym c = do - (sym,t) <- return (span isSym s) - return (c:sym,t) - | isAlpha c = do - (nam,t) <- return (span isIdChar s) - return (c:nam, t) - | isDigit c = do -{- Removed, 13/03/2000 by SDM. - Doesn't work, and not required by Haskell report. - let - (pred, s', isDec) = - case s of - ('o':rs) -> (isOctDigit, rs, False) - ('O':rs) -> (isOctDigit, rs, False) - ('x':rs) -> (isHexDigit, rs, False) - ('X':rs) -> (isHexDigit, rs, False) - _ -> (isDigit, s, True) --} - (ds,s) <- return (span isDigit s) - (fe,t) <- lexFracExp s - return (c:ds++fe,t) - | otherwise = mzero -- bad character - where - isSingle c = c `elem` ",;()[]{}_`" - isSym c = c `elem` "!@#$%&*+./<=>?\\^|:-~" - isIdChar c = isAlphaNum c || c `elem` "_'" - - lexFracExp ('.':c:cs) | isDigit c = do - (ds,t) <- lex0Digits cs - (e,u) <- lexExp t - return ('.':c:ds++e,u) - lexFracExp s = return ("",s) - - lexExp (e:s) | e `elem` "eE" = - (do - (c:t) <- return s - guard (c `elem` "+-") - (ds,u) <- lexDecDigits t - return (e:c:ds,u)) ++ - (do - (ds,t) <- lexDecDigits s - return (e:ds,t)) - - lexExp s = return ("",s) - -lexDigits :: ReadS String -lexDigits = lexDecDigits - -lexDecDigits :: ReadS String -lexDecDigits = nonnull isDigit - -lexOctDigits :: ReadS String -lexOctDigits = nonnull isOctDigit - -lexHexDigits :: ReadS String -lexHexDigits = nonnull isHexDigit - --- 0 or more digits -lex0Digits :: ReadS String -lex0Digits s = return (span isDigit s) - -nonnull :: (Char -> Bool) -> ReadS String -nonnull p s = do - (cs@(_:_),t) <- return (span p s) - return (cs,t) - -lexLitChar :: ReadS String -lexLitChar ('\\':s) = do - (esc,t) <- lexEsc s - return ('\\':esc, t) - where - lexEsc (c:s) | c `elem` escChars = return ([c],s) - lexEsc s@(d:_) | isDigit d = checkSize 10 lexDecDigits s - lexEsc ('o':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s) - lexEsc ('O':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s) - lexEsc ('x':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s) - lexEsc ('X':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s) - lexEsc ('^':c:s) | c >= '@' && c <= '_' = [(['^',c],s)] -- cf. cntrl in 2.6 of H. report. - lexEsc s@(c:_) | isUpper c = fromAsciiLab s - lexEsc _ = mzero - - escChars = "abfnrtv\\\"'" - - fromAsciiLab (x:y:z:ls) | isUpper y && (isUpper z || isDigit z) && - [x,y,z] `elem` asciiEscTab = return ([x,y,z], ls) - fromAsciiLab (x:y:ls) | isUpper y && - [x,y] `elem` asciiEscTab = return ([x,y], ls) - fromAsciiLab _ = mzero - - asciiEscTab = "DEL" : asciiTab - - {- - Check that the numerically escaped char literals are - within accepted boundaries. - - Note: this allows char lits with leading zeros, i.e., - \0000000000000000000000000000001. - -} - checkSize base f str = do - (num, res) <- f str - if toAnInteger base num > toInteger (ord maxBound) then - mzero - else - case base of - 8 -> return ('o':num, res) - 16 -> return ('x':num, res) - _ -> return (num, res) - - toAnInteger base = foldl (\ acc n -> acc*base + toInteger (digitToInt n)) 0 - - -lexLitChar (c:s) = return ([c],s) -lexLitChar "" = mzero - -digitToInt :: Char -> Int -digitToInt c - | isDigit c = fromEnum c - fromEnum '0' - | c >= 'a' && c <= 'f' = fromEnum c - fromEnum 'a' + 10 - | c >= 'A' && c <= 'F' = fromEnum c - fromEnum 'A' + 10 - | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh +------------------------------------------------------------------------ +-- ReadS + +type ReadS a = String -> [(a,String)] + +------------------------------------------------------------------------ +-- class Read + +class Read a where + readsPrec :: Int -> ReadS a + readList :: ReadS [a] + readPrec :: ReadPrec a + readListPrec :: ReadPrec [a] + + -- default definitions + readsPrec = readPrec_to_S readPrec + readList = readPrec_to_S (list readPrec) 0 + readPrec = readS_to_Prec readsPrec + readListPrec = readS_to_Prec (\_ -> readList) + +readListDefault :: Read a => ReadS [a] +readListDefault = readPrec_to_S readListPrec 0 + +readListPrecDefault :: Read a => ReadPrec [a] +readListPrecDefault = list readPrec + +------------------------------------------------------------------------ +-- utility functions + +reads :: Read a => ReadS a +reads = readsPrec minPrec + +readp :: Read a => ReadP a +readp = readPrec_to_P readPrec minPrec + +readEither :: Read a => String -> Either String a +readEither s = + case [ x | (x,"") <- readPrec_to_S read' minPrec s ] of + [x] -> Right x + [] -> Left "Prelude.read: no parse" + _ -> Left "Prelude.read: ambiguous parse" + where + read' = + do x <- readPrec + lift P.skipSpaces + return x + +read :: Read a => String -> a +read s = either error id (readEither s) + +------------------------------------------------------------------------ +-- H98 compatibility + +lex :: ReadS String -- As defined by H98 +lex = readP_to_S (do { lexeme <- L.lex ; + return (show lexeme) }) + +lexLitChar :: ReadS String -- As defined by H98 +lexLitChar = readP_to_S (do { lexeme <- L.lexLitChar ; + return (show lexeme) }) + +readLitChar :: ReadS Char -- As defined by H98 +readLitChar = readP_to_S (do { Char c <- L.lexLitChar ; + return c }) + +lexDigits :: ReadS String +lexDigits = readP_to_S (P.munch1 isDigit) + +------------------------------------------------------------------------ +-- utility parsers + +lexP :: ReadPrec Lexeme +lexP = lift L.lex + +paren :: ReadPrec a -> ReadPrec a +-- (paren p) parses (P0) +-- where p parses P0 in precedence context zero +paren p = + do Single '(' <- lexP + x <- reset p + Single ')' <- lexP + return x + +parens :: ReadPrec a -> ReadPrec a +-- (parens p) parses P, (P0), ((P0)), etc, +-- where p parses P in the current precedence context +-- parses P0 in precedence context zero +parens p = optional + where + optional = p +++ mandatory + mandatory = paren optional + +list :: ReadPrec a -> ReadPrec [a] +list readx = + parens + ( do Single '[' <- lexP + (listRest False +++ listNext) + ) + where + listRest started = + do Single c <- lexP + case c of + ']' -> return [] + ',' | started -> listNext + _ -> pfail + + listNext = + do x <- reset readx + xs <- listRest True + return (x:xs) + +choose :: [(String, ReadPrec a)] -> ReadPrec a +-- Parse the specified lexeme and continue as specified +-- Esp useful for nullary constructors +choose sps = foldr ((+++) . try_one) pfail sps + where + try_one (s,p) = do { Ident s' <- lexP ; + if s == s' then p else pfail } \end{code} + %********************************************************* %* * -\subsection{Instances of @Read@} +\subsection{Simple instances of Read} %* * %********************************************************* \begin{code} -instance Read Char where - readsPrec _ = readParen False - (\r -> do - ('\'':s,t) <- lex r - (c,"\'") <- readLitChar s - return (c,t)) - - readList = readParen False (\r -> do - ('"':s,t) <- lex r - (l,_) <- readl s - return (l,t)) - where readl ('"':s) = return ("",s) - readl ('\\':'&':s) = readl s - readl s = do - (c,t) <- readLitChar s - (cs,u) <- readl t - return (c:cs,u) +instance Read Char where + readPrec = + parens + ( do Char c <- lexP + return c + ) + + readListPrec = + parens + ( do String s <- lexP -- Looks for "foo" + return s + +++ + readListPrecDefault -- Looks for ['f','o','o'] + ) -- (more generous than H98 spec) + + readList = readListDefault instance Read Bool where - readsPrec _ = readParen False - (\r -> - lex r >>= \ lr -> - (do { ("True", rest) <- return lr ; return (True, rest) }) ++ - (do { ("False", rest) <- return lr ; return (False, rest) })) - + readPrec = + parens + ( do Ident s <- lexP + case s of + "True" -> return True + "False" -> return False + _ -> pfail + ) + + readListPrec = readListPrecDefault + readList = readListDefault instance Read Ordering where - readsPrec _ = readParen False - (\r -> - lex r >>= \ lr -> - (do { ("LT", rest) <- return lr ; return (LT, rest) }) ++ - (do { ("EQ", rest) <- return lr ; return (EQ, rest) }) ++ - (do { ("GT", rest) <- return lr ; return (GT, rest) })) + readPrec = + parens + ( do Ident s <- lexP + case s of + "LT" -> return LT + "EQ" -> return EQ + "GT" -> return GT + _ -> pfail + ) + + readListPrec = readListPrecDefault + readList = readListDefault +\end{code} -instance Read a => Read (Maybe a) where - readsPrec _ = readParen False - (\r -> - lex r >>= \ lr -> - (do { ("Nothing", rest) <- return lr ; return (Nothing, rest)}) ++ - (do - ("Just", rest1) <- return lr - (x, rest2) <- reads rest1 - return (Just x, rest2))) -instance (Read a, Read b) => Read (Either a b) where - readsPrec _ = readParen False - (\r -> - lex r >>= \ lr -> - (do - ("Left", rest1) <- return lr - (x, rest2) <- reads rest1 - return (Left x, rest2)) ++ - (do - ("Right", rest1) <- return lr - (x, rest2) <- reads rest1 - return (Right x, rest2))) - -instance Read Int where - readsPrec _ x = readSigned readDec x - -instance Read Integer where - readsPrec _ x = readSigned readDec x - -instance Read Float where - readsPrec _ x = readSigned readFloat x - -instance Read Double where - readsPrec _ x = readSigned readFloat x - -instance (Integral a, Read a) => Read (Ratio a) where - readsPrec p = readParen (p > ratio_prec) - (\r -> do - (x,s) <- reads r - ("%",t) <- lex s - (y,u) <- reads t - return (x % y,u)) - -instance (Read a) => Read [a] where - readsPrec _ = readList +%********************************************************* +%* * +\subsection{Structure instances of Read: Maybe, List etc} +%* * +%********************************************************* -instance Read () where - readsPrec _ = readParen False - (\r -> do - ("(",s) <- lex r - (")",t) <- lex s - return ((),t)) - -instance (Read a, Read b) => Read (a,b) where - readsPrec _ = readParen False - (\r -> do - ("(",s) <- lex r - (x,t) <- readsPrec 0 s - (",",u) <- lex t - (y,v) <- readsPrec 0 u - (")",w) <- lex v - return ((x,y), w)) +For structured instances of Read we start using the precedences. The +idea is then that 'parens (prec k p)' will fail immediately when trying +to parse it in a context with a higher precedence level than k. But if +there is one parenthesis parsed, then the required precedence level +drops to 0 again, and parsing inside p may succeed. -instance (Read a, Read b, Read c) => Read (a, b, c) where - readsPrec _ = readParen False - (\a -> do - ("(",b) <- lex a - (x,c) <- readsPrec 0 b - (",",d) <- lex c - (y,e) <- readsPrec 0 d - (",",f) <- lex e - (z,g) <- readsPrec 0 f - (")",h) <- lex g - return ((x,y,z), h)) +'appPrec' is just the precedence level of function application (maybe +it should be called 'appPrec' instead). So, if we are parsing +function application, we'd better require the precedence level to be +at least 'appPrec'. Otherwise, we have to put parentheses around it. -instance (Read a, Read b, Read c, Read d) => Read (a, b, c, d) where - readsPrec _ = readParen False - (\a -> do - ("(",b) <- lex a - (w,c) <- readsPrec 0 b - (",",d) <- lex c - (x,e) <- readsPrec 0 d - (",",f) <- lex e - (y,g) <- readsPrec 0 f - (",",h) <- lex g - (z,h) <- readsPrec 0 h - (")",i) <- lex h - return ((w,x,y,z), i)) +'step' is used to increase the precedence levels inside a +parser, and can be used to express left- or right- associativity. For +example, % is defined to be left associative, so we only increase +precedence on the right hand side. -instance (Read a, Read b, Read c, Read d, Read e) => Read (a, b, c, d, e) where - readsPrec _ = readParen False - (\a -> do - ("(",b) <- lex a - (v,c) <- readsPrec 0 b - (",",d) <- lex c - (w,e) <- readsPrec 0 d - (",",f) <- lex e - (x,g) <- readsPrec 0 f - (",",h) <- lex g - (y,i) <- readsPrec 0 h - (",",j) <- lex i - (z,k) <- readsPrec 0 j - (")",l) <- lex k - return ((v,w,x,y,z), l)) +Note how step is used in for example the Maybe parser to increase the +precedence beyond appPrec, so that basically only literals and +parenthesis-like objects such as (...) and [...] can be an argument to +'Just'. + +\begin{code} +instance Read a => Read (Maybe a) where + readPrec = + parens + ( prec appPrec + ( do Ident "Nothing" <- lexP + return Nothing + +++ + do Ident "Just" <- lexP + x <- step readPrec + return (Just x) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault + +instance (Read a, Read b) => Read (Either a b) where + readPrec = + parens + ( prec appPrec + ( do Ident "Left" <- lexP + x <- step readPrec + return (Left x) + +++ + do Ident "Right" <- lexP + y <- step readPrec + return (Right y) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault + +instance Read a => Read [a] where + readPrec = readListPrec + readListPrec = readListPrecDefault + readList = readListDefault + +instance Read Lexeme where + readPrec = lexP + readListPrec = readListPrecDefault + readList = readListDefault \end{code} %********************************************************* %* * -\subsection{Reading characters} +\subsection{Numeric instances of Read} %* * %********************************************************* \begin{code} -readLitChar :: ReadS Char - -readLitChar [] = mzero -readLitChar ('\\':s) = readEsc s - where - readEsc ('a':s) = return ('\a',s) - readEsc ('b':s) = return ('\b',s) - readEsc ('f':s) = return ('\f',s) - readEsc ('n':s) = return ('\n',s) - readEsc ('r':s) = return ('\r',s) - readEsc ('t':s) = return ('\t',s) - readEsc ('v':s) = return ('\v',s) - readEsc ('\\':s) = return ('\\',s) - readEsc ('"':s) = return ('"',s) - readEsc ('\'':s) = return ('\'',s) - readEsc ('^':c:s) | c >= '@' && c <= '_' - = return (chr (ord c - ord '@'), s) - readEsc s@(d:_) | isDigit d - = do - (n,t) <- readDec s - return (chr n,t) - readEsc ('o':s) = do - (n,t) <- readOct s - return (chr n,t) - readEsc ('x':s) = do - (n,t) <- readHex s - return (chr n,t) - - readEsc s@(c:_) | isUpper c - = let table = ('\DEL', "DEL") : zip ['\NUL'..] asciiTab - in case [(c,s') | (c, mne) <- table, - ([],s') <- [match mne s]] - of (pr:_) -> return pr - [] -> mzero - readEsc _ = mzero - -readLitChar (c:s) = return (c,s) - -match :: (Eq a) => [a] -> [a] -> ([a],[a]) -match (x:xs) (y:ys) | x == y = match xs ys -match xs ys = (xs,ys) - +readNumber :: Num a => (Number -> Maybe a) -> ReadPrec a +-- Read a signed number +readNumber convert = + parens + ( do x <- lexP + case x of + Symbol "-" -> do n <- readNumber convert + return (negate n) + + Number y -> case convert y of + Just n -> return n + Nothing -> pfail + + _ -> pfail + ) + +instance Read Int where + readPrec = readNumber numberToInt + readListPrec = readListPrecDefault + readList = readListDefault + +instance Read Integer where + readPrec = readNumber numberToInteger + readListPrec = readListPrecDefault + readList = readListDefault + +instance Read Float where + readPrec = readNumber numberToFloat + readListPrec = readListPrecDefault + readList = readListDefault + +instance Read Double where + readPrec = readNumber numberToDouble + readListPrec = readListPrecDefault + readList = readListDefault + +instance (Integral a, Read a) => Read (Ratio a) where + readPrec = + parens + ( prec ratioPrec + ( do x <- step readPrec + Symbol "%" <- lexP + y <- step readPrec + return (x % y) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault \end{code} %********************************************************* %* * -\subsection{Reading numbers} +\subsection{Tuple instances of Read} %* * %********************************************************* -Note: reading numbers at bases different than 10, does not -include lexing common prefixes such as '0x' or '0o' etc. - \begin{code} -{-# SPECIALISE readDec :: - ReadS Int, - ReadS Integer #-} -readDec :: (Integral a) => ReadS a -readDec = readInt 10 isDigit (\d -> ord d - ord '0') - -{-# SPECIALISE readOct :: - ReadS Int, - ReadS Integer #-} -readOct :: (Integral a) => ReadS a -readOct = readInt 8 isOctDigit (\d -> ord d - ord '0') - -{-# SPECIALISE readHex :: - ReadS Int, - ReadS Integer #-} -readHex :: (Integral a) => ReadS a -readHex = readInt 16 isHexDigit hex - where hex d = ord d - (if isDigit d then ord '0' - else ord (if isUpper d then 'A' else 'a') - 10) - -readInt :: (Integral a) => a -> (Char -> Bool) -> (Char -> Int) -> ReadS a -readInt radix isDig digToInt s = do - (ds,r) <- nonnull isDig s - return (foldl1 (\n d -> n * radix + d) - (map (fromInteger . toInteger . digToInt) ds), r) - -{-# SPECIALISE readSigned :: - ReadS Int -> ReadS Int, - ReadS Integer -> ReadS Integer, - ReadS Double -> ReadS Double #-} -readSigned :: (Real a) => ReadS a -> ReadS a -readSigned readPos = readParen False read' - where read' r = read'' r ++ - (do - ("-",s) <- lex r - (x,t) <- read'' s - return (-x,t)) - read'' r = do - (str,s) <- lex r - (n,"") <- readPos str - return (n,s) -\end{code} +instance Read () where + readPrec = + parens + ( paren + ( return () + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault + +instance (Read a, Read b) => Read (a,b) where + readPrec = + parens + ( paren + ( do x <- readPrec + Single ',' <- lexP + y <- readPrec + return (x,y) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault -The functions readFloat below uses rational arithmetic -to ensure correct conversion between the floating-point radix and -decimal. It is often possible to use a higher-precision floating- -point type to obtain the same results. -\begin{code} -{-# SPECIALISE readFloat :: - ReadS Double, - ReadS Float #-} -readFloat :: (RealFloat a) => ReadS a -readFloat r = - (do - (x,t) <- readRational r - return (fromRational x,t) ) ++ - (do - ("NaN",t) <- lex r - return (0/0,t) ) ++ - (do - ("Infinity",t) <- lex r - return (1/0,t) ) - -readRational :: ReadS Rational -- NB: doesn't handle leading "-" -readRational r = do - (n,d,s) <- readFix r - (k,t) <- readExp s - return ((n%1)*10^^(k-d), t) - where - readFix r = do - (ds,s) <- lexDecDigits r - (ds',t) <- lexDotDigits s - return (read (ds++ds'), length ds', t) - - readExp (e:s) | e `elem` "eE" = readExp' s - readExp s = return (0,s) - - readExp' ('+':s) = readDec s - readExp' ('-':s) = do - (k,t) <- readDec s - return (-k,t) - readExp' s = readDec s - - lexDotDigits ('.':s) = lex0Digits s - lexDotDigits s = return ("",s) - -readRational__ :: String -> Rational -- we export this one (non-std) - -- NB: *does* handle a leading "-" -readRational__ top_s - = case top_s of - '-' : xs -> - (read_me xs) - xs -> read_me xs - where - read_me s - = case (do { (x,t) <- readRational s ; ("","") <- lex t ; return x }) of -#ifndef NEW_READS_REP - [x] -> x - [] -> error ("readRational__: no parse:" ++ top_s) - _ -> error ("readRational__: ambiguous parse:" ++ top_s) -#else - Just x -> x - Nothing -> error ("readRational__: no parse:" ++ top_s) -#endif +instance (Read a, Read b, Read c) => Read (a, b, c) where + readPrec = + parens + ( paren + ( do x <- readPrec + Single ',' <- lexP + y <- readPrec + Single ',' <- lexP + z <- readPrec + return (x,y,z) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault + +instance (Read a, Read b, Read c, Read d) => Read (a, b, c, d) where + readPrec = + parens + ( paren + ( do w <- readPrec + Single ',' <- lexP + x <- readPrec + Single ',' <- lexP + y <- readPrec + Single ',' <- lexP + z <- readPrec + return (w,x,y,z) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault +instance (Read a, Read b, Read c, Read d, Read e) => Read (a, b, c, d, e) where + readPrec = + parens + ( paren + ( do v <- readPrec + Single ',' <- lexP + w <- readPrec + Single ',' <- lexP + x <- readPrec + Single ',' <- lexP + y <- readPrec + Single ',' <- lexP + z <- readPrec + return (v,w,x,y,z) + ) + ) + + readListPrec = readListPrecDefault + readList = readListDefault \end{code} diff --git a/GHC/Show.lhs b/GHC/Show.lhs index 9a14dae..b0265be 100644 --- a/GHC/Show.lhs +++ b/GHC/Show.lhs @@ -1,5 +1,5 @@ % ------------------------------------------------------------------------------ -% $Id: Show.lhs,v 1.4 2001/12/21 15:07:25 simonmar Exp $ +% $Id: Show.lhs,v 1.5 2002/04/11 12:03:44 simonpj Exp $ % % (c) The University of Glasgow, 1992-2000 % @@ -19,7 +19,7 @@ module GHC.Show -- Show support code shows, showChar, showString, showParen, showList__, showSpace, showLitChar, protectEsc, - intToDigit, showSignedInt, + intToDigit, digitToInt, showSignedInt, -- Character operations isAscii, isLatin1, isControl, isPrint, isSpace, isUpper, @@ -34,6 +34,7 @@ module GHC.Show import {-# SOURCE #-} GHC.Err ( error ) import GHC.Base +import GHC.Enum import Data.Maybe import Data.Either import GHC.List ( (!!), break, dropWhile @@ -216,18 +217,26 @@ protectEsc :: (Char -> Bool) -> ShowS -> ShowS protectEsc p f = f . cont where cont s@(c:_) | p c = "\\&" ++ s cont s = s +\end{code} + +Code specific for Ints. +\begin{code} intToDigit :: Int -> Char intToDigit (I# i) | i >=# 0# && i <=# 9# = unsafeChr (ord '0' `plusInt` I# i) - | i >=# 10# && i <=# 15# = unsafeChr (ord 'a' `minusInt` I# 10# `plusInt` I# i) + | i >=# 10# && i <=# 15# = unsafeChr (ord 'a' `minusInt` ten `plusInt` I# i) | otherwise = error ("Char.intToDigit: not a digit " ++ show (I# i)) -\end{code} +digitToInt :: Char -> Int +digitToInt c + | isDigit c = ord c `minusInt` ord '0' + | c >= 'a' && c <= 'f' = ord c `minusInt` ord 'a' `plusInt` ten + | c >= 'A' && c <= 'F' = ord c `minusInt` ord 'A' `plusInt` ten + | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh -Code specific for Ints. +ten = I# 10# -\begin{code} showSignedInt :: Int -> Int -> ShowS showSignedInt (I# p) (I# n) r | n <# 0# && p ># 6# = '(' : itos n (')' : r) @@ -250,6 +259,7 @@ itos n# cs itos' (n# `quotInt#` 10#) (C# c# : cs) } \end{code} + %********************************************************* %* * \subsection{Character stuff} @@ -309,7 +319,6 @@ toUpper c@(C# c#) = c - toLower c@(C# c#) | isAsciiUpper c = C# (chr# (ord# c# +# 32#)) | isAscii c = c diff --git a/Makefile b/Makefile index 72b6e72..5e62d0c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# $Id: Makefile,v 1.22 2002/03/25 15:49:26 sof Exp $ +# $Id: Makefile,v 1.23 2002/04/11 12:03:43 simonpj Exp $ TOP=.. include $(TOP)/mk/boilerplate.mk @@ -34,8 +34,10 @@ ALL_DIRS = \ Text \ Text/Html \ Text/PrettyPrint \ + Text/ParserCombinators \ Text/Regex \ - Text/Show + Text/Show \ + Text/Read PACKAGE = base diff --git a/Numeric.hs b/Numeric.hs index 66a4f21..78af5b0 100644 --- a/Numeric.hs +++ b/Numeric.hs @@ -9,7 +9,7 @@ -- Stability : provisional -- Portability : portable -- --- $Id: Numeric.hs,v 1.5 2002/02/12 10:52:18 simonmar Exp $ +-- $Id: Numeric.hs,v 1.6 2002/04/11 12:03:43 simonpj Exp $ -- -- Odds and ends, mostly functions for reading and showing -- RealFloat-like kind of values. @@ -55,12 +55,63 @@ import GHC.Float import GHC.Num import GHC.Show import Data.Maybe +import Text.ParserCombinators.ReadP( ReadP, readP_to_S, pfail ) +import qualified Text.Read.Lex as L #endif #ifdef __HUGS__ import Array #endif + +-- ********************************************************* +-- * * +-- \subsection{Reading} +-- * * +-- ********************************************************* + +readInt :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadS a +readInt base isDigit valDigit = readP_to_S (L.readIntP base isDigit valDigit) + +readOct, readDec, readHex :: Num a => ReadS a +readOct = readP_to_S L.readOctP +readDec = readP_to_S L.readDecP +readHex = readP_to_S L.readHexP + +readFloat :: RealFrac a => ReadS a +readFloat = readP_to_S readFloatP + +readFloatP :: RealFrac a => ReadP a +readFloatP = + do L.Number x <- L.lex + case L.numberToRational x of + Nothing -> pfail + Just y -> return (fromRational y) + +-- It's turgid to have readSigned work using list comprehensions, +-- but it's specified as a ReadS to ReadS transformer +-- With a bit of luck no one will use it. +readSigned :: (Real a) => ReadS a -> ReadS a +readSigned readPos = readParen False read' + where read' r = read'' r ++ + (do + ("-",s) <- lex r + (x,t) <- read'' s + return (-x,t)) + read'' r = do + (str,s) <- lex r + (n,"") <- readPos str + return (n,s) + + +-- ********************************************************* +-- * * +-- \subsection{Showing} +-- * * +-- ********************************************************* + + + #ifdef __GLASGOW_HASKELL__ showInt :: Integral a => a -> ShowS showInt n cs diff --git a/System/Random.hs b/System/Random.hs index c0633aa..0b5286b 100644 --- a/System/Random.hs +++ b/System/Random.hs @@ -8,7 +8,7 @@ -- Stability : provisional -- Portability : portable -- --- $Id: Random.hs,v 1.2 2001/12/21 15:07:26 simonmar Exp $ +-- $Id: Random.hs,v 1.3 2002/04/11 12:03:44 simonpj Exp $ -- -- Random numbers. -- @@ -47,7 +47,7 @@ import Data.IORef #ifdef __GLASGOW_HASKELL__ import GHC.Show ( showSignedInt, showSpace ) -import GHC.Read ( readDec ) +import Numeric ( readDec ) import GHC.IOBase ( unsafePerformIO, stToIO ) import System.Time ( getClockTime, ClockTime(..) ) #endif diff --git a/Text/ParserCombinators/ReadP.lhs b/Text/ParserCombinators/ReadP.lhs new file mode 100644 index 0000000..98565b6 --- /dev/null +++ b/Text/ParserCombinators/ReadP.lhs @@ -0,0 +1,176 @@ +% ------------------------------------------------------------- +% $Id: ReadP.lhs +% +% (c) The University of Glasgow, 1994-2000 +% + +\begin{code} +{-# OPTIONS -fglasgow-exts -fno-implicit-prelude #-} +module Text.ParserCombinators.ReadP + ( ReadP -- :: * -> *; instance Functor, Monad, MonadPlus + + -- primitive operations + , get -- :: ReadP Char + , look -- :: ReadP String + , (+++) -- :: ReadP a -> ReadP a -> ReadP a + + -- other operations + , pfail -- :: ReadP a + , satisfy -- :: (Char -> Bool) -> ReadP Char + , char -- :: Char -> ReadP Char + , string -- :: String -> ReadP String + , munch -- :: (Char -> Bool) -> ReadP String + , munch1 -- :: (Char -> Bool) -> ReadP String + , skipSpaces -- :: ReadP () + , choice -- :: [ReadP a] -> ReadP a + + -- converting + , readP_to_S -- :: ReadP a -> ReadS a + , readS_to_P -- :: ReadS a -> ReadP a + ) + where + +import Control.Monad( MonadPlus(..) ) +import GHC.Show( isSpace ) +import GHC.Base +\end{code} + + +%********************************************************* +%* * +\subsection{The @ReadP@ type} +%* * +%********************************************************* + +\begin{code} +newtype ReadP a = R (forall b . (a -> P b) -> P b) + +data P a + = Get (Char -> P a) + | Look (String -> P a) + | Fail + | Result a (P a) + | ReadS (ReadS a) + +-- We define a local version of ReadS here, +-- because its "real" definition site is in GHC.Read +type ReadS a = String -> [(a,String)] + +-- Functor, Monad, MonadPlus + +instance Functor ReadP where + fmap h (R f) = R (\k -> f (k . h)) + +instance Monad ReadP where + return x = R (\k -> k x) + fail _ = R (\_ -> Fail) + R m >>= f = R (\k -> m (\a -> let R m' = f a in m' k)) + +instance MonadPlus ReadP where + mzero = pfail + mplus = (+++) +\end{code} + + +%********************************************************* +%* * +\subsection{Operations over ReadP} +%* * +%********************************************************* + +\begin{code} +get :: ReadP Char +get = R (\k -> Get k) + +look :: ReadP String +look = R (\k -> Look k) + +(+++) :: ReadP a -> ReadP a -> ReadP a +R f1 +++ R f2 = R (\k -> f1 k >|< f2 k) + where + Get f1 >|< Get f2 = Get (\c -> f1 c >|< f2 c) + Fail >|< p = p + p >|< Fail = p + Look f >|< Look g = Look (\s -> f s >|< g s) + Result x p >|< q = Result x (p >|< q) + p >|< Result x q = Result x (p >|< q) + Look f >|< p = Look (\s -> f s >|< p) + p >|< Look f = Look (\s -> p >|< f s) + p >|< q = ReadS (\s -> run p s ++ run q s) + +run :: P a -> ReadS a +run (Get f) [] = [] +run (Get f) (c:s) = run (f c) s +run (Look f) s = run (f s) s +run (Result x p) s = (x,s) : run p s +run (ReadS r) s = r s +run Fail _ = [] +\end{code} + + +%********************************************************* +%* * +\subsection{Derived operations} +%* * +%********************************************************* + +\begin{code} +pfail :: ReadP a +pfail = fail "" + +satisfy :: (Char -> Bool) -> ReadP Char +satisfy p = do c <- get; if p c then return c else pfail + +char :: Char -> ReadP Char +char c = satisfy (c ==) + +string :: String -> ReadP String +string s = scan s + where + scan [] = do return s + scan (c:cs) = do char c; scan cs + +munch :: (Char -> Bool) -> ReadP String +-- (munch p) parses the first zero or more characters satisfying p +munch p = + do s <- look + scan s + where + scan (c:cs) | p c = do get; s <- scan cs; return (c:s) + scan _ = do return "" + +munch1 :: (Char -> Bool) -> ReadP String +-- (munch p) parses the first one or more characters satisfying p +munch1 p = + do c <- get + if p c then do s <- munch p; return (c:s) else pfail + +choice :: [ReadP a] -> ReadP a +choice ps = foldr (+++) pfail ps + +skipSpaces :: ReadP () +skipSpaces = + do s <- look + skip s + where + skip (c:s) | isSpace c = do get; skip s + skip _ = do return () +\end{code} + + +%********************************************************* +%* * +\subsection{Converting between ReadP and ReadS +%* * +%********************************************************* + +\begin{code} +readP_to_S :: ReadP a -> ReadS a +readP_to_S (R f) = run (f (\x -> Result x Fail)) + +readS_to_P :: ReadS a -> ReadP a +readS_to_P r = R (\k -> ReadS (\s -> [ bs'' + | (a,s') <- r s + , bs'' <- run (k a) s' + ])) +\end{code} \ No newline at end of file diff --git a/Text/ParserCombinators/ReadPrec.lhs b/Text/ParserCombinators/ReadPrec.lhs new file mode 100644 index 0000000..dcfef79 --- /dev/null +++ b/Text/ParserCombinators/ReadPrec.lhs @@ -0,0 +1,163 @@ +% ---------------------------------------------------------------- +% $Id: ReadPrec.lhs +% +% (c) The University of Glasgow, 1994-2000 +% + +\begin{code} +{-# OPTIONS -fno-implicit-prelude #-} + +module Text.ParserCombinators.ReadPrec + ( ReadPrec -- :: * -> *; instance Functor, Monad, MonadPlus + + -- precedences + , Prec -- :: *; = Int + , minPrec -- :: Prec; = 0 + + -- primitive operations + , lift -- :: ReadP a -> ReadPrec a + , prec -- :: Prec -> ReadPrec a -> ReadPrec a + , step -- :: ReadPrec a -> ReadPrec a + , reset -- :: ReadPrec a -> ReadPrec a + + -- other operations + , get -- :: ReadPrec Char + , look -- :: ReadPrec String + , (+++) -- :: ReadPrec a -> ReadPrec a -> ReadPrec a + , pfail -- :: ReadPrec a + , choice -- :: [ReadPrec a] -> ReadPrec a + + -- converters + , readPrec_to_P -- :: ReadPrec a -> (Int -> ReadP a) + , readP_to_Prec -- :: (Int -> ReadP a) -> ReadPrec a + , readPrec_to_S -- :: ReadPrec a -> (Int -> ReadS a) + , readS_to_Prec -- :: (Int -> ReadS a) -> ReadPrec a + ) + where + + +import Text.ParserCombinators.ReadP + ( ReadP + , readP_to_S + , readS_to_P + ) + +import qualified Text.ParserCombinators.ReadP as ReadP + ( get + , look + , (+++) + , pfail + , choice + ) + +import Control.Monad( MonadPlus(..) ) +import GHC.Num( Num(..) ) +import GHC.Base +\end{code} + + +%********************************************************* +%* * +\subsection{The readPrec type} +%* * +%********************************************************* + +\begin{code} +newtype ReadPrec a = P { unP :: Prec -> ReadP a } + +-- Functor, Monad, MonadPlus + +instance Functor ReadPrec where + fmap h (P f) = P (\n -> fmap h (f n)) + +instance Monad ReadPrec where + return x = P (\_ -> return x) + fail s = P (\_ -> fail s) + P f >>= k = P (\n -> do a <- f n; let P f' = k a in f' n) + +instance MonadPlus ReadPrec where + mzero = pfail + mplus = (+++) + +-- precedences + +type Prec = Int + +minPrec :: Prec +minPrec = 0 +\end{code} + + +%********************************************************* +%* * +\subsection{Operations over ReadPrec +%* * +%********************************************************* + +\begin{code} +lift :: ReadP a -> ReadPrec a +lift m = P (\_ -> m) + +step :: ReadPrec a -> ReadPrec a +-- Increases the precedence context by one +step (P f) = P (\n -> f (n+1)) + +reset :: ReadPrec a -> ReadPrec a +-- Resets the precedence context to zero +reset (P f) = P (\n -> f minPrec) + +prec :: Prec -> ReadPrec a -> ReadPrec a +-- (prec n p) checks that the precedence context is +-- less than or equal to n, +-- if not, fails +-- if so, parses p in context n +prec n (P f) = P (\c -> if c <= n then f n else ReadP.pfail) +\end{code} + +%********************************************************* +%* * +\subsection{Derived operations} +%* * +%********************************************************* + +\begin{code} +get :: ReadPrec Char +get = lift ReadP.get + +look :: ReadPrec String +look = lift ReadP.look + +(+++) :: ReadPrec a -> ReadPrec a -> ReadPrec a +P f1 +++ P f2 = P (\n -> f1 n ReadP.+++ f2 n) + +pfail :: ReadPrec a +pfail = lift ReadP.pfail + +choice :: [ReadPrec a] -> ReadPrec a +choice ps = foldr (+++) pfail ps +\end{code} + + +%********************************************************* +%* * +\subsection{Converting between ReadPrec and ReadS +%* * +%********************************************************* + +\begin{code} +-- We define a local version of ReadS here, +-- because its "real" definition site is in GHC.Read +type ReadS a = String -> [(a,String)] + +readPrec_to_P :: ReadPrec a -> (Int -> ReadP a) +readPrec_to_P (P f) = f + +readP_to_Prec :: (Int -> ReadP a) -> ReadPrec a +readP_to_Prec f = P f + +readPrec_to_S :: ReadPrec a -> (Int -> ReadS a) +readPrec_to_S (P f) n = readP_to_S (f n) + +readS_to_Prec :: (Int -> ReadS a) -> ReadPrec a +readS_to_Prec f = P (\n -> readS_to_P (f n)) +\end{code} diff --git a/Text/Read/Lex.lhs b/Text/Read/Lex.lhs new file mode 100644 index 0000000..e09f75b --- /dev/null +++ b/Text/Read/Lex.lhs @@ -0,0 +1,504 @@ +% ---------------------------------------------------------------- +% $Id: Lex.lhs +% +% (c) The University of Glasgow, 1994-2000 +% + +\begin{code} +{-# OPTIONS -fno-implicit-prelude #-} + +module Text.Read.Lex + -- lexing types + ( LexP -- :: *; = ReadP Lexeme + , Lexeme(..) -- :: *; Show, Eq + + -- lexer + , lex -- :: LexP + , lexLitChar -- :: LexP + + -- numbers + , Number -- :: *; Show, Eq + + , numberToInt -- :: Number -> Maybe Int + , numberToInteger -- :: Number -> Maybe Integer + , numberToRational -- :: Number -> Maybe Integer + , numberToFloat -- :: Number -> Maybe Float + , numberToDouble -- :: Number -> Maybe Double + + , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a + , readOctP -- :: Num a => ReadP a + , readDecP -- :: Num a => ReadP a + , readHexP -- :: Num a => ReadP a + ) + where + +import Text.ParserCombinators.ReadP + +import GHC.Base +import GHC.Num( Num(..), Integer ) +import GHC.Show( Show(.. ), showChar, showString, + isSpace, isAlpha, isAlphaNum, + isOctDigit, isHexDigit, toUpper ) +import GHC.Real( Ratio, Integral, Rational, (%), fromIntegral, fromRational, + toInteger, (^), (^^) ) +import GHC.Float( Float, Double ) +import GHC.List +import GHC.Show( ShowS, shows ) +import GHC.Enum( minBound, maxBound ) +import Data.Maybe +import Data.Either +import Control.Monad +\end{code} + +%********************************************************* +%* * +\subsection{Lexing types} +%* * +%********************************************************* + +\begin{code} +type LexP = ReadP Lexeme + +data Lexeme + = Char Char + | String String + | Single Char + | Symbol String + | Ident String + | Number Number + deriving (Eq) + +instance Show Lexeme where + showsPrec n (Char c) = showsPrec n c + showsPrec n (String s) = showsPrec n s + showsPrec _ (Single c) = showChar c + showsPrec _ (Ident s) = showString s + showsPrec _ (Symbol s) = showString s + showsPrec n (Number x) = showsPrec n x +\end{code} + + +%********************************************************* +%* * +\subsection{Lexing} +%* * +%********************************************************* + +\begin{code} +lex :: LexP +lex = + do skipSpaces + (lexLitChar + +++ lexString + +++ lexSingle + +++ lexSymbol + +++ lexIdf + +++ lexNumber) +\end{code} + +\begin{code} +------------------------------------------------------------------------ +-- symbols + +lexSymbol :: LexP +lexSymbol = + do s <- munch1 isSymbolChar + return (Symbol s) + where + isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~" + +------------------------------------------------------------------------ +-- identifiers + +lexIdf :: LexP +lexIdf = + do c <- satisfy isAlpha + s <- munch isIdfChar + return (Ident (c:s)) + where + isIdfChar c = isAlphaNum c || c `elem` "_'" +\end{code} + + +%********************************************************* +%* * +\subsection{Lexing characters and strings} +%* * +%********************************************************* + +\begin{code} +------------------------------------------------------------------------ +-- char literal + +lexLitChar :: LexP +lexLitChar = + do char '\'' + (c,esc) <- lexChar + guard (esc || c /= '\'') + char '\'' + return (Char c) + +lexChar :: ReadP (Char, Bool) -- "escaped or not"? +lexChar = + do c <- get + if c == '\\' + then do c <- lexEsc; return (c, True) + else do return (c, False) + where + lexEsc = + lexEscChar + +++ lexNumeric + +++ lexCntrlChar + +++ lexAscii + + lexEscChar = + do c <- get + case c of + 'a' -> return '\a' + 'b' -> return '\b' + 'f' -> return '\f' + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'v' -> return '\v' + '\\' -> return '\\' + '\"' -> return '\"' + '\'' -> return '\'' + _ -> pfail + + lexNumeric = + do base <- lexBase + n <- lexInteger base + guard (n <= toInteger (ord maxBound)) + return (chr (fromInteger n)) + where + lexBase = + do s <- look + case s of + 'o':_ -> do get; return 8 + 'O':_ -> do get; return 8 + 'x':_ -> do get; return 16 + 'X':_ -> do get; return 16 + _ -> do return 10 + + lexCntrlChar = + do char '^' + c <- get + case c of + '@' -> return '\^@' + 'A' -> return '\^A' + 'B' -> return '\^B' + 'C' -> return '\^C' + 'D' -> return '\^D' + 'E' -> return '\^E' + 'F' -> return '\^F' + 'G' -> return '\^G' + 'H' -> return '\^H' + 'I' -> return '\^I' + 'J' -> return '\^J' + 'K' -> return '\^K' + 'L' -> return '\^L' + 'M' -> return '\^M' + 'N' -> return '\^N' + 'O' -> return '\^O' + 'P' -> return '\^P' + 'Q' -> return '\^Q' + 'R' -> return '\^R' + 'S' -> return '\^S' + 'T' -> return '\^T' + 'U' -> return '\^U' + 'V' -> return '\^V' + 'W' -> return '\^W' + 'X' -> return '\^X' + 'Y' -> return '\^Y' + 'Z' -> return '\^Z' + '[' -> return '\^[' + '\\' -> return '\^\' + ']' -> return '\^]' + '^' -> return '\^^' + '_' -> return '\^_' + _ -> pfail + + lexAscii = + do choice + [ string "NUL" >> return '\NUL' + , string "SOH" >> return '\SOH' + , string "STX" >> return '\STX' + , string "ETX" >> return '\ETX' + , string "EOT" >> return '\EOT' + , string "ENQ" >> return '\ENQ' + , string "ACK" >> return '\ACK' + , string "BEL" >> return '\BEL' + , string "BS" >> return '\BS' + , string "HT" >> return '\HT' + , string "LF" >> return '\LF' + , string "VT" >> return '\VT' + , string "FF" >> return '\FF' + , string "CR" >> return '\CR' + , string "SO" >> return '\SO' + , string "SI" >> return '\SI' + , string "DLE" >> return '\DLE' + , string "DC1" >> return '\DC1' + , string "DC2" >> return '\DC2' + , string "DC3" >> return '\DC3' + , string "DC4" >> return '\DC4' + , string "NAK" >> return '\NAK' + , string "SYN" >> return '\SYN' + , string "ETB" >> return '\ETB' + , string "CAN" >> return '\CAN' + , string "EM" >> return '\EM' + , string "SUB" >> return '\SUB' + , string "ESC" >> return '\ESC' + , string "FS" >> return '\FS' + , string "GS" >> return '\GS' + , string "RS" >> return '\RS' + , string "US" >> return '\US' + , string "SP" >> return '\SP' + , string "DEL" >> return '\DEL' + ] + +------------------------------------------------------------------------ +-- string literal + +lexString :: LexP +lexString = + do char '"' + body id + where + body f = + do (c,esc) <- lexStrItem + if c /= '"' || esc + then body (f.(c:)) + else return (String (f "")) + + lexStrItem = + (lexEmpty >> lexStrItem) + +++ lexChar + + lexEmpty = + do char '\\' + c <- get + case c of + '&' -> do return () + _ | isSpace c -> do skipSpaces; char '\\'; return () + _ -> do pfail + +------------------------------------------------------------------------ +-- single character lexemes + +lexSingle :: LexP +lexSingle = + do c <- satisfy isSingleChar + return (Single c) + where + isSingleChar c = c `elem` ",;()[]{=}_`" +\end{code} + + +%********************************************************* +%* * +\subsection{Lexing numbers} +%* * +%********************************************************* + +\begin{code} +data Number + = MkNumber + { value :: Either Integer Rational + , base :: Base + , digits :: Digits + , fraction :: Maybe Digits + , exponent :: Maybe Integer + } + deriving (Eq) + +type Base = Int +type Digits = [Int] + +instance Show Number where + showsPrec _ x = + showsBase (base x) + . foldr (.) id (map showDigit (digits x)) + . showsFrac (fraction x) + . showsExp (exponent x) + where + showsBase 8 = showString "0o" + showsBase 10 = id + showsBase 16 = showString "0x" + + showsFrac Nothing = id + showsFrac (Just ys) = + showChar '.' + . foldr (.) id (map showDigit ys) + + showsExp Nothing = id + showsExp (Just exp) = + showChar 'e' + . shows exp + +showDigit :: Int -> ShowS +showDigit n | n <= 9 = shows n + | otherwise = showChar (chr (n + ord 'A' - 10)) + +lexNumber :: LexP +lexNumber = + do base <- lexBase + lexNumberBase base + where + lexBase = + do s <- look + case s of + '0':'o':_ -> do get; get; return 8 + '0':'O':_ -> do get; get; return 8 + '0':'x':_ -> do get; get; return 16 + '0':'X':_ -> do get; get; return 16 + _ -> do return 10 + +lexNumberBase :: Base -> LexP +lexNumberBase base = + do xs <- lexDigits base + mFrac <- lexFrac base + mExp <- lexExp base + return (Number (MkNumber (value xs mFrac mExp) base xs mFrac mExp)) + where + value xs mFrac mExp = valueFracExp (val (fromIntegral base) 0 xs) mFrac mExp + + valueFracExp a Nothing mExp = Left (valueExp a mExp) + valueFracExp a (Just fs) mExp = + Right (valueExp (fromInteger a + frac (fromIntegral base) 0 1 fs) mExp) + + valueExp a Nothing = a + valueExp a (Just exp) = a * (fromIntegral base ^ exp) + +lexFrac :: Base -> ReadP (Maybe Digits) +lexFrac base = + do s <- look + case s of + '.' : _ -> + do get + frac <- lexDigits base + return (Just frac) + + _ -> + do return Nothing + +lexExp :: Base -> ReadP (Maybe Integer) +lexExp base = + do s <- look + case s of + e : _ | e `elem` "eE" && base == 10 -> + do get + (signedExp +++ exp) + where + signedExp = + do c <- char '-' +++ char '+' + n <- lexInteger 10 + return (Just (if c == '-' then -n else n)) + + exp = + do n <- lexInteger 10 + return (Just n) + + _ -> + do return Nothing + +lexDigits :: Int -> ReadP Digits +lexDigits base = + do s <- look + xs <- scan s id + guard (not (null xs)) + return xs + where + scan (c:cs) f = case valDig base c of + Just n -> do get; scan cs (f.(n:)) + Nothing -> do return (f []) + scan [] f = do return (f []) + +lexInteger :: Base -> ReadP Integer +lexInteger base = + do xs <- lexDigits base + return (val (fromIntegral base) 0 xs) + +val :: Num a => a -> a -> Digits -> a +val base y [] = y +val base y (x:xs) = y' `seq` val base y' xs + where + y' = y * base + fromIntegral x + +frac :: Integral a => a -> a -> a -> Digits -> Ratio a +frac base a b [] = a % b +frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs + where + a' = a * base + fromIntegral x + b' = b * base + +valDig :: Num a => a -> Char -> Maybe Int +valDig 8 c + | '0' <= c && c <= '7' = Just (ord c - ord '0') + | otherwise = Nothing + +valDig 10 c + | '0' <= c && c <= '9' = Just (ord c - ord '0') + | otherwise = Nothing + +valDig 16 c + | '0' <= c && c <= '9' = Just (ord c - ord '0') + | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10) + | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) + | otherwise = Nothing + +------------------------------------------------------------------------ +-- conversion + +numberToInt :: Number -> Maybe Int +numberToInt x = + case numberToInteger x of + Just n | minBound' <= n && n <= maxBound' -> Just (fromInteger n) + _ -> Nothing + where + minBound' = toInteger (minBound :: Int) + maxBound' = toInteger (maxBound :: Int) + +numberToInteger :: Number -> Maybe Integer +numberToInteger x = + case value x of + Left n -> Just n + _ -> Nothing + +numberToRational :: Number -> Maybe Rational +numberToRational x = + case value x of + Left n -> Just (fromInteger n) + Right r -> Just r + +numberToFloat :: Number -> Maybe Float +numberToFloat x = + case value x of + Left n -> Just (fromInteger n) + Right r -> Just (fromRational r) + +numberToDouble :: Number -> Maybe Double +numberToDouble x = + case value x of + Left n -> Just (fromInteger n) + Right r -> Just (fromRational r) + +------------------------------------------------------------------------ +-- other numeric lexing functions + +readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a +readIntP base isDigit valDigit = + do s <- munch1 isDigit + return (val base 0 (map valDigit s)) + +readIntP' :: Num a => a -> ReadP a +readIntP' base = readIntP base isDigit valDigit + where + isDigit c = maybe False (const True) (valDig base c) + valDigit c = maybe 0 id (valDig base c) + +readOctP, readDecP, readHexP :: Num a => ReadP a +readOctP = readIntP' 8 +readDecP = readIntP' 10 +readHexP = readIntP' 16 +\end{code} -- 1.7.10.4