1 % ------------------------------------------------------------------------------
2 % $Id: Read.lhs,v 1.3 2001/12/21 15:07:25 simonmar Exp $
4 % (c) The University of Glasgow, 1994-2000
7 \section[GHC.Read]{Module @GHC.Read@}
9 Instances of the Read class.
12 {-# OPTIONS -fno-implicit-prelude #-}
19 import {-# SOURCE #-} GHC.Err ( error )
20 import GHC.Enum ( Enum(..), maxBound )
25 import GHC.Show -- isAlpha etc
29 %*********************************************************
31 \subsection{The @Read@ class}
33 %*********************************************************
35 Note: if you compile this with -DNEW_READS_REP, you'll get
36 a (simpler) ReadS representation that only allow one valid
37 parse of a string of characters, instead of a list of
40 [changing the ReadS rep has implications for the deriving
41 machinery for Read, a change that hasn't been made, so you
42 probably won't want to compile in this new rep. except
43 when in an experimental mood.]
48 type ReadS a = String -> [(a,String)]
50 type ReadS a = String -> Maybe (a,String)
54 readsPrec :: Int -> ReadS a
57 readList = readList__ reads
60 In this module we treat [(a,String)] as a monad in Control.MonadPlus
61 But Control.MonadPlus isn't defined yet, so we simply give local
62 declarations for mzero and guard suitable for this particular
63 type. It would also be reasonably to move Control.MonadPlus to GHC.Base
64 along with Control.Monad and Functor, but that seems overkill for one
76 %*********************************************************
78 \subsection{Utility functions}
80 %*********************************************************
83 reads :: (Read a) => ReadS a
86 read :: (Read a) => String -> a
91 [] -> error "Prelude.read: no parse"
92 _ -> error "Prelude.read: ambiguous parse"
95 Nothing -> error "Prelude.read: no parse"
105 readParen :: Bool -> ReadS a -> ReadS a
106 readParen b g = if b then mandatory else optional
107 where optional r = g r ++ mandatory r
115 readList__ :: ReadS a -> ReadS [a]
118 = readParen False (\r -> do
122 (do { ("]",t) <- lex s ; return ([],t) }) ++
123 (do { (x,t) <- readx s ; (xs,u) <- readl2 t ; return (x:xs,u) })
126 (do { ("]",t) <- lex s ; return ([],t) }) ++
127 (do { (",",t) <- lex s ; (x,u) <- readx t ; (xs,v) <- readl2 u ; return (x:xs,v) })
132 %*********************************************************
134 \subsection{Lexical analysis}
136 %*********************************************************
138 This lexer is not completely faithful to the Haskell lexical syntax.
140 Qualified names are not handled properly
141 A `--' does not terminate a symbol
142 Octal and hexidecimal numerics are not recognized as a single token
147 lex "" = return ("","")
148 lex (c:s) | isSpace c = lex (dropWhile isSpace s)
150 (ch, '\'':t) <- lexLitChar s
152 return ('\'':ch++"'", t)
154 (str,t) <- lexString s
158 lexString ('"':s) = return ("\"",s)
160 (ch,t) <- lexStrItem s
161 (str,u) <- lexString t
165 lexStrItem ('\\':'&':s) = return ("\\&",s)
166 lexStrItem ('\\':c:s) | isSpace c = do
167 ('\\':t) <- return (dropWhile isSpace s)
169 lexStrItem s = lexLitChar s
171 lex (c:s) | isSingle c = return ([c],s)
173 (sym,t) <- return (span isSym s)
176 (nam,t) <- return (span isIdChar s)
179 {- Removed, 13/03/2000 by SDM.
180 Doesn't work, and not required by Haskell report.
184 ('o':rs) -> (isOctDigit, rs, False)
185 ('O':rs) -> (isOctDigit, rs, False)
186 ('x':rs) -> (isHexDigit, rs, False)
187 ('X':rs) -> (isHexDigit, rs, False)
188 _ -> (isDigit, s, True)
190 (ds,s) <- return (span isDigit s)
191 (fe,t) <- lexFracExp s
193 | otherwise = mzero -- bad character
195 isSingle c = c `elem` ",;()[]{}_`"
196 isSym c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
197 isIdChar c = isAlphaNum c || c `elem` "_'"
199 lexFracExp ('.':c:cs) | isDigit c = do
200 (ds,t) <- lex0Digits cs
202 return ('.':c:ds++e,u)
203 lexFracExp s = return ("",s)
205 lexExp (e:s) | e `elem` "eE" =
208 guard (c `elem` "+-")
209 (ds,u) <- lexDecDigits t
210 return (e:c:ds,u)) ++
212 (ds,t) <- lexDecDigits s
215 lexExp s = return ("",s)
217 lexDigits :: ReadS String
218 lexDigits = lexDecDigits
220 lexDecDigits :: ReadS String
221 lexDecDigits = nonnull isDigit
223 lexOctDigits :: ReadS String
224 lexOctDigits = nonnull isOctDigit
226 lexHexDigits :: ReadS String
227 lexHexDigits = nonnull isHexDigit
230 lex0Digits :: ReadS String
231 lex0Digits s = return (span isDigit s)
233 nonnull :: (Char -> Bool) -> ReadS String
235 (cs@(_:_),t) <- return (span p s)
238 lexLitChar :: ReadS String
239 lexLitChar ('\\':s) = do
243 lexEsc (c:s) | c `elem` escChars = return ([c],s)
244 lexEsc s@(d:_) | isDigit d = checkSize 10 lexDecDigits s
245 lexEsc ('o':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s)
246 lexEsc ('O':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s)
247 lexEsc ('x':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s)
248 lexEsc ('X':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s)
249 lexEsc ('^':c:s) | c >= '@' && c <= '_' = [(['^',c],s)] -- cf. cntrl in 2.6 of H. report.
250 lexEsc s@(c:_) | isUpper c = fromAsciiLab s
253 escChars = "abfnrtv\\\"'"
255 fromAsciiLab (x:y:z:ls) | isUpper y && (isUpper z || isDigit z) &&
256 [x,y,z] `elem` asciiEscTab = return ([x,y,z], ls)
257 fromAsciiLab (x:y:ls) | isUpper y &&
258 [x,y] `elem` asciiEscTab = return ([x,y], ls)
259 fromAsciiLab _ = mzero
261 asciiEscTab = "DEL" : asciiTab
264 Check that the numerically escaped char literals are
265 within accepted boundaries.
267 Note: this allows char lits with leading zeros, i.e.,
268 \0000000000000000000000000000001.
270 checkSize base f str = do
272 if toAnInteger base num > toInteger (ord maxBound) then
276 8 -> return ('o':num, res)
277 16 -> return ('x':num, res)
278 _ -> return (num, res)
280 toAnInteger base = foldl (\ acc n -> acc*base + toInteger (digitToInt n)) 0
283 lexLitChar (c:s) = return ([c],s)
284 lexLitChar "" = mzero
286 digitToInt :: Char -> Int
288 | isDigit c = fromEnum c - fromEnum '0'
289 | c >= 'a' && c <= 'f' = fromEnum c - fromEnum 'a' + 10
290 | c >= 'A' && c <= 'F' = fromEnum c - fromEnum 'A' + 10
291 | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh
294 %*********************************************************
296 \subsection{Instances of @Read@}
298 %*********************************************************
301 instance Read Char where
302 readsPrec _ = readParen False
305 (c,"\'") <- readLitChar s
308 readList = readParen False (\r -> do
312 where readl ('"':s) = return ("",s)
313 readl ('\\':'&':s) = readl s
315 (c,t) <- readLitChar s
319 instance Read Bool where
320 readsPrec _ = readParen False
323 (do { ("True", rest) <- return lr ; return (True, rest) }) ++
324 (do { ("False", rest) <- return lr ; return (False, rest) }))
327 instance Read Ordering where
328 readsPrec _ = readParen False
331 (do { ("LT", rest) <- return lr ; return (LT, rest) }) ++
332 (do { ("EQ", rest) <- return lr ; return (EQ, rest) }) ++
333 (do { ("GT", rest) <- return lr ; return (GT, rest) }))
335 instance Read a => Read (Maybe a) where
336 readsPrec _ = readParen False
339 (do { ("Nothing", rest) <- return lr ; return (Nothing, rest)}) ++
341 ("Just", rest1) <- return lr
342 (x, rest2) <- reads rest1
343 return (Just x, rest2)))
345 instance (Read a, Read b) => Read (Either a b) where
346 readsPrec _ = readParen False
350 ("Left", rest1) <- return lr
351 (x, rest2) <- reads rest1
352 return (Left x, rest2)) ++
354 ("Right", rest1) <- return lr
355 (x, rest2) <- reads rest1
356 return (Right x, rest2)))
358 instance Read Int where
359 readsPrec _ x = readSigned readDec x
361 instance Read Integer where
362 readsPrec _ x = readSigned readDec x
364 instance Read Float where
365 readsPrec _ x = readSigned readFloat x
367 instance Read Double where
368 readsPrec _ x = readSigned readFloat x
370 instance (Integral a, Read a) => Read (Ratio a) where
371 readsPrec p = readParen (p > ratio_prec)
378 instance (Read a) => Read [a] where
379 readsPrec _ = readList
381 instance Read () where
382 readsPrec _ = readParen False
388 instance (Read a, Read b) => Read (a,b) where
389 readsPrec _ = readParen False
392 (x,t) <- readsPrec 0 s
394 (y,v) <- readsPrec 0 u
398 instance (Read a, Read b, Read c) => Read (a, b, c) where
399 readsPrec _ = readParen False
402 (x,c) <- readsPrec 0 b
404 (y,e) <- readsPrec 0 d
406 (z,g) <- readsPrec 0 f
410 instance (Read a, Read b, Read c, Read d) => Read (a, b, c, d) where
411 readsPrec _ = readParen False
414 (w,c) <- readsPrec 0 b
416 (x,e) <- readsPrec 0 d
418 (y,g) <- readsPrec 0 f
420 (z,h) <- readsPrec 0 h
422 return ((w,x,y,z), i))
424 instance (Read a, Read b, Read c, Read d, Read e) => Read (a, b, c, d, e) where
425 readsPrec _ = readParen False
428 (v,c) <- readsPrec 0 b
430 (w,e) <- readsPrec 0 d
432 (x,g) <- readsPrec 0 f
434 (y,i) <- readsPrec 0 h
436 (z,k) <- readsPrec 0 j
438 return ((v,w,x,y,z), l))
442 %*********************************************************
444 \subsection{Reading characters}
446 %*********************************************************
449 readLitChar :: ReadS Char
451 readLitChar [] = mzero
452 readLitChar ('\\':s) = readEsc s
454 readEsc ('a':s) = return ('\a',s)
455 readEsc ('b':s) = return ('\b',s)
456 readEsc ('f':s) = return ('\f',s)
457 readEsc ('n':s) = return ('\n',s)
458 readEsc ('r':s) = return ('\r',s)
459 readEsc ('t':s) = return ('\t',s)
460 readEsc ('v':s) = return ('\v',s)
461 readEsc ('\\':s) = return ('\\',s)
462 readEsc ('"':s) = return ('"',s)
463 readEsc ('\'':s) = return ('\'',s)
464 readEsc ('^':c:s) | c >= '@' && c <= '_'
465 = return (chr (ord c - ord '@'), s)
466 readEsc s@(d:_) | isDigit d
477 readEsc s@(c:_) | isUpper c
478 = let table = ('\DEL', "DEL") : zip ['\NUL'..] asciiTab
479 in case [(c,s') | (c, mne) <- table,
480 ([],s') <- [match mne s]]
481 of (pr:_) -> return pr
485 readLitChar (c:s) = return (c,s)
487 match :: (Eq a) => [a] -> [a] -> ([a],[a])
488 match (x:xs) (y:ys) | x == y = match xs ys
489 match xs ys = (xs,ys)
494 %*********************************************************
496 \subsection{Reading numbers}
498 %*********************************************************
500 Note: reading numbers at bases different than 10, does not
501 include lexing common prefixes such as '0x' or '0o' etc.
504 {-# SPECIALISE readDec ::
507 readDec :: (Integral a) => ReadS a
508 readDec = readInt 10 isDigit (\d -> ord d - ord '0')
510 {-# SPECIALISE readOct ::
513 readOct :: (Integral a) => ReadS a
514 readOct = readInt 8 isOctDigit (\d -> ord d - ord '0')
516 {-# SPECIALISE readHex ::
519 readHex :: (Integral a) => ReadS a
520 readHex = readInt 16 isHexDigit hex
521 where hex d = ord d - (if isDigit d then ord '0'
522 else ord (if isUpper d then 'A' else 'a') - 10)
524 readInt :: (Integral a) => a -> (Char -> Bool) -> (Char -> Int) -> ReadS a
525 readInt radix isDig digToInt s = do
526 (ds,r) <- nonnull isDig s
527 return (foldl1 (\n d -> n * radix + d)
528 (map (fromInteger . toInteger . digToInt) ds), r)
530 {-# SPECIALISE readSigned ::
531 ReadS Int -> ReadS Int,
532 ReadS Integer -> ReadS Integer,
533 ReadS Double -> ReadS Double #-}
534 readSigned :: (Real a) => ReadS a -> ReadS a
535 readSigned readPos = readParen False read'
536 where read' r = read'' r ++
543 (n,"") <- readPos str
547 The functions readFloat below uses rational arithmetic
548 to ensure correct conversion between the floating-point radix and
549 decimal. It is often possible to use a higher-precision floating-
550 point type to obtain the same results.
553 {-# SPECIALISE readFloat ::
556 readFloat :: (RealFloat a) => ReadS a
559 (x,t) <- readRational r
560 return (fromRational x,t) ) ++
565 ("Infinity",t) <- lex r
568 readRational :: ReadS Rational -- NB: doesn't handle leading "-"
572 return ((n%1)*10^^(k-d), t)
575 (ds,s) <- lexDecDigits r
576 (ds',t) <- lexDotDigits s
577 return (read (ds++ds'), length ds', t)
579 readExp (e:s) | e `elem` "eE" = readExp' s
580 readExp s = return (0,s)
582 readExp' ('+':s) = readDec s
583 readExp' ('-':s) = do
586 readExp' s = readDec s
588 lexDotDigits ('.':s) = lex0Digits s
589 lexDotDigits s = return ("",s)
591 readRational__ :: String -> Rational -- we export this one (non-std)
592 -- NB: *does* handle a leading "-"
595 '-' : xs -> - (read_me xs)
599 = case (do { (x,t) <- readRational s ; ("","") <- lex t ; return x }) of
600 #ifndef NEW_READS_REP
602 [] -> error ("readRational__: no parse:" ++ top_s)
603 _ -> error ("readRational__: ambiguous parse:" ++ top_s)
606 Nothing -> error ("readRational__: no parse:" ++ top_s)