1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3 -----------------------------------------------------------------------------
5 -- Module : Text.Read.Lex
6 -- Copyright : (c) The University of Glasgow 2002
7 -- License : BSD-style (see the file libraries/base/LICENSE)
9 -- Maintainer : libraries@haskell.org
10 -- Stability : provisional
11 -- Portability : non-portable (uses Text.ParserCombinators.ReadP)
13 -- The cut-down Haskell lexer, used by Text.Read
15 -----------------------------------------------------------------------------
19 ( Lexeme(..) -- :: *; Show, Eq
22 , lex -- :: ReadP Lexeme Skips leading spaces
23 , hsLex -- :: ReadP String
24 , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
26 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
27 , readOctP -- :: Num a => ReadP a
28 , readDecP -- :: Num a => ReadP a
29 , readHexP -- :: Num a => ReadP a
33 import Text.ParserCombinators.ReadP
35 #ifdef __GLASGOW_HASKELL__
37 import GHC.Num( Num(..), Integer )
38 import GHC.Show( Show(..) )
40 import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
42 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
43 toInteger, (^), (^^), infinity, notANumber )
45 import GHC.Enum( maxBound )
47 import Prelude hiding ( lex )
48 import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
49 import Data.Ratio( Ratio, (%) )
52 import Hugs.Prelude( Ratio(..) )
57 -- -----------------------------------------------------------------------------
62 = Char Char -- ^ Character literal
63 | String String -- ^ String literal, with escapes interpreted
64 | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
65 | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
66 | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
67 | Int Integer -- ^ Integer literal
68 | Rat Rational -- ^ Floating point literal
72 -- -----------------------------------------------------------------------------
76 lex = skipSpaces >> lexToken
79 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
81 (s,_) <- gather lexToken
84 lexToken :: ReadP Lexeme
94 -- ----------------------------------------------------------------------
96 lexEOF :: ReadP Lexeme
101 -- ---------------------------------------------------------------------------
102 -- Single character lexemes
104 lexPunc :: ReadP Lexeme
106 do c <- satisfy isPuncChar
109 isPuncChar c = c `elem` ",;()[]{}`"
111 -- ----------------------------------------------------------------------
114 lexSymbol :: ReadP Lexeme
116 do s <- munch1 isSymbolChar
117 if s `elem` reserved_ops then
118 return (Punc s) -- Reserved-ops count as punctuation
122 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
123 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
125 -- ----------------------------------------------------------------------
128 lexId :: ReadP Lexeme
129 lexId = lex_nan <++ lex_id
131 -- NaN and Infinity look like identifiers, so
132 -- we parse them first.
133 lex_nan = (string "NaN" >> return (Rat notANumber)) +++
134 (string "Infinity" >> return (Rat infinity))
136 lex_id = do c <- satisfy isIdsChar
140 -- Identifiers can start with a '_'
141 isIdsChar c = isAlpha c || c == '_'
142 isIdfChar c = isAlphaNum c || c `elem` "_'"
144 #ifndef __GLASGOW_HASKELL__
145 infinity, notANumber :: Rational
150 -- ---------------------------------------------------------------------------
151 -- Lexing character literals
153 lexLitChar :: ReadP Lexeme
157 guard (esc || c /= '\'') -- Eliminate '' possibility
161 lexChar :: ReadP Char
162 lexChar = do { (c,_) <- lexCharE; return c }
164 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
168 then do c2 <- lexEsc; return (c2, True)
169 else do return (c1, False)
193 do base <- lexBaseChar <++ return 10
195 guard (n <= toInteger (ord maxBound))
196 return (chr (fromInteger n))
238 [ (string "SOH" >> return '\SOH') <++
239 (string "SO" >> return '\SO')
240 -- \SO and \SOH need maximal-munch treatment
241 -- See the Haskell report Sect 2.6
243 , string "NUL" >> return '\NUL'
244 , string "STX" >> return '\STX'
245 , string "ETX" >> return '\ETX'
246 , string "EOT" >> return '\EOT'
247 , string "ENQ" >> return '\ENQ'
248 , string "ACK" >> return '\ACK'
249 , string "BEL" >> return '\BEL'
250 , string "BS" >> return '\BS'
251 , string "HT" >> return '\HT'
252 , string "LF" >> return '\LF'
253 , string "VT" >> return '\VT'
254 , string "FF" >> return '\FF'
255 , string "CR" >> return '\CR'
256 , string "SI" >> return '\SI'
257 , string "DLE" >> return '\DLE'
258 , string "DC1" >> return '\DC1'
259 , string "DC2" >> return '\DC2'
260 , string "DC3" >> return '\DC3'
261 , string "DC4" >> return '\DC4'
262 , string "NAK" >> return '\NAK'
263 , string "SYN" >> return '\SYN'
264 , string "ETB" >> return '\ETB'
265 , string "CAN" >> return '\CAN'
266 , string "EM" >> return '\EM'
267 , string "SUB" >> return '\SUB'
268 , string "ESC" >> return '\ESC'
269 , string "FS" >> return '\FS'
270 , string "GS" >> return '\GS'
271 , string "RS" >> return '\RS'
272 , string "US" >> return '\US'
273 , string "SP" >> return '\SP'
274 , string "DEL" >> return '\DEL'
278 -- ---------------------------------------------------------------------------
281 lexString :: ReadP Lexeme
287 do (c,esc) <- lexStrItem
293 lexStrItem = (lexEmpty >> lexStrItem)
301 _ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
304 -- ---------------------------------------------------------------------------
310 lexNumber :: ReadP Lexeme
312 = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
313 -- If that fails, try for a decimal number
314 lexDecNumber -- Start with ordinary digits
316 lexHexOct :: ReadP Lexeme
320 digits <- lexDigits base
321 return (Int (val (fromIntegral base) 0 digits))
323 lexBaseChar :: ReadP Int
324 -- Lex a single character indicating the base; fail if not there
325 lexBaseChar = do { c <- get;
333 lexDecNumber :: ReadP Lexeme
335 do xs <- lexDigits 10
336 mFrac <- lexFrac <++ return Nothing
337 mExp <- lexExp <++ return Nothing
338 return (value xs mFrac mExp)
340 value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
342 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
344 valueFracExp a Nothing Nothing
346 valueFracExp a Nothing (Just exp)
347 | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
348 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
349 valueFracExp a (Just fs) mExp
351 Nothing -> Rat rat -- 4.3
352 Just exp -> Rat (valExp rat exp) -- 4.3e-4
355 rat = fromInteger a + frac 10 0 1 fs
357 valExp :: Rational -> Integer -> Rational
358 valExp rat exp = rat * (10 ^^ exp)
360 lexFrac :: ReadP (Maybe Digits)
361 -- Read the fractional part; fail if it doesn't
362 -- start ".d" where d is a digit
363 lexFrac = do _ <- char '.'
364 fraction <- lexDigits 10
365 return (Just fraction)
367 lexExp :: ReadP (Maybe Integer)
368 lexExp = do _ <- char 'e' +++ char 'E'
369 exp <- signedExp +++ lexInteger 10
373 = do c <- char '-' +++ char '+'
375 return (if c == '-' then -n else n)
377 lexDigits :: Int -> ReadP Digits
378 -- Lex a non-empty sequence of digits in specified base
382 guard (not (null xs))
385 scan (c:cs) f = case valDig base c of
386 Just n -> do _ <- get; scan cs (f.(n:))
387 Nothing -> do return (f [])
388 scan [] f = do return (f [])
390 lexInteger :: Base -> ReadP Integer
392 do xs <- lexDigits base
393 return (val (fromIntegral base) 0 xs)
395 val :: Num a => a -> a -> Digits -> a
396 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
398 val base y (x:xs) = y' `seq` val base y' xs
400 y' = y * base + fromIntegral x
402 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
403 frac _ a b [] = a % b
404 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
406 a' = a * base + fromIntegral x
409 valDig :: Num a => a -> Char -> Maybe Int
411 | '0' <= c && c <= '7' = Just (ord c - ord '0')
412 | otherwise = Nothing
414 valDig 10 c = valDecDig c
417 | '0' <= c && c <= '9' = Just (ord c - ord '0')
418 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
419 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
420 | otherwise = Nothing
422 valDig _ _ = error "valDig: Bad base"
424 valDecDig :: Char -> Maybe Int
426 | '0' <= c && c <= '9' = Just (ord c - ord '0')
427 | otherwise = Nothing
429 -- ----------------------------------------------------------------------
430 -- other numeric lexing functions
432 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
433 readIntP base isDigit valDigit =
434 do s <- munch1 isDigit
435 return (val base 0 (map valDigit s))
437 readIntP' :: Num a => a -> ReadP a
438 readIntP' base = readIntP base isDigit valDigit
440 isDigit c = maybe False (const True) (valDig base c)
441 valDigit c = maybe 0 id (valDig base c)
443 readOctP, readDecP, readHexP :: Num a => ReadP a
444 readOctP = readIntP' 8
445 readDecP = readIntP' 10
446 readHexP = readIntP' 16