1 {-# OPTIONS -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
4 -- Module : Text.Read.Lex
5 -- Copyright : (c) The University of Glasgow 2002
6 -- License : BSD-style (see the file libraries/base/LICENSE)
8 -- Maintainer : libraries@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
12 -- The cut-down Haskell lexer, used by Text.Read
14 -----------------------------------------------------------------------------
18 ( Lexeme(..) -- :: *; Show, Eq
21 , lex -- :: ReadP Lexeme Skips leading spaces
22 , hsLex -- :: ReadP String
23 , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
25 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
26 , readOctP -- :: Num a => ReadP a
27 , readDecP -- :: Num a => ReadP a
28 , readHexP -- :: Num a => ReadP a
32 import Text.ParserCombinators.ReadP
35 import GHC.Num( Num(..), Integer )
36 import GHC.Show( Show(.. ), showChar, showString,
37 isSpace, isAlpha, isAlphaNum,
38 isOctDigit, isHexDigit, toUpper )
39 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational,
40 toInteger, (^), (^^), infinity, notANumber )
41 import GHC.Float( Float, Double )
43 import GHC.Show( ShowS, shows )
44 import GHC.Enum( minBound, maxBound )
49 -- -----------------------------------------------------------------------------
52 type LexP = ReadP Lexeme
55 = Char Char -- Quotes removed,
56 | String String -- escapes interpreted
57 | Punc String -- Punctuation, eg "(", "::"
58 | Ident String -- Haskell identifiers, e.g. foo, baz
59 | Symbol String -- Haskell symbols, e.g. >>, %
65 -- -----------------------------------------------------------------------------
69 lex = skipSpaces >> lexToken
72 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
74 (s,_) <- gather lexToken
77 lexToken :: ReadP Lexeme
87 -- ----------------------------------------------------------------------
89 lexEOF :: ReadP Lexeme
94 -- ---------------------------------------------------------------------------
95 -- Single character lexemes
97 lexPunc :: ReadP Lexeme
99 do c <- satisfy isPuncChar
102 isPuncChar c = c `elem` ",;()[]{}`"
104 -- ----------------------------------------------------------------------
107 lexSymbol :: ReadP Lexeme
109 do s <- munch1 isSymbolChar
110 if s `elem` reserved_ops then
111 return (Punc s) -- Reserved-ops count as punctuation
115 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
116 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
118 -- ----------------------------------------------------------------------
121 lexId :: ReadP Lexeme
122 lexId = lex_nan <++ lex_id
124 -- NaN and Infinity look like identifiers, so
125 -- we parse them first.
126 lex_nan = (string "NaN" >> return (Rat notANumber)) +++
127 (string "Infinity" >> return (Rat infinity))
129 lex_id = do c <- satisfy isIdsChar
133 -- Identifiers can start with a '_'
134 isIdsChar c = isAlpha c || c == '_'
135 isIdfChar c = isAlphaNum c || c `elem` "_'"
137 -- ---------------------------------------------------------------------------
138 -- Lexing character literals
140 lexLitChar :: ReadP Lexeme
144 guard (esc || c /= '\'') -- Eliminate '' possibility
148 lexChar :: ReadP Char
149 lexChar = do { (c,_) <- lexCharE; return c }
151 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
155 then do c <- lexEsc; return (c, True)
156 else do return (c, False)
180 do base <- lexBaseChar
182 guard (n <= toInteger (ord maxBound))
183 return (chr (fromInteger n))
225 [ (string "SOH" >> return '\SOH') <++
226 (string "SO" >> return '\SO')
227 -- \SO and \SOH need maximal-munch treatment
228 -- See the Haskell report Sect 2.6
230 , string "NUL" >> return '\NUL'
231 , string "STX" >> return '\STX'
232 , string "ETX" >> return '\ETX'
233 , string "EOT" >> return '\EOT'
234 , string "ENQ" >> return '\ENQ'
235 , string "ACK" >> return '\ACK'
236 , string "BEL" >> return '\BEL'
237 , string "BS" >> return '\BS'
238 , string "HT" >> return '\HT'
239 , string "LF" >> return '\LF'
240 , string "VT" >> return '\VT'
241 , string "FF" >> return '\FF'
242 , string "CR" >> return '\CR'
243 , string "SI" >> return '\SI'
244 , string "DLE" >> return '\DLE'
245 , string "DC1" >> return '\DC1'
246 , string "DC2" >> return '\DC2'
247 , string "DC3" >> return '\DC3'
248 , string "DC4" >> return '\DC4'
249 , string "NAK" >> return '\NAK'
250 , string "SYN" >> return '\SYN'
251 , string "ETB" >> return '\ETB'
252 , string "CAN" >> return '\CAN'
253 , string "EM" >> return '\EM'
254 , string "SUB" >> return '\SUB'
255 , string "ESC" >> return '\ESC'
256 , string "FS" >> return '\FS'
257 , string "GS" >> return '\GS'
258 , string "RS" >> return '\RS'
259 , string "US" >> return '\US'
260 , string "SP" >> return '\SP'
261 , string "DEL" >> return '\DEL'
265 -- ---------------------------------------------------------------------------
268 lexString :: ReadP Lexeme
274 do (c,esc) <- lexStrItem
280 lexStrItem = (lexEmpty >> lexStrItem)
288 _ | isSpace c -> do skipSpaces; char '\\'; return ()
291 -- ---------------------------------------------------------------------------
297 showDigit :: Int -> ShowS
298 showDigit n | n <= 9 = shows n
299 | otherwise = showChar (chr (n + ord 'A' - 10))
301 lexNumber :: ReadP Lexeme
303 = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
304 -- If that fails, try for a decimal number
305 lexDecNumber -- Start with ordinary digits
307 lexHexOct :: ReadP Lexeme
311 digits <- lexDigits base
312 return (Int (val (fromIntegral base) 0 digits))
314 lexBaseChar :: ReadP Int
315 -- Lex a single character indicating the base,
316 -- or return 10 if there isn't one
317 lexBaseChar = lex_base <++ return 10
319 lex_base = do { c <- get;
327 lexDecNumber :: ReadP Lexeme
329 do xs <- lexDigits 10
330 mFrac <- lexFrac <++ return Nothing
331 mExp <- lexExp <++ return Nothing
332 return (value xs mFrac mExp)
334 value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
336 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
338 valueFracExp a Nothing Nothing
340 valueFracExp a Nothing (Just exp)
341 | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
342 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
343 valueFracExp a (Just fs) mExp
345 Nothing -> Rat rat -- 4.3
346 Just exp -> Rat (valExp rat exp) -- 4.3e-4
349 rat = fromInteger a + frac 10 0 1 fs
351 valExp :: Rational -> Integer -> Rational
352 valExp rat exp = rat * (10 ^^ exp)
354 lexFrac :: ReadP (Maybe Digits)
355 -- Read the fractional part; fail if it doesn't
356 -- start ".d" where d is a digit
357 lexFrac = do char '.'
361 lexExp :: ReadP (Maybe Integer)
362 lexExp = do char 'e' +++ char 'E'
363 exp <- signedExp +++ lexInteger 10
367 = do c <- char '-' +++ char '+'
369 return (if c == '-' then -n else n)
371 lexDigits :: Int -> ReadP Digits
372 -- Lex a non-empty sequence of digits in specified base
376 guard (not (null xs))
379 scan (c:cs) f = case valDig base c of
380 Just n -> do get; scan cs (f.(n:))
381 Nothing -> do return (f [])
382 scan [] f = do return (f [])
384 lexInteger :: Base -> ReadP Integer
386 do xs <- lexDigits base
387 return (val (fromIntegral base) 0 xs)
389 val :: Num a => a -> a -> Digits -> a
390 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
392 val base y (x:xs) = y' `seq` val base y' xs
394 y' = y * base + fromIntegral x
396 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
397 frac base a b [] = a % b
398 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
400 a' = a * base + fromIntegral x
403 valDig :: Num a => a -> Char -> Maybe Int
405 | '0' <= c && c <= '7' = Just (ord c - ord '0')
406 | otherwise = Nothing
408 valDig 10 c = valDecDig c
411 | '0' <= c && c <= '9' = Just (ord c - ord '0')
412 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
413 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
414 | otherwise = Nothing
417 | '0' <= c && c <= '9' = Just (ord c - ord '0')
418 | otherwise = Nothing
420 -- ----------------------------------------------------------------------
421 -- other numeric lexing functions
423 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
424 readIntP base isDigit valDigit =
425 do s <- munch1 isDigit
426 return (val base 0 (map valDigit s))
428 readIntP' :: Num a => a -> ReadP a
429 readIntP' base = readIntP base isDigit valDigit
431 isDigit c = maybe False (const True) (valDig base c)
432 valDigit c = maybe 0 id (valDig base c)
434 readOctP, readDecP, readHexP :: Num a => ReadP a
435 readOctP = readIntP' 8
436 readDecP = readIntP' 10
437 readHexP = readIntP' 16