1 {-# OPTIONS -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
4 -- Module : Text.Read.Lex
5 -- Copyright : (c) The University of Glasgow 2002
6 -- License : BSD-style (see the file libraries/base/LICENSE)
8 -- Maintainer : libraries@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
12 -- The cut-down Haskell lexer, used by Text.Read
14 -----------------------------------------------------------------------------
18 ( Lexeme(..) -- :: *; Show, Eq
21 , lex -- :: ReadP Lexeme Skips leading spaces
22 , hsLex -- :: ReadP String
23 , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
25 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
26 , readOctP -- :: Num a => ReadP a
27 , readDecP -- :: Num a => ReadP a
28 , readHexP -- :: Num a => ReadP a
32 import Text.ParserCombinators.ReadP
35 import GHC.Num( Num(..), Integer )
36 import GHC.Show( Show(.. ), showChar, showString,
37 isSpace, isAlpha, isAlphaNum,
38 isOctDigit, isHexDigit, toUpper )
39 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational,
40 toInteger, (^), (^^), infinity, notANumber )
41 import GHC.Float( Float, Double )
43 import GHC.Show( ShowS, shows )
44 import GHC.Enum( minBound, maxBound )
49 -- -----------------------------------------------------------------------------
52 type LexP = ReadP Lexeme
55 = Char Char -- Quotes removed,
56 | String String -- escapes interpreted
57 | Punc String -- Punctuation, eg "(", "::"
58 | Ident String -- Haskell identifiers, e.g. foo, baz
59 | Symbol String -- Haskell symbols, e.g. >>, %
65 -- -----------------------------------------------------------------------------
69 lex = skipSpaces >> lexToken
72 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
74 (s,_) <- gather lexToken
77 lexToken :: ReadP Lexeme
87 -- ----------------------------------------------------------------------
89 lexEOF :: ReadP Lexeme
94 -- ---------------------------------------------------------------------------
95 -- Single character lexemes
97 lexPunc :: ReadP Lexeme
99 do c <- satisfy isPuncChar
102 isPuncChar c = c `elem` ",;()[]{}`"
104 -- ----------------------------------------------------------------------
107 lexSymbol :: ReadP Lexeme
109 do s <- munch1 isSymbolChar
110 if s `elem` reserved_ops then
111 return (Punc s) -- Reserved-ops count as punctuation
115 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
116 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
118 -- ----------------------------------------------------------------------
121 lexId :: ReadP Lexeme
123 do c <- satisfy isIdsChar
127 -- Identifiers can start with a '_'
128 isIdsChar c = isAlpha c || c == '_'
129 isIdfChar c = isAlphaNum c || c `elem` "_'"
131 -- ---------------------------------------------------------------------------
132 -- Lexing character literals
134 lexLitChar :: ReadP Lexeme
138 guard (esc || c /= '\'') -- Eliminate '' possibility
142 lexChar :: ReadP Char
143 lexChar = do { (c,_) <- lexCharE; return c }
145 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
149 then do c <- lexEsc; return (c, True)
150 else do return (c, False)
176 guard (n <= toInteger (ord maxBound))
177 return (chr (fromInteger n))
182 'o':_ -> do get; return 8
183 'x':_ -> do get; return 16
226 [ do { string "SO" ; s <- look;
228 'H' : _ -> do { get ; return '\SOH' }
229 other -> return '\SO'
231 -- \SO and \SOH need maximal-munch treatment
232 -- See the Haskell report Sect 2.6
233 , string "NUL" >> return '\NUL'
234 , string "STX" >> return '\STX'
235 , string "ETX" >> return '\ETX'
236 , string "EOT" >> return '\EOT'
237 , string "ENQ" >> return '\ENQ'
238 , string "ACK" >> return '\ACK'
239 , string "BEL" >> return '\BEL'
240 , string "BS" >> return '\BS'
241 , string "HT" >> return '\HT'
242 , string "LF" >> return '\LF'
243 , string "VT" >> return '\VT'
244 , string "FF" >> return '\FF'
245 , string "CR" >> return '\CR'
246 , string "SI" >> return '\SI'
247 , string "DLE" >> return '\DLE'
248 , string "DC1" >> return '\DC1'
249 , string "DC2" >> return '\DC2'
250 , string "DC3" >> return '\DC3'
251 , string "DC4" >> return '\DC4'
252 , string "NAK" >> return '\NAK'
253 , string "SYN" >> return '\SYN'
254 , string "ETB" >> return '\ETB'
255 , string "CAN" >> return '\CAN'
256 , string "EM" >> return '\EM'
257 , string "SUB" >> return '\SUB'
258 , string "ESC" >> return '\ESC'
259 , string "FS" >> return '\FS'
260 , string "GS" >> return '\GS'
261 , string "RS" >> return '\RS'
262 , string "US" >> return '\US'
263 , string "SP" >> return '\SP'
264 , string "DEL" >> return '\DEL'
268 -- ---------------------------------------------------------------------------
271 lexString :: ReadP Lexeme
277 do (c,esc) <- lexStrItem
283 lexStrItem = (lexEmpty >> lexStrItem)
291 _ | isSpace c -> do skipSpaces; char '\\'; return ()
294 -- ---------------------------------------------------------------------------
300 showDigit :: Int -> ShowS
301 showDigit n | n <= 9 = shows n
302 | otherwise = showChar (chr (n + ord 'A' - 10))
304 lexNumber :: ReadP Lexeme
305 lexNumber = do { string "NaN"; return (Rat notANumber) } +++
306 do { string "Infinity"; return (Rat infinity) } +++
307 do { base <- lexBase ; lexNumberBase base }
312 '0':'o':_ -> do get; get; return 8
313 '0':'O':_ -> do get; get; return 8
314 '0':'x':_ -> do get; get; return 16
315 '0':'X':_ -> do get; get; return 16
318 lexNumberBase :: Base -> ReadP Lexeme
320 do xs <- lexDigits base
321 mFrac <- lexFrac base
323 return (value xs mFrac mExp)
325 baseInteger :: Integer
326 baseInteger = fromIntegral base
328 value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp
330 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
332 valueFracExp a Nothing Nothing
334 valueFracExp a Nothing (Just exp)
335 | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7
336 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
337 valueFracExp a (Just fs) mExp
339 Nothing -> Rat rat -- 4.3
340 Just exp -> Rat (valExp rat exp) -- 4.3e-4
343 rat = fromInteger a + frac (fromIntegral base) 0 1 fs
345 valExp :: Rational -> Integer -> Rational
346 valExp rat exp = rat * (fromIntegral base ^^ exp)
348 lexFrac :: Base -> ReadP (Maybe Digits)
352 '.' : d : _ | isJust (valDig base d) ->
353 -- The lookahead checks for point and at least one
354 -- valid following digit. For example 1..n must
355 -- lex the "1" off rather than failing.
357 frac <- lexDigits base
363 lexExp :: Base -> ReadP (Maybe Integer)
367 e : _ | e `elem` "eE" && base == 10 ->
372 do c <- char '-' +++ char '+'
374 return (Just (if c == '-' then -n else n))
377 do n <- lexInteger 10
383 lexDigits :: Int -> ReadP Digits
384 -- Lex a non-empty sequence of digits in specified base
388 guard (not (null xs))
391 scan (c:cs) f = case valDig base c of
392 Just n -> do get; scan cs (f.(n:))
393 Nothing -> do return (f [])
394 scan [] f = do return (f [])
396 lexInteger :: Base -> ReadP Integer
398 do xs <- lexDigits base
399 return (val (fromIntegral base) 0 xs)
401 val :: Num a => a -> a -> Digits -> a
402 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
404 val base y (x:xs) = y' `seq` val base y' xs
406 y' = y * base + fromIntegral x
408 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
409 frac base a b [] = a % b
410 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
412 a' = a * base + fromIntegral x
415 valDig :: Num a => a -> Char -> Maybe Int
417 | '0' <= c && c <= '7' = Just (ord c - ord '0')
418 | otherwise = Nothing
421 | '0' <= c && c <= '9' = Just (ord c - ord '0')
422 | otherwise = Nothing
425 | '0' <= c && c <= '9' = Just (ord c - ord '0')
426 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
427 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
428 | otherwise = Nothing
430 -- ----------------------------------------------------------------------
431 -- other numeric lexing functions
433 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
434 readIntP base isDigit valDigit =
435 do s <- munch1 isDigit
436 return (val base 0 (map valDigit s))
438 readIntP' :: Num a => a -> ReadP a
439 readIntP' base = readIntP base isDigit valDigit
441 isDigit c = maybe False (const True) (valDig base c)
442 valDigit c = maybe 0 id (valDig base c)
444 readOctP, readDecP, readHexP :: Num a => ReadP a
445 readOctP = readIntP' 8
446 readDecP = readIntP' 10
447 readHexP = readIntP' 16