1 {-# OPTIONS -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
4 -- Module : Text.Read.Lex
5 -- Copyright : (c) The University of Glasgow 2002
6 -- License : BSD-style (see the file libraries/base/LICENSE)
8 -- Maintainer : libraries@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
12 -- The cut-down Haskell lexer, used by Text.Read
14 -----------------------------------------------------------------------------
18 ( Lexeme(..) -- :: *; Show, Eq
21 , lex -- :: ReadP Lexeme -- Skips leading spaces
22 , hsLex -- :: ReadP String
24 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
25 , readOctP -- :: Num a => ReadP a
26 , readDecP -- :: Num a => ReadP a
27 , readHexP -- :: Num a => ReadP a
31 import Text.ParserCombinators.ReadP
34 import GHC.Num( Num(..), Integer )
35 import GHC.Show( Show(.. ), showChar, showString,
36 isSpace, isAlpha, isAlphaNum,
37 isOctDigit, isHexDigit, toUpper )
38 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational,
39 toInteger, (^), (^^), infinity, notANumber )
40 import GHC.Float( Float, Double )
42 import GHC.Show( ShowS, shows )
43 import GHC.Enum( minBound, maxBound )
48 -- -----------------------------------------------------------------------------
51 type LexP = ReadP Lexeme
54 = Char Char -- Quotes removed,
55 | String String -- escapes interpreted
56 | Punc String -- Punctuation, eg "(", "::"
57 | Ident String -- Haskell identifiers, e.g. foo, baz
58 | Symbol String -- Haskell symbols, e.g. >>, %
64 -- -----------------------------------------------------------------------------
68 lex = skipSpaces >> lexToken
71 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
73 (s,_) <- gather lexToken
76 lexToken :: ReadP Lexeme
86 -- ----------------------------------------------------------------------
88 lexEOF :: ReadP Lexeme
93 -- ---------------------------------------------------------------------------
94 -- Single character lexemes
96 lexPunc :: ReadP Lexeme
98 do c <- satisfy isPuncChar
101 isPuncChar c = c `elem` ",;()[]{}`"
103 -- ----------------------------------------------------------------------
106 lexSymbol :: ReadP Lexeme
108 do s <- munch1 isSymbolChar
109 if s `elem` reserved_ops then
110 return (Punc s) -- Reserved-ops count as punctuation
114 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
115 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
117 -- ----------------------------------------------------------------------
120 lexId :: ReadP Lexeme
122 do c <- satisfy isIdsChar
126 -- Identifiers can start with a '_'
127 isIdsChar c = isAlpha c || c == '_'
128 isIdfChar c = isAlphaNum c || c `elem` "_'"
130 -- ---------------------------------------------------------------------------
131 -- Lexing character literals
133 lexLitChar :: ReadP Lexeme
137 guard (esc || c /= '\'')
141 lexChar :: ReadP (Char, Bool) -- "escaped or not"?
145 then do c <- lexEsc; return (c, True)
146 else do return (c, False)
172 guard (n <= toInteger (ord maxBound))
173 return (chr (fromInteger n))
178 'o':_ -> do get; return 8
179 'x':_ -> do get; return 16
222 [ do { string "SO" ; s <- look;
224 'H' : _ -> do { get ; return '\SOH' }
225 other -> return '\SO'
227 -- \SO and \SOH need maximal-munch treatment
228 -- See the Haskell report Sect 2.6
229 , string "NUL" >> return '\NUL'
230 , string "STX" >> return '\STX'
231 , string "ETX" >> return '\ETX'
232 , string "EOT" >> return '\EOT'
233 , string "ENQ" >> return '\ENQ'
234 , string "ACK" >> return '\ACK'
235 , string "BEL" >> return '\BEL'
236 , string "BS" >> return '\BS'
237 , string "HT" >> return '\HT'
238 , string "LF" >> return '\LF'
239 , string "VT" >> return '\VT'
240 , string "FF" >> return '\FF'
241 , string "CR" >> return '\CR'
242 , string "SI" >> return '\SI'
243 , string "DLE" >> return '\DLE'
244 , string "DC1" >> return '\DC1'
245 , string "DC2" >> return '\DC2'
246 , string "DC3" >> return '\DC3'
247 , string "DC4" >> return '\DC4'
248 , string "NAK" >> return '\NAK'
249 , string "SYN" >> return '\SYN'
250 , string "ETB" >> return '\ETB'
251 , string "CAN" >> return '\CAN'
252 , string "EM" >> return '\EM'
253 , string "SUB" >> return '\SUB'
254 , string "ESC" >> return '\ESC'
255 , string "FS" >> return '\FS'
256 , string "GS" >> return '\GS'
257 , string "RS" >> return '\RS'
258 , string "US" >> return '\US'
259 , string "SP" >> return '\SP'
260 , string "DEL" >> return '\DEL'
264 -- ---------------------------------------------------------------------------
267 lexString :: ReadP Lexeme
273 do (c,esc) <- lexStrItem
280 (lexEmpty >> lexStrItem)
288 _ | isSpace c -> do skipSpaces; char '\\'; return ()
291 -- ---------------------------------------------------------------------------
297 showDigit :: Int -> ShowS
298 showDigit n | n <= 9 = shows n
299 | otherwise = showChar (chr (n + ord 'A' - 10))
301 lexNumber :: ReadP Lexeme
302 lexNumber = do { string "NaN"; return (Rat notANumber) } +++
303 do { string "Infinity"; return (Rat infinity) } +++
304 do { base <- lexBase ; lexNumberBase base }
309 '0':'o':_ -> do get; get; return 8
310 '0':'O':_ -> do get; get; return 8
311 '0':'x':_ -> do get; get; return 16
312 '0':'X':_ -> do get; get; return 16
315 lexNumberBase :: Base -> ReadP Lexeme
317 do xs <- lexDigits base
318 mFrac <- lexFrac base
320 return (value xs mFrac mExp)
322 baseInteger :: Integer
323 baseInteger = fromIntegral base
325 value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp
327 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
329 valueFracExp a Nothing Nothing
331 valueFracExp a Nothing (Just exp)
332 | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7
333 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
334 valueFracExp a (Just fs) mExp
336 Nothing -> Rat rat -- 4.3
337 Just exp -> Rat (valExp rat exp) -- 4.3e-4
340 rat = fromInteger a + frac (fromIntegral base) 0 1 fs
342 valExp :: Rational -> Integer -> Rational
343 valExp rat exp = rat * (fromIntegral base ^^ exp)
345 lexFrac :: Base -> ReadP (Maybe Digits)
349 '.' : d : _ | isJust (valDig base d) ->
350 -- The lookahead checks for point and at least one
351 -- valid following digit. For example 1..n must
352 -- lex the "1" off rather than failing.
354 frac <- lexDigits base
360 lexExp :: Base -> ReadP (Maybe Integer)
364 e : _ | e `elem` "eE" && base == 10 ->
369 do c <- char '-' +++ char '+'
371 return (Just (if c == '-' then -n else n))
374 do n <- lexInteger 10
380 lexDigits :: Int -> ReadP Digits
381 -- Lex a non-empty sequence of digits in specified base
385 guard (not (null xs))
388 scan (c:cs) f = case valDig base c of
389 Just n -> do get; scan cs (f.(n:))
390 Nothing -> do return (f [])
391 scan [] f = do return (f [])
393 lexInteger :: Base -> ReadP Integer
395 do xs <- lexDigits base
396 return (val (fromIntegral base) 0 xs)
398 val :: Num a => a -> a -> Digits -> a
399 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
401 val base y (x:xs) = y' `seq` val base y' xs
403 y' = y * base + fromIntegral x
405 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
406 frac base a b [] = a % b
407 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
409 a' = a * base + fromIntegral x
412 valDig :: Num a => a -> Char -> Maybe Int
414 | '0' <= c && c <= '7' = Just (ord c - ord '0')
415 | otherwise = Nothing
418 | '0' <= c && c <= '9' = Just (ord c - ord '0')
419 | otherwise = Nothing
422 | '0' <= c && c <= '9' = Just (ord c - ord '0')
423 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
424 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
425 | otherwise = Nothing
427 -- ----------------------------------------------------------------------
428 -- other numeric lexing functions
430 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
431 readIntP base isDigit valDigit =
432 do s <- munch1 isDigit
433 return (val base 0 (map valDigit s))
435 readIntP' :: Num a => a -> ReadP a
436 readIntP' base = readIntP base isDigit valDigit
438 isDigit c = maybe False (const True) (valDig base c)
439 valDigit c = maybe 0 id (valDig base c)
441 readOctP, readDecP, readHexP :: Num a => ReadP a
442 readOctP = readIntP' 8
443 readDecP = readIntP' 10
444 readHexP = readIntP' 16