X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRead%2FLex.hs;h=222d6cf06dbbe9c0fae148002fbf3e51651c1353;hb=41e8fba828acbae1751628af50849f5352b27873;hp=14528c134367db95148bfd256c2e206bd05334ca;hpb=0c3ce7a5b3c10ce1e181dab07bfe71aca7e83b33;p=ghc-base.git diff --git a/Text/Read/Lex.hs b/Text/Read/Lex.hs index 14528c1..222d6cf 100644 --- a/Text/Read/Lex.hs +++ b/Text/Read/Lex.hs @@ -1,4 +1,5 @@ -{-# OPTIONS -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} + ----------------------------------------------------------------------------- -- | -- Module : Text.Read.Lex @@ -7,7 +8,7 @@ -- -- Maintainer : libraries@haskell.org -- Stability : provisional --- Portability : portable +-- Portability : non-portable (uses Text.ParserCombinators.ReadP) -- -- The cut-down Haskell lexer, used by Text.Read -- @@ -15,49 +16,56 @@ module Text.Read.Lex -- lexing types - ( Lexeme(..) -- :: *; Show, Eq - - -- lexer - , lex -- :: ReadP Lexeme -- Skips leading spaces - , hsLex -- :: ReadP String - - , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a - , readOctP -- :: Num a => ReadP a - , readDecP -- :: Num a => ReadP a - , readHexP -- :: Num a => ReadP a + ( Lexeme(..) -- :: *; Show, Eq + + -- lexer + , lex -- :: ReadP Lexeme Skips leading spaces + , hsLex -- :: ReadP String + , lexChar -- :: ReadP Char Reads just one char, with H98 escapes + + , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a + , readOctP -- :: Num a => ReadP a + , readDecP -- :: Num a => ReadP a + , readHexP -- :: Num a => ReadP a ) where import Text.ParserCombinators.ReadP +#ifdef __GLASGOW_HASKELL__ import GHC.Base import GHC.Num( Num(..), Integer ) -import GHC.Show( Show(.. ), showChar, showString, - isSpace, isAlpha, isAlphaNum, - isOctDigit, isHexDigit, toUpper ) -import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational, - toInteger, (^), (^^), infinity, notANumber ) -import GHC.Float( Float, Double ) +import GHC.Show( Show(..) ) +#ifndef __HADDOCK__ +import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum ) +#endif +import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, + toInteger, (^), (^^), infinity, notANumber ) import GHC.List -import GHC.Show( ShowS, shows ) -import GHC.Enum( minBound, maxBound ) +import GHC.Enum( maxBound ) +#else +import Prelude hiding ( lex ) +import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum ) +import Data.Ratio( Ratio, (%) ) +#endif +#ifdef __HUGS__ +import Hugs.Prelude( Ratio(..) ) +#endif import Data.Maybe -import Data.Either import Control.Monad -- ----------------------------------------------------------------------------- -- Lexing types -type LexP = ReadP Lexeme - +-- ^ Haskell lexemes. data Lexeme - = Char Char -- Quotes removed, - | String String -- escapes interpreted - | Punc String -- Punctuation, eg "(", "::" - | Ident String -- Haskell identifiers, e.g. foo, baz - | Symbol String -- Haskell symbols, e.g. >>, % - | Int Integer - | Rat Rational + = Char Char -- ^ Character literal + | String String -- ^ String literal, with escapes interpreted + | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@ + | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@ + | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@ + | Int Integer -- ^ Integer literal + | Rat Rational -- ^ Floating point literal | EOF deriving (Eq, Show) @@ -70,25 +78,25 @@ lex = skipSpaces >> lexToken hsLex :: ReadP String -- ^ Haskell lexer: returns the lexed string, rather than the lexeme hsLex = do skipSpaces - (s,_) <- gather lexToken - return s + (s,_) <- gather lexToken + return s lexToken :: ReadP Lexeme lexToken = lexEOF +++ - lexLitChar +++ - lexString +++ - lexPunc +++ - lexSymbol +++ - lexId +++ - lexNumber + lexLitChar +++ + lexString +++ + lexPunc +++ + lexSymbol +++ + lexId +++ + lexNumber -- ---------------------------------------------------------------------- -- End of file lexEOF :: ReadP Lexeme lexEOF = do s <- look - guard (null s) - return EOF + guard (null s) + return EOF -- --------------------------------------------------------------------------- -- Single character lexemes @@ -107,9 +115,9 @@ lexSymbol :: ReadP Lexeme lexSymbol = do s <- munch1 isSymbolChar if s `elem` reserved_ops then - return (Punc s) -- Reserved-ops count as punctuation + return (Punc s) -- Reserved-ops count as punctuation else - return (Symbol s) + return (Symbol s) where isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~" reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] @@ -118,32 +126,47 @@ lexSymbol = -- identifiers lexId :: ReadP Lexeme -lexId = - do c <- satisfy isIdsChar - s <- munch isIdfChar - return (Ident (c:s)) - where - -- Identifiers can start with a '_' - isIdsChar c = isAlpha c || c == '_' - isIdfChar c = isAlphaNum c || c `elem` "_'" +lexId = lex_nan <++ lex_id + where + -- NaN and Infinity look like identifiers, so + -- we parse them first. + lex_nan = (string "NaN" >> return (Rat notANumber)) +++ + (string "Infinity" >> return (Rat infinity)) + + lex_id = do c <- satisfy isIdsChar + s <- munch isIdfChar + return (Ident (c:s)) + + -- Identifiers can start with a '_' + isIdsChar c = isAlpha c || c == '_' + isIdfChar c = isAlphaNum c || c `elem` "_'" + +#ifndef __GLASGOW_HASKELL__ +infinity, notANumber :: Rational +infinity = 1 :% 0 +notANumber = 0 :% 0 +#endif -- --------------------------------------------------------------------------- -- Lexing character literals lexLitChar :: ReadP Lexeme lexLitChar = - do char '\'' - (c,esc) <- lexChar - guard (esc || c /= '\'') - char '\'' + do _ <- char '\'' + (c,esc) <- lexCharE + guard (esc || c /= '\'') -- Eliminate '' possibility + _ <- char '\'' return (Char c) -lexChar :: ReadP (Char, Bool) -- "escaped or not"? -lexChar = - do c <- get - if c == '\\' - then do c <- lexEsc; return (c, True) - else do return (c, False) +lexChar :: ReadP Char +lexChar = do { (c,_) <- lexCharE; return c } + +lexCharE :: ReadP (Char, Bool) -- "escaped or not"? +lexCharE = + do c1 <- get + if c1 == '\\' + then do c2 <- lexEsc; return (c2, True) + else do return (c1, False) where lexEsc = lexEscChar @@ -167,20 +190,13 @@ lexChar = _ -> pfail lexNumeric = - do base <- lexBase + do base <- lexBaseChar <++ return 10 n <- lexInteger base guard (n <= toInteger (ord maxBound)) return (chr (fromInteger n)) - where - lexBase = - do s <- look - case s of - 'o':_ -> do get; return 8 - 'x':_ -> do get; return 16 - _ -> do return 10 - + lexCntrlChar = - do char '^' + do _ <- char '^' c <- get case c of '@' -> return '\^@' @@ -219,13 +235,11 @@ lexChar = lexAscii = do choice - [ do { string "SO" ; s <- look; - case s of - 'H' : _ -> do { get ; return '\SOH' } - other -> return '\SO' - } - -- \SO and \SOH need maximal-munch treatment - -- See the Haskell report Sect 2.6 + [ (string "SOH" >> return '\SOH') <++ + (string "SO" >> return '\SO') + -- \SO and \SOH need maximal-munch treatment + -- See the Haskell report Sect 2.6 + , string "NUL" >> return '\NUL' , string "STX" >> return '\STX' , string "ETX" >> return '\ETX' @@ -266,7 +280,7 @@ lexChar = lexString :: ReadP Lexeme lexString = - do char '"' + do _ <- char '"' body id where body f = @@ -274,18 +288,17 @@ lexString = if c /= '"' || esc then body (f.(c:)) else let s = f "" in - return (String s) + return (String s) - lexStrItem = - (lexEmpty >> lexStrItem) - +++ lexChar + lexStrItem = (lexEmpty >> lexStrItem) + +++ lexCharE lexEmpty = - do char '\\' + do _ <- char '\\' c <- get case c of '&' -> do return () - _ | isSpace c -> do skipSpaces; char '\\'; return () + _ | isSpace c -> do skipSpaces; _ <- char '\\'; return () _ -> do pfail -- --------------------------------------------------------------------------- @@ -294,88 +307,72 @@ lexString = type Base = Int type Digits = [Int] -showDigit :: Int -> ShowS -showDigit n | n <= 9 = shows n - | otherwise = showChar (chr (n + ord 'A' - 10)) - lexNumber :: ReadP Lexeme -lexNumber = do { string "NaN"; return (Rat notANumber) } +++ - do { string "Infinity"; return (Rat infinity) } +++ - do { base <- lexBase ; lexNumberBase base } - where - lexBase = - do s <- look - case s of - '0':'o':_ -> do get; get; return 8 - '0':'O':_ -> do get; get; return 8 - '0':'x':_ -> do get; get; return 16 - '0':'X':_ -> do get; get; return 16 - _ -> do return 10 - -lexNumberBase :: Base -> ReadP Lexeme -lexNumberBase base = - do xs <- lexDigits base - mFrac <- lexFrac base - mExp <- lexExp base +lexNumber + = lexHexOct <++ -- First try for hex or octal 0x, 0o etc + -- If that fails, try for a decimal number + lexDecNumber -- Start with ordinary digits + +lexHexOct :: ReadP Lexeme +lexHexOct + = do _ <- char '0' + base <- lexBaseChar + digits <- lexDigits base + return (Int (val (fromIntegral base) 0 digits)) + +lexBaseChar :: ReadP Int +-- Lex a single character indicating the base; fail if not there +lexBaseChar = do { c <- get; + case c of + 'o' -> return 8 + 'O' -> return 8 + 'x' -> return 16 + 'X' -> return 16 + _ -> pfail } + +lexDecNumber :: ReadP Lexeme +lexDecNumber = + do xs <- lexDigits 10 + mFrac <- lexFrac <++ return Nothing + mExp <- lexExp <++ return Nothing return (value xs mFrac mExp) where - baseInteger :: Integer - baseInteger = fromIntegral base - - value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp + value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp valueFracExp :: Integer -> Maybe Digits -> Maybe Integer - -> Lexeme - valueFracExp a Nothing Nothing - = Int a -- 43 + -> Lexeme + valueFracExp a Nothing Nothing + = Int a -- 43 valueFracExp a Nothing (Just exp) - | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7 - | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 + | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7 + | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 valueFracExp a (Just fs) mExp = case mExp of - Nothing -> Rat rat -- 4.3 - Just exp -> Rat (valExp rat exp) -- 4.3e-4 + Nothing -> Rat rat -- 4.3 + Just exp -> Rat (valExp rat exp) -- 4.3e-4 where - rat :: Rational - rat = fromInteger a + frac (fromIntegral base) 0 1 fs + rat :: Rational + rat = fromInteger a + frac 10 0 1 fs valExp :: Rational -> Integer -> Rational - valExp rat exp = rat * (fromIntegral base ^^ exp) - -lexFrac :: Base -> ReadP (Maybe Digits) -lexFrac base = - do s <- look - case s of - '.' : d : _ | isJust (valDig base d) -> - -- The lookahead checks for point and at least one - -- valid following digit. For example 1..n must - -- lex the "1" off rather than failing. - do get - frac <- lexDigits base - return (Just frac) - - _ -> - do return Nothing - -lexExp :: Base -> ReadP (Maybe Integer) -lexExp base = - do s <- look - case s of - e : _ | e `elem` "eE" && base == 10 -> - do get - (signedExp +++ exp) - where - signedExp = - do c <- char '-' +++ char '+' - n <- lexInteger 10 - return (Just (if c == '-' then -n else n)) - - exp = - do n <- lexInteger 10 - return (Just n) - - _ -> - do return Nothing + valExp rat exp = rat * (10 ^^ exp) + +lexFrac :: ReadP (Maybe Digits) +-- Read the fractional part; fail if it doesn't +-- start ".d" where d is a digit +lexFrac = do _ <- char '.' + fraction <- lexDigits 10 + return (Just fraction) + +lexExp :: ReadP (Maybe Integer) +lexExp = do _ <- char 'e' +++ char 'E' + exp <- signedExp +++ lexInteger 10 + return (Just exp) + where + signedExp + = do c <- char '-' +++ char '+' + n <- lexInteger 10 + return (if c == '-' then -n else n) lexDigits :: Int -> ReadP Digits -- Lex a non-empty sequence of digits in specified base @@ -386,7 +383,7 @@ lexDigits base = return xs where scan (c:cs) f = case valDig base c of - Just n -> do get; scan cs (f.(n:)) + Just n -> do _ <- get; scan cs (f.(n:)) Nothing -> do return (f []) scan [] f = do return (f []) @@ -397,13 +394,13 @@ lexInteger base = val :: Num a => a -> a -> Digits -> a -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were -val base y [] = y +val _ y [] = y val base y (x:xs) = y' `seq` val base y' xs where y' = y * base + fromIntegral x frac :: Integral a => a -> a -> a -> Digits -> Ratio a -frac base a b [] = a % b +frac _ a b [] = a % b frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs where a' = a * base + fromIntegral x @@ -414,9 +411,7 @@ valDig 8 c | '0' <= c && c <= '7' = Just (ord c - ord '0') | otherwise = Nothing -valDig 10 c - | '0' <= c && c <= '9' = Just (ord c - ord '0') - | otherwise = Nothing +valDig 10 c = valDecDig c valDig 16 c | '0' <= c && c <= '9' = Just (ord c - ord '0') @@ -424,6 +419,13 @@ valDig 16 c | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) | otherwise = Nothing +valDig _ _ = error "valDig: Bad base" + +valDecDig :: Char -> Maybe Int +valDecDig c + | '0' <= c && c <= '9' = Just (ord c - ord '0') + | otherwise = Nothing + -- ---------------------------------------------------------------------- -- other numeric lexing functions