X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRead%2FLex.hs;h=222d6cf06dbbe9c0fae148002fbf3e51651c1353;hb=41e8fba828acbae1751628af50849f5352b27873;hp=7fdf024aab60c87a2254f01aba219c063512a8ce;hpb=66aeadbe43dcb4f95e5bc47f45be6509a983540b;p=ghc-base.git diff --git a/Text/Read/Lex.hs b/Text/Read/Lex.hs index 7fdf024..222d6cf 100644 --- a/Text/Read/Lex.hs +++ b/Text/Read/Lex.hs @@ -1,4 +1,5 @@ -{-# OPTIONS -fno-implicit-prelude #-} +{-# LANGUAGE CPP, NoImplicitPrelude #-} + ----------------------------------------------------------------------------- -- | -- Module : Text.Read.Lex @@ -7,7 +8,7 @@ -- -- Maintainer : libraries@haskell.org -- Stability : provisional --- Portability : portable +-- Portability : non-portable (uses Text.ParserCombinators.ReadP) -- -- The cut-down Haskell lexer, used by Text.Read -- @@ -15,119 +16,157 @@ module Text.Read.Lex -- lexing types - ( LexP -- :: *; = ReadP Lexeme - , Lexeme(..) -- :: *; Show, Eq - - -- lexer - , lex -- :: LexP - , lexLitChar -- :: LexP - - -- numbers - , Number -- :: *; Show, Eq - - , numberToInt -- :: Number -> Maybe Int - , numberToInteger -- :: Number -> Maybe Integer - , numberToRational -- :: Number -> Maybe Integer - , numberToFloat -- :: Number -> Maybe Float - , numberToDouble -- :: Number -> Maybe Double - - , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a - , readOctP -- :: Num a => ReadP a - , readDecP -- :: Num a => ReadP a - , readHexP -- :: Num a => ReadP a + ( Lexeme(..) -- :: *; Show, Eq + + -- lexer + , lex -- :: ReadP Lexeme Skips leading spaces + , hsLex -- :: ReadP String + , lexChar -- :: ReadP Char Reads just one char, with H98 escapes + + , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a + , readOctP -- :: Num a => ReadP a + , readDecP -- :: Num a => ReadP a + , readHexP -- :: Num a => ReadP a ) where import Text.ParserCombinators.ReadP +#ifdef __GLASGOW_HASKELL__ import GHC.Base import GHC.Num( Num(..), Integer ) -import GHC.Show( Show(.. ), showChar, showString, - isSpace, isAlpha, isAlphaNum, - isOctDigit, isHexDigit, toUpper ) -import GHC.Real( Ratio, Integral, Rational, (%), fromIntegral, fromRational, - toInteger, (^), (^^) ) -import GHC.Float( Float, Double ) +import GHC.Show( Show(..) ) +#ifndef __HADDOCK__ +import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum ) +#endif +import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, + toInteger, (^), (^^), infinity, notANumber ) import GHC.List -import GHC.Show( ShowS, shows ) -import GHC.Enum( minBound, maxBound ) +import GHC.Enum( maxBound ) +#else +import Prelude hiding ( lex ) +import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum ) +import Data.Ratio( Ratio, (%) ) +#endif +#ifdef __HUGS__ +import Hugs.Prelude( Ratio(..) ) +#endif import Data.Maybe -import Data.Either import Control.Monad -- ----------------------------------------------------------------------------- -- Lexing types -type LexP = ReadP Lexeme - +-- ^ Haskell lexemes. data Lexeme - = Char Char - | String String - | Single Char - | Symbol String - | Ident String - | Number Number - deriving (Eq) - -instance Show Lexeme where - showsPrec n (Char c) = showsPrec n c - showsPrec n (String s) = showsPrec n s - showsPrec _ (Single c) = showChar c - showsPrec _ (Ident s) = showString s - showsPrec _ (Symbol s) = showString s - showsPrec n (Number x) = showsPrec n x + = Char Char -- ^ Character literal + | String String -- ^ String literal, with escapes interpreted + | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@ + | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@ + | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@ + | Int Integer -- ^ Integer literal + | Rat Rational -- ^ Floating point literal + | EOF + deriving (Eq, Show) -- ----------------------------------------------------------------------------- -- Lexing -lex :: LexP -lex = - do skipSpaces - (lexLitChar - +++ lexString - +++ lexSingle - +++ lexSymbol - +++ lexIdf - +++ lexNumber) +lex :: ReadP Lexeme +lex = skipSpaces >> lexToken + +hsLex :: ReadP String +-- ^ Haskell lexer: returns the lexed string, rather than the lexeme +hsLex = do skipSpaces + (s,_) <- gather lexToken + return s + +lexToken :: ReadP Lexeme +lexToken = lexEOF +++ + lexLitChar +++ + lexString +++ + lexPunc +++ + lexSymbol +++ + lexId +++ + lexNumber + -- ---------------------------------------------------------------------- --- symbols +-- End of file +lexEOF :: ReadP Lexeme +lexEOF = do s <- look + guard (null s) + return EOF -lexSymbol :: LexP +-- --------------------------------------------------------------------------- +-- Single character lexemes + +lexPunc :: ReadP Lexeme +lexPunc = + do c <- satisfy isPuncChar + return (Punc [c]) + where + isPuncChar c = c `elem` ",;()[]{}`" + +-- ---------------------------------------------------------------------- +-- Symbols + +lexSymbol :: ReadP Lexeme lexSymbol = do s <- munch1 isSymbolChar - return (Symbol s) + if s `elem` reserved_ops then + return (Punc s) -- Reserved-ops count as punctuation + else + return (Symbol s) where isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~" + reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] -- ---------------------------------------------------------------------- -- identifiers -lexIdf :: LexP -lexIdf = - do c <- satisfy isAlpha - s <- munch isIdfChar - return (Ident (c:s)) - where - isIdfChar c = isAlphaNum c || c `elem` "_'" +lexId :: ReadP Lexeme +lexId = lex_nan <++ lex_id + where + -- NaN and Infinity look like identifiers, so + -- we parse them first. + lex_nan = (string "NaN" >> return (Rat notANumber)) +++ + (string "Infinity" >> return (Rat infinity)) + + lex_id = do c <- satisfy isIdsChar + s <- munch isIdfChar + return (Ident (c:s)) + + -- Identifiers can start with a '_' + isIdsChar c = isAlpha c || c == '_' + isIdfChar c = isAlphaNum c || c `elem` "_'" + +#ifndef __GLASGOW_HASKELL__ +infinity, notANumber :: Rational +infinity = 1 :% 0 +notANumber = 0 :% 0 +#endif -- --------------------------------------------------------------------------- -- Lexing character literals -lexLitChar :: LexP +lexLitChar :: ReadP Lexeme lexLitChar = - do char '\'' - (c,esc) <- lexChar - guard (esc || c /= '\'') - char '\'' + do _ <- char '\'' + (c,esc) <- lexCharE + guard (esc || c /= '\'') -- Eliminate '' possibility + _ <- char '\'' return (Char c) -lexChar :: ReadP (Char, Bool) -- "escaped or not"? -lexChar = - do c <- get - if c == '\\' - then do c <- lexEsc; return (c, True) - else do return (c, False) +lexChar :: ReadP Char +lexChar = do { (c,_) <- lexCharE; return c } + +lexCharE :: ReadP (Char, Bool) -- "escaped or not"? +lexCharE = + do c1 <- get + if c1 == '\\' + then do c2 <- lexEsc; return (c2, True) + else do return (c1, False) where lexEsc = lexEscChar @@ -151,20 +190,13 @@ lexChar = _ -> pfail lexNumeric = - do base <- lexBase + do base <- lexBaseChar <++ return 10 n <- lexInteger base guard (n <= toInteger (ord maxBound)) return (chr (fromInteger n)) - where - lexBase = - do s <- look - case s of - 'o':_ -> do get; return 8 - 'x':_ -> do get; return 16 - _ -> do return 10 - + lexCntrlChar = - do char '^' + do _ <- char '^' c <- get case c of '@' -> return '\^@' @@ -203,8 +235,12 @@ lexChar = lexAscii = do choice - [ string "NUL" >> return '\NUL' - , string "SOH" >> return '\SOH' + [ (string "SOH" >> return '\SOH') <++ + (string "SO" >> return '\SO') + -- \SO and \SOH need maximal-munch treatment + -- See the Haskell report Sect 2.6 + + , string "NUL" >> return '\NUL' , string "STX" >> return '\STX' , string "ETX" >> return '\ETX' , string "EOT" >> return '\EOT' @@ -217,7 +253,6 @@ lexChar = , string "VT" >> return '\VT' , string "FF" >> return '\FF' , string "CR" >> return '\CR' - , string "SO" >> return '\SO' , string "SI" >> return '\SI' , string "DLE" >> return '\DLE' , string "DC1" >> return '\DC1' @@ -243,152 +278,104 @@ lexChar = -- --------------------------------------------------------------------------- -- string literal -lexString :: LexP +lexString :: ReadP Lexeme lexString = - do char '"' + do _ <- char '"' body id where body f = do (c,esc) <- lexStrItem if c /= '"' || esc then body (f.(c:)) - else return (String (f "")) + else let s = f "" in + return (String s) - lexStrItem = - (lexEmpty >> lexStrItem) - +++ lexChar + lexStrItem = (lexEmpty >> lexStrItem) + +++ lexCharE lexEmpty = - do char '\\' + do _ <- char '\\' c <- get case c of '&' -> do return () - _ | isSpace c -> do skipSpaces; char '\\'; return () + _ | isSpace c -> do skipSpaces; _ <- char '\\'; return () _ -> do pfail -- --------------------------------------------------------------------------- --- single character lexemes - -lexSingle :: LexP -lexSingle = - do c <- satisfy isSingleChar - return (Single c) - where - isSingleChar c = c `elem` ",;()[]{=}_`" - --- --------------------------------------------------------------------------- -- Lexing numbers -data Number - = MkNumber - { value :: Either Integer Rational - , base :: Base - , digits :: Digits - , fraction :: Maybe Digits - , exponent :: Maybe Integer - } - deriving (Eq) - type Base = Int type Digits = [Int] -instance Show Number where - showsPrec _ x = - showsBase (base x) - . foldr (.) id (map showDigit (digits x)) - . showsFrac (fraction x) - . showsExp (exponent x) - where - showsBase 8 = showString "0o" - showsBase 10 = id - showsBase 16 = showString "0x" - - showsFrac Nothing = id - showsFrac (Just ys) = - showChar '.' - . foldr (.) id (map showDigit ys) - - showsExp Nothing = id - showsExp (Just exp) = - showChar 'e' - . shows exp - -showDigit :: Int -> ShowS -showDigit n | n <= 9 = shows n - | otherwise = showChar (chr (n + ord 'A' - 10)) - -lexNumber :: LexP -lexNumber = - do base <- lexBase - lexNumberBase base - where - lexBase = - do s <- look - case s of - '0':'o':_ -> do get; get; return 8 - '0':'O':_ -> do get; get; return 8 - '0':'x':_ -> do get; get; return 16 - '0':'X':_ -> do get; get; return 16 - _ -> do return 10 - -lexNumberBase :: Base -> LexP -lexNumberBase base = - do xs <- lexDigits base - mFrac <- lexFrac base - mExp <- lexExp base - return (Number (MkNumber (value xs mFrac mExp) base xs mFrac mExp)) +lexNumber :: ReadP Lexeme +lexNumber + = lexHexOct <++ -- First try for hex or octal 0x, 0o etc + -- If that fails, try for a decimal number + lexDecNumber -- Start with ordinary digits + +lexHexOct :: ReadP Lexeme +lexHexOct + = do _ <- char '0' + base <- lexBaseChar + digits <- lexDigits base + return (Int (val (fromIntegral base) 0 digits)) + +lexBaseChar :: ReadP Int +-- Lex a single character indicating the base; fail if not there +lexBaseChar = do { c <- get; + case c of + 'o' -> return 8 + 'O' -> return 8 + 'x' -> return 16 + 'X' -> return 16 + _ -> pfail } + +lexDecNumber :: ReadP Lexeme +lexDecNumber = + do xs <- lexDigits 10 + mFrac <- lexFrac <++ return Nothing + mExp <- lexExp <++ return Nothing + return (value xs mFrac mExp) where - value xs mFrac mExp = valueFracExp (val (fromIntegral base) 0 xs) mFrac mExp + value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp - valueFracExp a Nothing mExp - | validIntExp mExp = Left (valueExpInt a mExp) - | otherwise = Right (valueExp (fromIntegral a) mExp) - valueFracExp a (Just fs) mExp = - Right (valueExp (fromInteger a + frac (fromIntegral base) 0 1 fs) mExp) - - -- only positive exponents allowed - validIntExp Nothing = True - validIntExp (Just e) = e >= 0 - - valueExpInt a Nothing = a - valueExpInt a (Just exp) = a * ((fromIntegral base) ^ exp) - - valueExp a Nothing = a - valueExp a (Just exp) = a * ((fromIntegral base) ^^ exp) - -lexFrac :: Base -> ReadP (Maybe Digits) -lexFrac base = - do s <- look - case s of - '.' : _ -> - do get - frac <- lexDigits base - return (Just frac) - - _ -> - do return Nothing - -lexExp :: Base -> ReadP (Maybe Integer) -lexExp base = - do s <- look - case s of - e : _ | e `elem` "eE" && base == 10 -> - do get - (signedExp +++ exp) - where - signedExp = - do c <- char '-' +++ char '+' - n <- lexInteger 10 - return (Just (if c == '-' then -n else n)) - - exp = - do n <- lexInteger 10 - return (Just n) - - _ -> - do return Nothing + valueFracExp :: Integer -> Maybe Digits -> Maybe Integer + -> Lexeme + valueFracExp a Nothing Nothing + = Int a -- 43 + valueFracExp a Nothing (Just exp) + | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7 + | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 + valueFracExp a (Just fs) mExp + = case mExp of + Nothing -> Rat rat -- 4.3 + Just exp -> Rat (valExp rat exp) -- 4.3e-4 + where + rat :: Rational + rat = fromInteger a + frac 10 0 1 fs + + valExp :: Rational -> Integer -> Rational + valExp rat exp = rat * (10 ^^ exp) + +lexFrac :: ReadP (Maybe Digits) +-- Read the fractional part; fail if it doesn't +-- start ".d" where d is a digit +lexFrac = do _ <- char '.' + fraction <- lexDigits 10 + return (Just fraction) + +lexExp :: ReadP (Maybe Integer) +lexExp = do _ <- char 'e' +++ char 'E' + exp <- signedExp +++ lexInteger 10 + return (Just exp) + where + signedExp + = do c <- char '-' +++ char '+' + n <- lexInteger 10 + return (if c == '-' then -n else n) lexDigits :: Int -> ReadP Digits +-- Lex a non-empty sequence of digits in specified base lexDigits base = do s <- look xs <- scan s id @@ -396,7 +383,7 @@ lexDigits base = return xs where scan (c:cs) f = case valDig base c of - Just n -> do get; scan cs (f.(n:)) + Just n -> do _ <- get; scan cs (f.(n:)) Nothing -> do return (f []) scan [] f = do return (f []) @@ -406,13 +393,14 @@ lexInteger base = return (val (fromIntegral base) 0 xs) val :: Num a => a -> a -> Digits -> a -val base y [] = y +-- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were +val _ y [] = y val base y (x:xs) = y' `seq` val base y' xs where y' = y * base + fromIntegral x frac :: Integral a => a -> a -> a -> Digits -> Ratio a -frac base a b [] = a % b +frac _ a b [] = a % b frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs where a' = a * base + fromIntegral x @@ -423,9 +411,7 @@ valDig 8 c | '0' <= c && c <= '7' = Just (ord c - ord '0') | otherwise = Nothing -valDig 10 c - | '0' <= c && c <= '9' = Just (ord c - ord '0') - | otherwise = Nothing +valDig 10 c = valDecDig c valDig 16 c | '0' <= c && c <= '9' = Just (ord c - ord '0') @@ -433,41 +419,12 @@ valDig 16 c | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) | otherwise = Nothing --- ---------------------------------------------------------------------- --- conversion +valDig _ _ = error "valDig: Bad base" -numberToInt :: Number -> Maybe Int -numberToInt x = - case numberToInteger x of - Just n | minBound' <= n && n <= maxBound' -> Just (fromInteger n) - _ -> Nothing - where - minBound' = toInteger (minBound :: Int) - maxBound' = toInteger (maxBound :: Int) - -numberToInteger :: Number -> Maybe Integer -numberToInteger x = - case value x of - Left n -> Just n - _ -> Nothing - -numberToRational :: Number -> Maybe Rational -numberToRational x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just r - -numberToFloat :: Number -> Maybe Float -numberToFloat x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just (fromRational r) - -numberToDouble :: Number -> Maybe Double -numberToDouble x = - case value x of - Left n -> Just (fromInteger n) - Right r -> Just (fromRational r) +valDecDig :: Char -> Maybe Int +valDecDig c + | '0' <= c && c <= '9' = Just (ord c - ord '0') + | otherwise = Nothing -- ---------------------------------------------------------------------- -- other numeric lexing functions