X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Text%2FRead%2FLex.hs;h=740e27f3468edbcfe9e4198f012d13d78f1269db;hb=a2a70b9bf60672c72b35654105402cf21238b6f4;hp=14528c134367db95148bfd256c2e206bd05334ca;hpb=0c3ce7a5b3c10ce1e181dab07bfe71aca7e83b33;p=haskell-directory.git diff --git a/Text/Read/Lex.hs b/Text/Read/Lex.hs index 14528c1..740e27f 100644 --- a/Text/Read/Lex.hs +++ b/Text/Read/Lex.hs @@ -1,4 +1,4 @@ -{-# OPTIONS -fno-implicit-prelude #-} +{-# OPTIONS_GHC -fno-implicit-prelude #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Read.Lex @@ -7,7 +7,7 @@ -- -- Maintainer : libraries@haskell.org -- Stability : provisional --- Portability : portable +-- Portability : non-portable (uses Text.ParserCombinators.ReadP) -- -- The cut-down Haskell lexer, used by Text.Read -- @@ -15,49 +15,56 @@ module Text.Read.Lex -- lexing types - ( Lexeme(..) -- :: *; Show, Eq + ( Lexeme(..) -- :: *; Show, Eq + + -- lexer + , lex -- :: ReadP Lexeme Skips leading spaces + , hsLex -- :: ReadP String + , lexChar -- :: ReadP Char Reads just one char, with H98 escapes - -- lexer - , lex -- :: ReadP Lexeme -- Skips leading spaces - , hsLex -- :: ReadP String - - , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a - , readOctP -- :: Num a => ReadP a - , readDecP -- :: Num a => ReadP a - , readHexP -- :: Num a => ReadP a + , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a + , readOctP -- :: Num a => ReadP a + , readDecP -- :: Num a => ReadP a + , readHexP -- :: Num a => ReadP a ) where import Text.ParserCombinators.ReadP +#ifdef __GLASGOW_HASKELL__ import GHC.Base import GHC.Num( Num(..), Integer ) -import GHC.Show( Show(.. ), showChar, showString, - isSpace, isAlpha, isAlphaNum, - isOctDigit, isHexDigit, toUpper ) -import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, fromRational, +import GHC.Show( Show(..) ) +#ifndef __HADDOCK__ +import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum ) +#endif +import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, toInteger, (^), (^^), infinity, notANumber ) -import GHC.Float( Float, Double ) import GHC.List -import GHC.Show( ShowS, shows ) -import GHC.Enum( minBound, maxBound ) +import GHC.Enum( maxBound ) +#else +import Prelude hiding ( lex ) +import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum ) +import Data.Ratio( Ratio, (%) ) +#endif +#ifdef __HUGS__ +import Hugs.Prelude( Ratio(..) ) +#endif import Data.Maybe -import Data.Either import Control.Monad -- ----------------------------------------------------------------------------- -- Lexing types -type LexP = ReadP Lexeme - +-- ^ Haskell lexemes. data Lexeme - = Char Char -- Quotes removed, - | String String -- escapes interpreted - | Punc String -- Punctuation, eg "(", "::" - | Ident String -- Haskell identifiers, e.g. foo, baz - | Symbol String -- Haskell symbols, e.g. >>, % - | Int Integer - | Rat Rational + = Char Char -- ^ Character literal + | String String -- ^ String literal, with escapes interpreted + | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@ + | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@ + | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@ + | Int Integer -- ^ Integer literal + | Rat Rational -- ^ Floating point literal | EOF deriving (Eq, Show) @@ -118,14 +125,26 @@ lexSymbol = -- identifiers lexId :: ReadP Lexeme -lexId = - do c <- satisfy isIdsChar - s <- munch isIdfChar - return (Ident (c:s)) - where - -- Identifiers can start with a '_' - isIdsChar c = isAlpha c || c == '_' - isIdfChar c = isAlphaNum c || c `elem` "_'" +lexId = lex_nan <++ lex_id + where + -- NaN and Infinity look like identifiers, so + -- we parse them first. + lex_nan = (string "NaN" >> return (Rat notANumber)) +++ + (string "Infinity" >> return (Rat infinity)) + + lex_id = do c <- satisfy isIdsChar + s <- munch isIdfChar + return (Ident (c:s)) + + -- Identifiers can start with a '_' + isIdsChar c = isAlpha c || c == '_' + isIdfChar c = isAlphaNum c || c `elem` "_'" + +#ifndef __GLASGOW_HASKELL__ +infinity, notANumber :: Rational +infinity = 1 :% 0 +notANumber = 0 :% 0 +#endif -- --------------------------------------------------------------------------- -- Lexing character literals @@ -133,13 +152,16 @@ lexId = lexLitChar :: ReadP Lexeme lexLitChar = do char '\'' - (c,esc) <- lexChar - guard (esc || c /= '\'') + (c,esc) <- lexCharE + guard (esc || c /= '\'') -- Eliminate '' possibility char '\'' return (Char c) -lexChar :: ReadP (Char, Bool) -- "escaped or not"? -lexChar = +lexChar :: ReadP Char +lexChar = do { (c,_) <- lexCharE; return c } + +lexCharE :: ReadP (Char, Bool) -- "escaped or not"? +lexCharE = do c <- get if c == '\\' then do c <- lexEsc; return (c, True) @@ -167,18 +189,11 @@ lexChar = _ -> pfail lexNumeric = - do base <- lexBase + do base <- lexBaseChar <++ return 10 n <- lexInteger base guard (n <= toInteger (ord maxBound)) return (chr (fromInteger n)) - where - lexBase = - do s <- look - case s of - 'o':_ -> do get; return 8 - 'x':_ -> do get; return 16 - _ -> do return 10 - + lexCntrlChar = do char '^' c <- get @@ -219,13 +234,11 @@ lexChar = lexAscii = do choice - [ do { string "SO" ; s <- look; - case s of - 'H' : _ -> do { get ; return '\SOH' } - other -> return '\SO' - } + [ (string "SOH" >> return '\SOH') <++ + (string "SO" >> return '\SO') -- \SO and \SOH need maximal-munch treatment -- See the Haskell report Sect 2.6 + , string "NUL" >> return '\NUL' , string "STX" >> return '\STX' , string "ETX" >> return '\ETX' @@ -276,9 +289,8 @@ lexString = else let s = f "" in return (String s) - lexStrItem = - (lexEmpty >> lexStrItem) - +++ lexChar + lexStrItem = (lexEmpty >> lexStrItem) + +++ lexCharE lexEmpty = do char '\\' @@ -294,42 +306,44 @@ lexString = type Base = Int type Digits = [Int] -showDigit :: Int -> ShowS -showDigit n | n <= 9 = shows n - | otherwise = showChar (chr (n + ord 'A' - 10)) - lexNumber :: ReadP Lexeme -lexNumber = do { string "NaN"; return (Rat notANumber) } +++ - do { string "Infinity"; return (Rat infinity) } +++ - do { base <- lexBase ; lexNumberBase base } - where - lexBase = - do s <- look - case s of - '0':'o':_ -> do get; get; return 8 - '0':'O':_ -> do get; get; return 8 - '0':'x':_ -> do get; get; return 16 - '0':'X':_ -> do get; get; return 16 - _ -> do return 10 - -lexNumberBase :: Base -> ReadP Lexeme -lexNumberBase base = - do xs <- lexDigits base - mFrac <- lexFrac base - mExp <- lexExp base +lexNumber + = lexHexOct <++ -- First try for hex or octal 0x, 0o etc + -- If that fails, try for a decimal number + lexDecNumber -- Start with ordinary digits + +lexHexOct :: ReadP Lexeme +lexHexOct + = do char '0' + base <- lexBaseChar + digits <- lexDigits base + return (Int (val (fromIntegral base) 0 digits)) + +lexBaseChar :: ReadP Int +-- Lex a single character indicating the base; fail if not there +lexBaseChar = do { c <- get; + case c of + 'o' -> return 8 + 'O' -> return 8 + 'x' -> return 16 + 'X' -> return 16 + _ -> pfail } + +lexDecNumber :: ReadP Lexeme +lexDecNumber = + do xs <- lexDigits 10 + mFrac <- lexFrac <++ return Nothing + mExp <- lexExp <++ return Nothing return (value xs mFrac mExp) where - baseInteger :: Integer - baseInteger = fromIntegral base - - value xs mFrac mExp = valueFracExp (val baseInteger 0 xs) mFrac mExp + value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp valueFracExp :: Integer -> Maybe Digits -> Maybe Integer -> Lexeme valueFracExp a Nothing Nothing = Int a -- 43 valueFracExp a Nothing (Just exp) - | exp >= 0 = Int (a * (baseInteger ^ exp)) -- 43e7 + | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7 valueFracExp a (Just fs) mExp = case mExp of @@ -337,45 +351,27 @@ lexNumberBase base = Just exp -> Rat (valExp rat exp) -- 4.3e-4 where rat :: Rational - rat = fromInteger a + frac (fromIntegral base) 0 1 fs + rat = fromInteger a + frac 10 0 1 fs valExp :: Rational -> Integer -> Rational - valExp rat exp = rat * (fromIntegral base ^^ exp) - -lexFrac :: Base -> ReadP (Maybe Digits) -lexFrac base = - do s <- look - case s of - '.' : d : _ | isJust (valDig base d) -> - -- The lookahead checks for point and at least one - -- valid following digit. For example 1..n must - -- lex the "1" off rather than failing. - do get - frac <- lexDigits base - return (Just frac) - - _ -> - do return Nothing - -lexExp :: Base -> ReadP (Maybe Integer) -lexExp base = - do s <- look - case s of - e : _ | e `elem` "eE" && base == 10 -> - do get - (signedExp +++ exp) - where - signedExp = - do c <- char '-' +++ char '+' - n <- lexInteger 10 - return (Just (if c == '-' then -n else n)) - - exp = - do n <- lexInteger 10 - return (Just n) - - _ -> - do return Nothing + valExp rat exp = rat * (10 ^^ exp) + +lexFrac :: ReadP (Maybe Digits) +-- Read the fractional part; fail if it doesn't +-- start ".d" where d is a digit +lexFrac = do char '.' + frac <- lexDigits 10 + return (Just frac) + +lexExp :: ReadP (Maybe Integer) +lexExp = do char 'e' +++ char 'E' + exp <- signedExp +++ lexInteger 10 + return (Just exp) + where + signedExp + = do c <- char '-' +++ char '+' + n <- lexInteger 10 + return (if c == '-' then -n else n) lexDigits :: Int -> ReadP Digits -- Lex a non-empty sequence of digits in specified base @@ -414,9 +410,7 @@ valDig 8 c | '0' <= c && c <= '7' = Just (ord c - ord '0') | otherwise = Nothing -valDig 10 c - | '0' <= c && c <= '9' = Just (ord c - ord '0') - | otherwise = Nothing +valDig 10 c = valDecDig c valDig 16 c | '0' <= c && c <= '9' = Just (ord c - ord '0') @@ -424,6 +418,10 @@ valDig 16 c | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) | otherwise = Nothing +valDecDig c + | '0' <= c && c <= '9' = Just (ord c - ord '0') + | otherwise = Nothing + -- ---------------------------------------------------------------------- -- other numeric lexing functions