From: sof Date: Thu, 13 May 1999 10:45:05 +0000 (+0000) Subject: [project @ 1999-05-13 10:45:03 by sof] X-Git-Tag: Approximately_9120_patches~6216 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=9bb7a81fea7ba9e90737cba1007b90055f5f0951;p=ghc-hetmet.git [project @ 1999-05-13 10:45:03 by sof] Fised & improved lexing of escaped numeric character literals + a regression test. --- diff --git a/ghc/lib/std/Char.lhs b/ghc/lib/std/Char.lhs index a471bec..b00345d 100644 --- a/ghc/lib/std/Char.lhs +++ b/ghc/lib/std/Char.lhs @@ -33,20 +33,7 @@ module Char ) where import PrelBase -import PrelRead (readLitChar, lexLitChar) +import PrelRead (readLitChar, lexLitChar, digitToInt) import {-# SOURCE #-} PrelErr ( error ) \end{code} - -\begin{code} --- Digit conversion operations - -digitToInt :: Char -> Int -digitToInt c - | isDigit c = fromEnum c - fromEnum '0' - | c >= 'a' && c <= 'f' = fromEnum c - fromEnum 'a' + 10 - | c >= 'A' && c <= 'F' = fromEnum c - fromEnum 'A' + 10 - | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh - - -\end{code} diff --git a/ghc/lib/std/PrelRead.lhs b/ghc/lib/std/PrelRead.lhs index 1ef7b43..c298bcb 100644 --- a/ghc/lib/std/PrelRead.lhs +++ b/ghc/lib/std/PrelRead.lhs @@ -235,15 +235,17 @@ lexLitChar ('\\':s) = do (esc,t) <- lexEsc s return ('\\':esc, t) where - lexEsc (c:s) | c `elem` "abfnrtv\\\"'" = return ([c],s) - lexEsc s@(d:_) | isDigit d = lexDecDigits s - lexEsc ('o':d:s) | isDigit d = lexOctDigits (d:s) - lexEsc ('O':d:s) | isDigit d = lexOctDigits (d:s) - lexEsc ('x':d:s) | isDigit d = lexHexDigits (d:s) - lexEsc ('X':d:s) | isDigit d = lexHexDigits (d:s) - lexEsc ('^':c:s) | '@' <= c && c <= '_' = [(['^',c],s)] -- cf. cntrl in 2.6 of H. report. - lexEsc s@(c:_) | isUpper c = fromAsciiLab s - lexEsc _ = mzero + lexEsc (c:s) | c `elem` escChars = return ([c],s) + lexEsc s@(d:_) | isDigit d = checkSize 10 lexDecDigits s + lexEsc ('o':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s) + lexEsc ('O':d:s) | isOctDigit d = checkSize 8 lexOctDigits (d:s) + lexEsc ('x':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s) + lexEsc ('X':d:s) | isHexDigit d = checkSize 16 lexHexDigits (d:s) + lexEsc ('^':c:s) | c >= '@' && c <= '_' = [(['^',c],s)] -- cf. cntrl in 2.6 of H. report. + lexEsc s@(c:_) | isUpper c = fromAsciiLab s + lexEsc _ = mzero + + escChars = "abfnrtv\\\"'" fromAsciiLab (x:y:z:ls) | isUpper y && (isUpper z || isDigit z) && [x,y,z] `elem` asciiEscTab = return ([x,y,z], ls) @@ -253,9 +255,36 @@ lexLitChar ('\\':s) = do asciiEscTab = "DEL" : asciiTab + {- + Check that the numerically escaped char literals are + within accepted boundaries. + + Note: this allows char lits with leading zeros, i.e., + \0000000000000000000000000000001. + -} + checkSize base f str = do + (num, res) <- f str + -- Note: this is assumes that a Char is 8 bits long. + if (toAnInt base num) > 255 then + mzero + else + case base of + 8 -> return ('o':num', res) + 16 -> return ('x':num', res) + _ -> return (num, res) + + toAnInt base xs = foldl (\ acc n -> acc*base + n) 0 (map digitToInt xs) + lexLitChar (c:s) = return ([c],s) lexLitChar "" = mzero + +digitToInt :: Char -> Int +digitToInt c + | isDigit c = fromEnum c - fromEnum '0' + | c >= 'a' && c <= 'f' = fromEnum c - fromEnum 'a' + 10 + | c >= 'A' && c <= 'F' = fromEnum c - fromEnum 'A' + 10 + | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh \end{code} %********************************************************* diff --git a/ghc/tests/lib/should_run/char001.hs b/ghc/tests/lib/should_run/char001.hs new file mode 100644 index 0000000..ec4ae9b --- /dev/null +++ b/ghc/tests/lib/should_run/char001.hs @@ -0,0 +1,39 @@ +-- !!! Testing the behaviour of Char.lexLitChar a little.. +module Main where + +import Char + +lex' str = do + putStr ("lex " ++ str ++ " = ") + print (lex str) + +hexes = do + lex' "'\\X00'" + lex' "'\\x0f2'" + lex' "'\\xf2'" + lex' "'\\xf2t'" + lex' "'\\X24'" + lex' "'\\x24b'" + lex' "'\\Xa4b'" + lex' "'\\xa4bg'" + +octs = do + lex' "'\\o00'" + lex' "'\\o05'" + lex' "'\\o50'" + lex' "'\\o72'" + lex' "'\\o82'" + lex' "'\\O24'" + lex' "'\\O000024'" + lex' "'\\024b'" + lex' "'\\o14b'" + lex' "'\\0a4bg'" + +main = do + hexes + octs + + + + + diff --git a/ghc/tests/lib/should_run/char001.stdout b/ghc/tests/lib/should_run/char001.stdout new file mode 100644 index 0000000..c5d261e --- /dev/null +++ b/ghc/tests/lib/should_run/char001.stdout @@ -0,0 +1,18 @@ +lex '\X00' = [("'\\x00'","")] +lex '\x0f2' = [("'\\x0f2'","")] +lex '\xf2' = [("'\\xf2'","")] +lex '\xf2t' = [] +lex '\X24' = [("'\\x24'","")] +lex '\x24b' = [] +lex '\Xa4b' = [] +lex '\xa4bg' = [] +lex '\o00' = [("'\\o00'","")] +lex '\o05' = [("'\\o05'","")] +lex '\o50' = [("'\\o50'","")] +lex '\o72' = [("'\\o72'","")] +lex '\o82' = [] +lex '\O24' = [("'\\o24'","")] +lex '\O000024' = [("'\\o000024'","")] +lex '\024b' = [] +lex '\o14b' = [] +lex '\0a4bg' = []