From: ross Date: Tue, 26 Aug 2003 10:39:57 +0000 (+0000) Subject: [project @ 2003-08-26 10:39:55 by ross] X-Git-Tag: nhc98-1-18-release~528 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=aaedec91d25e9240edda260e6a0aaf9f76844864;p=ghc-base.git [project @ 2003-08-26 10:39:55 by ross] Document Data.Char from the H98 Report. Note that the new implementations of isSpace and isDigit are inconsistent with this documentation -- maybe two versions of them are needed? --- diff --git a/Data/Char.hs b/Data/Char.hs index 001c83f..c8c630e 100644 --- a/Data/Char.hs +++ b/Data/Char.hs @@ -17,23 +17,33 @@ module Data.Char ( Char - , isAscii, isLatin1, isControl - , isPrint, isSpace, isUpper - , isLower, isAlpha, isDigit - , isOctDigit, isHexDigit, isAlphaNum -- :: Char -> Bool + , String + + -- * Character classification + -- | Unicode characters are divided into letters, numbers, marks, + -- punctuation, symbols, separators (including spaces) and others + -- (including control characters). + -- The full set of Unicode character attributes is not accessible + -- in this library. + , isAscii, isLatin1, isControl, isSpace + , isLower, isUpper, isAlpha, isAlphaNum, isPrint + , isDigit, isOctDigit, isHexDigit -- :: Char -> Bool + -- * Case conversion , toUpper, toLower -- :: Char -> Char + -- * Single digit characters , digitToInt -- :: Char -> Int , intToDigit -- :: Int -> Char + -- * Numeric representations , ord -- :: Char -> Int , chr -- :: Int -> Char - , readLitChar -- :: ReadS Char + + -- * String representations , showLitChar -- :: Char -> ShowS , lexLitChar -- :: ReadS String - - , String + , readLitChar -- :: ReadS Char -- Implementation checked wrt. Haskell 98 lib report, 1/99. ) where @@ -56,7 +66,10 @@ import Prelude(Char,String) import Char #endif - +-- | Convert a single digit 'Char' to the corresponding 'Int'. +-- This function fails unless its argument satisfies 'isHexDigit', +-- but recognises both upper and lower-case hexadecimal digits +-- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@). digitToInt :: Char -> Int digitToInt c | isDigit c = ord c - ord '0' diff --git a/GHC/Base.lhs b/GHC/Base.lhs index 76ade71..5f47ebb 100644 --- a/GHC/Base.lhs +++ b/GHC/Base.lhs @@ -402,8 +402,9 @@ mapFB c f x ys = c (f x) ys \begin{code} -- |The 'Bool' type is an enumeration. It is defined with 'False' --- first so that the corresponding 'Enum' instance will give @'fromEnum' --- False@ the value zero, and @'fromEnum' True@ the value 1. +-- first so that the corresponding 'Prelude.Enum' instance will give +-- 'Prelude.fromEnum' 'False' the value zero, and +-- 'Prelude.fromEnum' 'True' the value 1. data Bool = False | True deriving (Eq, Ord) -- Read in GHC.Read, Show in GHC.Show @@ -495,11 +496,15 @@ data Ordering = LT | EQ | GT deriving (Eq, Ord) type String = [Char] {-| The character type 'Char' is an enumeration whose values represent -Unicode characters. A character literal in Haskell has type 'Char'. - -To convert a 'Char' to or from an 'Int', use 'Prelude.toEnum' and -'Prelude.fromEnum' from the 'Enum' class respectively (equivalently -'ord' and 'chr' also do the trick). +Unicode (or equivalently ISO 10646) characters. +This set extends the ISO 8859-1 (Latin-1) character set +(the first 256 charachers), which is itself an extension of the ASCII +character set (the first 128 characters). +A character literal in Haskell has type 'Char'. + +To convert a 'Char' to or from the corresponding 'Int' value defined +by Unicode, use 'Prelude.toEnum' and 'Prelude.fromEnum' from the +'Prelude.Enum' class respectively (or equivalently 'ord' and 'chr'). -} data Char = C# Char# @@ -526,6 +531,7 @@ instance Ord Char where "x# `ltChar#` x#" forall x#. x# `ltChar#` x# = False #-} +-- | The 'Prelude.toEnum' method restricted to the type 'Data.Char.Char'. chr :: Int -> Char chr (I# i#) | int2Word# i# `leWord#` int2Word# 0x10FFFF# = C# (chr# i#) | otherwise = error "Prelude.chr: bad argument" @@ -533,6 +539,7 @@ chr (I# i#) | int2Word# i# `leWord#` int2Word# 0x10FFFF# = C# (chr# i#) unsafeChr :: Int -> Char unsafeChr (I# i#) = C# (chr# i#) +-- | The 'Prelude.fromEnum' method restricted to the type 'Data.Char.Char'. ord :: Char -> Int ord (C# c#) = I# (ord# c#) \end{code} diff --git a/GHC/Read.lhs b/GHC/Read.lhs index cc3c541..2b9c448 100644 --- a/GHC/Read.lhs +++ b/GHC/Read.lhs @@ -155,12 +155,23 @@ read s = either error id (readEither s) lex :: ReadS String -- As defined by H98 lex s = readP_to_S L.hsLex s +-- | Read a string representation of a character, using Haskell +-- source-language escape conventions. For example: +-- +-- > lexLitChar "\\nHello" = [("\\n", "Hello")] +-- lexLitChar :: ReadS String -- As defined by H98 lexLitChar = readP_to_S (do { (s, _) <- P.gather L.lexChar ; return s }) -- There was a skipSpaces before the P.gather L.lexChar, -- but that seems inconsistent with readLitChar +-- | Read a string representation of a character, using Haskell +-- source-language escape conventions, and convert it to the character +-- that it encodes. For example: +-- +-- > readLitChar "\\nHello" = [('\n', "Hello")] +-- readLitChar :: ReadS Char -- As defined by H98 readLitChar = readP_to_S L.lexChar diff --git a/GHC/Show.lhs b/GHC/Show.lhs index 5c0382a..4df4351 100644 --- a/GHC/Show.lhs +++ b/GHC/Show.lhs @@ -200,6 +200,11 @@ showSpace = {-showChar ' '-} \ xs -> ' ' : xs Code specific for characters \begin{code} +-- | Convert a character to a string using only printable characters, +-- using Haskell source-language escape conventions. For example: +-- +-- > showLitChar '\n' s = "\\n" ++ s +-- showLitChar :: Char -> ShowS showLitChar c s | c > '\DEL' = showChar '\\' (protectEsc isDec (shows (ord c)) s) showLitChar '\DEL' s = showString "\\DEL" s @@ -237,6 +242,9 @@ asciiTab = -- Using an array drags in the array module. listArray ('\NUL', ' ') Code specific for Ints. \begin{code} +-- | Convert an 'Int' in the range @0@..@15@ to the corresponding single +-- digit 'Char'. This function fails on other inputs, and generates +-- lower-case hexadecimal digits. intToDigit :: Int -> Char intToDigit (I# i) | i >=# 0# && i <=# 9# = unsafeChr (ord '0' `plusInt` I# i) diff --git a/GHC/Unicode.hsc b/GHC/Unicode.hsc index 2314a5d..d58722a 100644 --- a/GHC/Unicode.hsc +++ b/GHC/Unicode.hsc @@ -32,20 +32,73 @@ import GHC.Num (fromInteger) #include "config.h" -isAscii, isLatin1, isAsciiUpper, isAsciiLower :: Char -> Bool +-- | Selects the first 128 characters of the Unicode character set, +-- corresponding to the ASCII character set. +isAscii :: Char -> Bool isAscii c = c < '\x80' + +-- | Selects the first 256 characters of the Unicode character set, +-- corresponding to the ISO 8859-1 (Latin-1) character set. +isLatin1 :: Char -> Bool isLatin1 c = c <= '\xff' + +isAsciiUpper, isAsciiLower :: Char -> Bool isAsciiLower c = c >= 'a' && c <= 'z' isAsciiUpper c = c >= 'A' && c <= 'Z' -isControl, isPrint, isSpace, isUpper, - isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum - :: Char -> Bool +-- | Selects control characters, which are the non-printing characters of +-- the Latin-1 subset of Unicode. +isControl :: Char -> Bool + +-- | Selects printable Unicode characters +-- (letters, numbers, marks, punctuation, symbols and spaces). +isPrint :: Char -> Bool + +-- | Selects white-space characters in the Latin-1 range. +-- (In Unicode terms, this includes spaces and some control characters.) +isSpace :: Char -> Bool + +-- | Selects alphabetic Unicode characters (letters) that are not lower-case. +-- (In Unicode terms, this includes letters in upper and title cases, +-- as well as modifier letters and other letters.) +isUpper :: Char -> Bool + +-- | Selects lower-case alphabetic Unicode characters (letters). +isLower :: Char -> Bool + +-- | Selects alphabetic Unicode characters (letters). +isAlpha :: Char -> Bool +-- | Selects alphabetic or numeric digit Unicode characters. +-- +-- Note that numeric digits outside the ASCII range are selected by this +-- function but not by 'isDigit'. Such digits may be part of identifiers +-- but are not used by the printer and reader to represent numbers. +isAlphaNum :: Char -> Bool + +-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@. +isDigit :: Char -> Bool + +-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@. +isOctDigit :: Char -> Bool isOctDigit c = c >= '0' && c <= '7' + +-- | Selects ASCII hexadecimal digits, +-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@. +isHexDigit :: Char -> Bool isHexDigit c = isDigit c || c >= 'A' && c <= 'F' || c >= 'a' && c <= 'f' +-- | Convert a letter to the corresponding upper-case letter, leaving any +-- other character unchanged. Any Unicode letter which has an upper-case +-- equivalent is transformed. +toUpper :: Char -> Char + +-- | Convert a letter to the corresponding lower-case letter, leaving any +-- other character unchanged. Any Unicode letter which has a lower-case +-- equivalent is transformed. +toLower :: Char -> Char + -- ----------------------------------------------------------------------------- -- Win32 implementation