Document Data.Char from the H98 Report.
Note that the new implementations of isSpace and isDigit are inconsistent
with this documentation -- maybe two versions of them are needed?
(
Char
- , isAscii, isLatin1, isControl
- , isPrint, isSpace, isUpper
- , isLower, isAlpha, isDigit
- , isOctDigit, isHexDigit, isAlphaNum -- :: Char -> Bool
+ , String
+
+ -- * Character classification
+ -- | Unicode characters are divided into letters, numbers, marks,
+ -- punctuation, symbols, separators (including spaces) and others
+ -- (including control characters).
+ -- The full set of Unicode character attributes is not accessible
+ -- in this library.
+ , isAscii, isLatin1, isControl, isSpace
+ , isLower, isUpper, isAlpha, isAlphaNum, isPrint
+ , isDigit, isOctDigit, isHexDigit -- :: Char -> Bool
+ -- * Case conversion
, toUpper, toLower -- :: Char -> Char
+ -- * Single digit characters
, digitToInt -- :: Char -> Int
, intToDigit -- :: Int -> Char
+ -- * Numeric representations
, ord -- :: Char -> Int
, chr -- :: Int -> Char
- , readLitChar -- :: ReadS Char
+
+ -- * String representations
, showLitChar -- :: Char -> ShowS
, lexLitChar -- :: ReadS String
-
- , String
+ , readLitChar -- :: ReadS Char
-- Implementation checked wrt. Haskell 98 lib report, 1/99.
) where
import Char
#endif
-
+-- | Convert a single digit 'Char' to the corresponding 'Int'.
+-- This function fails unless its argument satisfies 'isHexDigit',
+-- but recognises both upper and lower-case hexadecimal digits
+-- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
digitToInt :: Char -> Int
digitToInt c
| isDigit c = ord c - ord '0'
\begin{code}
-- |The 'Bool' type is an enumeration. It is defined with 'False'
--- first so that the corresponding 'Enum' instance will give @'fromEnum'
--- False@ the value zero, and @'fromEnum' True@ the value 1.
+-- first so that the corresponding 'Prelude.Enum' instance will give
+-- 'Prelude.fromEnum' 'False' the value zero, and
+-- 'Prelude.fromEnum' 'True' the value 1.
data Bool = False | True deriving (Eq, Ord)
-- Read in GHC.Read, Show in GHC.Show
type String = [Char]
{-| The character type 'Char' is an enumeration whose values represent
-Unicode characters. A character literal in Haskell has type 'Char'.
-
-To convert a 'Char' to or from an 'Int', use 'Prelude.toEnum' and
-'Prelude.fromEnum' from the 'Enum' class respectively (equivalently
-'ord' and 'chr' also do the trick).
+Unicode (or equivalently ISO 10646) characters.
+This set extends the ISO 8859-1 (Latin-1) character set
+(the first 256 charachers), which is itself an extension of the ASCII
+character set (the first 128 characters).
+A character literal in Haskell has type 'Char'.
+
+To convert a 'Char' to or from the corresponding 'Int' value defined
+by Unicode, use 'Prelude.toEnum' and 'Prelude.fromEnum' from the
+'Prelude.Enum' class respectively (or equivalently 'ord' and 'chr').
-}
data Char = C# Char#
"x# `ltChar#` x#" forall x#. x# `ltChar#` x# = False
#-}
+-- | The 'Prelude.toEnum' method restricted to the type 'Data.Char.Char'.
chr :: Int -> Char
chr (I# i#) | int2Word# i# `leWord#` int2Word# 0x10FFFF# = C# (chr# i#)
| otherwise = error "Prelude.chr: bad argument"
unsafeChr :: Int -> Char
unsafeChr (I# i#) = C# (chr# i#)
+-- | The 'Prelude.fromEnum' method restricted to the type 'Data.Char.Char'.
ord :: Char -> Int
ord (C# c#) = I# (ord# c#)
\end{code}
lex :: ReadS String -- As defined by H98
lex s = readP_to_S L.hsLex s
+-- | Read a string representation of a character, using Haskell
+-- source-language escape conventions. For example:
+--
+-- > lexLitChar "\\nHello" = [("\\n", "Hello")]
+--
lexLitChar :: ReadS String -- As defined by H98
lexLitChar = readP_to_S (do { (s, _) <- P.gather L.lexChar ;
return s })
-- There was a skipSpaces before the P.gather L.lexChar,
-- but that seems inconsistent with readLitChar
+-- | Read a string representation of a character, using Haskell
+-- source-language escape conventions, and convert it to the character
+-- that it encodes. For example:
+--
+-- > readLitChar "\\nHello" = [('\n', "Hello")]
+--
readLitChar :: ReadS Char -- As defined by H98
readLitChar = readP_to_S L.lexChar
Code specific for characters
\begin{code}
+-- | Convert a character to a string using only printable characters,
+-- using Haskell source-language escape conventions. For example:
+--
+-- > showLitChar '\n' s = "\\n" ++ s
+--
showLitChar :: Char -> ShowS
showLitChar c s | c > '\DEL' = showChar '\\' (protectEsc isDec (shows (ord c)) s)
showLitChar '\DEL' s = showString "\\DEL" s
Code specific for Ints.
\begin{code}
+-- | Convert an 'Int' in the range @0@..@15@ to the corresponding single
+-- digit 'Char'. This function fails on other inputs, and generates
+-- lower-case hexadecimal digits.
intToDigit :: Int -> Char
intToDigit (I# i)
| i >=# 0# && i <=# 9# = unsafeChr (ord '0' `plusInt` I# i)
#include "config.h"
-isAscii, isLatin1, isAsciiUpper, isAsciiLower :: Char -> Bool
+-- | Selects the first 128 characters of the Unicode character set,
+-- corresponding to the ASCII character set.
+isAscii :: Char -> Bool
isAscii c = c < '\x80'
+
+-- | Selects the first 256 characters of the Unicode character set,
+-- corresponding to the ISO 8859-1 (Latin-1) character set.
+isLatin1 :: Char -> Bool
isLatin1 c = c <= '\xff'
+
+isAsciiUpper, isAsciiLower :: Char -> Bool
isAsciiLower c = c >= 'a' && c <= 'z'
isAsciiUpper c = c >= 'A' && c <= 'Z'
-isControl, isPrint, isSpace, isUpper,
- isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum
- :: Char -> Bool
+-- | Selects control characters, which are the non-printing characters of
+-- the Latin-1 subset of Unicode.
+isControl :: Char -> Bool
+
+-- | Selects printable Unicode characters
+-- (letters, numbers, marks, punctuation, symbols and spaces).
+isPrint :: Char -> Bool
+
+-- | Selects white-space characters in the Latin-1 range.
+-- (In Unicode terms, this includes spaces and some control characters.)
+isSpace :: Char -> Bool
+
+-- | Selects alphabetic Unicode characters (letters) that are not lower-case.
+-- (In Unicode terms, this includes letters in upper and title cases,
+-- as well as modifier letters and other letters.)
+isUpper :: Char -> Bool
+
+-- | Selects lower-case alphabetic Unicode characters (letters).
+isLower :: Char -> Bool
+
+-- | Selects alphabetic Unicode characters (letters).
+isAlpha :: Char -> Bool
+-- | Selects alphabetic or numeric digit Unicode characters.
+--
+-- Note that numeric digits outside the ASCII range are selected by this
+-- function but not by 'isDigit'. Such digits may be part of identifiers
+-- but are not used by the printer and reader to represent numbers.
+isAlphaNum :: Char -> Bool
+
+-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@.
+isDigit :: Char -> Bool
+
+-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@.
+isOctDigit :: Char -> Bool
isOctDigit c = c >= '0' && c <= '7'
+
+-- | Selects ASCII hexadecimal digits,
+-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.
+isHexDigit :: Char -> Bool
isHexDigit c = isDigit c || c >= 'A' && c <= 'F' ||
c >= 'a' && c <= 'f'
+-- | Convert a letter to the corresponding upper-case letter, leaving any
+-- other character unchanged. Any Unicode letter which has an upper-case
+-- equivalent is transformed.
+toUpper :: Char -> Char
+
+-- | Convert a letter to the corresponding lower-case letter, leaving any
+-- other character unchanged. Any Unicode letter which has a lower-case
+-- equivalent is transformed.
+toLower :: Char -> Char
+
-- -----------------------------------------------------------------------------
-- Win32 implementation