[project @ 2003-08-26 10:39:55 by ross]

author ross <unknown>

Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)

committer ross <unknown>

Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)
author ross <unknown>
Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)
committer ross <unknown>
Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)
diff --git a/Data/Char.hs b/Data/Char.hs

index 001c83f..c8c630e 100644 (file)
--- a/Data/Char.hs
+++ b/Data/Char.hs
@@ -17,23 +17,33 @@ module Data.Char
      (
        Char
  
-    , isAscii, isLatin1, isControl
-    , isPrint, isSpace,  isUpper
-    , isLower, isAlpha,  isDigit
-    , isOctDigit, isHexDigit, isAlphaNum  -- :: Char -> Bool
+    , String
+
+    -- * Character classification
+    -- | Unicode characters are divided into letters, numbers, marks,
+    -- punctuation, symbols, separators (including spaces) and others
+    -- (including control characters).
+    -- The full set of Unicode character attributes is not accessible
+    -- in this library.
+    , isAscii, isLatin1, isControl, isSpace
+    , isLower, isUpper,  isAlpha,   isAlphaNum, isPrint
+    , isDigit, isOctDigit, isHexDigit  -- :: Char -> Bool
  
+    -- * Case conversion
      , toUpper, toLower  -- :: Char -> Char
  
+    -- * Single digit characters
      , digitToInt        -- :: Char -> Int
      , intToDigit        -- :: Int  -> Char
  
+    -- * Numeric representations
      , ord               -- :: Char -> Int
      , chr               -- :: Int  -> Char
-    , readLitChar       -- :: ReadS Char 
+
+    -- * String representations
      , showLitChar       -- :: Char -> ShowS
      , lexLitChar       -- :: ReadS String
-
-    , String
+    , readLitChar       -- :: ReadS Char 
  
       -- Implementation checked wrt. Haskell 98 lib report, 1/99.
      ) where
@@ -56,7 +66,10 @@ import Prelude(Char,String)
  import Char
  #endif
  
-
+-- | Convert a single digit 'Char' to the corresponding 'Int'.  
+-- This function fails unless its argument satisfies 'isHexDigit',
+-- but recognises both upper and lower-case hexadecimal digits
+-- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
  digitToInt :: Char -> Int
  digitToInt c
   | isDigit c           =  ord c - ord '0'
diff --git a/GHC/Base.lhs b/GHC/Base.lhs

index 76ade71..5f47ebb 100644 (file)
--- a/GHC/Base.lhs
+++ b/GHC/Base.lhs
@@ -402,8 +402,9 @@ mapFB c f x ys = c (f x) ys
  
  \begin{code}
  -- |The 'Bool' type is an enumeration.  It is defined with 'False'
--- first so that the corresponding 'Enum' instance will give @'fromEnum'
--- False@ the value zero, and @'fromEnum' True@ the value 1.
+-- first so that the corresponding 'Prelude.Enum' instance will give
+-- 'Prelude.fromEnum' 'False' the value zero, and
+-- 'Prelude.fromEnum' 'True' the value 1.
  data  Bool  =  False | True  deriving (Eq, Ord)
         -- Read in GHC.Read, Show in GHC.Show
  
@@ -495,11 +496,15 @@ data Ordering = LT | EQ | GT deriving (Eq, Ord)
  type String = [Char]
  
  {-| The character type 'Char' is an enumeration whose values represent
-Unicode characters.  A character literal in Haskell has type 'Char'.
-
-To convert a 'Char' to or from an 'Int', use 'Prelude.toEnum' and
-'Prelude.fromEnum' from the 'Enum' class respectively (equivalently
-'ord' and 'chr' also do the trick).
+Unicode (or equivalently ISO 10646) characters.
+This set extends the ISO 8859-1 (Latin-1) character set
+(the first 256 charachers), which is itself an extension of the ASCII
+character set (the first 128 characters).
+A character literal in Haskell has type 'Char'.
+
+To convert a 'Char' to or from the corresponding 'Int' value defined
+by Unicode, use 'Prelude.toEnum' and 'Prelude.fromEnum' from the
+'Prelude.Enum' class respectively (or equivalently 'ord' and 'chr').
  -}
  data Char = C# Char#
  
@@ -526,6 +531,7 @@ instance Ord Char where
  "x# `ltChar#` x#" forall x#. x# `ltChar#` x# = False
    #-}
  
+-- | The 'Prelude.toEnum' method restricted to the type 'Data.Char.Char'.
  chr :: Int -> Char
  chr (I# i#) | int2Word# i# `leWord#` int2Word# 0x10FFFF# = C# (chr# i#)
              | otherwise                                  = error "Prelude.chr: bad argument"
@@ -533,6 +539,7 @@ chr (I# i#) | int2Word# i# `leWord#` int2Word# 0x10FFFF# = C# (chr# i#)
  unsafeChr :: Int -> Char
  unsafeChr (I# i#) = C# (chr# i#)
  
+-- | The 'Prelude.fromEnum' method restricted to the type 'Data.Char.Char'.
  ord :: Char -> Int
  ord (C# c#) = I# (ord# c#)
  \end{code}
diff --git a/GHC/Read.lhs b/GHC/Read.lhs

index cc3c541..2b9c448 100644 (file)
--- a/GHC/Read.lhs
+++ b/GHC/Read.lhs
@@ -155,12 +155,23 @@ read s = either error id (readEither s)
  lex :: ReadS String            -- As defined by H98
  lex s  = readP_to_S L.hsLex s
  
+-- | Read a string representation of a character, using Haskell
+-- source-language escape conventions.  For example:
+--
+-- > lexLitChar  "\\nHello"  =  [("\\n", "Hello")]
+--
  lexLitChar :: ReadS String     -- As defined by H98
  lexLitChar = readP_to_S (do { (s, _) <- P.gather L.lexChar ;
                               return s })
         -- There was a skipSpaces before the P.gather L.lexChar,
         -- but that seems inconsistent with readLitChar
  
+-- | Read a string representation of a character, using Haskell
+-- source-language escape conventions, and convert it to the character
+-- that it encodes.  For example:
+--
+-- > readLitChar "\\nHello"  =  [('\n', "Hello")]
+--
  readLitChar :: ReadS Char      -- As defined by H98
  readLitChar = readP_to_S L.lexChar
  
diff --git a/GHC/Show.lhs b/GHC/Show.lhs

index 5c0382a..4df4351 100644 (file)
--- a/GHC/Show.lhs
+++ b/GHC/Show.lhs
@@ -200,6 +200,11 @@ showSpace = {-showChar ' '-} \ xs -> ' ' : xs
  Code specific for characters
  
  \begin{code}
+-- | Convert a character to a string using only printable characters,
+-- using Haskell source-language escape conventions.  For example:
+--
+-- > showLitChar '\n' s  =  "\\n" ++ s
+--
  showLitChar               :: Char -> ShowS
  showLitChar c s | c > '\DEL' =  showChar '\\' (protectEsc isDec (shows (ord c)) s)
  showLitChar '\DEL'        s =  showString "\\DEL" s
@@ -237,6 +242,9 @@ asciiTab = -- Using an array drags in the array module.  listArray ('\NUL', ' ')
  Code specific for Ints.
  
  \begin{code}
+-- | Convert an 'Int' in the range @0@..@15@ to the corresponding single
+-- digit 'Char'.  This function fails on other inputs, and generates
+-- lower-case hexadecimal digits.
  intToDigit :: Int -> Char
  intToDigit (I# i)
      | i >=# 0#  && i <=#  9# =  unsafeChr (ord '0' `plusInt` I# i)
diff --git a/GHC/Unicode.hsc b/GHC/Unicode.hsc

index 2314a5d..d58722a 100644 (file)
--- a/GHC/Unicode.hsc
+++ b/GHC/Unicode.hsc
@@ -32,20 +32,73 @@ import GHC.Num       (fromInteger)
  
  #include "config.h"
  
-isAscii, isLatin1, isAsciiUpper, isAsciiLower :: Char -> Bool
+-- | Selects the first 128 characters of the Unicode character set,
+-- corresponding to the ASCII character set.
+isAscii                 :: Char -> Bool
  isAscii c              =  c <  '\x80'
+
+-- | Selects the first 256 characters of the Unicode character set,
+-- corresponding to the ISO 8859-1 (Latin-1) character set.
+isLatin1                :: Char -> Bool
  isLatin1 c              =  c <= '\xff'
+
+isAsciiUpper, isAsciiLower :: Char -> Bool
  isAsciiLower c          =  c >= 'a' && c <= 'z'
  isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  
-isControl, isPrint, isSpace, isUpper,
- isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum
- :: Char -> Bool
+-- | Selects control characters, which are the non-printing characters of
+-- the Latin-1 subset of Unicode.
+isControl               :: Char -> Bool
+
+-- | Selects printable Unicode characters
+-- (letters, numbers, marks, punctuation, symbols and spaces).
+isPrint                 :: Char -> Bool
+
+-- | Selects white-space characters in the Latin-1 range.
+-- (In Unicode terms, this includes spaces and some control characters.)
+isSpace                 :: Char -> Bool
+
+-- | Selects alphabetic Unicode characters (letters) that are not lower-case.
+-- (In Unicode terms, this includes letters in upper and title cases,
+-- as well as modifier letters and other letters.)
+isUpper                 :: Char -> Bool
+
+-- | Selects lower-case alphabetic Unicode characters (letters).
+isLower                 :: Char -> Bool
+
+-- | Selects alphabetic Unicode characters (letters).
+isAlpha                 :: Char -> Bool
  
+-- | Selects alphabetic or numeric digit Unicode characters.
+--
+-- Note that numeric digits outside the ASCII range are selected by this
+-- function but not by 'isDigit'.  Such digits may be part of identifiers
+-- but are not used by the printer and reader to represent numbers.
+isAlphaNum              :: Char -> Bool
+
+-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@.
+isDigit                 :: Char -> Bool
+
+-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@.
+isOctDigit              :: Char -> Bool
  isOctDigit c           =  c >= '0' && c <= '7'
+
+-- | Selects ASCII hexadecimal digits,
+-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.
+isHexDigit              :: Char -> Bool
  isHexDigit c           =  isDigit c || c >= 'A' && c <= 'F' ||
                                          c >= 'a' && c <= 'f'
  
+-- | Convert a letter to the corresponding upper-case letter, leaving any
+-- other character unchanged.  Any Unicode letter which has an upper-case
+-- equivalent is transformed.
+toUpper                 :: Char -> Char
+
+-- | Convert a letter to the corresponding lower-case letter, leaving any
+-- other character unchanged.  Any Unicode letter which has a lower-case
+-- equivalent is transformed.
+toLower                 :: Char -> Char
+
  -- -----------------------------------------------------------------------------
  -- Win32 implementation
author	ross <unknown>
	Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)
committer	ross <unknown>
	Tue, 26 Aug 2003 10:39:57 +0000 (10:39 +0000)
Data/Char.hs		patch \| blob \| history
GHC/Base.lhs		patch \| blob \| history
GHC/Read.lhs		patch \| blob \| history
GHC/Show.lhs		patch \| blob \| history
GHC/Unicode.hsc		patch \| blob \| history