[project @ 2005-03-16 13:27:03 by ross]

author ross <unknown>

Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)

committer ross <unknown>

Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)
author ross <unknown>
Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)
committer ross <unknown>
Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)
diff --git a/Data/Char.hs b/Data/Char.hs

index c59ec00..48de798 100644 (file)
--- a/Data/Char.hs
+++ b/Data/Char.hs
@@ -23,12 +23,16 @@ module Data.Char
      -- | Unicode characters are divided into letters, numbers, marks,
      -- punctuation, symbols, separators (including spaces) and others
      -- (including control characters).
-    , isAscii, isLatin1, isControl, isSpace
-    , isLower, isUpper,  isAlpha,   isAlphaNum, isPrint
+    , isControl, isSpace
+    , isLower, isUpper, isAlpha, isAlphaNum, isPrint
      , isDigit, isOctDigit, isHexDigit
-    , isAsciiUpper, isAsciiLower
      , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  
+    -- ** Subranges
+    , isAscii, isLatin1
+    , isAsciiUpper, isAsciiLower
+
+    -- ** Unicode general categories
      , GeneralCategory(..), generalCategory
  
      -- * Case conversion
@@ -125,7 +129,7 @@ data GeneralCategory
          | NotAssigned           -- ^ Cn: Other, Not Assigned
          deriving (Eq, Ord, Enum, Read, Show, Bounded)
  
--- | Retrieves the general Unicode category of the character.
+-- | The Unicode general category of the character.
  generalCategory :: Char -> GeneralCategory
  #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
  generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
@@ -136,6 +140,9 @@ generalCategory c = toEnum (primUniGenCat c)
  
  -- derived character classifiers
  
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isAlpha'.
  isLetter :: Char -> Bool
  isLetter c = case generalCategory c of
          UppercaseLetter         -> True
@@ -145,6 +152,8 @@ isLetter c = case generalCategory c of
          OtherLetter             -> True
          _                       -> False
  
+-- | Selects Unicode mark characters, e.g. accents and the like, which
+-- combine with preceding letters.
  isMark :: Char -> Bool
  isMark c = case generalCategory c of
          NonSpacingMark          -> True
@@ -152,6 +161,8 @@ isMark c = case generalCategory c of
          EnclosingMark           -> True
          _                       -> False
  
+-- | Selects Unicode numeric characters, including digits from various
+-- scripts, Roman numerals, etc.
  isNumber :: Char -> Bool
  isNumber c = case generalCategory c of
          DecimalNumber           -> True
@@ -159,6 +170,8 @@ isNumber c = case generalCategory c of
          OtherNumber             -> True
          _                       -> False
  
+-- | Selects Unicode punctuation characters, including various kinds
+-- of connectors, brackets and quotes.
  isPunctuation :: Char -> Bool
  isPunctuation c = case generalCategory c of
          ConnectorPunctuation    -> True
@@ -170,6 +183,8 @@ isPunctuation c = case generalCategory c of
          OtherPunctuation        -> True
          _                       -> False
  
+-- | Selects Unicode symbol characters, including mathematical and
+-- currency symbols.
  isSymbol :: Char -> Bool
  isSymbol c = case generalCategory c of
          MathSymbol              -> True
@@ -178,6 +193,7 @@ isSymbol c = case generalCategory c of
          OtherSymbol             -> True
          _                       -> False
  
+-- | Selects Unicode space and separator characters.
  isSeparator :: Char -> Bool
  isSeparator c = case generalCategory c of
          Space                   -> True
diff --git a/GHC/Base.lhs b/GHC/Base.lhs

index 5b6a676..32bf498 100644 (file)
--- a/GHC/Base.lhs
+++ b/GHC/Base.lhs
@@ -560,7 +560,8 @@ data Ordering = LT | EQ | GT deriving (Eq, Ord)
  type String = [Char]
  
  {-| The character type 'Char' is an enumeration whose values represent
-Unicode (or equivalently ISO 10646) characters.
+Unicode (or equivalently ISO\/IEC 10646) characters
+(see <http://www.unicode.org/> for details).
  This set extends the ISO 8859-1 (Latin-1) character set
  (the first 256 charachers), which is itself an extension of the ASCII
  character set (the first 128 characters).
diff --git a/GHC/Unicode.hs b/GHC/Unicode.hs

index 50fef9a..e22fae5 100644 (file)
--- a/GHC/Unicode.hs
+++ b/GHC/Unicode.hs
@@ -16,6 +16,7 @@
  --
  -----------------------------------------------------------------------------
  
+-- #hide
  module GHC.Unicode (
      isAscii, isLatin1, isControl,
      isAsciiUpper, isAsciiLower,
@@ -44,8 +45,14 @@ isAscii c            =  c <  '\x80'
  isLatin1                :: Char -> Bool
  isLatin1 c              =  c <= '\xff'
  
-isAsciiUpper, isAsciiLower :: Char -> Bool
+-- | Selects ASCII lower-case letters,
+-- i.e. characters satisfying both 'isAscii' and 'isLower'.
+isAsciiLower :: Char -> Bool
  isAsciiLower c          =  c >= 'a' && c <= 'z'
+
+-- | Selects ASCII upper-case letters,
+-- i.e. characters satisfying both 'isAscii' and 'isUpper'.
+isAsciiUpper :: Char -> Bool
  isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  
  -- | Selects control characters, which are the non-printing characters of
@@ -71,15 +78,17 @@ isSpace c           =  c == ' '     ||
                            c == '\xa0'  ||
                            iswspace (fromIntegral (ord c)) /= 0
  
--- | Selects alphabetic Unicode characters (letters) that are not lower-case.
--- (In Unicode terms, this includes letters in upper and title cases,
--- as well as modifier letters and other letters.)
+-- | Selects upper-case or title-case alphabetic Unicode characters (letters).
+-- Title case is used by a small number of letter ligatures like the
+-- single-character form of /Lj/.
  isUpper                 :: Char -> Bool
  
  -- | Selects lower-case alphabetic Unicode characters (letters).
  isLower                 :: Char -> Bool
  
--- | Selects alphabetic Unicode characters (letters).
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isLetter'.
  isAlpha                 :: Char -> Bool
  
  -- | Selects alphabetic or numeric digit Unicode characters.
@@ -103,19 +112,18 @@ isHexDigit              :: Char -> Bool
  isHexDigit c           =  isDigit c || c >= 'A' && c <= 'F' ||
                                          c >= 'a' && c <= 'f'
  
--- | Convert a letter to the corresponding upper-case letter, leaving any
--- other character unchanged.  Any Unicode letter which has an upper-case
--- equivalent is transformed.
+-- | Convert a letter to the corresponding upper-case letter, if any.
+-- Any other character is returned unchanged.
  toUpper                 :: Char -> Char
  
--- | Convert a letter to the corresponding lower-case letter, leaving any
--- other character unchanged.  Any Unicode letter which has a lower-case
--- equivalent is transformed.
+-- | Convert a letter to the corresponding lower-case letter, if any.
+-- Any other character is returned unchanged.
  toLower                 :: Char -> Char
  
--- | Convert a letter to the corresponding title-case letter, leaving any
--- other character unchanged.  Any Unicode letter which has a lower-case
--- equivalent is transformed.
+-- | Convert a letter to the corresponding title-case or upper-case
+-- letter, if any.  (Title case differs from upper case only for a small
+-- number of ligature letters.)
+-- Any other character is returned unchanged.
  toTitle                 :: Char -> Char
  
  -- -----------------------------------------------------------------------------
author	ross <unknown>
	Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)
committer	ross <unknown>
	Wed, 16 Mar 2005 13:27:04 +0000 (13:27 +0000)
Data/Char.hs		patch \| blob \| history
GHC/Base.lhs		patch \| blob \| history
GHC/Unicode.hs		patch \| blob \| history