[project @ 2003-10-16 16:26:43 by ross]

[haskell-directory.git] / GHC / Unicode.hsc
diff --git a/GHC/Unicode.hsc b/GHC/Unicode.hsc

index 2314a5d..d664a13 100644 (file)
--- a/GHC/Unicode.hsc
+++ b/GHC/Unicode.hsc
@@ -32,24 +32,87 @@ import GHC.Num       (fromInteger)
  
  #include "config.h"
  
-isAscii, isLatin1, isAsciiUpper, isAsciiLower :: Char -> Bool
+-- | Selects the first 128 characters of the Unicode character set,
+-- corresponding to the ASCII character set.
+isAscii                 :: Char -> Bool
  isAscii c              =  c <  '\x80'
+
+-- | Selects the first 256 characters of the Unicode character set,
+-- corresponding to the ISO 8859-1 (Latin-1) character set.
+isLatin1                :: Char -> Bool
  isLatin1 c              =  c <= '\xff'
+
+isAsciiUpper, isAsciiLower :: Char -> Bool
  isAsciiLower c          =  c >= 'a' && c <= 'z'
  isAsciiUpper c          =  c >= 'A' && c <= 'Z'
  
-isControl, isPrint, isSpace, isUpper,
- isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum
- :: Char -> Bool
+-- | Selects control characters, which are the non-printing characters of
+-- the Latin-1 subset of Unicode.
+isControl               :: Char -> Bool
+
+-- | Selects printable Unicode characters
+-- (letters, numbers, marks, punctuation, symbols and spaces).
+isPrint                 :: Char -> Bool
+
+-- | Selects white-space characters in the Latin-1 range.
+-- (In Unicode terms, this includes spaces and some control characters.)
+isSpace                 :: Char -> Bool
+-- isSpace includes non-breaking space
+-- Done with explicit equalities both for efficiency, and to avoid a tiresome
+-- recursion with GHC.List elem
+isSpace c              =  c == ' '     ||
+                          c == '\t'    ||
+                          c == '\n'    ||
+                          c == '\r'    ||
+                          c == '\f'    ||
+                          c == '\v'    ||
+                          c == '\xa0'
+
+-- | Selects alphabetic Unicode characters (letters) that are not lower-case.
+-- (In Unicode terms, this includes letters in upper and title cases,
+-- as well as modifier letters and other letters.)
+isUpper                 :: Char -> Bool
+
+-- | Selects lower-case alphabetic Unicode characters (letters).
+isLower                 :: Char -> Bool
+
+-- | Selects alphabetic Unicode characters (letters).
+isAlpha                 :: Char -> Bool
  
+-- | Selects alphabetic or numeric digit Unicode characters.
+--
+-- Note that numeric digits outside the ASCII range are selected by this
+-- function but not by 'isDigit'.  Such digits may be part of identifiers
+-- but are not used by the printer and reader to represent numbers.
+isAlphaNum              :: Char -> Bool
+
+-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@.
+isDigit                 :: Char -> Bool
+
+-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@.
+isOctDigit              :: Char -> Bool
  isOctDigit c           =  c >= '0' && c <= '7'
+
+-- | Selects ASCII hexadecimal digits,
+-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.
+isHexDigit              :: Char -> Bool
  isHexDigit c           =  isDigit c || c >= 'A' && c <= 'F' ||
                                          c >= 'a' && c <= 'f'
  
+-- | Convert a letter to the corresponding upper-case letter, leaving any
+-- other character unchanged.  Any Unicode letter which has an upper-case
+-- equivalent is transformed.
+toUpper                 :: Char -> Char
+
+-- | Convert a letter to the corresponding lower-case letter, leaving any
+-- other character unchanged.  Any Unicode letter which has a lower-case
+-- equivalent is transformed.
+toLower                 :: Char -> Char
+
  -- -----------------------------------------------------------------------------
  -- Win32 implementation
  
-#if defined(HAVE_WCTYPE_H) || mingw32_TARGET_OS
+#if (defined(HAVE_WCTYPE_H) && HAVE_ISWSPACE) || mingw32_TARGET_OS
  
  -- Use the wide-char classification functions if available.  Glibc
  -- seems to implement these properly, even for chars > 0xffff, as long
@@ -66,7 +129,7 @@ type CInt = (#type int)
  isDigit    c = iswdigit (fromIntegral (ord c)) /= 0
  isAlpha    c = iswalpha (fromIntegral (ord c)) /= 0
  isAlphaNum c = iswalnum (fromIntegral (ord c)) /= 0
-isSpace    c = iswspace (fromIntegral (ord c)) /= 0
+--isSpace    c = iswspace (fromIntegral (ord c)) /= 0
  isControl  c = iswcntrl (fromIntegral (ord c)) /= 0
  isPrint    c = iswprint (fromIntegral (ord c)) /= 0
  isUpper    c = iswupper (fromIntegral (ord c)) /= 0
@@ -113,17 +176,6 @@ foreign import ccall unsafe "towupper"
  isControl c            =  c < ' ' || c >= '\DEL' && c <= '\x9f'
  isPrint c              =  not (isControl c)
  
--- isSpace includes non-breaking space
--- Done with explicit equalities both for efficiency, and to avoid a tiresome
--- recursion with GHC.List elem
-isSpace c              =  c == ' '     ||
-                          c == '\t'    ||
-                          c == '\n'    ||
-                          c == '\r'    ||
-                          c == '\f'    ||
-                          c == '\v'    ||
-                          c == '\xa0'
-
  -- The upper case ISO characters have the multiplication sign dumped
  -- randomly in the middle of the range.  Go figure.
  isUpper c              =  c >= 'A' && c <= 'Z' || 
@@ -141,7 +193,6 @@ isAlphaNum c                =  isAlpha c || isDigit c
  
  -- Case-changing operations
  
-toUpper, toLower       :: Char -> Char
  toUpper c@(C## c##)
    | isAsciiLower c    = C## (chr## (ord## c## -## 32##))
    | isAscii c         = c