X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FChar.hs;h=40052a760761b9e263efe5e030e900d251c2bb59;hb=HEAD;hp=fa0e8995db604772d61b35b6676a202027e31759;hpb=62de77e9a5d1f19ab333f1796437e1b16f78d23e;p=ghc-base.git

diff --git a/Data/Char.hs b/Data/Char.hs
index fa0e899..40052a7 100644
--- a/Data/Char.hs
+++ b/Data/Char.hs
@@ -1,4 +1,5 @@
-{-# OPTIONS_GHC -fno-implicit-prelude #-}
+{-# LANGUAGE CPP, NoImplicitPrelude #-}
+
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  Data.Char
@@ -13,25 +14,25 @@
 --
 -----------------------------------------------------------------------------
 
-module Data.Char 
+module Data.Char
     (
       Char
 
-    , String
-
     -- * Character classification
     -- | Unicode characters are divided into letters, numbers, marks,
     -- punctuation, symbols, separators (including spaces) and others
     -- (including control characters).
-    , isAscii, isLatin1, isControl, isSpace
-    , isLower, isUpper,  isAlpha,   isAlphaNum, isPrint
+    , isControl, isSpace
+    , isLower, isUpper, isAlpha, isAlphaNum, isPrint
     , isDigit, isOctDigit, isHexDigit
-    , isAsciiUpper, isAsciiLower
-#ifndef __NHC__
     , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
 
+    -- ** Subranges
+    , isAscii, isLatin1
+    , isAsciiUpper, isAsciiLower
+
+    -- ** Unicode general categories
     , GeneralCategory(..), generalCategory
-#endif
 
     -- * Case conversion
     , toUpper, toLower, toTitle  -- :: Char -> Char
@@ -46,7 +47,7 @@ module Data.Char
 
     -- * String representations
     , showLitChar       -- :: Char -> ShowS
-    , lexLitChar	-- :: ReadS String
+    , lexLitChar        -- :: ReadS String
     , readLitChar       -- :: ReadS Char 
 
      -- Implementation checked wrt. Haskell 98 lib report, 1/99.
@@ -54,6 +55,7 @@ module Data.Char
 
 #ifdef __GLASGOW_HASKELL__
 import GHC.Base
+import GHC.Arr (Ix)
 import GHC.Real (fromIntegral)
 import GHC.Show
 import GHC.Read (Read, readLitChar, lexLitChar)
@@ -63,6 +65,7 @@ import GHC.Enum
 #endif
 
 #ifdef __HUGS__
+import Hugs.Prelude (Ix)
 import Hugs.Char
 #endif
 
@@ -70,6 +73,9 @@ import Hugs.Char
 import Prelude
 import Prelude(Char,String)
 import Char
+import Ix
+import NHC.FFI (CInt)
+foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
 #endif
 
 -- | Convert a single digit 'Char' to the corresponding 'Int'.  
@@ -78,10 +84,10 @@ import Char
 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
 digitToInt :: Char -> Int
 digitToInt c
- | isDigit c		=  ord c - ord '0'
+ | isDigit c            =  ord c - ord '0'
  | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
  | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
- | otherwise	        =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
+ | otherwise            =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
 
 #ifndef __GLASGOW_HASKELL__
 isAsciiUpper, isAsciiLower :: Char -> Bool
@@ -89,7 +95,6 @@ isAsciiLower c          =  c >= 'a' && c <= 'z'
 isAsciiUpper c          =  c >= 'A' && c <= 'Z'
 #endif
 
-#ifndef __NHC__
 -- | Unicode General Categories (column 2 of the UnicodeData table)
 -- in the order they are listed in the Unicode standard.
 
@@ -124,12 +129,12 @@ data GeneralCategory
         | Surrogate             -- ^ Cs: Other, Surrogate
         | PrivateUse            -- ^ Co: Other, Private Use
         | NotAssigned           -- ^ Cn: Other, Not Assigned
-        deriving (Eq, Ord, Enum, Read, Show, Bounded)
+        deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
 
--- | Retrieves the general Unicode category of the character.
+-- | The Unicode general category of the character.
 generalCategory :: Char -> GeneralCategory
-#ifdef __GLASGOW_HASKELL__
-generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
+#if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
+generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
 #endif
 #ifdef __HUGS__
 generalCategory c = toEnum (primUniGenCat c)
@@ -137,6 +142,9 @@ generalCategory c = toEnum (primUniGenCat c)
 
 -- derived character classifiers
 
+-- | Selects alphabetic Unicode characters (lower-case, upper-case and
+-- title-case letters, plus letters of caseless scripts and modifiers letters).
+-- This function is equivalent to 'Data.Char.isAlpha'.
 isLetter :: Char -> Bool
 isLetter c = case generalCategory c of
         UppercaseLetter         -> True
@@ -146,6 +154,8 @@ isLetter c = case generalCategory c of
         OtherLetter             -> True
         _                       -> False
 
+-- | Selects Unicode mark characters, e.g. accents and the like, which
+-- combine with preceding letters.
 isMark :: Char -> Bool
 isMark c = case generalCategory c of
         NonSpacingMark          -> True
@@ -153,6 +163,8 @@ isMark c = case generalCategory c of
         EnclosingMark           -> True
         _                       -> False
 
+-- | Selects Unicode numeric characters, including digits from various
+-- scripts, Roman numerals, etc.
 isNumber :: Char -> Bool
 isNumber c = case generalCategory c of
         DecimalNumber           -> True
@@ -160,6 +172,8 @@ isNumber c = case generalCategory c of
         OtherNumber             -> True
         _                       -> False
 
+-- | Selects Unicode punctuation characters, including various kinds
+-- of connectors, brackets and quotes.
 isPunctuation :: Char -> Bool
 isPunctuation c = case generalCategory c of
         ConnectorPunctuation    -> True
@@ -171,6 +185,8 @@ isPunctuation c = case generalCategory c of
         OtherPunctuation        -> True
         _                       -> False
 
+-- | Selects Unicode symbol characters, including mathematical and
+-- currency symbols.
 isSymbol :: Char -> Bool
 isSymbol c = case generalCategory c of
         MathSymbol              -> True
@@ -179,13 +195,13 @@ isSymbol c = case generalCategory c of
         OtherSymbol             -> True
         _                       -> False
 
+-- | Selects Unicode space and separator characters.
 isSeparator :: Char -> Bool
 isSeparator c = case generalCategory c of
         Space                   -> True
         LineSeparator           -> True
         ParagraphSeparator      -> True
         _                       -> False
-#endif /* !__NHC__ */
 
 #ifdef __NHC__
 -- dummy implementation