From 14c3d7f368a6ec5e760b2fd57218171eeaf53a29 Mon Sep 17 00:00:00 2001
From: simonmar <unknown>
Date: Tue, 19 Aug 2003 16:39:14 +0000
Subject: [PATCH] [project @ 2003-08-19 16:39:13 by simonmar] Use the
 wide-char classifications from the C library if available.
 This gives us Unicode-aware isLower, isUpper, isAlpha etc.

On Unix, you have to set your locale to something.  This is usually
done by setting the environment variable LANG, eg.

  export LANG=en

This stuff *should* also work on Windows, except that Windows uses a
16-bit wchar_t so will get it wrong for characters > '\xffff'.  However,
I figured it was better to use the system-supplied functionality
rather than trying to implement this stuff ourselves.
---
 Data/Char.hs                    |   10 +++
 Data/List.hs                    |   48 +++++++++++-
 GHC/Read.lhs                    |    5 +-
 GHC/Show.lhs                    |  157 ++++-----------------------------------
 Text/ParserCombinators/ReadP.hs |    2 +-
 Text/Read/Lex.hs                |    3 +-
 include/HsBase.h                |    5 +-
 7 files changed, 81 insertions(+), 149 deletions(-)
diff --git a/Data/Char.hs b/Data/Char.hs
index 2800684..001c83f 100644
--- a/Data/Char.hs
+++ b/Data/Char.hs
@@ -42,6 +42,8 @@ module Data.Char
 import GHC.Base
 import GHC.Show
 import GHC.Read (readLitChar, lexLitChar)
+import GHC.Unicode
+import GHC.Num
 #endif
 
 #ifdef __HUGS__
@@ -53,3 +55,11 @@ import Prelude
 import Prelude(Char,String)
 import Char
 #endif
+
+
+digitToInt :: Char -> Int
+digitToInt c
+ | isDigit c		=  ord c - ord '0'
+ | c >= 'a' && c <= 'f' =  ord c - ord 'a' + 10
+ | c >= 'A' && c <= 'F' =  ord c - ord 'A' + 10
+ | otherwise	        =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
diff --git a/Data/List.hs b/Data/List.hs
index 3e2bead..0a4bb20 100644
--- a/Data/List.hs
+++ b/Data/List.hs
@@ -141,12 +141,12 @@ import Prelude hiding (Maybe(..))
 #endif
 
 import Data.Maybe
+import Data.Char	( isSpace )
 
 #ifdef __GLASGOW_HASKELL__
 import GHC.Num
 import GHC.Real
 import GHC.List
-import GHC.Show	( lines, words, unlines, unwords )
 import GHC.Base
 #endif
 
@@ -723,3 +723,49 @@ product	l	= prod l 1
     prod (x:xs) a = prod xs (a*x)
 #endif
 #endif  /* __GLASGOW_HASKELL__ */
+
+-- -----------------------------------------------------------------------------
+-- Functions on strings
+
+-- lines breaks a string up into a list of strings at newline characters.
+-- The resulting strings do not contain newlines.  Similary, words
+-- breaks a string up into a list of words, which were delimited by
+-- white space.  unlines and unwords are the inverse operations.
+-- unlines joins lines with terminating newlines, and unwords joins
+-- words with separating spaces.
+
+lines			:: String -> [String]
+lines ""		=  []
+lines s			=  let (l, s') = break (== '\n') s
+			   in  l : case s' of
+					[]     	-> []
+					(_:s'') -> lines s''
+
+unlines			:: [String] -> String
+#ifdef USE_REPORT_PRELUDE
+unlines			=  concatMap (++ "\n")
+#else
+-- HBC version (stolen)
+-- here's a more efficient version
+unlines [] = []
+unlines (l:ls) = l ++ '\n' : unlines ls
+#endif
+
+words			:: String -> [String]
+words s			=  case dropWhile {-partain:Char.-}isSpace s of
+				"" -> []
+				s' -> w : words s''
+				      where (w, s'') = 
+                                             break {-partain:Char.-}isSpace s'
+
+unwords			:: [String] -> String
+#ifdef USE_REPORT_PRELUDE
+unwords []		=  ""
+unwords ws		=  foldr1 (\w s -> w ++ ' ':s) ws
+#else
+-- HBC version (stolen)
+-- here's a more efficient version
+unwords []		=  ""
+unwords [w]		= w
+unwords (w:ws)		= w ++ ' ' : unwords ws
+#endif
diff --git a/GHC/Read.lhs b/GHC/Read.lhs
index b67e83b..aba24ff 100644
--- a/GHC/Read.lhs
+++ b/GHC/Read.lhs
@@ -3,7 +3,7 @@
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  GHC.Read
--- Copyright   :  (c) The FFI Task Force, 1994-2002
+-- Copyright   :  (c) The University of Glasgow, 1994-2002
 -- License     :  see libraries/base/LICENSE
 -- 
 -- Maintainer  :  cvs-ghc@haskell.org
@@ -65,11 +65,12 @@ import Data.Maybe
 import Data.Either
 
 import {-# SOURCE #-} GHC.Err		( error )
+import {-# SOURCE #-} GHC.Unicode	( isDigit )
 import GHC.Num
 import GHC.Real
 import GHC.Float
 import GHC.List
-import GHC.Show		-- isAlpha etc
+import GHC.Show
 import GHC.Base
 import GHC.Arr
 \end{code}
diff --git a/GHC/Show.lhs b/GHC/Show.lhs
index 1401241..5c0382a 100644
--- a/GHC/Show.lhs
+++ b/GHC/Show.lhs
@@ -23,17 +23,11 @@ module GHC.Show
 	-- Show support code
 	shows, showChar, showString, showParen, showList__, showSpace,
 	showLitChar, protectEsc, 
-	intToDigit, digitToInt, showSignedInt,
+	intToDigit, showSignedInt,
 	appPrec, appPrec1,
 
 	-- Character operations
-	isAscii, isLatin1, isControl, isPrint, isSpace, isUpper,
-	isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum,
-	toUpper, toLower,
 	asciiTab,
-
-	-- String operations
-	lines, unlines, words, unwords
   ) 
 	where
 
@@ -42,7 +36,7 @@ import GHC.Base
 import GHC.Enum
 import Data.Maybe
 import Data.Either
-import GHC.List	( (!!), break, dropWhile
+import GHC.List	( (!!),
 #ifdef USE_REPORT_PRELUDE
                 , concatMap, foldr1
 #endif
@@ -207,7 +201,7 @@ Code specific for characters
 
 \begin{code}
 showLitChar 		   :: Char -> ShowS
-showLitChar c s | c > '\DEL' =  showChar '\\' (protectEsc isDigit (shows (ord c)) s)
+showLitChar c s | c > '\DEL' =  showChar '\\' (protectEsc isDec (shows (ord c)) s)
 showLitChar '\DEL'	   s =  showString "\\DEL" s
 showLitChar '\\'	   s =  showString "\\\\" s
 showLitChar c s | c >= ' '   =  showChar c s
@@ -223,10 +217,21 @@ showLitChar c		   s =  showString ('\\' : asciiTab!!ord c) s
 	-- I've done manual eta-expansion here, becuase otherwise it's
 	-- impossible to stop (asciiTab!!ord) getting floated out as an MFE
 
+isDec c = c >= '0' && c <= '9'
+
 protectEsc :: (Char -> Bool) -> ShowS -> ShowS
 protectEsc p f		   = f . cont
 			     where cont s@(c:_) | p c = "\\&" ++ s
 				   cont s	      = s
+
+
+asciiTab :: [String]
+asciiTab = -- Using an array drags in the array module.  listArray ('\NUL', ' ')
+	   ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+	    "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI", 
+	    "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+	    "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US", 
+	    "SP"] 
 \end{code}
 
 Code specific for Ints.
@@ -238,13 +243,6 @@ intToDigit (I# i)
     | i >=# 10# && i <=# 15# =  unsafeChr (ord 'a' `minusInt` ten `plusInt` I# i)
     | otherwise		  =  error ("Char.intToDigit: not a digit " ++ show (I# i))
 
-digitToInt :: Char -> Int
-digitToInt c
- | isDigit c		=  ord c `minusInt` ord '0'
- | c >= 'a' && c <= 'f' =  ord c `minusInt` ord 'a' `plusInt` ten
- | c >= 'A' && c <= 'F' =  ord c `minusInt` ord 'A' `plusInt` ten
- | otherwise	        =  error ("Char.digitToInt: not a digit " ++ show c) -- sigh
-
 ten = I# 10#
 
 showSignedInt :: Int -> Int -> ShowS
@@ -268,130 +266,3 @@ itos n# cs
         | otherwise = case chr# (ord# '0'# +# (n# `remInt#` 10#)) of { c# ->
 		      itos' (n# `quotInt#` 10#) (C# c# : cs) }
 \end{code}
-
-
-%*********************************************************
-%*							*
-\subsection{Character stuff}
-%*							*
-%*********************************************************
-
-\begin{code}
-isAscii, isLatin1, isControl, isPrint, isSpace, isUpper,
- isLower, isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum,
- isAsciiUpper, isAsciiLower :: Char -> Bool
-isAscii c	 	=  c <  '\x80'
-isLatin1 c              =  c <= '\xff'
-isControl c		=  c < ' ' || c >= '\DEL' && c <= '\x9f'
-isPrint c		=  not (isControl c)
-
--- isSpace includes non-breaking space
--- Done with explicit equalities both for efficiency, and to avoid a tiresome
--- recursion with GHC.List elem
-isSpace c		=  c == ' '	||
-			   c == '\t'	||
-			   c == '\n'	||
-			   c == '\r'	||
-			   c == '\f'	||
-			   c == '\v'	||
-			   c == '\xa0'
-
--- The upper case ISO characters have the multiplication sign dumped
--- randomly in the middle of the range.  Go figure.
-isUpper c		=  c >= 'A' && c <= 'Z' || 
-                           c >= '\xC0' && c <= '\xD6' ||
-                           c >= '\xD8' && c <= '\xDE'
--- The lower case ISO characters have the division sign dumped
--- randomly in the middle of the range.  Go figure.
-isLower c		=  c >= 'a' && c <= 'z' ||
-                           c >= '\xDF' && c <= '\xF6' ||
-                           c >= '\xF8' && c <= '\xFF'
-isAsciiLower c          =  c >= 'a' && c <= 'z'
-isAsciiUpper c          =  c >= 'A' && c <= 'Z'
-
-isAlpha c		=  isLower c || isUpper c
-isDigit c		=  c >= '0' && c <= '9'
-isOctDigit c		=  c >= '0' && c <= '7'
-isHexDigit c		=  isDigit c || c >= 'A' && c <= 'F' ||
-                                        c >= 'a' && c <= 'f'
-isAlphaNum c		=  isAlpha c || isDigit c
-
--- Case-changing operations
-
-toUpper, toLower	:: Char -> Char
-toUpper c@(C# c#)
-  | isAsciiLower c    = C# (chr# (ord# c# -# 32#))
-  | isAscii c         = c
-    -- fall-through to the slower stuff.
-  | isLower c	&& c /= '\xDF' && c /= '\xFF'
-  = unsafeChr (ord c `minusInt` ord 'a' `plusInt` ord 'A')
-  | otherwise
-  = c
-
-
-toLower c@(C# c#)
-  | isAsciiUpper c = C# (chr# (ord# c# +# 32#))
-  | isAscii c      = c
-  | isUpper c	   = unsafeChr (ord c `minusInt` ord 'A' `plusInt` ord 'a')
-  | otherwise	   =  c
-
-asciiTab :: [String]
-asciiTab = -- Using an array drags in the array module.  listArray ('\NUL', ' ')
-	   ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
-	    "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI", 
-	    "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
-	    "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US", 
-	    "SP"] 
-\end{code}
-
-%*********************************************************
-%*							*
-\subsection{Functions on strings}
-%*							*
-%*********************************************************
-
-lines breaks a string up into a list of strings at newline characters.
-The resulting strings do not contain newlines.  Similary, words
-breaks a string up into a list of words, which were delimited by
-white space.  unlines and unwords are the inverse operations.
-unlines joins lines with terminating newlines, and unwords joins
-words with separating spaces.
-
-\begin{code}
-lines			:: String -> [String]
-lines ""		=  []
-lines s			=  let (l, s') = break (== '\n') s
-			   in  l : case s' of
-					[]     	-> []
-					(_:s'') -> lines s''
-
-words			:: String -> [String]
-words s			=  case dropWhile {-partain:Char.-}isSpace s of
-				"" -> []
-				s' -> w : words s''
-				      where (w, s'') = 
-                                             break {-partain:Char.-}isSpace s'
-
-unlines			:: [String] -> String
-#ifdef USE_REPORT_PRELUDE
-unlines			=  concatMap (++ "\n")
-#else
--- HBC version (stolen)
--- here's a more efficient version
-unlines [] = []
-unlines (l:ls) = l ++ '\n' : unlines ls
-#endif
-
-unwords			:: [String] -> String
-#ifdef USE_REPORT_PRELUDE
-unwords []		=  ""
-unwords ws		=  foldr1 (\w s -> w ++ ' ':s) ws
-#else
--- HBC version (stolen)
--- here's a more efficient version
-unwords []		=  ""
-unwords [w]		= w
-unwords (w:ws)		= w ++ ' ' : unwords ws
-#endif
-
-\end{code}
diff --git a/Text/ParserCombinators/ReadP.hs b/Text/ParserCombinators/ReadP.hs
index 8a9cc80..ab84419 100644
--- a/Text/ParserCombinators/ReadP.hs
+++ b/Text/ParserCombinators/ReadP.hs
@@ -51,7 +51,7 @@ module Text.ParserCombinators.ReadP
 
 import Control.Monad( MonadPlus(..) )
 #ifdef __GLASGOW_HASKELL__
-import GHC.Show( isSpace  )
+import {-# SOURCE #-} GHC.Unicode ( isSpace  )
 import GHC.Base
 #else
 import Data.Char( isSpace )
diff --git a/Text/Read/Lex.hs b/Text/Read/Lex.hs
index 9be4220..ff63aa1 100644
--- a/Text/Read/Lex.hs
+++ b/Text/Read/Lex.hs
@@ -34,7 +34,8 @@ import Text.ParserCombinators.ReadP
 #ifdef __GLASGOW_HASKELL__
 import GHC.Base
 import GHC.Num( Num(..), Integer )
-import GHC.Show( Show(.. ), isSpace, isAlpha, isAlphaNum )
+import GHC.Show( Show(..) )
+import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral, 
 		 toInteger, (^), (^^), infinity, notANumber )
 import GHC.List
diff --git a/include/HsBase.h b/include/HsBase.h
index d124107..51f2e60 100644
--- a/include/HsBase.h
+++ b/include/HsBase.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: HsBase.h,v 1.22 2003/06/12 16:06:07 simonmar Exp $
+ * $Id: HsBase.h,v 1.23 2003/08/19 16:39:14 simonmar Exp $
  *
  * (c) The University of Glasgow 2001-2002
  *
@@ -78,6 +78,9 @@
 #ifdef HAVE_LIMITS_H
 #include <limits.h>
 #endif
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
 
 #if !defined(mingw32_TARGET_OS) && !defined(irix_TARGET_OS)
 # if defined(HAVE_SYS_RESOURCE_H)
-- 
1.7.10.4