From 7e85a637700a5b134ac9381e811c0fc1da8cad8f Mon Sep 17 00:00:00 2001 From: simonmar Date: Wed, 27 Aug 2003 11:03:44 +0000 Subject: [PATCH] [project @ 2003-08-27 11:03:44 by simonmar] isSpace should only recognise Latin-1 whitespace characters, according to the report. Perhaps we should have isUniSpace, too? --- GHC/Unicode.hsc | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/GHC/Unicode.hsc b/GHC/Unicode.hsc index fc41060..7b84535 100644 --- a/GHC/Unicode.hsc +++ b/GHC/Unicode.hsc @@ -57,6 +57,16 @@ isPrint :: Char -> Bool -- | Selects white-space characters in the Latin-1 range. -- (In Unicode terms, this includes spaces and some control characters.) isSpace :: Char -> Bool +-- isSpace includes non-breaking space +-- Done with explicit equalities both for efficiency, and to avoid a tiresome +-- recursion with GHC.List elem +isSpace c = c == ' ' || + c == '\t' || + c == '\n' || + c == '\r' || + c == '\f' || + c == '\v' || + c == '\xa0' -- | Selects alphabetic Unicode characters (letters) that are not lower-case. -- (In Unicode terms, this includes letters in upper and title cases, @@ -119,7 +129,7 @@ type CInt = (#type int) isDigit c = iswdigit (fromIntegral (ord c)) /= 0 isAlpha c = iswalpha (fromIntegral (ord c)) /= 0 isAlphaNum c = iswalnum (fromIntegral (ord c)) /= 0 -isSpace c = iswspace (fromIntegral (ord c)) /= 0 +--isSpace c = iswspace (fromIntegral (ord c)) /= 0 isControl c = iswcntrl (fromIntegral (ord c)) /= 0 isPrint c = iswprint (fromIntegral (ord c)) /= 0 isUpper c = iswupper (fromIntegral (ord c)) /= 0 @@ -166,17 +176,6 @@ foreign import ccall unsafe "towupper" isControl c = c < ' ' || c >= '\DEL' && c <= '\x9f' isPrint c = not (isControl c) --- isSpace includes non-breaking space --- Done with explicit equalities both for efficiency, and to avoid a tiresome --- recursion with GHC.List elem -isSpace c = c == ' ' || - c == '\t' || - c == '\n' || - c == '\r' || - c == '\f' || - c == '\v' || - c == '\xa0' - -- The upper case ISO characters have the multiplication sign dumped -- randomly in the middle of the range. Go figure. isUpper c = c >= 'A' && c <= 'Z' || -- 1.7.10.4