From 77448dfad0a5b02e98f036214409b55a54f7eadc Mon Sep 17 00:00:00 2001 From: qrczak Date: Sun, 18 Feb 2001 14:45:15 +0000 Subject: [PATCH] [project @ 2001-02-18 14:45:15 by qrczak] Recent Unicode and future ISO-10646 finally decided that the character code space ends at U+10FFFF. Let ghc follow the rules: maxBound::Char is now '\x10FFFF', etc. --- ghc/compiler/parser/Lex.lhs | 2 +- ghc/compiler/utils/UnicodeUtil.lhs | 15 +--------- ghc/lib/std/PrelBase.lhs | 54 +++++++++++++----------------------- ghc/lib/std/PrelEnum.lhs | 12 ++++---- 4 files changed, 28 insertions(+), 55 deletions(-) diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index 8fdd6ad..130eef8 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -742,7 +742,7 @@ lex_escape cont buf [] -> charError buf' after_charnum cont i buf - = if i >= 0 && i <= 0x7FFFFFFF + = if i >= 0 && i <= 0x10FFFF then cont (fromInteger i) buf else charError buf diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs index 64062dd..51ca358 100644 --- a/ghc/compiler/utils/UnicodeUtil.lhs +++ b/ghc/compiler/utils/UnicodeUtil.lhs @@ -24,20 +24,7 @@ stringToUtf8 (c:s) chr (0x80 + c `div` 0x40 `mod` 0x40) : chr (0x80 + c `mod` 0x40) : stringToUtf8 s - | c <= 0x1FFFFF = chr (0xF0 + c `div` 0x40000 ) : - chr (0x80 + c `div` 0x1000 `mod` 0x40) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - stringToUtf8 s - | c <= 0x3FFFFFF = chr (0xF8 + c `div` 0x1000000 ) : - chr (0x80 + c `div` 0x40000 `mod` 0x40) : - chr (0x80 + c `div` 0x1000 `mod` 0x40) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - stringToUtf8 s - | c <= 0x7FFFFFFF = chr (0xFC + c `div` 0x40000000 ) : - chr (0x80 + c `div` 0x1000000 `mod` 0x40) : - chr (0x80 + c `div` 0x40000 `mod` 0x40) : + | c <= 0x10FFFF = chr (0xF0 + c `div` 0x40000 ) : chr (0x80 + c `div` 0x1000 `mod` 0x40) : chr (0x80 + c `div` 0x40 `mod` 0x40) : chr (0x80 + c `mod` 0x40) : diff --git a/ghc/lib/std/PrelBase.lhs b/ghc/lib/std/PrelBase.lhs index 4c0bcbe..7a933e0 100644 --- a/ghc/lib/std/PrelBase.lhs +++ b/ghc/lib/std/PrelBase.lhs @@ -1,5 +1,5 @@ % ----------------------------------------------------------------------------- -% $Id: PrelBase.lhs,v 1.39 2000/10/03 08:43:05 simonpj Exp $ +% $Id: PrelBase.lhs,v 1.40 2001/02/18 14:45:15 qrczak Exp $ % % (c) The University of Glasgow, 1992-2000 % @@ -435,12 +435,9 @@ instance Ord Char where (C# c1) < (C# c2) = c1 `ltChar#` c2 chr :: Int -> Char -chr (I# i) | i >=# 0# -#if INT_SIZE_IN_BYTES > 4 - && i <=# 0x7FFFFFFF# -#endif - = C# (chr# i) - | otherwise = error ("Prelude.chr: bad argument") +chr (I# i) | i >=# 0# && i <=# 0x10FFFF# + = C# (chr# i) + | otherwise = error "Prelude.chr: bad argument" unsafeChr :: Int -> Char unsafeChr (I# i) = C# (chr# i) @@ -687,34 +684,23 @@ unpackCStringUtf8# addr = unpack 0# where unpack nh - | ch `eqChar#` '\0'# = [] + | ch `eqChar#` '\0'# = [] | ch `leChar#` '\x7F'# = C# ch : unpack (nh +# 1#) - | ch `leChar#` '\xDF'# = C# (chr# ((ord# ch `iShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 1#))) -# 0x3080#)) - : unpack (nh +# 2#) - | ch `leChar#` '\xEF'# = C# (chr# ((ord# ch `iShiftL#` 12#) +# - (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 2#))) -# 0xE2080#)) - : unpack (nh +# 3#) - | ch `leChar#` '\xF7'# = C# (chr# ((ord# ch `iShiftL#` 18#) +# - (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 12#) +# - (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 3#))) -# 0x3C82080#)) - : unpack (nh +# 4#) - | ch `leChar#` '\xFB'# = C# (chr# ((ord# ch -# 0xF8# `iShiftL#` 24#) +# - (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 18#) +# - (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 12#) +# - (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 4#))) -# 0x2082080#)) - : unpack (nh +# 5#) - | otherwise = C# (chr# (((ord# ch -# 0xFC#) `iShiftL#` 30#) +# - ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) - `iShiftL#` 24#) +# - (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 18#) +# - (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#` 12#) +# - (ord# (indexCharOffAddr# addr (nh +# 4#)) `iShiftL#` 6#) +# - (ord# (indexCharOffAddr# addr (nh +# 5#))) -# 0x2082080#)) - : unpack (nh +# 6#) + | ch `leChar#` '\xDF'# = + C# (chr# ((ord# ch -# 0xC0#) `iShiftL#` 6# +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#))) : + unpack (nh +# 2#) + | ch `leChar#` '\xEF'# = + C# (chr# ((ord# ch -# 0xE0#) `iShiftL#` 12# +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `iShiftL#` 6# +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#))) : + unpack (nh +# 3#) + | otherwise = + C# (chr# ((ord# ch -# 0xF0#) `iShiftL#` 18# +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `iShiftL#` 12# +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `iShiftL#` 6# +# + (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#))) : + unpack (nh +# 4#) where ch = indexCharOffAddr# addr nh diff --git a/ghc/lib/std/PrelEnum.lhs b/ghc/lib/std/PrelEnum.lhs index 62f3167..0104e46 100644 --- a/ghc/lib/std/PrelEnum.lhs +++ b/ghc/lib/std/PrelEnum.lhs @@ -1,5 +1,5 @@ % ----------------------------------------------------------------------------- -% $Id: PrelEnum.lhs,v 1.12 2000/08/07 23:37:23 qrczak Exp $ +% $Id: PrelEnum.lhs,v 1.13 2001/02/18 14:45:15 qrczak Exp $ % % (c) The University of Glasgow, 1992-2000 % @@ -180,11 +180,11 @@ instance Enum Ordering where \begin{code} instance Bounded Char where minBound = '\0' - maxBound = '\x7FFFFFFF' + maxBound = '\x10FFFF' instance Enum Char where succ (C# c#) - | not (ord# c# ==# 0x7FFFFFFF#) = C# (chr# (ord# c# +# 1#)) + | not (ord# c# ==# 0x10FFFF#) = C# (chr# (ord# c# +# 1#)) | otherwise = error ("Prelude.Enum.Char.succ: bad argument") pred (C# c#) | not (ord# c# ==# 0#) = C# (chr# (ord# c# -# 1#)) @@ -194,7 +194,7 @@ instance Enum Char where fromEnum = ord {-# INLINE enumFrom #-} - enumFrom (C# x) = eftChar (ord# x) 0x7FFFFFFF# + enumFrom (C# x) = eftChar (ord# x) 0x10FFFF# -- Blarg: technically I guess enumFrom isn't strict! {-# INLINE enumFromTo #-} @@ -235,13 +235,13 @@ eftCharList x y | x ># y = [] -- For enumFromThenTo we give up on inlining efdCharFB c n x1 x2 - | delta >=# 0# = go_up_char_fb c n x1 delta 0x7FFFFFFF# + | delta >=# 0# = go_up_char_fb c n x1 delta 0x10FFFF# | otherwise = go_dn_char_fb c n x1 delta 0# where delta = x2 -# x1 efdCharList x1 x2 - | delta >=# 0# = go_up_char_list x1 delta 0x7FFFFFFF# + | delta >=# 0# = go_up_char_list x1 delta 0x10FFFF# | otherwise = go_dn_char_list x1 delta 0# where delta = x2 -# x1 -- 1.7.10.4