Ignore .rej files

[ghc-hetmet.git] / compiler / utils / Encoding.hs
diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs

index 3381265..84b4e09 100644 (file)
--- a/compiler/utils/Encoding.hs
+++ b/compiler/utils/Encoding.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
  {-# OPTIONS_GHC -O #-}
  -- We always optimise this, otherwise performance of a non-optimised
  -- compiler is severely affected
@@ -29,10 +30,8 @@ module Encoding (
  
  #include "HsVersions.h"
  import Foreign
-import Data.Char        ( ord, chr, isDigit, digitToInt, intToDigit,
-                          isHexDigit )
-import Numeric          ( showIntAtBase )
-import Data.Bits
+import Data.Char
+import Numeric
  import GHC.Ptr          ( Ptr(..) )
  import GHC.Base
  
@@ -51,21 +50,21 @@ import GHC.Base
  {-# INLINE utf8DecodeChar# #-}
  utf8DecodeChar# :: Addr# -> (# Char#, Addr# #)
  utf8DecodeChar# a# =
-  let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+  let !ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
    case () of
      _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #)
  
        | ch0 >=# 0xC0# && ch0 <=# 0xDF# ->
-        let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+        let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
          if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
          (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
                    (ch1 -# 0x80#)),
             a# `plusAddr#` 2# #)
  
        | ch0 >=# 0xE0# && ch0 <=# 0xEF# ->
-        let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+        let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
          if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
-        let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+        let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
          if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
          (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
                   ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#)  +#
@@ -73,11 +72,11 @@ utf8DecodeChar# a# =
             a# `plusAddr#` 3# #)
  
       | ch0 >=# 0xF0# && ch0 <=# 0xF8# ->
-        let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+        let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
          if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
-        let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+        let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
          if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
-        let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+        let !ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
          if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else
          (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
                   ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
@@ -117,7 +116,7 @@ STRICT2(utf8DecodeString)
  utf8DecodeString (Ptr a#) (I# len#)
    = unpack a#
    where
-    end# = addr2Int# (a# `plusAddr#` len#)
+    !end# = addr2Int# (a# `plusAddr#` len#)
  
      unpack p#
          | addr2Int# p# >=# end# = return []
@@ -239,7 +238,9 @@ zEncodeString cs = case maybe_tuple cs of
                  Nothing -> go cs
            where
                  go []     = []
-                go (c:cs) = encode_ch c ++ go cs
+                go (c:cs) = encode_digit_ch c ++ go' cs
+                go' []     = []
+                go' (c:cs) = encode_ch c ++ go' cs
  
  unencodedChar :: Char -> Bool   -- True for chars that don't need encoding
  unencodedChar 'Z' = False
@@ -248,6 +249,12 @@ unencodedChar c   =  c >= 'a' && c <= 'z'
                    || c >= 'A' && c <= 'Z'
                    || c >= '0' && c <= '9'
  
+-- If a digit is at the start of a symbol then we need to encode it.
+-- Otherwise package names like 9pH-0.1 give linker errors.
+encode_digit_ch :: Char -> EncodedString
+encode_digit_ch c | c >= '0' && c <= '9' = encode_as_unicode_char c
+encode_digit_ch c | otherwise            = encode_ch c
+
  encode_ch :: Char -> EncodedString
  encode_ch c | unencodedChar c = [c]     -- Common case first
  
@@ -279,16 +286,16 @@ encode_ch '/'  = "zs"
  encode_ch '*'  = "zt"
  encode_ch '_'  = "zu"
  encode_ch '%'  = "zv"
-encode_ch c    = 'z' : if isDigit (head hex_str) then hex_str
-                                                 else '0':hex_str
+encode_ch c    = encode_as_unicode_char c
+
+encode_as_unicode_char :: Char -> EncodedString
+encode_as_unicode_char c = 'z' : if isDigit (head hex_str) then hex_str
+                                                           else '0':hex_str
    where hex_str = showHex (ord c) "U"
    -- ToDo: we could improve the encoding here in various ways.
    -- eg. strings of unicode characters come out as 'z1234Uz5678U', we
    -- could remove the 'U' in the middle (the 'z' works as a separator).
  
-        showHex = showIntAtBase 16 intToDigit
-        -- needed because prior to GHC 6.2, Numeric.showHex added a "0x" prefix
-
  zDecodeString :: EncodedString -> UserString
  zDecodeString [] = []
  zDecodeString ('Z' : d : rest)