X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FEncoding.hs;h=e14f1e749a4f2e7e657acafb00659e6c197eff07;hp=33812650dd333f983749270b4a22843439982357;hb=831a35dd00faff195cf938659c2dd736192b865f;hpb=80b4eda6de2ea0f524e52b59415d84e04f7b1d5d diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs index 3381265..e14f1e7 100644 --- a/compiler/utils/Encoding.hs +++ b/compiler/utils/Encoding.hs @@ -29,9 +29,8 @@ module Encoding ( #include "HsVersions.h" import Foreign -import Data.Char ( ord, chr, isDigit, digitToInt, intToDigit, - isHexDigit ) -import Numeric ( showIntAtBase ) +import Data.Char +import Numeric import Data.Bits import GHC.Ptr ( Ptr(..) ) import GHC.Base @@ -51,21 +50,21 @@ import GHC.Base {-# INLINE utf8DecodeChar# #-} utf8DecodeChar# :: Addr# -> (# Char#, Addr# #) utf8DecodeChar# a# = - let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in + let !ch0 = word2Int# (indexWord8OffAddr# a# 0#) in case () of _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #) | ch0 >=# 0xC0# && ch0 <=# 0xDF# -> - let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +# (ch1 -# 0x80#)), a# `plusAddr#` 2# #) | ch0 >=# 0xE0# && ch0 <=# 0xEF# -> - let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else - let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +# ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +# @@ -73,11 +72,11 @@ utf8DecodeChar# a# = a# `plusAddr#` 3# #) | ch0 >=# 0xF0# && ch0 <=# 0xF8# -> - let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in + let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else - let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in + let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else - let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in + let !ch3 = word2Int# (indexWord8OffAddr# a# 3#) in if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +# ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +# @@ -117,7 +116,7 @@ STRICT2(utf8DecodeString) utf8DecodeString (Ptr a#) (I# len#) = unpack a# where - end# = addr2Int# (a# `plusAddr#` len#) + !end# = addr2Int# (a# `plusAddr#` len#) unpack p# | addr2Int# p# >=# end# = return [] @@ -239,7 +238,9 @@ zEncodeString cs = case maybe_tuple cs of Nothing -> go cs where go [] = [] - go (c:cs) = encode_ch c ++ go cs + go (c:cs) = encode_digit_ch c ++ go' cs + go' [] = [] + go' (c:cs) = encode_ch c ++ go' cs unencodedChar :: Char -> Bool -- True for chars that don't need encoding unencodedChar 'Z' = False @@ -248,6 +249,12 @@ unencodedChar c = c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' +-- If a digit is at the start of a symbol then we need to encode it. +-- Otherwise package names like 9pH-0.1 give linker errors. +encode_digit_ch :: Char -> EncodedString +encode_digit_ch c | c >= '0' && c <= '9' = encode_as_unicode_char c +encode_digit_ch c | otherwise = encode_ch c + encode_ch :: Char -> EncodedString encode_ch c | unencodedChar c = [c] -- Common case first @@ -279,16 +286,16 @@ encode_ch '/' = "zs" encode_ch '*' = "zt" encode_ch '_' = "zu" encode_ch '%' = "zv" -encode_ch c = 'z' : if isDigit (head hex_str) then hex_str - else '0':hex_str +encode_ch c = encode_as_unicode_char c + +encode_as_unicode_char :: Char -> EncodedString +encode_as_unicode_char c = 'z' : if isDigit (head hex_str) then hex_str + else '0':hex_str where hex_str = showHex (ord c) "U" -- ToDo: we could improve the encoding here in various ways. -- eg. strings of unicode characters come out as 'z1234Uz5678U', we -- could remove the 'U' in the middle (the 'z' works as a separator). - showHex = showIntAtBase 16 intToDigit - -- needed because prior to GHC 6.2, Numeric.showHex added a "0x" prefix - zDecodeString :: EncodedString -> UserString zDecodeString [] = [] zDecodeString ('Z' : d : rest)