+{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -O #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected
#include "HsVersions.h"
import Foreign
-import Data.Char ( ord, chr, isDigit, digitToInt, intToDigit,
- isHexDigit )
-import Numeric ( showIntAtBase )
-import Data.Bits
+import Data.Char
+import Numeric
import GHC.Ptr ( Ptr(..) )
import GHC.Base
{-# INLINE utf8DecodeChar# #-}
utf8DecodeChar# :: Addr# -> (# Char#, Addr# #)
utf8DecodeChar# a# =
- let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+ let !ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
case () of
_ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #)
| ch0 >=# 0xC0# && ch0 <=# 0xDF# ->
- let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
(# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
(ch1 -# 0x80#)),
a# `plusAddr#` 2# #)
| ch0 >=# 0xE0# && ch0 <=# 0xEF# ->
- let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
- let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
(# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +#
a# `plusAddr#` 3# #)
| ch0 >=# 0xF0# && ch0 <=# 0xF8# ->
- let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ let !ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
- let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ let !ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
- let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+ let !ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else
(# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
utf8DecodeString (Ptr a#) (I# len#)
= unpack a#
where
- end# = addr2Int# (a# `plusAddr#` len#)
+ !end# = addr2Int# (a# `plusAddr#` len#)
unpack p#
| addr2Int# p# >=# end# = return []
Nothing -> go cs
where
go [] = []
- go (c:cs) = encode_ch c ++ go cs
+ go (c:cs) = encode_digit_ch c ++ go' cs
+ go' [] = []
+ go' (c:cs) = encode_ch c ++ go' cs
unencodedChar :: Char -> Bool -- True for chars that don't need encoding
unencodedChar 'Z' = False
|| c >= 'A' && c <= 'Z'
|| c >= '0' && c <= '9'
+-- If a digit is at the start of a symbol then we need to encode it.
+-- Otherwise package names like 9pH-0.1 give linker errors.
+encode_digit_ch :: Char -> EncodedString
+encode_digit_ch c | c >= '0' && c <= '9' = encode_as_unicode_char c
+encode_digit_ch c | otherwise = encode_ch c
+
encode_ch :: Char -> EncodedString
encode_ch c | unencodedChar c = [c] -- Common case first
encode_ch '*' = "zt"
encode_ch '_' = "zu"
encode_ch '%' = "zv"
-encode_ch c = 'z' : if isDigit (head hex_str) then hex_str
- else '0':hex_str
+encode_ch c = encode_as_unicode_char c
+
+encode_as_unicode_char :: Char -> EncodedString
+encode_as_unicode_char c = 'z' : if isDigit (head hex_str) then hex_str
+ else '0':hex_str
where hex_str = showHex (ord c) "U"
-- ToDo: we could improve the encoding here in various ways.
-- eg. strings of unicode characters come out as 'z1234Uz5678U', we
-- could remove the 'U' in the middle (the 'z' works as a separator).
- showHex = showIntAtBase 16 intToDigit
- -- needed because prior to GHC 6.2, Numeric.showHex added a "0x" prefix
-
zDecodeString :: EncodedString -> UserString
zDecodeString [] = []
zDecodeString ('Z' : d : rest)