X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FUnicodeUtil.lhs;h=56e95a5434a8376b2aa3f928da6445679401c79b;hb=2a2efb720c0fdc06fe749f96f284b00b30f8f3f7;hp=0123e67305f7b99081ffb47804dbdd4c8efea092;hpb=4b17269854ccf10df8b3ca1711410a5ca439ea8a;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs index 0123e67..56e95a5 100644 --- a/ghc/compiler/utils/UnicodeUtil.lhs +++ b/ghc/compiler/utils/UnicodeUtil.lhs @@ -2,45 +2,35 @@ Various Unicode-related utilities. \begin{code} module UnicodeUtil( - stringToUtf8 + stringToUtf8, intsToUtf8 ) where #include "HsVersions.h" -import Panic (panic) -import Char (chr, ord) +import Panic ( panic ) +import Char ( chr, ord ) \end{code} \begin{code} -stringToUtf8 :: [Int] -> String -stringToUtf8 [] = "" -stringToUtf8 (c:s) - | c >= 1 && c <= 0x7F = chr c : stringToUtf8 s +stringToUtf8 :: String -> String +stringToUtf8 s = intsToUtf8 (map ord s) + +intsToUtf8 :: [Int] -> String +intsToUtf8 [] = "" +intsToUtf8 (c:s) + | c >= 1 && c <= 0x7F = chr c : intsToUtf8 s | c < 0 = panic ("charToUtf8 ("++show c++")") | c <= 0x7FF = chr (0xC0 + c `div` 0x40 ) : chr (0x80 + c `mod` 0x40) : - stringToUtf8 s + intsToUtf8 s | c <= 0xFFFF = chr (0xE0 + c `div` 0x1000 ) : chr (0x80 + c `div` 0x40 `mod` 0x40) : chr (0x80 + c `mod` 0x40) : - stringToUtf8 s - | c <= 0x1FFFFF = chr (0xF0 + c `div` 0x40000 ) : - chr (0x80 + c `div` 0x1000 `mod` 0x40) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - stringToUtf8 s - | c <= 0x3FFFFFF = chr (0xF8 + c `div` 0x1000000 ) : - chr (0x80 + c `div` 0x40000 `mod` 0x40) : - chr (0x80 + c `div` 0x1000 `mod` 0x40) : - chr (0x80 + c `div` 0x40 `mod` 0x40) : - chr (0x80 + c `mod` 0x40) : - stringToUtf8 s - | c <= 0x7FFFFFFF = chr (0xFC + c `div` 0x40000000 ) : - chr (0x80 + c `div` 0x1000000 `mod` 0x40) : - chr (0x80 + c `div` 0x40000 `mod` 0x40) : + intsToUtf8 s + | c <= 0x10FFFF = chr (0xF0 + c `div` 0x40000 ) : chr (0x80 + c `div` 0x1000 `mod` 0x40) : chr (0x80 + c `div` 0x40 `mod` 0x40) : chr (0x80 + c `mod` 0x40) : - stringToUtf8 s + intsToUtf8 s | otherwise = panic ("charToUtf8 "++show c) \end{code}