X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=GHC%2FIO%2FEncoding%2FFailure.hs;h=a27650b00ac9beadbc91b0fb7ebb1ce43fe27fd0;hb=acfd22e03a9d611734df20d673f2804da682e223;hp=9c0e6d9abe84c1bf7566790313b0452502b7d105;hpb=4c889c7daa98daff7aec5c0e4ccf491f25f5d10c;p=ghc-base.git diff --git a/GHC/IO/Encoding/Failure.hs b/GHC/IO/Encoding/Failure.hs index 9c0e6d9..a27650b 100644 --- a/GHC/IO/Encoding/Failure.hs +++ b/GHC/IO/Encoding/Failure.hs @@ -49,7 +49,7 @@ data CodingFailureMode = ErrorOnCodingFailure -- ^ Throw an error when a -- -- Roundtripping is based on the ideas of PEP383. However, unlike PEP383 we do not wish to use lone surrogate codepoints -- to escape undecodable bytes, because that may confuse Unicode processing software written in Haskell. Instead, we use --- the range of private-use characters from 0xF1E00 to 0xF1EFF. +-- the range of private-use characters from 0xEF80 to 0xEFFF designated for "encoding hacks" by the ConScript Unicode Registery. -- -- This introduces a technical problem when it comes to encoding back to bytes using iconv. The iconv code will not fail when -- it tries to encode a private-use character (as it would if trying to encode a surrogate), which means that we won't get a @@ -86,7 +86,7 @@ isSurrogate c = (0xD800 <= x && x <= 0xDBFF) || (0xDC00 <= x && x <= 0xDFFF) -- | We use some private-use characters for roundtripping unknown bytes through a String isRoundtripEscapeChar :: Char -> Bool -isRoundtripEscapeChar c = 0xF1E00 <= x && x < 0xF1F00 +isRoundtripEscapeChar c = 0xEF00 <= x && x < 0xF000 where x = ord c -- | We use some surrogate characters for roundtripping unknown bytes through a String @@ -96,12 +96,12 @@ isRoundtripEscapeSurrogateChar c = 0xDC00 <= x && x < 0xDD00 -- Private use characters (in Strings) --> lone surrogates (in Buffer CharBufElem) surrogatifyRoundtripCharacter :: Char -> Char -surrogatifyRoundtripCharacter c | isRoundtripEscapeChar c = chr (ord c - 0xF1E00 + 0xDC00) +surrogatifyRoundtripCharacter c | isRoundtripEscapeChar c = chr (ord c - 0xEF00 + 0xDC00) | otherwise = c -- Lone surrogates (in Buffer CharBufElem) --> private use characters (in Strings) desurrogatifyRoundtripCharacter :: Char -> Char -desurrogatifyRoundtripCharacter c | isRoundtripEscapeSurrogateChar c = chr (ord c - 0xDC00 + 0xF1E00) +desurrogatifyRoundtripCharacter c | isRoundtripEscapeSurrogateChar c = chr (ord c - 0xDC00 + 0xEF00) | otherwise = c -- Bytes (in Buffer Word8) --> lone surrogates (in Buffer CharBufElem)