1 {-# LANGUAGE CPP, BangPatterns, ForeignFunctionInterface, NoImplicitPrelude,
2 NondecreasingIndentation, MagicHash #-}
3 module GHC.IO.Encoding.CodePage(
4 #if !defined(mingw32_HOST_OS)
7 codePageEncoding, mkCodePageEncoding,
8 localeEncoding, mkLocaleEncoding
16 import GHC.IO (unsafePerformIO)
17 import GHC.IO.Encoding.Failure
18 import GHC.IO.Encoding.Types
22 import Data.List (lookup)
24 import GHC.IO.Encoding.CodePage.Table
26 import GHC.IO.Encoding.Latin1 (mkLatin1)
27 import GHC.IO.Encoding.UTF8 (mkUTF8)
28 import GHC.IO.Encoding.UTF16 (mkUTF16le, mkUTF16be)
29 import GHC.IO.Encoding.UTF32 (mkUTF32le, mkUTF32be)
31 -- note CodePage = UInt which might not work on Win64. But the Win32 package
32 -- also has this issue.
33 getCurrentCodePage :: IO Word32
34 getCurrentCodePage = do
40 -- Since the Win32 package depends on base, we have to import these ourselves:
41 foreign import stdcall unsafe "windows.h GetConsoleCP"
42 getConsoleCP :: IO Word32
44 foreign import stdcall unsafe "windows.h GetACP"
47 {-# NOINLINE currentCodePage #-}
48 currentCodePage :: Word32
49 currentCodePage = unsafePerformIO getCurrentCodePage
51 localeEncoding :: TextEncoding
52 localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
54 mkLocaleEncoding :: CodingFailureMode -> TextEncoding
55 mkLocaleEncoding cfm = mkCodePageEncoding cfm currentCodePage
58 codePageEncoding :: Word32 -> TextEncoding
59 codePageEncoding = mkCodePageEncoding ErrorOnCodingFailure
61 mkCodePageEncoding :: CodingFailureMode -> Word32 -> TextEncoding
62 mkCodePageEncoding cfm 65001 = mkUTF8 cfm
63 mkCodePageEncoding cfm 1200 = mkUTF16le cfm
64 mkCodePageEncoding cfm 1201 = mkUTF16be cfm
65 mkCodePageEncoding cfm 12000 = mkUTF32le cfm
66 mkCodePageEncoding cfm 12001 = mkUTF32be cfm
67 mkCodePageEncoding cfm cp = maybe (mkLatin1 cfm) (buildEncoding cfm cp) (lookup cp codePageMap)
69 buildEncoding :: CodingFailureMode -> Word32 -> CodePageArrays -> TextEncoding
70 buildEncoding cfm cp SingleByteCP {decoderArray = dec, encoderArray = enc}
72 textEncodingName = "CP" ++ show cp
73 , mkTextDecoder = return $ simpleCodec (recoverDecode cfm) $ decodeFromSingleByte dec
74 , mkTextEncoder = return $ simpleCodec (recoverEncode cfm) $ encodeToSingleByte enc
77 simpleCodec :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
78 -> (Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to))
79 -> BufferCodec from to ()
80 simpleCodec r f = BufferCodec {
88 decodeFromSingleByte :: ConvArray Char -> DecodeBuffer
89 decodeFromSingleByte convArr
90 input@Buffer { bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
91 output@Buffer { bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
93 done why !ir !ow = return (why,
94 if ir==iw then input{ bufL=0, bufR=0}
98 | ow >= os = done OutputUnderflow ir ow
99 | ir >= iw = done InputUnderflow ir ow
101 b <- readWord8Buf iraw ir
102 let c = lookupConv convArr b
103 if c=='\0' && b /= 0 then invalid else do
104 ow' <- writeCharBuf oraw ow c
107 invalid = done InvalidSequence ir ow
110 encodeToSingleByte :: CompactArray Char Word8 -> EncodeBuffer
111 encodeToSingleByte CompactArray { encoderMax = maxChar,
112 encoderIndices = indices,
113 encoderValues = values }
114 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
115 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
117 done why !ir !ow = return (why,
118 if ir==iw then input { bufL=0, bufR=0 }
119 else input { bufL=ir },
122 | ow >= os = done OutputUnderflow ir ow
123 | ir >= iw = done InputUnderflow ir ow
125 (c,ir') <- readCharBuf iraw ir
126 case lookupCompact maxChar indices values c of
128 Just 0 | c /= '\0' -> invalid
130 writeWord8Buf oraw ow b
133 invalid = done InvalidSequence ir ow
138 --------------------------------------------
139 -- Array access functions
141 -- {-# INLINE lookupConv #-}
142 lookupConv :: ConvArray Char -> Word8 -> Char
143 lookupConv a = indexChar a . fromEnum
145 {-# INLINE lookupCompact #-}
146 lookupCompact :: Char -> ConvArray Int -> ConvArray Word8 -> Char -> Maybe Word8
147 lookupCompact maxVal indexes values x
148 | x > maxVal = Nothing
149 | otherwise = Just $ indexWord8 values $ j + (i .&. mask)
152 mask = (1 `shiftL` n) - 1
154 j = indexInt indexes k
157 {-# INLINE indexInt #-}
158 indexInt :: ConvArray Int -> Int -> Int
159 indexInt (ConvArray p) (I# i) = I# (indexInt16OffAddr# p i)
161 {-# INLINE indexWord8 #-}
162 indexWord8 :: ConvArray Word8 -> Int -> Word8
163 indexWord8 (ConvArray p) (I# i) = W8# (indexWord8OffAddr# p i)
165 {-# INLINE indexChar #-}
166 indexChar :: ConvArray Char -> Int -> Char
167 indexChar (ConvArray p) (I# i) = C# (chr# (indexInt16OffAddr# p i))