1 {-# LANGUAGE CPP, BangPatterns, ForeignFunctionInterface, NoImplicitPrelude,
2 NondecreasingIndentation, MagicHash #-}
3 module GHC.IO.Encoding.CodePage(
4 #if !defined(mingw32_HOST_OS)
16 import GHC.IO (unsafePerformIO)
17 import GHC.IO.Encoding.Types
19 import GHC.IO.Exception
22 import Data.List (lookup)
24 import GHC.IO.Encoding.CodePage.Table
26 import GHC.IO.Encoding.Latin1 (latin1)
27 import GHC.IO.Encoding.UTF8 (utf8)
28 import GHC.IO.Encoding.UTF16 (utf16le, utf16be)
29 import GHC.IO.Encoding.UTF32 (utf32le, utf32be)
31 -- note CodePage = UInt which might not work on Win64. But the Win32 package
32 -- also has this issue.
33 getCurrentCodePage :: IO Word32
34 getCurrentCodePage = do
40 -- Since the Win32 package depends on base, we have to import these ourselves:
41 foreign import stdcall unsafe "windows.h GetConsoleCP"
42 getConsoleCP :: IO Word32
44 foreign import stdcall unsafe "windows.h GetACP"
47 {-# NOINLINE localeEncoding #-}
48 localeEncoding :: TextEncoding
49 localeEncoding = unsafePerformIO $ fmap codePageEncoding getCurrentCodePage
52 codePageEncoding :: Word32 -> TextEncoding
53 codePageEncoding 65001 = utf8
54 codePageEncoding 1200 = utf16le
55 codePageEncoding 1201 = utf16be
56 codePageEncoding 12000 = utf32le
57 codePageEncoding 12001 = utf32be
58 codePageEncoding cp = maybe latin1 (buildEncoding cp) (lookup cp codePageMap)
60 buildEncoding :: Word32 -> CodePageArrays -> TextEncoding
61 buildEncoding cp SingleByteCP {decoderArray = dec, encoderArray = enc}
63 textEncodingName = "CP" ++ show cp,
64 mkTextDecoder = return $ simpleCodec
65 $ decodeFromSingleByte dec
66 , mkTextEncoder = return $ simpleCodec $ encodeToSingleByte enc
69 simpleCodec :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
70 -> BufferCodec from to ()
71 simpleCodec f = BufferCodec {encode = f, close = return (), getState = return (),
74 decodeFromSingleByte :: ConvArray Char -> DecodeBuffer
75 decodeFromSingleByte convArr
76 input@Buffer { bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
77 output@Buffer { bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
79 done !ir !ow = return (if ir==iw then input{ bufL=0, bufR=0}
83 | ow >= os || ir >= iw = done ir ow
85 b <- readWord8Buf iraw ir
86 let c = lookupConv convArr b
87 if c=='\0' && b /= 0 then invalid else do
88 ow' <- writeCharBuf oraw ow c
91 invalid = if ir > ir0 then done ir ow else ioe_decodingError
94 encodeToSingleByte :: CompactArray Char Word8 -> EncodeBuffer
95 encodeToSingleByte CompactArray { encoderMax = maxChar,
96 encoderIndices = indices,
97 encoderValues = values }
98 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
99 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
101 done !ir !ow = return (if ir==iw then input { bufL=0, bufR=0 }
102 else input { bufL=ir },
105 | ow >= os || ir >= iw = done ir ow
107 (c,ir') <- readCharBuf iraw ir
108 case lookupCompact maxChar indices values c of
110 Just 0 | c /= '\0' -> invalid
112 writeWord8Buf oraw ow b
115 invalid = if ir > ir0 then done ir ow else ioe_encodingError
119 ioe_decodingError :: IO a
120 ioe_decodingError = ioException
121 (IOError Nothing InvalidArgument "codePageEncoding"
122 "invalid code page byte sequence" Nothing Nothing)
124 ioe_encodingError :: IO a
125 ioe_encodingError = ioException
126 (IOError Nothing InvalidArgument "codePageEncoding"
127 "character is not in the code page" Nothing Nothing)
130 --------------------------------------------
131 -- Array access functions
133 -- {-# INLINE lookupConv #-}
134 lookupConv :: ConvArray Char -> Word8 -> Char
135 lookupConv a = indexChar a . fromEnum
137 {-# INLINE lookupCompact #-}
138 lookupCompact :: Char -> ConvArray Int -> ConvArray Word8 -> Char -> Maybe Word8
139 lookupCompact maxVal indexes values x
140 | x > maxVal = Nothing
141 | otherwise = Just $ indexWord8 values $ j + (i .&. mask)
144 mask = (1 `shiftL` n) - 1
146 j = indexInt indexes k
149 {-# INLINE indexInt #-}
150 indexInt :: ConvArray Int -> Int -> Int
151 indexInt (ConvArray p) (I# i) = I# (indexInt16OffAddr# p i)
153 {-# INLINE indexWord8 #-}
154 indexWord8 :: ConvArray Word8 -> Int -> Word8
155 indexWord8 (ConvArray p) (I# i) = W8# (indexWord8OffAddr# p i)
157 {-# INLINE indexChar #-}
158 indexChar :: ConvArray Char -> Int -> Char
159 indexChar (ConvArray p) (I# i) = C# (chr# (indexInt16OffAddr# p i))