X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=GHC%2FIO%2FEncoding%2FTypes.hs;h=caffa24815f0e8c255bcc413022420c16fc54a0c;hb=41e8fba828acbae1751628af50849f5352b27873;hp=b857bdf4d7571ee89ca926c1419f190cfbbd8ff7;hpb=d2063b5b0be014545b21819172c87756efcb0b0c;p=ghc-base.git diff --git a/GHC/IO/Encoding/Types.hs b/GHC/IO/Encoding/Types.hs index b857bdf..caffa24 100644 --- a/GHC/IO/Encoding/Types.hs +++ b/GHC/IO/Encoding/Types.hs @@ -1,4 +1,6 @@ -{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-} +{-# LANGUAGE NoImplicitPrelude, ExistentialQuantification #-} +{-# OPTIONS_GHC -funbox-strict-fields #-} + ----------------------------------------------------------------------------- -- | -- Module : GHC.IO.Encoding.Types @@ -22,13 +24,14 @@ module GHC.IO.Encoding.Types ( import GHC.Base import GHC.Word -import GHC.IO +import GHC.Show +-- import GHC.IO import GHC.IO.Buffer -- ----------------------------------------------------------------------------- -- Text encoders/decoders -data BufferCodec from to = BufferCodec { +data BufferCodec from to state = BufferCodec { encode :: Buffer from -> Buffer to -> IO (Buffer from, Buffer to), -- ^ The @encode@ function translates elements of the buffer @from@ -- to the buffer @to@. It should translate as many elements as possible @@ -45,10 +48,27 @@ data BufferCodec from to = BufferCodec { -- library in order to report translation errors at the point they -- actually occur, rather than when the buffer is translated. -- - close :: IO () + close :: IO (), -- ^ Resources associated with the encoding may now be released. -- The @encode@ function may not be called again after calling -- @close@. + + getState :: IO state, + -- ^ Return the current state of the codec. + -- + -- Many codecs are not stateful, and in these case the state can be + -- represented as '()'. Other codecs maintain a state. For + -- example, UTF-16 recognises a BOM (byte-order-mark) character at + -- the beginning of the input, and remembers thereafter whether to + -- use big-endian or little-endian mode. In this case, the state + -- of the codec would include two pieces of information: whether we + -- are at the beginning of the stream (the BOM only occurs at the + -- beginning), and if not, whether to use the big or little-endian + -- encoding. + + setState :: state -> IO() + -- restore the state of the codec using the state from a previous + -- call to 'getState'. } type DecodeBuffer = Buffer Word8 -> Buffer Char @@ -57,16 +77,23 @@ type DecodeBuffer = Buffer Word8 -> Buffer Char type EncodeBuffer = Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) -type TextDecoder = BufferCodec Word8 CharBufElem -type TextEncoder = BufferCodec CharBufElem Word8 +type TextDecoder state = BufferCodec Word8 CharBufElem state +type TextEncoder state = BufferCodec CharBufElem Word8 state -- | A 'TextEncoding' is a specification of a conversion scheme -- between sequences of bytes and sequences of Unicode characters. -- -- For example, UTF-8 is an encoding of Unicode characters into a sequence --- of bytes. The 'TextEncoding' for UTF-8 is 'utf_8'. +-- of bytes. The 'TextEncoding' for UTF-8 is 'utf8'. data TextEncoding - = TextEncoding { - mkTextDecoder :: IO TextDecoder, - mkTextEncoder :: IO TextEncoder + = forall dstate estate . TextEncoding { + textEncodingName :: String, + -- ^ a string that can be passed to 'mkTextEncoding' to + -- create an equivalent 'TextEncoding'. + mkTextDecoder :: IO (TextDecoder dstate), + mkTextEncoder :: IO (TextEncoder estate) } + +instance Show TextEncoding where + -- | Returns the value of 'textEncodingName' + show te = textEncodingName te