1 {-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
2 -----------------------------------------------------------------------------
4 -- Module : GHC.IO.Encoding.Types
5 -- Copyright : (c) The University of Glasgow, 2008-2009
6 -- License : see libraries/base/LICENSE
8 -- Maintainer : libraries@haskell.org
9 -- Stability : internal
10 -- Portability : non-portable
12 -- Types for text encoding/decoding
14 -----------------------------------------------------------------------------
16 module GHC.IO.Encoding.Types (
19 TextEncoder, TextDecoder,
20 EncodeBuffer, DecodeBuffer,
28 -- -----------------------------------------------------------------------------
29 -- Text encoders/decoders
31 data BufferCodec from to = BufferCodec {
32 encode :: Buffer from -> Buffer to -> IO (Buffer from, Buffer to),
33 -- ^ The @encode@ function translates elements of the buffer @from@
34 -- to the buffer @to@. It should translate as many elements as possible
35 -- given the sizes of the buffers, including translating zero elements
36 -- if there is either not enough room in @to@, or @from@ does not
37 -- contain a complete multibyte sequence.
39 -- @encode@ should raise an exception if, and only if, @from@
40 -- begins with an illegal sequence, or the first element of @from@
41 -- is not representable in the encoding of @to@. That is, if any
42 -- elements can be successfully translated before an error is
43 -- encountered, then @encode@ should translate as much as it can
44 -- and not throw an exception. This behaviour is used by the IO
45 -- library in order to report translation errors at the point they
46 -- actually occur, rather than when the buffer is translated.
49 -- ^ Resources associated with the encoding may now be released.
50 -- The @encode@ function may not be called again after calling
54 type DecodeBuffer = Buffer Word8 -> Buffer Char
55 -> IO (Buffer Word8, Buffer Char)
57 type EncodeBuffer = Buffer Char -> Buffer Word8
58 -> IO (Buffer Char, Buffer Word8)
60 type TextDecoder = BufferCodec Word8 CharBufElem
61 type TextEncoder = BufferCodec CharBufElem Word8
63 -- | A 'TextEncoding' is a specification of a conversion scheme
64 -- between sequences of bytes and sequences of Unicode characters.
66 -- For example, UTF-8 is an encoding of Unicode characters into a sequence
67 -- of bytes. The 'TextEncoding' for UTF-8 is 'utf_8'.
70 mkTextDecoder :: IO TextDecoder,
71 mkTextEncoder :: IO TextEncoder