Big patch to improve Unicode support in GHC. Validated on OS X and Windows, this

[ghc-base.git] / GHC / IO / Encoding / Latin1.hs
diff --git a/GHC/IO/Encoding/Latin1.hs b/GHC/IO/Encoding/Latin1.hs

index 6bf18c8..197222e 100644 (file)
--- a/GHC/IO/Encoding/Latin1.hs
+++ b/GHC/IO/Encoding/Latin1.hs
@@ -23,8 +23,8 @@
  -----------------------------------------------------------------------------
  
  module GHC.IO.Encoding.Latin1 (
-  latin1,
-  latin1_checked,
+  latin1, mkLatin1,
+  latin1_checked, mkLatin1_checked,
    latin1_decode,
    latin1_encode,
    latin1_checked_encode,
@@ -34,46 +34,54 @@ import GHC.Base
  import GHC.Real
  import GHC.Num
  -- import GHC.IO
-import GHC.IO.Exception
  import GHC.IO.Buffer
+import GHC.IO.Encoding.Failure
  import GHC.IO.Encoding.Types
-import Data.Maybe
  
  -- -----------------------------------------------------------------------------
  -- Latin1
  
  latin1 :: TextEncoding
-latin1 = TextEncoding { textEncodingName = "ISO8859-1",
-                        mkTextDecoder = latin1_DF,
-                        mkTextEncoder = latin1_EF }
+latin1 = mkLatin1 ErrorOnCodingFailure
  
-latin1_DF :: IO (TextDecoder ())
-latin1_DF =
+mkLatin1 :: CodingFailureMode -> TextEncoding
+mkLatin1 cfm = TextEncoding { textEncodingName = "ISO8859-1",
+                              mkTextDecoder = latin1_DF cfm,
+                              mkTextEncoder = latin1_EF cfm }
+
+latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
+latin1_DF cfm =
    return (BufferCodec {
               encode   = latin1_decode,
+             recover  = recoverDecode cfm,
               close    = return (),
               getState = return (),
               setState = const $ return ()
            })
  
-latin1_EF :: IO (TextEncoder ())
-latin1_EF =
+latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
+latin1_EF cfm =
    return (BufferCodec {
               encode   = latin1_encode,
+             recover  = recoverEncode cfm,
               close    = return (),
               getState = return (),
               setState = const $ return ()
            })
  
  latin1_checked :: TextEncoding
-latin1_checked = TextEncoding { textEncodingName = "ISO8859-1(checked)",
-                                mkTextDecoder = latin1_DF,
-                                mkTextEncoder = latin1_checked_EF }
+latin1_checked = mkLatin1_checked ErrorOnCodingFailure
+
+mkLatin1_checked :: CodingFailureMode -> TextEncoding
+mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO8859-1(checked)",
+                                      mkTextDecoder = latin1_DF cfm,
+                                      mkTextEncoder = latin1_checked_EF cfm }
  
-latin1_checked_EF :: IO (TextEncoder ())
-latin1_checked_EF =
+latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
+latin1_checked_EF cfm =
    return (BufferCodec {
               encode   = latin1_checked_encode,
+             recover  = recoverEncode cfm,
               close    = return (),
               getState = return (),
               setState = const $ return ()
@@ -86,16 +94,18 @@ latin1_decode
    output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
   = let 
         loop !ir !ow
-         | ow >= os || ir >= iw =  done ir ow
+         | ow >= os = done OutputUnderflow ir ow
+         | ir >= iw = done InputUnderflow ir ow
           | otherwise = do
                c0 <- readWord8Buf iraw ir
                ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
                loop (ir+1) ow'
  
         -- lambda-lifted, to avoid thunks being built in the inner-loop:
-       done !ir !ow = return (if ir == iw then input{ bufL=0, bufR=0 }
-                                          else input{ bufL=ir },
-                         output{ bufR=ow })
+       done why !ir !ow = return (why,
+                                  if ir == iw then input{ bufL=0, bufR=0 }
+                                              else input{ bufL=ir },
+                                  output{ bufR=ow })
      in
      loop ir0 ow0
  
@@ -104,11 +114,13 @@ latin1_encode
    input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
    output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
   = let
-      done !ir !ow = return (if ir == iw then input{ bufL=0, bufR=0 }
-                                         else input{ bufL=ir },
-                             output{ bufR=ow })
+      done why !ir !ow = return (why,
+                                 if ir == iw then input{ bufL=0, bufR=0 }
+                                             else input{ bufL=ir },
+                                 output{ bufR=ow })
        loop !ir !ow
-        | ow >= os || ir >= iw =  done ir ow
+        | ow >= os = done OutputUnderflow ir ow
+        | ir >= iw = done InputUnderflow ir ow
          | otherwise = do
             (c,ir') <- readCharBuf iraw ir
             writeWord8Buf oraw ow (fromIntegral (ord c))
@@ -121,22 +133,19 @@ latin1_checked_encode
    input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
    output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
   = let
-      done !ir !ow = return (if ir == iw then input{ bufL=0, bufR=0 }
-                                         else input{ bufL=ir },
-                             output{ bufR=ow })
+      done why !ir !ow = return (why,
+                                 if ir == iw then input{ bufL=0, bufR=0 }
+                                             else input{ bufL=ir },
+                                 output{ bufR=ow })
        loop !ir !ow
-        | ow >= os || ir >= iw =  done ir ow
+        | ow >= os = done OutputUnderflow ir ow
+        | ir >= iw = done InputUnderflow ir ow
          | otherwise = do
             (c,ir') <- readCharBuf iraw ir
             if ord c > 0xff then invalid else do
             writeWord8Buf oraw ow (fromIntegral (ord c))
             loop ir' (ow+1)
          where
-           invalid = if ir > ir0 then done ir ow else ioe_encodingError
+           invalid = done InvalidSequence ir ow
      in
      loop ir0 ow0
-
-ioe_encodingError :: IO a
-ioe_encodingError = ioException
-     (IOError Nothing InvalidArgument "latin1_checked_encode"
-          "character is out of range for this encoding" Nothing Nothing)