Strip any Byte Order Mark (BOM) from the front of decoded streams.

author Ben.Lippmeier@anu.edu.au <unknown>

Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)

committer Ben.Lippmeier@anu.edu.au <unknown>

Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)
author Ben.Lippmeier@anu.edu.au <unknown>
Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)
committer Ben.Lippmeier@anu.edu.au <unknown>
Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)
diff --git a/GHC/IO/Handle/Internals.hs b/GHC/IO/Handle/Internals.hs

index b8dc82a..cc9e3d3 100644 (file)
--- a/GHC/IO/Handle/Internals.hs
+++ b/GHC/IO/Handle/Internals.hs
@@ -727,7 +727,7 @@ readTextDevice h_@Handle__{..} cbuf = do
  
    debugIO ("readTextDevice after reading: bbuf=" ++ summaryBuffer bbuf1)
  
  
    debugIO ("readTextDevice after reading: bbuf=" ++ summaryBuffer bbuf1)
  
-  (bbuf2,cbuf2) <-
+  (bbuf2,cbuf') <- 
        case haDecoder of
            Nothing      -> do
                 writeIORef haLastDecode (error "codec_state", bbuf1)
        case haDecoder of
            Nothing      -> do
                 writeIORef haLastDecode (error "codec_state", bbuf1)
@@ -737,16 +737,13 @@ readTextDevice h_@Handle__{..} cbuf = do
                 writeIORef haLastDecode (state, bbuf1)
                 (encode decoder) bbuf1 cbuf
  
                 writeIORef haLastDecode (state, bbuf1)
                 (encode decoder) bbuf1 cbuf
  
-  debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf2 ++ 
+  debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf' ++ 
          " bbuf=" ++ summaryBuffer bbuf2)
  
          " bbuf=" ++ summaryBuffer bbuf2)
  
-  cbuf3 <- stripByteOrderMark cbuf2
-
    writeIORef haByteBuffer bbuf2
    writeIORef haByteBuffer bbuf2
-  if bufR cbuf3 == bufR cbuf -- no new characters
+  if bufR cbuf' == bufR cbuf -- no new characters
       then readTextDevice' h_ bbuf2 cbuf -- we need more bytes to make a Char
       then readTextDevice' h_ bbuf2 cbuf -- we need more bytes to make a Char
-     else return cbuf3
-
+     else return cbuf'
  
  -- we have an incomplete byte sequence at the end of the buffer: try to
  -- read more bytes.
  
  -- we have an incomplete byte sequence at the end of the buffer: try to
  -- read more bytes.
@@ -795,7 +792,7 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do
                     if isNothing r then ioe_EOF else do  -- raise EOF
                     return bbuf1
  
                     if isNothing r then ioe_EOF else do  -- raise EOF
                     return bbuf1
  
-  (bbuf2,cbuf2) <-
+  (bbuf2,cbuf') <-
        case haDecoder of
            Nothing      -> do
                 writeIORef haLastDecode (error "codec_state", bbuf1)
        case haDecoder of
            Nothing      -> do
                 writeIORef haLastDecode (error "codec_state", bbuf1)
@@ -805,23 +802,5 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do
                 writeIORef haLastDecode (state, bbuf1)
                 (encode decoder) bbuf1 cbuf
  
                 writeIORef haLastDecode (state, bbuf1)
                 (encode decoder) bbuf1 cbuf
  
-  cbuf3        <- stripByteOrderMark cbuf2
-
    writeIORef haByteBuffer bbuf2
    writeIORef haByteBuffer bbuf2
-  return cbuf3
-
-
--- | When converting from UTF-8 to UCS-4, Solaris iconv adds a Byte Order Mark (BOM)
---     of value 0xfeff to the start of the stream. We don't want to return this to
---     the caller, so strip it here. This is a safe operation for other platforms,
---     so always do it.
-stripByteOrderMark :: CharBuffer -> IO CharBuffer
-stripByteOrderMark cbuf
-  | isEmptyBuffer cbuf
-  = return cbuf
-  
-  | otherwise
-  = do firstChar <- peekCharBuf (bufRaw cbuf) 0
-       if firstChar == chr 0xfeff
-        then   return (bufferRemove 1 cbuf)
-        else   return cbuf
+  return cbuf'
author	Ben.Lippmeier@anu.edu.au <unknown>
	Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)
committer	Ben.Lippmeier@anu.edu.au <unknown>
	Wed, 30 Sep 2009 08:42:29 +0000 (08:42 +0000)