X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=92a937b74f1aed2f4fefabb4088974c4c639cfa8;hb=67cb409159fa9136dff942b8baaec25909416022;hp=903d7ebe5e0d398950af3204b429c8e376794f7b;hpb=17b297d97d327620ed6bfab942f8992b2446f1bf;p=ghc-hetmet.git

diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs
index 903d7eb..92a937b 100644
--- a/compiler/utils/StringBuffer.lhs
+++ b/compiler/utils/StringBuffer.lhs
@@ -6,11 +6,11 @@
 Buffers for scanning string input stored in external arrays.
 
 \begin{code}
-{-# OPTIONS_GHC -w #-}
+{-# OPTIONS -w #-}
 -- The above warning supression flag is a temporary kludge.
 -- While working on this module you are encouraged to remove it and fix
 -- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/WorkingConventions#Warnings
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
 -- for details
 
 module StringBuffer
@@ -106,10 +106,7 @@ hGetStringBuffer fname = do
      hClose h
      if (r /= size)
 	then ioError (userError "short read of file")
-	else do
-	  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-		 -- sentinels for UTF-8 decoding
-	  return (StringBuffer buf size 0)
+	else newUTF8StringBuffer buf ptr size
 
 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
 hGetStringBufferBlock handle wanted
@@ -121,8 +118,21 @@ hGetStringBufferBlock handle wanted
              do r <- if size == 0 then return 0 else hGetBuf handle ptr size
                 if r /= size
                    then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
-                   else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-                           return (StringBuffer buf size 0)
+                   else newUTF8StringBuffer buf ptr size
+
+newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
+newUTF8StringBuffer buf ptr size = do
+  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+	 -- sentinels for UTF-8 decoding
+  let 
+      sb0 = StringBuffer buf size 0
+      (first_char, sb1) = nextChar sb0
+        -- skip the byte-order mark if there is one (see #1744)
+        -- This is better than treating #FEFF as whitespace,
+        -- because that would mess up layout.  We don't have a concept
+        -- of zero-width whitespace in Haskell: all whitespace codepoints
+        -- have a width of one column.
+  return (if first_char == '\xfeff' then sb1 else sb0)
 
 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
 appendStringBuffers sb1 sb2