Buffers for scanning string input stored in external arrays.
\begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
module StringBuffer
(
StringBuffer(..),
hClose h
if (r /= size)
then ioError (userError "short read of file")
- else do
- pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- -- sentinels for UTF-8 decoding
- return (StringBuffer buf size 0)
+ else newUTF8StringBuffer buf ptr size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock handle wanted
withForeignPtr buf $ \ptr ->
do r <- if size == 0 then return 0 else hGetBuf handle ptr size
if r /= size
- then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle))
- else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
- return (StringBuffer buf size 0)
+ then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+ else newUTF8StringBuffer buf ptr size
+
+newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
+newUTF8StringBuffer buf ptr size = do
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ let
+ sb0 = StringBuffer buf size 0
+ (first_char, sb1) = nextChar sb0
+ -- skip the byte-order mark if there is one (see #1744)
+ -- This is better than treating #FEFF as whitespace,
+ -- because that would mess up layout. We don't have a concept
+ -- of zero-width whitespace in Haskell: all whitespace codepoints
+ -- have a width of one column.
+ return (if first_char == '\xfeff' then sb1 else sb0)
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers sb1 sb2