2 % (c) The University of Glasgow 2006
3 % (c) The University of Glasgow, 1997-2006
6 Buffers for scanning string input stored in external arrays.
12 -- non-abstract for vs\/HaskellService
14 -- * Creation\/destruction
16 hGetStringBufferBlock,
26 -- * Moving and comparison
39 #include "HsVersions.h"
42 import FastString ( FastString,mkFastString,mkFastStringBytes )
45 import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose
49 import GHC.IOBase ( IO(..) )
50 import GHC.Base ( unsafeChr )
52 #if __GLASGOW_HASKELL__ >= 601
53 import System.IO ( openBinaryFile )
55 import IOExts ( openFileEx, IOModeEx(..) )
58 #if __GLASGOW_HASKELL__ < 601
59 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
62 -- -----------------------------------------------------------------------------
63 -- The StringBuffer type
65 -- |A StringBuffer is an internal pointer to a sized chunk of bytes.
66 -- The bytes are intended to be *immutable*. There are pure
67 -- operations to read the contents of a StringBuffer.
69 -- A StringBuffer may have a finalizer, depending on how it was
74 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
75 len :: {-# UNPACK #-} !Int, -- length
76 cur :: {-# UNPACK #-} !Int -- current pos
78 -- The buffer is assumed to be UTF-8 encoded, and furthermore
79 -- we add three '\0' bytes to the end as sentinels so that the
80 -- decoder doesn't have to check for overflow at every single byte
81 -- of a multibyte sequence.
83 instance Show StringBuffer where
84 showsPrec _ s = showString "<stringbuffer("
85 . shows (len s) . showString "," . shows (cur s)
88 -- -----------------------------------------------------------------------------
89 -- Creation / Destruction
91 hGetStringBuffer :: FilePath -> IO StringBuffer
92 hGetStringBuffer fname = do
93 h <- openBinaryFile fname ReadMode
95 let size = fromIntegral size_i
96 buf <- mallocForeignPtrArray (size+3)
97 withForeignPtr buf $ \ptr -> do
98 r <- if size == 0 then return 0 else hGetBuf h ptr size
101 then ioError (userError "short read of file")
103 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
104 -- sentinels for UTF-8 decoding
105 return (StringBuffer buf size 0)
107 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
108 hGetStringBufferBlock handle wanted
109 = do size_i <- hFileSize handle
110 offset_i <- hTell handle
111 let size = min wanted (fromIntegral $ size_i-offset_i)
112 buf <- mallocForeignPtrArray (size+3)
113 withForeignPtr buf $ \ptr ->
114 do r <- if size == 0 then return 0 else hGetBuf handle ptr size
116 then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
117 else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
118 return (StringBuffer buf size 0)
120 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
121 appendStringBuffers sb1 sb2
122 = do newBuf <- mallocForeignPtrArray (size+3)
123 withForeignPtr newBuf $ \ptr ->
124 withForeignPtr (buf sb1) $ \sb1Ptr ->
125 withForeignPtr (buf sb2) $ \sb2Ptr ->
126 do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1)
127 copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2)
128 pokeArray (ptr `advancePtr` size) [0,0,0]
129 return (StringBuffer newBuf size 0)
130 where calcLen sb = len sb - cur sb
131 size = calcLen sb1 + calcLen sb2
133 stringToStringBuffer :: String -> IO StringBuffer
134 stringToStringBuffer str = do
135 let size = utf8EncodedLength str
136 buf <- mallocForeignPtrArray (size+3)
137 withForeignPtr buf $ \ptr -> do
138 utf8EncodeString ptr str
139 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
140 -- sentinels for UTF-8 decoding
141 return (StringBuffer buf size 0)
143 -- -----------------------------------------------------------------------------
146 -- Getting our fingers dirty a little here, but this is performance-critical
147 {-# INLINE nextChar #-}
148 nextChar :: StringBuffer -> (Char,StringBuffer)
149 nextChar (StringBuffer buf len (I# cur#)) =
151 withForeignPtr buf $ \(Ptr a#) -> do
152 case utf8DecodeChar# (a# `plusAddr#` cur#) of
154 let cur' = I# (b# `minusAddr#` a#) in
155 return (C# c#, StringBuffer buf len cur')
157 currentChar :: StringBuffer -> Char
158 currentChar = fst . nextChar
160 prevChar :: StringBuffer -> Char -> Char
161 prevChar (StringBuffer buf len 0) deflt = deflt
162 prevChar (StringBuffer buf len cur) deflt =
164 withForeignPtr buf $ \p -> do
165 p' <- utf8PrevChar (p `plusPtr` cur)
166 return (fst (utf8DecodeChar p'))
168 -- -----------------------------------------------------------------------------
171 stepOn :: StringBuffer -> StringBuffer
172 stepOn s = snd (nextChar s)
174 offsetBytes :: Int -> StringBuffer -> StringBuffer
175 offsetBytes i s = s { cur = cur s + i }
177 byteDiff :: StringBuffer -> StringBuffer -> Int
178 byteDiff s1 s2 = cur s2 - cur s1
180 atEnd :: StringBuffer -> Bool
181 atEnd (StringBuffer _ l c) = l == c
183 -- -----------------------------------------------------------------------------
186 lexemeToString :: StringBuffer -> Int {-bytes-} -> String
187 lexemeToString _ 0 = ""
188 lexemeToString (StringBuffer buf _ cur) bytes =
190 withForeignPtr buf $ \ptr ->
191 utf8DecodeString (ptr `plusPtr` cur) bytes
193 lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
194 lexemeToFastString _ 0 = mkFastString ""
195 lexemeToFastString (StringBuffer buf _ cur) len =
197 withForeignPtr buf $ \ptr ->
198 return $! mkFastStringBytes (ptr `plusPtr` cur) len
200 -- -----------------------------------------------------------------------------
201 -- Parsing integer strings in various bases
203 byteOff :: StringBuffer -> Int -> Char
204 byteOff (StringBuffer buf _ cur) i =
205 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
206 w <- peek (ptr `plusPtr` (cur+i))
207 return (unsafeChr (fromIntegral (w::Word8)))
209 -- | XXX assumes ASCII digits only (by using byteOff)
210 parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
211 parseUnsignedInteger buf len radix char_to_int
214 go i x | i == len = x
215 | otherwise = go (i+1)
216 (x * radix + toInteger (char_to_int (byteOff buf i)))
218 -- -----------------------------------------------------------------------------
221 -- Just like unsafePerformIO, but we inline it.
222 {-# INLINE inlinePerformIO #-}
223 inlinePerformIO :: IO a -> a
224 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r