2 % (c) The University of Glasgow, 1997-2006
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
12 -- non-abstract for vs\/HaskellService
14 -- * Creation\/destruction
16 hGetStringBufferBlock,
26 -- * Moving and comparison
39 #include "HsVersions.h"
42 import FastString ( FastString,mkFastString,mkFastStringBytes )
45 import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose
48 import GHC.Ptr ( Ptr(..) )
50 import GHC.IOBase ( IO(..) )
51 import GHC.Base ( unsafeChr )
53 #if __GLASGOW_HASKELL__ >= 601
54 import System.IO ( openBinaryFile )
56 import IOExts ( openFileEx, IOModeEx(..) )
59 #if __GLASGOW_HASKELL__ < 601
60 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
63 -- -----------------------------------------------------------------------------
64 -- The StringBuffer type
66 -- |A StringBuffer is an internal pointer to a sized chunk of bytes.
67 -- The bytes are intended to be *immutable*. There are pure
68 -- operations to read the contents of a StringBuffer.
70 -- A StringBuffer may have a finalizer, depending on how it was
75 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
76 len :: {-# UNPACK #-} !Int, -- length
77 cur :: {-# UNPACK #-} !Int -- current pos
79 -- The buffer is assumed to be UTF-8 encoded, and furthermore
80 -- we add three '\0' bytes to the end as sentinels so that the
81 -- decoder doesn't have to check for overflow at every single byte
82 -- of a multibyte sequence.
84 instance Show StringBuffer where
85 showsPrec _ s = showString "<stringbuffer("
86 . shows (len s) . showString "," . shows (cur s)
89 -- -----------------------------------------------------------------------------
90 -- Creation / Destruction
92 hGetStringBuffer :: FilePath -> IO StringBuffer
93 hGetStringBuffer fname = do
94 h <- openBinaryFile fname ReadMode
96 let size = fromIntegral size_i
97 buf <- mallocForeignPtrArray (size+3)
98 withForeignPtr buf $ \ptr -> do
99 r <- if size == 0 then return 0 else hGetBuf h ptr size
102 then ioError (userError "short read of file")
104 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
105 -- sentinels for UTF-8 decoding
106 return (StringBuffer buf size 0)
108 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
109 hGetStringBufferBlock handle wanted
110 = do size_i <- hFileSize handle
111 offset_i <- hTell handle
112 let size = min wanted (fromIntegral $ size_i-offset_i)
113 buf <- mallocForeignPtrArray (size+3)
114 withForeignPtr buf $ \ptr ->
115 do r <- if size == 0 then return 0 else hGetBuf handle ptr size
117 then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle))
118 else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
119 return (StringBuffer buf size 0)
121 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
122 appendStringBuffers sb1 sb2
123 = do newBuf <- mallocForeignPtrArray (size+3)
124 withForeignPtr newBuf $ \ptr ->
125 withForeignPtr (buf sb1) $ \sb1Ptr ->
126 withForeignPtr (buf sb2) $ \sb2Ptr ->
127 do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1)
128 copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2)
129 pokeArray (ptr `advancePtr` size) [0,0,0]
130 return (StringBuffer newBuf size 0)
131 where calcLen sb = len sb - cur sb
132 size = calcLen sb1 + calcLen sb2
134 stringToStringBuffer :: String -> IO StringBuffer
135 stringToStringBuffer str = do
136 let size = utf8EncodedLength str
137 buf <- mallocForeignPtrArray (size+3)
138 withForeignPtr buf $ \ptr -> do
139 utf8EncodeString ptr str
140 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
141 -- sentinels for UTF-8 decoding
142 return (StringBuffer buf size 0)
144 -- -----------------------------------------------------------------------------
147 -- Getting our fingers dirty a little here, but this is performance-critical
148 {-# INLINE nextChar #-}
149 nextChar :: StringBuffer -> (Char,StringBuffer)
150 nextChar (StringBuffer buf len (I# cur#)) =
152 withForeignPtr buf $ \(Ptr a#) -> do
153 case utf8DecodeChar# (a# `plusAddr#` cur#) of
155 let cur' = I# (b# `minusAddr#` a#) in
156 return (C# c#, StringBuffer buf len cur')
158 currentChar :: StringBuffer -> Char
159 currentChar = fst . nextChar
161 prevChar :: StringBuffer -> Char -> Char
162 prevChar (StringBuffer buf len 0) deflt = deflt
163 prevChar (StringBuffer buf len cur) deflt =
165 withForeignPtr buf $ \p -> do
166 p' <- utf8PrevChar (p `plusPtr` cur)
167 return (fst (utf8DecodeChar p'))
169 -- -----------------------------------------------------------------------------
172 stepOn :: StringBuffer -> StringBuffer
173 stepOn s = snd (nextChar s)
175 offsetBytes :: Int -> StringBuffer -> StringBuffer
176 offsetBytes i s = s { cur = cur s + i }
178 byteDiff :: StringBuffer -> StringBuffer -> Int
179 byteDiff s1 s2 = cur s2 - cur s1
181 atEnd :: StringBuffer -> Bool
182 atEnd (StringBuffer _ l c) = l == c
184 -- -----------------------------------------------------------------------------
187 lexemeToString :: StringBuffer -> Int {-bytes-} -> String
188 lexemeToString _ 0 = ""
189 lexemeToString (StringBuffer buf _ cur) bytes =
191 withForeignPtr buf $ \ptr ->
192 utf8DecodeString (ptr `plusPtr` cur) bytes
194 lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
195 lexemeToFastString _ 0 = mkFastString ""
196 lexemeToFastString (StringBuffer buf _ cur) len =
198 withForeignPtr buf $ \ptr ->
199 return $! mkFastStringBytes (ptr `plusPtr` cur) len
201 -- -----------------------------------------------------------------------------
202 -- Parsing integer strings in various bases
204 byteOff :: StringBuffer -> Int -> Char
205 byteOff (StringBuffer buf _ cur) i =
206 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
207 w <- peek (ptr `plusPtr` (cur+i))
208 return (unsafeChr (fromIntegral (w::Word8)))
210 -- | XXX assumes ASCII digits only
211 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
212 parseInteger buf len radix to_int
214 where go i x | i == len = x
215 | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
217 -- -----------------------------------------------------------------------------
220 -- Just like unsafePerformIO, but we inline it.
221 {-# INLINE inlinePerformIO #-}
222 inlinePerformIO :: IO a -> a
223 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
225 #if __GLASGOW_HASKELL__ < 600
226 mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a)
227 mallocForeignPtrArray = doMalloc undefined
229 doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b)
230 doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy)
232 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
233 mallocForeignPtrBytes n = do
235 newForeignPtr r (finalizerFree r)
237 foreign import ccall unsafe "stdlib.h free"
238 finalizerFree :: Ptr a -> IO ()