2 % (c) The University of Glasgow, 1997-2006
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
12 -- non-abstract for vs\/HaskellService
14 -- * Creation\/destruction
24 -- * Moving and comparison
37 #include "HsVersions.h"
40 import FastString ( FastString,mkFastString,mkFastStringBytes )
43 import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose )
45 import GHC.Ptr ( Ptr(..) )
47 import GHC.IOBase ( IO(..) )
48 import GHC.Base ( unsafeChr )
50 #if __GLASGOW_HASKELL__ >= 601
51 import System.IO ( openBinaryFile )
53 import IOExts ( openFileEx, IOModeEx(..) )
56 #if __GLASGOW_HASKELL__ < 601
57 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
60 -- -----------------------------------------------------------------------------
61 -- The StringBuffer type
63 -- |A StringBuffer is an internal pointer to a sized chunk of bytes.
64 -- The bytes are intended to be *immutable*. There are pure
65 -- operations to read the contents of a StringBuffer.
67 -- A StringBuffer may have a finalizer, depending on how it was
72 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
73 len :: {-# UNPACK #-} !Int, -- length
74 cur :: {-# UNPACK #-} !Int -- current pos
76 -- The buffer is assumed to be UTF-8 encoded, and furthermore
77 -- we add three '\0' bytes to the end as sentinels so that the
78 -- decoder doesn't have to check for overflow at every single byte
79 -- of a multibyte sequence.
81 instance Show StringBuffer where
82 showsPrec _ s = showString "<stringbuffer("
83 . shows (len s) . showString "," . shows (cur s)
86 -- -----------------------------------------------------------------------------
87 -- Creation / Destruction
89 hGetStringBuffer :: FilePath -> IO StringBuffer
90 hGetStringBuffer fname = do
91 h <- openBinaryFile fname ReadMode
93 let size = fromIntegral size_i
94 buf <- mallocForeignPtrArray (size+3)
95 withForeignPtr buf $ \ptr -> do
96 r <- if size == 0 then return 0 else hGetBuf h ptr size
99 then ioError (userError "short read of file")
101 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
102 -- sentinels for UTF-8 decoding
103 return (StringBuffer buf size 0)
105 stringToStringBuffer :: String -> IO StringBuffer
106 stringToStringBuffer str = do
107 let size = utf8EncodedLength str
108 buf <- mallocForeignPtrArray (size+3)
109 withForeignPtr buf $ \ptr -> do
110 utf8EncodeString ptr str
111 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
112 -- sentinels for UTF-8 decoding
113 return (StringBuffer buf size 0)
115 -- -----------------------------------------------------------------------------
118 -- Getting our fingers dirty a little here, but this is performance-critical
119 {-# INLINE nextChar #-}
120 nextChar :: StringBuffer -> (Char,StringBuffer)
121 nextChar (StringBuffer buf len (I# cur#)) =
123 withForeignPtr buf $ \(Ptr a#) -> do
124 case utf8DecodeChar# (a# `plusAddr#` cur#) of
126 let cur' = I# (b# `minusAddr#` a#) in
127 return (C# c#, StringBuffer buf len cur')
129 currentChar :: StringBuffer -> Char
130 currentChar = fst . nextChar
132 prevChar :: StringBuffer -> Char -> Char
133 prevChar (StringBuffer buf len 0) deflt = deflt
134 prevChar (StringBuffer buf len cur) deflt =
136 withForeignPtr buf $ \p -> do
137 p' <- utf8PrevChar (p `plusPtr` cur)
138 return (fst (utf8DecodeChar p'))
140 -- -----------------------------------------------------------------------------
143 stepOn :: StringBuffer -> StringBuffer
144 stepOn s = snd (nextChar s)
146 offsetBytes :: Int -> StringBuffer -> StringBuffer
147 offsetBytes i s = s { cur = cur s + i }
149 byteDiff :: StringBuffer -> StringBuffer -> Int
150 byteDiff s1 s2 = cur s2 - cur s1
152 atEnd :: StringBuffer -> Bool
153 atEnd (StringBuffer _ l c) = l == c
155 -- -----------------------------------------------------------------------------
158 lexemeToString :: StringBuffer -> Int {-bytes-} -> String
159 lexemeToString _ 0 = ""
160 lexemeToString (StringBuffer buf _ cur) bytes =
162 withForeignPtr buf $ \ptr ->
163 utf8DecodeString (ptr `plusPtr` cur) bytes
165 lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
166 lexemeToFastString _ 0 = mkFastString ""
167 lexemeToFastString (StringBuffer buf _ cur) len =
169 withForeignPtr buf $ \ptr ->
170 return $! mkFastStringBytes (ptr `plusPtr` cur) len
172 -- -----------------------------------------------------------------------------
173 -- Parsing integer strings in various bases
175 byteOff :: StringBuffer -> Int -> Char
176 byteOff (StringBuffer buf _ cur) i =
177 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
178 w <- peek (ptr `plusPtr` (cur+i))
179 return (unsafeChr (fromIntegral (w::Word8)))
181 -- | XXX assumes ASCII digits only
182 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
183 parseInteger buf len radix to_int
185 where go i x | i == len = x
186 | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
188 -- -----------------------------------------------------------------------------
191 -- Just like unsafePerformIO, but we inline it.
192 {-# INLINE inlinePerformIO #-}
193 inlinePerformIO :: IO a -> a
194 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
196 #if __GLASGOW_HASKELL__ < 600
197 mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a)
198 mallocForeignPtrArray = doMalloc undefined
200 doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b)
201 doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy)
203 mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
204 mallocForeignPtrBytes n = do
206 newForeignPtr r (finalizerFree r)
208 foreign import ccall unsafe "stdlib.h free"
209 finalizerFree :: Ptr a -> IO ()