2 % (c) The University of Glasgow, 1997-2003
4 \section{String buffers}
6 Buffers for scanning string input stored in external arrays.
10 -- always optimise this module, it's critical
15 -- non-abstract for vs\/HaskellService
17 -- * Creation\/destruction
27 -- * Moving and comparison
40 #include "HsVersions.h"
43 import FastString (FastString,mkFastString,mkFastStringBytes)
49 import GHC.IOBase ( IO(..) )
50 import GHC.Base ( unsafeChr )
52 import System.IO ( hGetBuf )
54 import IO ( hFileSize, IOMode(ReadMode),
56 #if __GLASGOW_HASKELL__ >= 601
57 import System.IO ( openBinaryFile )
59 import IOExts ( openFileEx, IOModeEx(..) )
62 #if __GLASGOW_HASKELL__ < 601
63 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
66 -- -----------------------------------------------------------------------------
67 -- The StringBuffer type
69 -- |A StringBuffer is an internal pointer to a sized chunk of bytes.
70 -- The bytes are intended to be *immutable*. There are pure
71 -- operations to read the contents of a StringBuffer.
73 -- A StringBuffer may have a finalizer, depending on how it was
78 buf :: {-# UNPACK #-} !(ForeignPtr Word8),
79 len :: {-# UNPACK #-} !Int, -- length
80 cur :: {-# UNPACK #-} !Int -- current pos
82 -- The buffer is assumed to be UTF-8 encoded, and furthermore
83 -- we add three '\0' bytes to the end as sentinels so that the
84 -- decoder doesn't have to check for overflow at every single byte
85 -- of a multibyte sequence.
87 instance Show StringBuffer where
88 showsPrec _ s = showString "<stringbuffer("
89 . shows (len s) . showString "," . shows (cur s)
92 -- -----------------------------------------------------------------------------
93 -- Creation / Destruction
95 hGetStringBuffer :: FilePath -> IO StringBuffer
96 hGetStringBuffer fname = do
97 h <- openBinaryFile fname ReadMode
99 let size = fromIntegral size_i
100 buf <- mallocForeignPtrArray (size+3)
101 withForeignPtr buf $ \ptr -> do
102 r <- if size == 0 then return 0 else hGetBuf h ptr size
105 then ioError (userError "short read of file")
107 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
108 -- sentinels for UTF-8 decoding
109 return (StringBuffer buf size 0)
111 stringToStringBuffer :: String -> IO StringBuffer
112 stringToStringBuffer str = do
113 let size = utf8EncodedLength str
114 buf <- mallocForeignPtrArray (size+3)
115 withForeignPtr buf $ \ptr -> do
116 utf8EncodeString ptr str
117 pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
118 -- sentinels for UTF-8 decoding
119 return (StringBuffer buf size 0)
121 -- -----------------------------------------------------------------------------
124 -- Getting our fingers dirty a little here, but this is performance-critical
125 {-# INLINE nextChar #-}
126 nextChar :: StringBuffer -> (Char,StringBuffer)
127 nextChar (StringBuffer buf len (I# cur#)) =
129 withForeignPtr buf $ \(Ptr a#) -> do
130 case utf8DecodeChar# (a# `plusAddr#` cur#) of
132 let cur' = I# (b# `minusAddr#` a#) in
133 return (C# c#, StringBuffer buf len cur')
135 currentChar :: StringBuffer -> Char
136 currentChar = fst . nextChar
138 prevChar :: StringBuffer -> Char -> Char
139 prevChar (StringBuffer buf len 0) deflt = deflt
140 prevChar (StringBuffer buf len cur) deflt =
142 withForeignPtr buf $ \p -> do
143 p' <- utf8PrevChar (p `plusPtr` cur)
144 return (fst (utf8DecodeChar p'))
146 -- -----------------------------------------------------------------------------
149 stepOn :: StringBuffer -> StringBuffer
150 stepOn s = snd (nextChar s)
152 offsetBytes :: Int -> StringBuffer -> StringBuffer
153 offsetBytes i s = s { cur = cur s + i }
155 byteDiff :: StringBuffer -> StringBuffer -> Int
156 byteDiff s1 s2 = cur s2 - cur s1
158 atEnd :: StringBuffer -> Bool
159 atEnd (StringBuffer _ l c) = l == c
161 -- -----------------------------------------------------------------------------
164 lexemeToString :: StringBuffer -> Int {-bytes-} -> String
165 lexemeToString _ 0 = ""
166 lexemeToString (StringBuffer buf _ cur) bytes =
168 withForeignPtr buf $ \ptr ->
169 utf8DecodeString (ptr `plusPtr` cur) bytes
171 lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
172 lexemeToFastString _ 0 = mkFastString ""
173 lexemeToFastString (StringBuffer buf _ cur) len =
175 withForeignPtr buf $ \ptr ->
176 return $! mkFastStringBytes (ptr `plusPtr` cur) len
178 -- -----------------------------------------------------------------------------
179 -- Parsing integer strings in various bases
181 byteOff :: StringBuffer -> Int -> Char
182 byteOff (StringBuffer buf _ cur) i =
183 inlinePerformIO $ withForeignPtr buf $ \ptr -> do
184 w <- peek (ptr `plusPtr` (cur+i))
185 return (unsafeChr (fromIntegral (w::Word8)))
187 -- | XXX assumes ASCII digits only
188 parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
189 parseInteger buf len radix to_int
191 where go i x | i == len = x
192 | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
194 -- -----------------------------------------------------------------------------
197 -- Just like unsafePerformIO, but we inline it.
198 {-# INLINE inlinePerformIO #-}
199 inlinePerformIO :: IO a -> a
200 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r