- showsPrec _ s = showString ""
-\end{code}
-
-\begin{code}
-hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
-hGetStringBuffer expand_tabs fname = do
- (a, read) <- if expand_tabs
- then slurpFileExpandTabs fname
- else slurpFile fname
-
- let (A# a#) = a; (I# read#) = read
-
- -- add sentinel '\NUL'
- _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
- return (StringBuffer a# read# 0# 0#)
-
-#if __GLASGOW_HASKELL__ < 303
-slurpFile fname =
- openFile fname ReadMode >>= \ hndl ->
- hFileSize hndl >>= \ len ->
- let len_i = fromInteger len in
- -- Allocate an array for system call to store its bytes into.
- -- ToDo: make it robust
--- trace (show ((len_i::Int)+1)) $
- _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int) >>= \ arr@(A# a#) ->
- if addr2Int# a# ==# 0# then
- fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
- else
- readHandle hndl >>= \ hndl_ ->
- writeHandle hndl hndl_ >>
- let ptr = filePtr hndl_ in
-#if __GLASGOW_HASKELL__ <= 302
- _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj) >>= \ (I# read#) ->
-#else
- _ccall_ fread arr (1::Int) len_i (ptr::Addr) >>= \ (I# read#) ->
-#endif
- hClose hndl >>
- if read# ==# 0# then -- EOF or some other error
- fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
- else
- return (arr, I# read#)
-#endif
-
-unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
-unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
- unsafePerformIO (
- _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
- return s
- )
-\end{code}
-
------------------------------------------------------------------------------
-This very disturbing bit of code is used for expanding the tabs in a
-file before we start parsing it. Expanding the tabs early makes the
-lexer a lot simpler: we only have to record the beginning of the line
-in order to be able to calculate the column offset of the current
-token.
-
-We guess the size of the buffer required as 20% extra for
-expanded tabs, and enlarge it if necessary.
-
-\begin{code}
-#if __GLASGOW_HASKELL__ < 303
-mayBlock fo thing = thing
-
-writeCharOffAddr :: Addr -> Int -> Char -> IO ()
-writeCharOffAddr addr off c
- = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
-#endif
-
-getErrType :: IO Int
-#if __GLASGOW_HASKELL__ < 303
-getErrType = _casm_ ``%r = ghc_errtype;''
-#else
-getErrType = _ccall_ getErrType__
-#endif
-
-slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
-slurpFileExpandTabs fname = do
- bracket (openFile fname ReadMode) (hClose)
- (\ handle ->
- do sz <- hFileSize handle
- if sz > toInteger (maxBound::Int)
- then IOERROR (userError "slurpFile: file too big")
- else do
- let sz_i = fromInteger sz
- sz_i' = (sz_i * 12) `div` 10 -- add 20% for tabs
- chunk <- allocMem sz_i'
- trySlurp handle sz_i' chunk
- )
-
-trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
-trySlurp handle sz_i chunk =
-#if __GLASGOW_HASKELL__ == 303
- wantReadableHandle "hGetChar" handle >>= \ handle_ ->
- let fo = haFO__ handle_ in
-#elif __GLASGOW_HASKELL__ > 303
- wantReadableHandle "hGetChar" handle $ \ handle_ ->
- let fo = haFO__ handle_ in
-#else
- readHandle handle >>= \ handle_ ->
- let fo = filePtr handle_ in
-#endif
- let
- (I# chunk_sz) = sz_i
-
- tAB_SIZE = 8#
-
- slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
- slurpFile c off chunk chunk_sz max_off = slurp c off
- where
-
- slurp :: Int# -> Int# -> IO (Addr, Int)
- slurp c off | off >=# max_off = do
- let new_sz = chunk_sz *# 2#
- chunk' <- reAllocMem chunk (I# new_sz)
- slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
- slurp c off = do
- intc <- mayBlock fo (_ccall_ fileGetc fo)
- if intc == ((-1)::Int)
- then do errtype <- getErrType
- if errtype == (ERR_EOF :: Int)
- then return (chunk, I# off)
- else constructErrorAndFail "slurpFile"
- else case chr intc of
- '\t' -> tabIt c off
- ch -> do writeCharOffAddr chunk (I# off) ch
- let c' | ch == '\n' = 0#
- | otherwise = c +# 1#
- slurp c' (off +# 1#)
-
- tabIt :: Int# -> Int# -> IO (Addr, Int)
- -- can't run out of buffer in here, because we reserved an
- -- extra tAB_SIZE bytes at the end earlier.
- tabIt c off = do
- writeCharOffAddr chunk (I# off) ' '
- let c' = c +# 1#
- off' = off +# 1#
- if c' `remInt#` tAB_SIZE ==# 0#
- then slurp c' off'
- else tabIt c' off'
- in do
-
- -- allow space for a full tab at the end of the buffer
- -- (that's what the max_off thing is for),
- -- and add 1 to allow room for the final sentinel \NUL at
- -- the end of the file.
- (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
-#if __GLASGOW_HASKELL__ < 404
- writeHandle handle handle_
-#endif
- if rc < (0::Int)
- then constructErrorAndFail "slurpFile"
- else return (chunk', rc+1 {-room for sentinel-})
-
-
-reAllocMem :: Addr -> Int -> IO Addr
-reAllocMem ptr sz = do
- chunk <- _ccall_ realloc ptr sz
- if chunk == nullAddr
-#if __GLASGOW_HASKELL__ >= 400
- then fail "reAllocMem"
-#else
- then fail (userError "reAllocMem")
-#endif
- else return chunk
-
-allocMem :: Int -> IO Addr
-allocMem sz = do
-#if __GLASGOW_HASKELL__ < 303
- chunk <- _ccall_ malloc sz
- if chunk == nullAddr
- then fail (userError "allocMem")
- else return chunk
-#else
- chunk <- _ccall_ allocMemory__ sz
- if chunk == nullAddr
- then constructErrorAndFail "allocMem"
- else return chunk
-#endif
-\end{code}
-
-Lookup
-
-\begin{code}
-currentChar :: StringBuffer -> Char
-currentChar sb = case currentChar# sb of c -> C# c
-
-lookAhead :: StringBuffer -> Int -> Char
-lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c
-
-indexSBuffer :: StringBuffer -> Int -> Char
-indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c
-
-currentChar# :: StringBuffer -> Char#
-indexSBuffer# :: StringBuffer -> Int# -> Char#
-lookAhead# :: StringBuffer -> Int# -> Char#
-currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
-indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#
-
- -- relative lookup, i.e, currentChar = lookAhead 0
-lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)
+ showsPrec _ s = showString "<stringbuffer("
+ . shows (len s) . showString "," . shows (cur s)
+ . showString ">"
+
+-- -----------------------------------------------------------------------------
+-- Creation / Destruction
+
+hGetStringBuffer :: FilePath -> IO StringBuffer
+hGetStringBuffer fname = do
+ h <- openBinaryFile fname ReadMode
+ size_i <- hFileSize h
+ let size = fromIntegral size_i
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ r <- if size == 0 then return 0 else hGetBuf h ptr size
+ hClose h
+ if (r /= size)
+ then ioError (userError "short read of file")
+ else do
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
+
+stringToStringBuffer :: String -> IO StringBuffer
+stringToStringBuffer str = do
+ let size = utf8EncodedLength str
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
+
+-- -----------------------------------------------------------------------------
+-- Grab a character
+
+-- Getting our fingers dirty a little here, but this is performance-critical
+{-# INLINE nextChar #-}
+nextChar :: StringBuffer -> (Char,StringBuffer)
+nextChar (StringBuffer buf len (I# cur#)) =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \(Ptr a#) -> do
+ case utf8DecodeChar# (a# `plusAddr#` cur#) of
+ (# c#, b# #) ->
+ let cur' = I# (b# `minusAddr#` a#) in
+ return (C# c#, StringBuffer buf len cur')
+
+currentChar :: StringBuffer -> Char
+currentChar = fst . nextChar
+
+prevChar :: StringBuffer -> Char -> Char
+prevChar (StringBuffer buf len 0) deflt = deflt
+prevChar (StringBuffer buf len cur) deflt =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \p -> do
+ p' <- utf8PrevChar (p `plusPtr` cur)
+ return (fst (utf8DecodeChar p'))
+
+-- -----------------------------------------------------------------------------
+-- Moving