X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Futils%2FStringBuffer.lhs;h=cbf7d618b25ea8c51cba424de8329c884494b878;hb=b3a84c8ba18a2f30da3bd8ee470a67c323b6abd2;hp=28a7f6728d7ee565ddb82e8dec93d19f8e2f0ce3;hpb=4a1aca1033549f95cbdb62cbc0aac331610c91ea;p=ghc-hetmet.git

diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs
index 28a7f67..cbf7d61 100644
--- a/compiler/utils/StringBuffer.lhs
+++ b/compiler/utils/StringBuffer.lhs
@@ -6,6 +6,13 @@
 Buffers for scanning string input stored in external arrays.
 
 \begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
 module StringBuffer
        (
         StringBuffer(..),
@@ -39,24 +46,23 @@ module StringBuffer
 #include "HsVersions.h"
 
 import Encoding
-import FastString		( FastString,mkFastString,mkFastStringBytes )
+import FastString hiding ( buf )
+import FastTypes
+import FastFunctions
 
 import Foreign
 import System.IO		( hGetBuf, hFileSize,IOMode(ReadMode), hClose
                                 , Handle, hTell )
 
-import GHC.Ptr			( Ptr(..) )
 import GHC.Exts
-import GHC.IOBase		( IO(..) )
-import GHC.Base			( unsafeChr )
 
-#if __GLASGOW_HASKELL__ >= 601
+#if !defined(__GLASGOW_HASKELL__) || __GLASGOW_HASKELL__ >= 601
 import System.IO		( openBinaryFile )
 #else
 import IOExts                   ( openFileEx, IOModeEx(..) )
 #endif
 
-#if __GLASGOW_HASKELL__ < 601
+#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 601
 openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
 #endif
 
@@ -100,10 +106,7 @@ hGetStringBuffer fname = do
      hClose h
      if (r /= size)
 	then ioError (userError "short read of file")
-	else do
-	  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-		 -- sentinels for UTF-8 decoding
-	  return (StringBuffer buf size 0)
+	else newUTF8StringBuffer buf ptr size
 
 hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
 hGetStringBufferBlock handle wanted
@@ -114,9 +117,22 @@ hGetStringBufferBlock handle wanted
          withForeignPtr buf $ \ptr ->
              do r <- if size == 0 then return 0 else hGetBuf handle ptr size
                 if r /= size
-                   then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle))
-                   else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
-                           return (StringBuffer buf size 0)
+                   then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
+                   else newUTF8StringBuffer buf ptr size
+
+newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
+newUTF8StringBuffer buf ptr size = do
+  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+	 -- sentinels for UTF-8 decoding
+  let 
+      sb0 = StringBuffer buf size 0
+      (first_char, sb1) = nextChar sb0
+        -- skip the byte-order mark if there is one (see #1744)
+        -- This is better than treating #FEFF as whitespace,
+        -- because that would mess up layout.  We don't have a concept
+        -- of zero-width whitespace in Haskell: all whitespace codepoints
+        -- have a width of one column.
+  return (if first_char == '\xfeff' then sb1 else sb0)
 
 appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
 appendStringBuffers sb1 sb2
@@ -192,7 +208,7 @@ lexemeToString (StringBuffer buf _ cur) bytes =
       utf8DecodeString (ptr `plusPtr` cur) bytes
 
 lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
-lexemeToFastString _ 0 = mkFastString ""
+lexemeToFastString _ 0 = nilFS
 lexemeToFastString (StringBuffer buf _ cur) len =
    inlinePerformIO $
      withForeignPtr buf $ \ptr ->
@@ -200,43 +216,28 @@ lexemeToFastString (StringBuffer buf _ cur) len =
 
 -- -----------------------------------------------------------------------------
 -- Parsing integer strings in various bases
-
+{-
 byteOff :: StringBuffer -> Int -> Char
 byteOff (StringBuffer buf _ cur) i = 
   inlinePerformIO $ withForeignPtr buf $ \ptr -> do
-    w <- peek (ptr `plusPtr` (cur+i))
-    return (unsafeChr (fromIntegral (w::Word8)))
-
+--    return $! cBox (indexWord8OffFastPtrAsFastChar
+--                         (pUnbox ptr) (iUnbox (cur+i)))
+--or
+--    w <- peek (ptr `plusPtr` (cur+i))
+--    return (unsafeChr (fromIntegral (w::Word8)))
+-}
 -- | XXX assumes ASCII digits only (by using byteOff)
 parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
-parseUnsignedInteger buf len radix char_to_int 
-  = go 0 0
-  where
+parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int 
+  = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
+    --LOL, in implementations where the indexing needs slow unsafePerformIO,
+    --this is less (not more) efficient than using the IO monad explicitly
+    --here.
+    byteOff p i = cBox (indexWord8OffFastPtrAsFastChar
+                         (pUnbox ptr) (iUnbox (cur+i)))
     go i x | i == len  = x
-           | otherwise = go (i+1)
-              (x * radix + toInteger (char_to_int (byteOff buf i)))
+           | otherwise = case byteOff ptr i of
+               char -> go (i+1) (x * radix + toInteger (char_to_int char))
+  in go 0 0
 
--- -----------------------------------------------------------------------------
--- under the carpet
-
--- Just like unsafePerformIO, but we inline it.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
-
-#if __GLASGOW_HASKELL__ < 600
-mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a)
-mallocForeignPtrArray  = doMalloc undefined
-  where
-    doMalloc            :: Storable b => b -> Int -> IO (ForeignPtr b)
-    doMalloc dummy size  = mallocForeignPtrBytes (size * sizeOf dummy)
-
-mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
-mallocForeignPtrBytes n = do
-  r <- mallocBytes n
-  newForeignPtr r (finalizerFree r)
-
-foreign import ccall unsafe "stdlib.h free" 
-  finalizerFree :: Ptr a -> IO ()
-#endif
 \end{code}