+
+-- ---------------------------------------------------------------------
+--
+-- Functional array fusion for ByteStrings.
+--
+-- From the Data Parallel Haskell project,
+-- http://www.cse.unsw.edu.au/~chak/project/dph/
+--
+
+-- |Data type for accumulators which can be ignored. The rewrite rules rely on
+-- the fact that no bottoms of this type are ever constructed; hence, we can
+-- assume @(_ :: NoAL) `seq` x = x@.
+--
+data NoAL = NoAL
+
+-- | Special forms of loop arguments
+--
+-- * These are common special cases for the three function arguments of gen
+-- and loop; we give them special names to make it easier to trigger RULES
+-- applying in the special cases represented by these arguments. The
+-- "INLINE [1]" makes sure that these functions are only inlined in the last
+-- two simplifier phases.
+--
+-- * In the case where the accumulator is not needed, it is better to always
+-- explicitly return a value `()', rather than just copy the input to the
+-- output, as the former gives GHC better local information.
+--
+
+-- | Element function expressing a mapping only
+mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
+mapEFL f = \_ e -> (noAL, (Just $ f e))
+{-# INLINE [1] mapEFL #-}
+
+-- | Element function implementing a filter function only
+filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
+filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
+{-# INLINE [1] filterEFL #-}
+
+-- |Element function expressing a reduction only
+foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
+foldEFL f = \a e -> (f a e, Nothing)
+{-# INLINE [1] foldEFL #-}
+
+-- | No accumulator
+noAL :: NoAL
+noAL = NoAL
+{-# INLINE [1] noAL #-}
+
+-- | Projection functions that are fusion friendly (as in, we determine when
+-- they are inlined)
+loopArr :: (ByteString, acc) -> ByteString
+loopArr (arr, _) = arr
+{-# INLINE [1] loopArr #-}
+
+loopAcc :: (ByteString, acc) -> acc
+loopAcc (_, acc) = acc
+{-# INLINE [1] loopAcc #-}
+
+loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
+loopSndAcc (arr, (_, acc)) = (arr, acc)
+{-# INLINE [1] loopSndAcc #-}
+
+------------------------------------------------------------------------
+
+-- | Iteration over over ByteStrings
+loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
+ -> acc -- ^ initial acc value
+ -> ByteString -- ^ input ByteString
+ -> (ByteString, acc)
+
+loopU f start (PS fp s i) = inlinePerformIO $ withForeignPtr fp $ \a -> do
+ p <- mallocArray (i+1)
+ (acc, i') <- go (a `plusPtr` s) p start
+ p' <- if i == i' then return p else reallocArray p (i'+1) -- avoid realloc for maps
+ poke (p' `plusPtr` i') (0::Word8)
+ fp' <- newForeignFreePtr p'
+ return (PS fp' 0 i', acc)
+ where
+ go p ma = trans 0 0
+ where
+ STRICT3(trans)
+ trans a_off ma_off acc
+ | a_off >= i = return (acc, ma_off)
+ | otherwise = do
+ x <- peekByteOff p a_off
+ let (acc', oe) = f acc x
+ ma_off' <- case oe of
+ Nothing -> return ma_off
+ Just e -> do pokeByteOff ma ma_off e
+ return $ ma_off + 1
+ trans (a_off+1) ma_off' acc'
+
+{-# INLINE [1] loopU #-}
+
+{-# RULES
+
+"array fusion!" forall em1 em2 start1 start2 arr.
+ loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
+ let em (acc1, acc2) e =
+ case em1 acc1 e of
+ (acc1', Nothing) -> ((acc1', acc2), Nothing)
+ (acc1', Just e') ->
+ case em2 acc2 e' of
+ (acc2', res) -> ((acc1', acc2'), res)
+ in loopSndAcc (loopU em (start1, start2) arr)
+
+"loopArr/loopSndAcc" forall x.
+ loopArr (loopSndAcc x) = loopArr x
+
+-- orphan?
+-- "seq/NoAL" forall (u::NoAL) e.
+-- u `seq` e = e
+
+ #-}
+