GHC/PArr.hs

   1 {-# OPTIONS_GHC -fparr -funbox-strict-fields #-}
   2
   3 -----------------------------------------------------------------------------
   4 -- |
   5 -- Module      :  GHC.PArr
   6 -- Copyright   :  (c) 2001-2002 Manuel M T Chakravarty & Gabriele Keller
   7 -- License     :  see libraries/base/LICENSE
   8 --
   9 -- Maintainer  :  Manuel M. T. Chakravarty <chak@cse.unsw.edu.au>
  10 -- Stability   :  internal
  11 -- Portability :  non-portable (GHC Extensions)
  12 --
  13 --  Basic implementation of Parallel Arrays.
  14 --
  15 --  This module has two functions: (1) It defines the interface to the
  16 --  parallel array extension of the Prelude and (2) it provides a vanilla
  17 --  implementation of parallel arrays that does not require to flatten the
  18 --  array code.  The implementation is not very optimised.
  19 --
  20 --- DOCU ----------------------------------------------------------------------
  21 --
  22 --  Language: Haskell 98 plus unboxed values and parallel arrays
  23 --
  24 --  The semantic difference between standard Haskell arrays (aka "lazy
  25 --  arrays") and parallel arrays (aka "strict arrays") is that the evaluation
  26 --  of two different elements of a lazy array is independent, whereas in a
  27 --  strict array either non or all elements are evaluated.  In other words,
  28 --  when a parallel array is evaluated to WHNF, all its elements will be
  29 --  evaluated to WHNF.  The name parallel array indicates that all array
  30 --  elements may, in general, be evaluated to WHNF in parallel without any
  31 --  need to resort to speculative evaluation.  This parallel evaluation
  32 --  semantics is also beneficial in the sequential case, as it facilitates
  33 --  loop-based array processing as known from classic array-based languages,
  34 --  such as Fortran.
  35 --
  36 --  The interface of this module is essentially a variant of the list
  37 --  component of the Prelude, but also includes some functions (such as
  38 --  permutations) that are not provided for lists.  The following list
  39 --  operations are not supported on parallel arrays, as they would require the
  40 --  availability of infinite parallel arrays: `iterate', `repeat', and `cycle'.
  41 --
  42 --  The current implementation is quite simple and entirely based on boxed
  43 --  arrays.  One disadvantage of boxed arrays is that they require to
  44 --  immediately initialise all newly allocated arrays with an error thunk to
  45 --  keep the garbage collector happy, even if it is guaranteed that the array
  46 --  is fully initialised with different values before passing over the
  47 --  user-visible interface boundary.  Currently, no effort is made to use
  48 --  raw memory copy operations to speed things up.
  49 --
  50 --- TODO ----------------------------------------------------------------------
  51 --
  52 --  * We probably want a standard library `PArray' in addition to the prelude
  53 --    extension in the same way as the standard library `List' complements the
  54 --    list functions from the prelude.
  55 --
  56 --  * Currently, functions that emphasis the constructor-based definition of
  57 --    lists (such as, head, last, tail, and init) are not supported.
  58 --
  59 --    Is it worthwhile to support the string processing functions lines,
  60 --    words, unlines, and unwords?  (Currently, they are not implemented.)
  61 --
  62 --    It can, however, be argued that it would be worthwhile to include them
  63 --    for completeness' sake; maybe only in the standard library `PArray'.
  64 --
  65 --  * Prescans are often more useful for array programming than scans.  Shall
  66 --    we include them into the Prelude or the library?
  67 --
  68 --  * Due to the use of the iterator `loop', we could define some fusion rules
  69 --    in this module.
  70 --
  71 --  * We might want to add bounds checks that can be deactivated.
  72 --
  73
  74 module GHC.PArr (
  75   -- [::],              -- Built-in syntax
  76
  77   mapP,                 -- :: (a -> b) -> [:a:] -> [:b:]
  78   (+:+),                -- :: [:a:] -> [:a:] -> [:a:]
  79   filterP,              -- :: (a -> Bool) -> [:a:] -> [:a:]
  80   concatP,              -- :: [:[:a:]:] -> [:a:]
  81   concatMapP,           -- :: (a -> [:b:]) -> [:a:] -> [:b:]
  82 --  head, last, tail, init,   -- it's not wise to use them on arrays
  83   nullP,                -- :: [:a:] -> Bool
  84   lengthP,              -- :: [:a:] -> Int
  85   (!:),                 -- :: [:a:] -> Int -> a
  86   foldlP,               -- :: (a -> b -> a) -> a -> [:b:] -> a
  87   foldl1P,              -- :: (a -> a -> a) ->      [:a:] -> a
  88   scanlP,               -- :: (a -> b -> a) -> a -> [:b:] -> [:a:]
  89   scanl1P,              -- :: (a -> a -> a) ->      [:a:] -> [:a:]
  90   foldrP,               -- :: (a -> b -> b) -> b -> [:a:] -> b
  91   foldr1P,              -- :: (a -> a -> a) ->      [:a:] -> a
  92   scanrP,               -- :: (a -> b -> b) -> b -> [:a:] -> [:b:]
  93   scanr1P,              -- :: (a -> a -> a) ->      [:a:] -> [:a:]
  94 --  iterate, repeat,          -- parallel arrays must be finite
  95   replicateP,           -- :: Int -> a -> [:a:]
  96 --  cycle,                    -- parallel arrays must be finite
  97   takeP,                -- :: Int -> [:a:] -> [:a:]
  98   dropP,                -- :: Int -> [:a:] -> [:a:]
  99   splitAtP,             -- :: Int -> [:a:] -> ([:a:],[:a:])
 100   takeWhileP,           -- :: (a -> Bool) -> [:a:] -> [:a:]
 101   dropWhileP,           -- :: (a -> Bool) -> [:a:] -> [:a:]
 102   spanP,                -- :: (a -> Bool) -> [:a:] -> ([:a:], [:a:])
 103   breakP,               -- :: (a -> Bool) -> [:a:] -> ([:a:], [:a:])
 104 --  lines, words, unlines, unwords,  -- is string processing really needed
 105   reverseP,             -- :: [:a:] -> [:a:]
 106   andP,                 -- :: [:Bool:] -> Bool
 107   orP,                  -- :: [:Bool:] -> Bool
 108   anyP,                 -- :: (a -> Bool) -> [:a:] -> Bool
 109   allP,                 -- :: (a -> Bool) -> [:a:] -> Bool
 110   elemP,                -- :: (Eq a) => a -> [:a:] -> Bool
 111   notElemP,             -- :: (Eq a) => a -> [:a:] -> Bool
 112   lookupP,              -- :: (Eq a) => a -> [:(a, b):] -> Maybe b
 113   sumP,                 -- :: (Num a) => [:a:] -> a
 114   productP,             -- :: (Num a) => [:a:] -> a
 115   maximumP,             -- :: (Ord a) => [:a:] -> a
 116   minimumP,             -- :: (Ord a) => [:a:] -> a
 117   zipP,                 -- :: [:a:] -> [:b:]          -> [:(a, b)   :]
 118   zip3P,                -- :: [:a:] -> [:b:] -> [:c:] -> [:(a, b, c):]
 119   zipWithP,             -- :: (a -> b -> c)      -> [:a:] -> [:b:] -> [:c:]
 120   zipWith3P,            -- :: (a -> b -> c -> d) -> [:a:]->[:b:]->[:c:]->[:d:]
 121   unzipP,               -- :: [:(a, b)   :] -> ([:a:], [:b:])
 122   unzip3P,              -- :: [:(a, b, c):] -> ([:a:], [:b:], [:c:])
 123
 124   -- overloaded functions
 125   --
 126   enumFromToP,          -- :: Enum a => a -> a      -> [:a:]
 127   enumFromThenToP,      -- :: Enum a => a -> a -> a -> [:a:]
 128
 129   -- the following functions are not available on lists
 130   --
 131   toP,                  -- :: [a] -> [:a:]
 132   fromP,                -- :: [:a:] -> [a]
 133   sliceP,               -- :: Int -> Int -> [:e:] -> [:e:]
 134   foldP,                -- :: (e -> e -> e) -> e -> [:e:] -> e
 135   fold1P,               -- :: (e -> e -> e) ->      [:e:] -> e
 136   permuteP,             -- :: [:Int:] -> [:e:] ->          [:e:]
 137   bpermuteP,            -- :: [:Int:] -> [:e:] ->          [:e:]
 138   dpermuteP,            -- :: [:Int:] -> [:e:] -> [:e:] -> [:e:]
 139   crossP,               -- :: [:a:] -> [:b:] -> [:(a, b):]
 140   indexOfP              -- :: (a -> Bool) -> [:a:] -> [:Int:]
 141 ) where
 142
 143 import Prelude
 144
 145 import GHC.ST   ( ST(..), STRep, runST )
 146 import GHC.Exts ( Int#, Array#, Int(I#), MutableArray#, newArray#,
 147                   unsafeFreezeArray#, indexArray#, writeArray#, (<#), (>=#) )
 148
 149 infixl 9  !:
 150 infixr 5  +:+
 151 infix  4  `elemP`, `notElemP`
 152
 153
 154 -- representation of parallel arrays
 155 -- ---------------------------------
 156
 157 -- this rather straight forward implementation maps parallel arrays to the
 158 -- internal representation used for standard Haskell arrays in GHC's Prelude
 159 -- (EXPORTED ABSTRACTLY)
 160 --
 161 -- * This definition *must* be kept in sync with `TysWiredIn.parrTyCon'!
 162 --
 163 data [::] e = PArr Int# (Array# e)
 164
 165
 166 -- exported operations on parallel arrays
 167 -- --------------------------------------
 168
 169 -- operations corresponding to list operations
 170 --
 171
 172 mapP   :: (a -> b) -> [:a:] -> [:b:]
 173 mapP f  = fst . loop (mapEFL f) noAL
 174
 175 (+:+)     :: [:a:] -> [:a:] -> [:a:]
 176 a1 +:+ a2  = fst $ loop (mapEFL sel) noAL (enumFromToP 0 (len1 + len2 - 1))
 177                        -- we can't use the [:x..y:] form here for tedious
 178                        -- reasons to do with the typechecker and the fact that
 179                        -- `enumFromToP' is defined in the same module
 180              where
 181                len1 = lengthP a1
 182                len2 = lengthP a2
 183                --
 184                sel i | i < len1  = a1!:i
 185                      | otherwise = a2!:(i - len1)
 186
 187 filterP   :: (a -> Bool) -> [:a:] -> [:a:]
 188 filterP p  = fst . loop (filterEFL p) noAL
 189
 190 concatP     :: [:[:a:]:] -> [:a:]
 191 concatP xss  = foldlP (+:+) [::] xss
 192
 193 concatMapP   :: (a -> [:b:]) -> [:a:] -> [:b:]
 194 concatMapP f  = concatP . mapP f
 195
 196 --  head, last, tail, init,   -- it's not wise to use them on arrays
 197
 198 nullP      :: [:a:] -> Bool
 199 nullP [::]  = True
 200 nullP _     = False
 201
 202 lengthP             :: [:a:] -> Int
 203 lengthP (PArr n# _)  = I# n#
 204
 205 (!:) :: [:a:] -> Int -> a
 206 (!:)  = indexPArr
 207
 208 foldlP     :: (a -> b -> a) -> a -> [:b:] -> a
 209 foldlP f z  = snd . loop (foldEFL (flip f)) z
 210
 211 foldl1P        :: (a -> a -> a) -> [:a:] -> a
 212 foldl1P f [::]  = error "Prelude.foldl1P: empty array"
 213 foldl1P f a     = snd $ loopFromTo 1 (lengthP a - 1) (foldEFL f) (a!:0) a
 214
 215 scanlP     :: (a -> b -> a) -> a -> [:b:] -> [:a:]
 216 scanlP f z  = fst . loop (scanEFL (flip f)) z
 217
 218 scanl1P        :: (a -> a -> a) -> [:a:] -> [:a:]
 219 scanl1P f [::]  = error "Prelude.scanl1P: empty array"
 220 scanl1P f a     = fst $ loopFromTo 1 (lengthP a - 1) (scanEFL f) (a!:0) a
 221
 222 foldrP :: (a -> b -> b) -> b -> [:a:] -> b
 223 foldrP  = error "Prelude.foldrP: not implemented yet" -- FIXME
 224
 225 foldr1P :: (a -> a -> a) -> [:a:] -> a
 226 foldr1P  = error "Prelude.foldr1P: not implemented yet" -- FIXME
 227
 228 scanrP :: (a -> b -> b) -> b -> [:a:] -> [:b:]
 229 scanrP  = error "Prelude.scanrP: not implemented yet" -- FIXME
 230
 231 scanr1P :: (a -> a -> a) -> [:a:] -> [:a:]
 232 scanr1P  = error "Prelude.scanr1P: not implemented yet" -- FIXME
 233
 234 --  iterate, repeat           -- parallel arrays must be finite
 235
 236 replicateP             :: Int -> a -> [:a:]
 237 {-# INLINE replicateP #-}
 238 replicateP n e  = runST (do
 239   marr# <- newArray n e
 240   mkPArr n marr#)
 241
 242 --  cycle                     -- parallel arrays must be finite
 243
 244 takeP   :: Int -> [:a:] -> [:a:]
 245 takeP n  = sliceP 0 (n - 1)
 246
 247 dropP     :: Int -> [:a:] -> [:a:]
 248 dropP n a  = sliceP n (lengthP a - 1) a
 249
 250 splitAtP      :: Int -> [:a:] -> ([:a:],[:a:])
 251 splitAtP n xs  = (takeP n xs, dropP n xs)
 252
 253 takeWhileP :: (a -> Bool) -> [:a:] -> [:a:]
 254 takeWhileP  = error "Prelude.takeWhileP: not implemented yet" -- FIXME
 255
 256 dropWhileP :: (a -> Bool) -> [:a:] -> [:a:]
 257 dropWhileP  = error "Prelude.dropWhileP: not implemented yet" -- FIXME
 258
 259 spanP :: (a -> Bool) -> [:a:] -> ([:a:], [:a:])
 260 spanP  = error "Prelude.spanP: not implemented yet" -- FIXME
 261
 262 breakP   :: (a -> Bool) -> [:a:] -> ([:a:], [:a:])
 263 breakP p  = spanP (not . p)
 264
 265 --  lines, words, unlines, unwords,  -- is string processing really needed
 266
 267 reverseP   :: [:a:] -> [:a:]
 268 reverseP a  = permuteP (enumFromThenToP (len - 1) (len - 2) 0) a
 269                        -- we can't use the [:x, y..z:] form here for tedious
 270                        -- reasons to do with the typechecker and the fact that
 271                        -- `enumFromThenToP' is defined in the same module
 272               where
 273                 len = lengthP a
 274
 275 andP :: [:Bool:] -> Bool
 276 andP  = foldP (&&) True
 277
 278 orP :: [:Bool:] -> Bool
 279 orP  = foldP (||) True
 280
 281 anyP   :: (a -> Bool) -> [:a:] -> Bool
 282 anyP p  = orP . mapP p
 283
 284 allP :: (a -> Bool) -> [:a:] -> Bool
 285 allP p  = andP . mapP p
 286
 287 elemP   :: (Eq a) => a -> [:a:] -> Bool
 288 elemP x  = anyP (== x)
 289
 290 notElemP   :: (Eq a) => a -> [:a:] -> Bool
 291 notElemP x  = allP (/= x)
 292
 293 lookupP :: (Eq a) => a -> [:(a, b):] -> Maybe b
 294 lookupP  = error "Prelude.lookupP: not implemented yet" -- FIXME
 295
 296 sumP :: (Num a) => [:a:] -> a
 297 sumP  = foldP (+) 0
 298
 299 productP :: (Num a) => [:a:] -> a
 300 productP  = foldP (*) 1
 301
 302 maximumP      :: (Ord a) => [:a:] -> a
 303 maximumP [::]  = error "Prelude.maximumP: empty parallel array"
 304 maximumP xs    = fold1P max xs
 305
 306 minimumP :: (Ord a) => [:a:] -> a
 307 minimumP [::]  = error "Prelude.minimumP: empty parallel array"
 308 minimumP xs    = fold1P min xs
 309
 310 zipP :: [:a:] -> [:b:] -> [:(a, b):]
 311 zipP  = zipWithP (,)
 312
 313 zip3P :: [:a:] -> [:b:] -> [:c:] -> [:(a, b, c):]
 314 zip3P  = zipWith3P (,,)
 315
 316 zipWithP         :: (a -> b -> c) -> [:a:] -> [:b:] -> [:c:]
 317 zipWithP f a1 a2  = let
 318                       len1 = lengthP a1
 319                       len2 = lengthP a2
 320                       len  = len1 `min` len2
 321                     in
 322                     fst $ loopFromTo 0 (len - 1) combine 0 a1
 323                     where
 324                       combine e1 i = (Just $ f e1 (a2!:i), i + 1)
 325
 326 zipWith3P :: (a -> b -> c -> d) -> [:a:]->[:b:]->[:c:]->[:d:]
 327 zipWith3P f a1 a2 a3 = let
 328                         len1 = lengthP a1
 329                         len2 = lengthP a2
 330                         len3 = lengthP a3
 331                         len  = len1 `min` len2 `min` len3
 332                       in
 333                       fst $ loopFromTo 0 (len - 1) combine 0 a1
 334                       where
 335                         combine e1 i = (Just $ f e1 (a2!:i) (a3!:i), i + 1)
 336
 337 unzipP   :: [:(a, b):] -> ([:a:], [:b:])
 338 unzipP a  = (fst $ loop (mapEFL fst) noAL a, fst $ loop (mapEFL snd) noAL a)
 339 -- FIXME: these two functions should be optimised using a tupled custom loop
 340 unzip3P   :: [:(a, b, c):] -> ([:a:], [:b:], [:c:])
 341 unzip3P a  = (fst $ loop (mapEFL fst3) noAL a,
 342               fst $ loop (mapEFL snd3) noAL a,
 343               fst $ loop (mapEFL trd3) noAL a)
 344              where
 345                fst3 (a, _, _) = a
 346                snd3 (_, b, _) = b
 347                trd3 (_, _, c) = c
 348
 349 -- instances
 350 --
 351
 352 instance Eq a => Eq [:a:] where
 353   a1 == a2 | lengthP a1 == lengthP a2 = andP (zipWithP (==) a1 a2)
 354            | otherwise                = False
 355
 356 instance Ord a => Ord [:a:] where
 357   compare a1 a2 = case foldlP combineOrdering EQ (zipWithP compare a1 a2) of
 358                     EQ | lengthP a1 == lengthP a2 -> EQ
 359                        | lengthP a1 <  lengthP a2 -> LT
 360                        | otherwise                -> GT
 361                   where
 362                     combineOrdering EQ    EQ    = EQ
 363                     combineOrdering EQ    other = other
 364                     combineOrdering other _     = other
 365
 366 instance Functor [::] where
 367   fmap = mapP
 368
 369 instance Monad [::] where
 370   m >>= k  = foldrP ((+:+) . k      ) [::] m
 371   m >>  k  = foldrP ((+:+) . const k) [::] m
 372   return x = [:x:]
 373   fail _   = [::]
 374
 375 instance Show a => Show [:a:]  where
 376   showsPrec _  = showPArr . fromP
 377     where
 378       showPArr []     s = "[::]" ++ s
 379       showPArr (x:xs) s = "[:" ++ shows x (showPArr' xs s)
 380
 381       showPArr' []     s = ":]" ++ s
 382       showPArr' (y:ys) s = ',' : shows y (showPArr' ys s)
 383
 384 instance Read a => Read [:a:]  where
 385   readsPrec _ a = [(toP v, rest) | (v, rest) <- readPArr a]
 386     where
 387       readPArr = readParen False (\r -> do
 388                                           ("[:",s) <- lex r
 389                                           readPArr1 s)
 390       readPArr1 s =
 391         (do { (":]", t) <- lex s; return ([], t) }) ++
 392         (do { (x, t) <- reads s; (xs, u) <- readPArr2 t; return (x:xs, u) })
 393
 394       readPArr2 s =
 395         (do { (":]", t) <- lex s; return ([], t) }) ++
 396         (do { (",", t) <- lex s; (x, u) <- reads t; (xs, v) <- readPArr2 u;
 397               return (x:xs, v) })
 398
 399 -- overloaded functions
 400 --
 401
 402 -- Ideally, we would like `enumFromToP' and `enumFromThenToP' to be members of
 403 -- `Enum'.  On the other hand, we really do not want to change `Enum'.  Thus,
 404 -- for the moment, we hope that the compiler is sufficiently clever to
 405 -- properly fuse the following definitions.
 406
 407 enumFromToP     :: Enum a => a -> a -> [:a:]
 408 enumFromToP x y  = mapP toEnum (eftInt (fromEnum x) (fromEnum y))
 409   where
 410     eftInt x y = scanlP (+) x $ replicateP (y - x + 1) 1
 411
 412 enumFromThenToP       :: Enum a => a -> a -> a -> [:a:]
 413 enumFromThenToP x y z  =
 414   mapP toEnum (efttInt (fromEnum x) (fromEnum y) (fromEnum z))
 415   where
 416     efttInt x y z = scanlP (+) x $
 417                       replicateP (abs (z - x) `div` abs delta + 1) delta
 418       where
 419        delta = y - x
 420
 421 -- the following functions are not available on lists
 422 --
 423
 424 -- create an array from a list (EXPORTED)
 425 --
 426 toP   :: [a] -> [:a:]
 427 toP l  = fst $ loop store l (replicateP (length l) ())
 428          where
 429            store _ (x:xs) = (Just x, xs)
 430
 431 -- convert an array to a list (EXPORTED)
 432 --
 433 fromP   :: [:a:] -> [a]
 434 fromP a  = [a!:i | i <- [0..lengthP a - 1]]
 435
 436 -- cut a subarray out of an array (EXPORTED)
 437 --
 438 sliceP :: Int -> Int -> [:e:] -> [:e:]
 439 sliceP from to a =
 440   fst $ loopFromTo (0 `max` from) (to `min` (lengthP a - 1)) (mapEFL id) noAL a
 441
 442 -- parallel folding (EXPORTED)
 443 --
 444 -- * the first argument must be associative; otherwise, the result is undefined
 445 --
 446 foldP :: (e -> e -> e) -> e -> [:e:] -> e
 447 foldP  = foldlP
 448
 449 -- parallel folding without explicit neutral (EXPORTED)
 450 --
 451 -- * the first argument must be associative; otherwise, the result is undefined
 452 --
 453 fold1P :: (e -> e -> e) -> [:e:] -> e
 454 fold1P  = foldl1P
 455
 456 -- permute an array according to the permutation vector in the first argument
 457 -- (EXPORTED)
 458 --
 459 permuteP       :: [:Int:] -> [:e:] -> [:e:]
 460 permuteP is es
 461   | isLen /= esLen = error "GHC.PArr: arguments must be of the same length"
 462   | otherwise      = runST (do
 463                        marr <- newArray isLen noElem
 464                        permute marr is es
 465                        mkPArr isLen marr)
 466   where
 467     noElem = error "GHC.PArr.permuteP: I do not exist!"
 468              -- unlike standard Haskell arrays, this value represents an
 469              -- internal error
 470     isLen = lengthP is
 471     esLen = lengthP es
 472
 473 -- permute an array according to the back-permutation vector in the first
 474 -- argument (EXPORTED)
 475 --
 476 -- * the permutation vector must represent a surjective function; otherwise,
 477 --   the result is undefined
 478 --
 479 bpermuteP       :: [:Int:] -> [:e:] -> [:e:]
 480 bpermuteP is es  = fst $ loop (mapEFL (es!:)) noAL is
 481
 482 -- permute an array according to the permutation vector in the first
 483 -- argument, which need not be surjective (EXPORTED)
 484 --
 485 -- * any elements in the result that are not covered by the permutation
 486 --   vector assume the value of the corresponding position of the third
 487 --   argument
 488 --
 489 dpermuteP :: [:Int:] -> [:e:] -> [:e:] -> [:e:]
 490 dpermuteP is es dft
 491   | isLen /= esLen = error "GHC.PArr: arguments must be of the same length"
 492   | otherwise      = runST (do
 493                        marr <- newArray dftLen noElem
 494                        trans 0 (isLen - 1) marr dft copyOne noAL
 495                        permute marr is es
 496                        mkPArr dftLen marr)
 497   where
 498     noElem = error "GHC.PArr.permuteP: I do not exist!"
 499              -- unlike standard Haskell arrays, this value represents an
 500              -- internal error
 501     isLen  = lengthP is
 502     esLen  = lengthP es
 503     dftLen = lengthP dft
 504
 505     copyOne e _ = (Just e, noAL)
 506
 507 -- computes the cross combination of two arrays (EXPORTED)
 508 --
 509 crossP       :: [:a:] -> [:b:] -> [:(a, b):]
 510 crossP a1 a2  = fst $ loop combine (0, 0) $ replicateP len ()
 511                 where
 512                   len1 = lengthP a1
 513                   len2 = lengthP a2
 514                   len  = len1 * len2
 515                   --
 516                   combine _ (i, j) = (Just $ (a1!:i, a2!:j), next)
 517                                      where
 518                                        next | (i + 1) == len1 = (0    , j + 1)
 519                                             | otherwise       = (i + 1, j)
 520
 521 {- An alternative implementation
 522    * The one above is certainly better for flattened code, but here where we
 523      are handling boxed arrays, the trade off is less clear.  However, I
 524      think, the above one is still better.
 525
 526 crossP a1 a2  = let
 527                   len1 = lengthP a1
 528                   len2 = lengthP a2
 529                   x1   = concatP $ mapP (replicateP len2) a1
 530                   x2   = concatP $ replicateP len1 a2
 531                 in
 532                 zipP x1 x2
 533  -}
 534
 535 -- computes an index array for all elements of the second argument for which
 536 -- the predicate yields `True' (EXPORTED)
 537 --
 538 indexOfP     :: (a -> Bool) -> [:a:] -> [:Int:]
 539 indexOfP p a  = fst $ loop calcIdx 0 a
 540                 where
 541                   calcIdx e idx | p e       = (Just idx, idx + 1)
 542                                 | otherwise = (Nothing , idx    )
 543
 544
 545 -- auxiliary functions
 546 -- -------------------
 547
 548 -- internally used mutable boxed arrays
 549 --
 550 data MPArr s e = MPArr Int# (MutableArray# s e)
 551
 552 -- allocate a new mutable array that is pre-initialised with a given value
 553 --
 554 newArray             :: Int -> e -> ST s (MPArr s e)
 555 {-# INLINE newArray #-}
 556 newArray (I# n#) e  = ST $ \s1# ->
 557   case newArray# n# e s1# of { (# s2#, marr# #) ->
 558   (# s2#, MPArr n# marr# #)}
 559
 560 -- convert a mutable array into the external parallel array representation
 561 --
 562 mkPArr                           :: Int -> MPArr s e -> ST s [:e:]
 563 {-# INLINE mkPArr #-}
 564 mkPArr (I# n#) (MPArr _ marr#)  = ST $ \s1# ->
 565   case unsafeFreezeArray# marr# s1#   of { (# s2#, arr# #) ->
 566   (# s2#, PArr n# arr# #) }
 567
 568 -- general array iterator
 569 --
 570 -- * corresponds to `loopA' from ``Functional Array Fusion'', Chakravarty &
 571 --   Keller, ICFP 2001
 572 --
 573 loop :: (e -> acc -> (Maybe e', acc))    -- mapping & folding, once per element
 574      -> acc                              -- initial acc value
 575      -> [:e:]                            -- input array
 576      -> ([:e':], acc)
 577 {-# INLINE loop #-}
 578 loop mf acc arr = loopFromTo 0 (lengthP arr - 1) mf acc arr
 579
 580 -- general array iterator with bounds
 581 --
 582 loopFromTo :: Int                        -- from index
 583            -> Int                        -- to index
 584            -> (e -> acc -> (Maybe e', acc))
 585            -> acc
 586            -> [:e:]
 587            -> ([:e':], acc)
 588 {-# INLINE loopFromTo #-}
 589 loopFromTo from to mf start arr = runST (do
 590   marr      <- newArray (to - from + 1) noElem
 591   (n', acc) <- trans from to marr arr mf start
 592   arr       <- mkPArr n' marr
 593   return (arr, acc))
 594   where
 595     noElem = error "GHC.PArr.loopFromTo: I do not exist!"
 596              -- unlike standard Haskell arrays, this value represents an
 597              -- internal error
 598
 599 -- actual loop body of `loop'
 600 --
 601 -- * for this to be really efficient, it has to be translated with the
 602 --   constructor specialisation phase "SpecConstr" switched on; as of GHC 5.03
 603 --   this requires an optimisation level of at least -O2
 604 --
 605 trans :: Int                            -- index of first elem to process
 606       -> Int                            -- index of last elem to process
 607       -> MPArr s e'                     -- destination array
 608       -> [:e:]                          -- source array
 609       -> (e -> acc -> (Maybe e', acc))  -- mutator
 610       -> acc                            -- initial accumulator
 611       -> ST s (Int, acc)                -- final destination length/final acc
 612 {-# INLINE trans #-}
 613 trans from to marr arr mf start = trans' from 0 start
 614   where
 615     trans' arrOff marrOff acc
 616       | arrOff > to = return (marrOff, acc)
 617       | otherwise   = do
 618                         let (oe', acc') = mf (arr `indexPArr` arrOff) acc
 619                         marrOff' <- case oe' of
 620                                       Nothing -> return marrOff
 621                                       Just e' -> do
 622                                         writeMPArr marr marrOff e'
 623                                         return $ marrOff + 1
 624                         trans' (arrOff + 1) marrOff' acc'
 625
 626 -- Permute the given elements into the mutable array.
 627 --
 628 permute :: MPArr s e -> [:Int:] -> [:e:] -> ST s ()
 629 permute marr is es = perm 0
 630   where
 631     perm i
 632       | i == n = return ()
 633       | otherwise  = writeMPArr marr (is!:i) (es!:i) >> perm (i + 1)
 634       where
 635         n = lengthP is
 636
 637
 638 -- common patterns for using `loop'
 639 --
 640
 641 -- initial value for the accumulator when the accumulator is not needed
 642 --
 643 noAL :: ()
 644 noAL  = ()
 645
 646 -- `loop' mutator maps a function over array elements
 647 --
 648 mapEFL   :: (e -> e') -> (e -> () -> (Maybe e', ()))
 649 {-# INLINE mapEFL #-}
 650 mapEFL f  = \e a -> (Just $ f e, ())
 651
 652 -- `loop' mutator that filter elements according to a predicate
 653 --
 654 filterEFL   :: (e -> Bool) -> (e -> () -> (Maybe e, ()))
 655 {-# INLINE filterEFL #-}
 656 filterEFL p  = \e a -> if p e then (Just e, ()) else (Nothing, ())
 657
 658 -- `loop' mutator for array folding
 659 --
 660 foldEFL   :: (e -> acc -> acc) -> (e -> acc -> (Maybe (), acc))
 661 {-# INLINE foldEFL #-}
 662 foldEFL f  = \e a -> (Nothing, f e a)
 663
 664 -- `loop' mutator for array scanning
 665 --
 666 scanEFL   :: (e -> acc -> acc) -> (e -> acc -> (Maybe acc, acc))
 667 {-# INLINE scanEFL #-}
 668 scanEFL f  = \e a -> (Just a, f e a)
 669
 670 -- elementary array operations
 671 --
 672
 673 -- unlifted array indexing
 674 --
 675 indexPArr                       :: [:e:] -> Int -> e
 676 {-# INLINE indexPArr #-}
 677 indexPArr (PArr n# arr#) (I# i#)
 678   | i# >=# 0# && i# <# n# =
 679     case indexArray# arr# i# of (# e #) -> e
 680   | otherwise = error $ "indexPArr: out of bounds parallel array index; " ++
 681                         "idx = " ++ show (I# i#) ++ ", arr len = "
 682                         ++ show (I# n#)
 683
 684 -- encapsulate writing into a mutable array into the `ST' monad
 685 --
 686 writeMPArr                           :: MPArr s e -> Int -> e -> ST s ()
 687 {-# INLINE writeMPArr #-}
 688 writeMPArr (MPArr n# marr#) (I# i#) e
 689   | i# >=# 0# && i# <# n# =
 690     ST $ \s# ->
 691     case writeArray# marr# i# e s# of s'# -> (# s'#, () #)
 692   | otherwise = error $ "writeMPArr: out of bounds parallel array index; " ++
 693                         "idx = " ++ show (I# i#) ++ ", arr len = "
 694                         ++ show (I# n#)