FIX #4381

[ghc-base.git] / GHC / Arr.lhs
diff --git a/GHC/Arr.lhs b/GHC/Arr.lhs

index 25505fc..b9ce6b7 100644 (file)
--- a/GHC/Arr.lhs
+++ b/GHC/Arr.lhs
@@ -1,5 +1,7 @@
  \begin{code}
-{-# OPTIONS_GHC -fno-implicit-prelude -fno-bang-patterns #-}
+{-# OPTIONS_GHC -funbox-strict-fields #-}
+{-# LANGUAGE NoImplicitPrelude, NoBangPatterns #-}
+{-# OPTIONS_HADDOCK hide #-}
  -----------------------------------------------------------------------------
  -- |
  -- Module      :  GHC.Arr
@@ -17,7 +19,6 @@
  -- #hide
  module GHC.Arr where
  
-import {-# SOURCE #-} GHC.Err ( error )
  import GHC.Enum
  import GHC.Num
  import GHC.ST
@@ -32,15 +33,15 @@ default ()
  
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{The @Ix@ class}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  \begin{code}
  -- | The 'Ix' class is used to map a contiguous subrange of values in
  -- a type onto integers.  It is used primarily for array indexing
--- (see "Data.Array", "Data.Array.IArray" and "Data.Array.MArray").
+-- (see the array package).
  --
  -- The first argument @(l,u)@ of each of these operations is a pair
  -- specifying the lower and upper bounds of a contiguous subrange of values.
@@ -48,64 +49,116 @@ default ()
  -- An implementation is entitled to assume the following laws about these
  -- operations:
  --
--- * @'inRange' (l,u) i == 'elem' i ('range' (l,u))@
+-- * @'inRange' (l,u) i == 'elem' i ('range' (l,u))@ @ @
  --
  -- * @'range' (l,u) '!!' 'index' (l,u) i == i@, when @'inRange' (l,u) i@
  --
--- * @'map' ('index' (l,u)) ('range' (l,u))) == [0..'rangeSize' (l,u)-1]@
+-- * @'map' ('index' (l,u)) ('range' (l,u))) == [0..'rangeSize' (l,u)-1]@ @ @
  --
--- * @'rangeSize' (l,u) == 'length' ('range' (l,u))@
+-- * @'rangeSize' (l,u) == 'length' ('range' (l,u))@ @ @
  --
  -- Minimal complete instance: 'range', 'index' and 'inRange'.
  --
  class (Ord a) => Ix a where
      -- | The list of values in the subrange defined by a bounding pair.
-    range              :: (a,a) -> [a]
+    range               :: (a,a) -> [a]
      -- | The position of a subscript in the subrange.
-    index              :: (a,a) -> a -> Int
+    index               :: (a,a) -> a -> Int
      -- | Like 'index', but without checking that the value is in range.
-    unsafeIndex                :: (a,a) -> a -> Int
+    unsafeIndex         :: (a,a) -> a -> Int
      -- | Returns 'True' the given subscript lies in the range defined
      -- the bounding pair.
-    inRange            :: (a,a) -> a -> Bool
+    inRange             :: (a,a) -> a -> Bool
      -- | The size of the subrange defined by a bounding pair.
-    rangeSize          :: (a,a) -> Int
+    rangeSize           :: (a,a) -> Int
      -- | like 'rangeSize', but without checking that the upper bound is
      -- in range.
      unsafeRangeSize     :: (a,a) -> Int
  
-       -- Must specify one of index, unsafeIndex
-    index b i | inRange b i = unsafeIndex b i  
-             | otherwise   = error "Error in array index"
+        -- Must specify one of index, unsafeIndex
+
+       -- 'index' is typically over-ridden in instances, with essentially
+       -- the same code, but using indexError instead of hopelessIndexError
+       -- Reason: we have 'Show' at the instances
+    {-# INLINE index #-}  -- See Note [Inlining index]
+    index b i | inRange b i = unsafeIndex b i   
+              | otherwise   = hopelessIndexError
+
      unsafeIndex b i = index b i
  
      rangeSize b@(_l,h) | inRange b h = unsafeIndex b h + 1
-                      | otherwise   = 0        -- This case is only here to
-                                               -- check for an empty range
-       -- NB: replacing (inRange b h) by (l <= h) fails for
-       --     tuples.  E.g.  (1,2) <= (2,1) but the range is empty
+                       | otherwise   = 0        -- This case is only here to
+                                                -- check for an empty range
+        -- NB: replacing (inRange b h) by (l <= h) fails for
+        --     tuples.  E.g.  (1,2) <= (2,1) but the range is empty
  
      unsafeRangeSize b@(_l,h) = unsafeIndex b h + 1
  \end{code}
  
  Note that the following is NOT right
-       rangeSize (l,h) | l <= h    = index b h + 1
-                       | otherwise = 0
+        rangeSize (l,h) | l <= h    = index b h + 1
+                        | otherwise = 0
  
  Because it might be the case that l<h, but the range
  is nevertheless empty.  Consider
-       ((1,2),(2,1))
+        ((1,2),(2,1))
  Here l<h, but the second index ranges from 2..1 and
  hence is empty
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{Instances of @Ix@}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
+Note [Inlining index]
+~~~~~~~~~~~~~~~~~~~~~
+We inline the 'index' operation, 
+
+ * Partly because it generates much faster code 
+   (although bigger); see Trac #1216
+
+ * Partly because it exposes the bounds checks to the simplifier which
+   might help a big.
+
+If you make a per-instance index method, you may consider inlining it.
+
+Note [Double bounds-checking of index values]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When you index an array, a!x, there are two possible bounds checks we might make:
+
+  (A) Check that (inRange (bounds a) x) holds.  
+
+      (A) is checked in the method for 'index'
+
+  (B) Check that (index (bounds a) x) lies in the range 0..n, 
+      where n is the size of the underlying array
+
+      (B) is checked in the top-level function (!), in safeIndex.
+
+Of course it *should* be the case that (A) holds iff (B) holds, but that 
+is a property of the particular instances of index, bounds, and inRange,
+so GHC cannot guarantee it.
+
+ * If you do (A) and not (B), then you might get a seg-fault, 
+   by indexing at some bizarre location.  Trac #1610
+
+ * If you do (B) but not (A), you may get no complaint when you index
+   an array out of its semantic bounds.  Trac #2120
+
+At various times we have had (A) and not (B), or (B) and not (A); both
+led to complaints.  So now we implement *both* checks (Trac #2669).
+
+For 1-d, 2-d, and 3-d arrays of Int we have specialised instances to avoid this.
+
+Note [Out-of-bounds error messages]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The default method for 'index' generates hoplelessIndexError, because
+Ix doesn't have Show as a superclass.  For particular base types we
+can do better, so we override the default method for index.
+
  \begin{code}
--- abstract these errors from the relevant index functions so that
+-- Abstract these errors from the relevant index functions so that
  -- the guts of the function will be small enough to inline.
  
  {-# NOINLINE indexError #-}
@@ -113,8 +166,11 @@ indexError :: Show a => (a,a) -> a -> String -> b
  indexError rng i tp
    = error (showString "Ix{" . showString tp . showString "}.index: Index " .
             showParen True (showsPrec 0 i) .
-          showString " out of range " $
-          showParen True (showsPrec 0 rng) "")
+           showString " out of range " $
+           showParen True (showsPrec 0 rng) "")
+
+hopelessIndexError :: Int -- Try to use 'indexError' instead!
+hopelessIndexError = error "Error in array index"
  
  ----------------------------------------------------------------------
  instance  Ix Char  where
@@ -124,23 +180,27 @@ instance  Ix Char  where
      {-# INLINE unsafeIndex #-}
      unsafeIndex (m,_n) i = fromEnum i - fromEnum m
  
+    {-# INLINE index #-}  -- See Note [Out-of-bounds error messages]
+                          -- and Note [Inlining index]
      index b i | inRange b i =  unsafeIndex b i
-             | otherwise   =  indexError b i "Char"
+              | otherwise   =  indexError b i "Char"
  
-    inRange (m,n) i    =  m <= i && i <= n
+    inRange (m,n) i     =  m <= i && i <= n
  
  ----------------------------------------------------------------------
  instance  Ix Int  where
      {-# INLINE range #-}
-       -- The INLINE stops the build in the RHS from getting inlined,
-       -- so that callers can fuse with the result of range
+        -- The INLINE stops the build in the RHS from getting inlined,
+        -- so that callers can fuse with the result of range
      range (m,n) = [m..n]
  
      {-# INLINE unsafeIndex #-}
      unsafeIndex (m,_n) i = i - m
  
+    {-# INLINE index #-}  -- See Note [Out-of-bounds error messages]
+                          -- and Note [Inlining index]
      index b i | inRange b i =  unsafeIndex b i
-             | otherwise   =  indexError b i "Int"
+              | otherwise   =  indexError b i "Int"
  
      {-# INLINE inRange #-}
      inRange (I# m,I# n) (I# i) =  m <=# i && i <=# n
@@ -153,10 +213,12 @@ instance  Ix Integer  where
      {-# INLINE unsafeIndex #-}
      unsafeIndex (m,_n) i   = fromInteger (i - m)
  
+    {-# INLINE index #-}  -- See Note [Out-of-bounds error messages]
+                          -- and Note [Inlining index]
      index b i | inRange b i =  unsafeIndex b i
-             | otherwise   =  indexError b i "Integer"
+              | otherwise   =  indexError b i "Integer"
  
-    inRange (m,n) i    =  m <= i && i <= n
+    inRange (m,n) i     =  m <= i && i <= n
  
  ----------------------------------------------------------------------
  instance Ix Bool where -- as derived
@@ -166,8 +228,10 @@ instance Ix Bool where -- as derived
      {-# INLINE unsafeIndex #-}
      unsafeIndex (l,_) i = fromEnum i - fromEnum l
  
+    {-# INLINE index #-}  -- See Note [Out-of-bounds error messages]
+                          -- and Note [Inlining index]
      index b i | inRange b i =  unsafeIndex b i
-             | otherwise   =  indexError b i "Bool"
+              | otherwise   =  indexError b i "Bool"
  
      inRange (l,u) i = fromEnum i >= fromEnum l && fromEnum i <= fromEnum u
  
@@ -179,8 +243,10 @@ instance Ix Ordering where -- as derived
      {-# INLINE unsafeIndex #-}
      unsafeIndex (l,_) i = fromEnum i - fromEnum l
  
+    {-# INLINE index #-}  -- See Note [Out-of-bounds error messages]
+                          -- and Note [Inlining index]
      index b i | inRange b i =  unsafeIndex b i
-             | otherwise   =  indexError b i "Ordering"
+              | otherwise   =  indexError b i "Ordering"
  
      inRange (l,u) i = fromEnum i >= fromEnum l && fromEnum i <= fromEnum u
  
@@ -192,7 +258,8 @@ instance Ix () where
      unsafeIndex   ((), ()) () = 0
      {-# INLINE inRange #-}
      inRange ((), ()) () = True
-    {-# INLINE index #-}
+
+    {-# INLINE index #-}  -- See Note [Inlining index]
      index b i = unsafeIndex b i
  
  ----------------------------------------------------------------------
@@ -277,9 +344,9 @@ instance  (Ix a1, Ix a2, Ix a3, Ix a4, Ix a5) => Ix (a1,a2,a3,a4,a5)  where
  \end{code}
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{The @Array@ types}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  \begin{code}
@@ -287,7 +354,6 @@ type IPr = (Int, Int)
  
  -- | The type of immutable non-strict (boxed) arrays
  -- with indices in @i@ and elements in @e@.
--- The Int is the number of elements in the Array.
  data Ix i => Array i e
                   = Array !i         -- the lower bound, l
                           !i         -- the upper bound, u
@@ -312,8 +378,8 @@ data STArray s i e
                                         -- used to make sure an index is
                                         -- really in range
                     (MutableArray# s e) -- The actual elements
-       -- No Ix context for STArray.  They are stupid,
-       -- and force an Ix context on the equality instance.
+        -- No Ix context for STArray.  They are stupid,
+        -- and force an Ix context on the equality instance.
  
  -- Just pointer equality on mutable arrays:
  instance Eq (STArray s i e) where
@@ -323,9 +389,9 @@ instance Eq (STArray s i e) where
  
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{Operations on immutable arrays}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  \begin{code}
@@ -345,7 +411,7 @@ arrEleBottom = error "(Array.!): undefined array element"
  --
  -- Because the indices must be checked for these errors, 'array' is
  -- strict in the bounds argument and in the indices of the association
--- list, but nonstrict in the values.  Thus, recurrences such as the
+-- list, but non-strict in the values.  Thus, recurrences such as the
  -- following are possible:
  --
  -- > a = array (1,100) ((1,1) : [(i, i * a!(i-1)) | i <- [2..100]])
@@ -360,18 +426,18 @@ arrEleBottom = error "(Array.!): undefined array element"
  -- with which the array was constructed.
  {-# INLINE array #-}
  array :: Ix i
-       => (i,i)        -- ^ a pair of /bounds/, each of the index type
-                       -- of the array.  These bounds are the lowest and
-                       -- highest indices in the array, in that order.
-                       -- For example, a one-origin vector of length
-                       -- '10' has bounds '(1,10)', and a one-origin '10'
-                       -- by '10' matrix has bounds '((1,1),(10,10))'.
-       -> [(i, e)]     -- ^ a list of /associations/ of the form
-                       -- (/index/, /value/).  Typically, this list will
-                       -- be expressed as a comprehension.  An
-                       -- association '(i, x)' defines the value of
-                       -- the array at index 'i' to be 'x'.
-       -> Array i e
+        => (i,i)        -- ^ a pair of /bounds/, each of the index type
+                        -- of the array.  These bounds are the lowest and
+                        -- highest indices in the array, in that order.
+                        -- For example, a one-origin vector of length
+                        -- '10' has bounds '(1,10)', and a one-origin '10'
+                        -- by '10' matrix has bounds '((1,1),(10,10))'.
+        -> [(i, e)]     -- ^ a list of /associations/ of the form
+                        -- (/index/, /value/).  Typically, this list will
+                        -- be expressed as a comprehension.  An
+                        -- association '(i, x)' defines the value of
+                        -- the array at index 'i' to be 'x'.
+        -> Array i e
  array (l,u) ies
      = let n = safeRangeSize (l,u)
        in unsafeArray' (l,u) n
@@ -390,15 +456,18 @@ unsafeArray' (l,u) n@(I# n#) ies = runST (ST $ \s1# ->
  
  {-# INLINE fill #-}
  fill :: MutableArray# s e -> (Int, e) -> STRep s a -> STRep s a
-fill marr# (I# i#, e) next s1# =
-    case writeArray# marr# i# e s1#     of { s2# ->
-    next s2# }
+-- NB: put the \s after the "=" so that 'fill' 
+--     inlines when applied to three args 
+fill marr# (I# i#, e) next 
+ = \s1# -> case writeArray# marr# i# e s1# of 
+             s2# -> next s2# 
  
  {-# INLINE done #-}
  done :: Ix i => i -> i -> Int -> MutableArray# s e -> STRep s (Array i e)
-done l u n marr# s1# =
-    case unsafeFreezeArray# marr# s1# of
-        (# s2#, arr# #) -> (# s2#, Array l u n arr# #)
+-- See NB on 'fill'
+done l u n marr# 
+  = \s1# -> case unsafeFreezeArray# marr# s1# of
+              (# s2#, arr# #) -> (# s2#, Array l u n arr# #)
  
  -- This is inefficient and I'm not sure why:
  -- listArray (l,u) es = unsafeArray (l,u) (zip [0 .. rangeSize (l,u) - 1] es)
@@ -429,15 +498,38 @@ arr@(Array l u n _) ! i = unsafeAt arr $ safeIndex (l,u) n i
  {-# INLINE safeRangeSize #-}
  safeRangeSize :: Ix i => (i, i) -> Int
  safeRangeSize (l,u) = let r = rangeSize (l, u)
-                      in if r < 0 then error "Negative range size"
+                      in if r < 0 then negRange
                                    else r
  
-{-# INLINE safeIndex #-}
+-- Don't inline this error message everywhere!!
+negRange :: Int          -- Uninformative, but Ix does not provide Show
+negRange = error "Negative range size"
+
+{-# INLINE[1] safeIndex #-}
+-- See Note [Double bounds-checking of index values]
+-- Inline *after* (!) so the rules can fire
  safeIndex :: Ix i => (i, i) -> Int -> i -> Int
-safeIndex (l,u) n i = let i' = unsafeIndex (l,u) i
+safeIndex (l,u) n i = let i' = index (l,u) i
                        in if (0 <= i') && (i' < n)
                           then i'
-                         else error "Error in array index"
+                         else badSafeIndex i' n
+
+-- See Note [Double bounds-checking of index values]
+{-# RULES
+"safeIndex/I"       safeIndex = lessSafeIndex :: (Int,Int) -> Int -> Int -> Int
+"safeIndex/(I,I)"   safeIndex = lessSafeIndex :: ((Int,Int),(Int,Int)) -> Int -> (Int,Int) -> Int
+"safeIndex/(I,I,I)" safeIndex = lessSafeIndex :: ((Int,Int,Int),(Int,Int,Int)) -> Int -> (Int,Int,Int) -> Int
+  #-}
+
+lessSafeIndex :: Ix i => (i, i) -> Int -> i -> Int
+-- See Note [Double bounds-checking of index values]
+-- Do only (A), the semantic check
+lessSafeIndex (l,u) _ i = index (l,u) i  
+
+-- Don't inline this long error message everywhere!!
+badSafeIndex :: Int -> Int -> Int
+badSafeIndex i' n = error ("Error in array index; " ++ show i' ++
+                        " not in range [0.." ++ show n ++ ")")
  
  {-# INLINE unsafeAt #-}
  unsafeAt :: Ix i => Array i e -> Int -> e
@@ -462,7 +554,7 @@ indices (Array l u _ _) = range (l,u)
  -- | The list of elements of an array in index order.
  {-# INLINE elems #-}
  elems :: Ix i => Array i e -> [e]
-elems arr@(Array l u n _) =
+elems arr@(Array _ _ n _) =
      [unsafeAt arr i | i <- [0 .. n - 1]]
  
  -- | The list of associations of an array in index order.
@@ -471,7 +563,7 @@ assocs :: Ix i => Array i e -> [(i, e)]
  assocs arr@(Array l u _ _) =
      [(i, arr ! i) | i <- range (l,u)]
  
--- | The 'accumArray' deals with repeated indices in the association
+-- | The 'accumArray' function deals with repeated indices in the association
  -- list using an /accumulating function/ which combines the values of
  -- associations with the same index.
  -- For example, given a list of values of some index type, @hist@
@@ -487,33 +579,34 @@ assocs arr@(Array l u _ _) =
  -- not in general be recursive.
  {-# INLINE accumArray #-}
  accumArray :: Ix i
-       => (e -> a -> e)        -- ^ accumulating function
-       -> e                    -- ^ initial value
-       -> (i,i)                -- ^ bounds of the array
-       -> [(i, a)]             -- ^ association list
-       -> Array i e
-accumArray f init (l,u) ies =
+        => (e -> a -> e)        -- ^ accumulating function
+        -> e                    -- ^ initial value
+        -> (i,i)                -- ^ bounds of the array
+        -> [(i, a)]             -- ^ association list
+        -> Array i e
+accumArray f initial (l,u) ies =
      let n = safeRangeSize (l,u)
-    in unsafeAccumArray' f init (l,u) n
+    in unsafeAccumArray' f initial (l,u) n
                           [(safeIndex (l,u) n i, e) | (i, e) <- ies]
  
  {-# INLINE unsafeAccumArray #-}
  unsafeAccumArray :: Ix i => (e -> a -> e) -> e -> (i,i) -> [(Int, a)] -> Array i e
-unsafeAccumArray f init b ies = unsafeAccumArray' f init b (rangeSize b) ies
+unsafeAccumArray f initial b ies = unsafeAccumArray' f initial b (rangeSize b) ies
  
  {-# INLINE unsafeAccumArray' #-}
  unsafeAccumArray' :: Ix i => (e -> a -> e) -> e -> (i,i) -> Int -> [(Int, a)] -> Array i e
-unsafeAccumArray' f init (l,u) n@(I# n#) ies = runST (ST $ \s1# ->
-    case newArray# n# init s1#          of { (# s2#, marr# #) ->
+unsafeAccumArray' f initial (l,u) n@(I# n#) ies = runST (ST $ \s1# ->
+    case newArray# n# initial s1#          of { (# s2#, marr# #) ->
      foldr (adjust f marr#) (done l u n marr#) ies s2# })
  
  {-# INLINE adjust #-}
  adjust :: (e -> a -> e) -> MutableArray# s e -> (Int, a) -> STRep s b -> STRep s b
-adjust f marr# (I# i#, new) next s1# =
-    case readArray# marr# i# s1# of
-        (# s2#, old #) ->
-            case writeArray# marr# i# (f old new) s2# of
-                s3# -> next s3#
+-- See NB on 'fill'
+adjust f marr# (I# i#, new) next
+  = \s1# -> case readArray# marr# i# s1# of
+               (# s2#, old #) ->
+                   case writeArray# marr# i# (f old new) s2# of
+                       s3# -> next s3#
  
  -- | Constructs an array identical to the first argument except that it has
  -- been updated by the associations in the right argument.
@@ -600,9 +693,9 @@ cmpIntArray arr1@(Array l1 u1 n1 _) arr2@(Array l2 u2 n2 _) =
  
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{Array instances}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  \begin{code}
@@ -622,16 +715,16 @@ instance (Ix a, Show a, Show b) => Show (Array a b) where
          showsPrec appPrec1 (bounds a) .
          showChar ' ' .
          showsPrec appPrec1 (assocs a)
-       -- Precedence of 'array' is the precedence of application
+        -- Precedence of 'array' is the precedence of application
  
  -- The Read instance is in GHC.Read
  \end{code}
  
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{Operations on mutable arrays}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  Idle ADR question: What's the tradeoff here between flattening these
@@ -650,9 +743,9 @@ might be different, though.
  \begin{code}
  {-# INLINE newSTArray #-}
  newSTArray :: Ix i => (i,i) -> e -> ST s (STArray s i e)
-newSTArray (l,u) init = ST $ \s1# ->
+newSTArray (l,u) initial = ST $ \s1# ->
      case safeRangeSize (l,u)            of { n@(I# n#) ->
-    case newArray# n# init s1#          of { (# s2#, marr# #) ->
+    case newArray# n# initial s1#       of { (# s2#, marr# #) ->
      (# s2#, STArray l u n marr# #) }}
  
  {-# INLINE boundsSTArray #-}
@@ -687,9 +780,9 @@ unsafeWriteSTArray (STArray _ _ _ marr#) (I# i#) e = ST $ \s1# ->
  
  
  %*********************************************************
-%*                                                     *
+%*                                                      *
  \subsection{Moving between mutable and immutable}
-%*                                                     *
+%*                                                      *
  %*********************************************************
  
  \begin{code}