{-# OPTIONS -cpp -fglasgow-exts #-}
---------------------------------------------------------------------------------
-{-| Module : Data.IntSet
- Copyright : (c) Daan Leijen 2002
- License : BSD-style
- Maintainer : libraries@haskell.org
- Stability : provisional
- Portability : portable
-
- An efficient implementation of integer sets.
-
- This module is intended to be imported @qualified@, to avoid name
- clashes with Prelude functions. eg.
-
- > import Data.IntSet as Set
-
- The implementation is based on /big-endian patricia trees/. This data structure
- performs especially well on binary operations like 'union' and 'intersection'. However,
- my benchmarks show that it is also (much) faster on insertions and deletions when
- compared to a generic size-balanced set implementation (see "Set").
-
- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
- Workshop on ML, September 1998, pages 77--86, <http://www.cse.ogi.edu/~andy/pub/finite.htm>
-
- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve Information
- Coded In Alphanumeric/\", Journal of the ACM, 15(4), October 1968, pages 514--534.
+-----------------------------------------------------------------------------
+-- |
+-- Module : Data.IntSet
+-- Copyright : (c) Daan Leijen 2002
+-- License : BSD-style
+-- Maintainer : libraries@haskell.org
+-- Stability : provisional
+-- Portability : portable
+--
+-- An efficient implementation of integer sets.
+--
+-- Since many function names (but not the type name) clash with
+-- "Prelude" names, this module is usually imported @qualified@, e.g.
+--
+-- > import Data.IntSet (IntSet)
+-- > import qualified Data.IntSet as IntSet
+--
+-- The implementation is based on /big-endian patricia trees/. This data
+-- structure performs especially well on binary operations like 'union'
+-- and 'intersection'. However, my benchmarks show that it is also
+-- (much) faster on insertions and deletions when compared to a generic
+-- size-balanced set implementation (see "Data.Set").
+--
+-- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
+-- Workshop on ML, September 1998, pages 77-86,
+-- <http://www.cse.ogi.edu/~andy/pub/finite.htm>
+--
+-- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
+-- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
+-- October 1968, pages 514-534.
+--
+-- Many operations have a worst-case complexity of /O(min(n,W))/.
+-- This means that the operation can become linear in the number of
+-- elements with a maximum of /W/ -- the number of bits in an 'Int'
+-- (32 or 64).
+-----------------------------------------------------------------------------
- Many operations have a worst-case complexity of /O(min(n,W))/. This means that the
- operation can become linear in the number of elements
- with a maximum of /W/ -- the number of bits in an 'Int' (32 or 64).
--}
----------------------------------------------------------------------------------}
module Data.IntSet (
-- * Set type
IntSet -- instance Eq,Show
, null
, size
, member
+ , notMember
, isSubsetOf
, isProperSubsetOf
, split
, splitMember
+ -- * Min\/Max
+ , findMin
+ , findMax
+ , deleteMin
+ , deleteMax
+ , deleteFindMin
+ , deleteFindMax
+ , maxView
+ , minView
+
-- * Map
, map
import Prelude hiding (lookup,filter,foldr,foldl,null,map)
import Data.Bits
-import Data.Int
import qualified Data.List as List
-import Data.Monoid
+import Data.Monoid (Monoid(..))
+import Data.Typeable
{-
-- just for testing
import qualified List
-}
+#if __GLASGOW_HASKELL__
+import Text.Read
+import Data.Generics.Basics (Data(..), mkNorepType)
+import Data.Generics.Instances ()
+#endif
-#ifdef __GLASGOW_HASKELL__
-{--------------------------------------------------------------------
- GHC: use unboxing to get @shiftRL@ inlined.
---------------------------------------------------------------------}
#if __GLASGOW_HASKELL__ >= 503
-import GHC.Word
import GHC.Exts ( Word(..), Int(..), shiftRL# )
-#else
+#elif __GLASGOW_HASKELL__
import Word
import GlaExts ( Word(..), Int(..), shiftRL# )
+#else
+import Data.Word
#endif
infixl 9 \\{-This comment teaches CPP correct behaviour -}
+-- A "Nat" is a natural machine word (an unsigned Int)
type Nat = Word
natFromInt :: Int -> Nat
intFromNat w = fromIntegral w
shiftRL :: Nat -> Int -> Nat
-shiftRL (W# x) (I# i)
- = W# (shiftRL# x i)
-
-#elif __HUGS__
+#if __GLASGOW_HASKELL__
{--------------------------------------------------------------------
- Hugs:
- * raises errors on boundary values when using 'fromIntegral'
- but not with the deprecated 'fromInt/toInt'.
- * Older Hugs doesn't define 'Word'.
- * Newer Hugs defines 'Word' in the Prelude but no operations.
+ GHC: use unboxing to get @shiftRL@ inlined.
--------------------------------------------------------------------}
-import Data.Word
-infixl 9 \\
-
-type Nat = Word32 -- illegal on 64-bit platforms!
-
-natFromInt :: Int -> Nat
-natFromInt i = fromInt i
-
-intFromNat :: Nat -> Int
-intFromNat w = toInt w
-
-shiftRL :: Nat -> Int -> Nat
-shiftRL x i = shiftR x i
-
+shiftRL (W# x) (I# i)
+ = W# (shiftRL# x i)
#else
-{--------------------------------------------------------------------
- 'Standard' Haskell
- * A "Nat" is a natural machine word (an unsigned Int)
---------------------------------------------------------------------}
-import Data.Word
-infixl 9 \\ -- comment to fool cpp
-
-type Nat = Word
-
-natFromInt :: Int -> Nat
-natFromInt i = fromIntegral i
-
-intFromNat :: Nat -> Int
-intFromNat w = fromIntegral w
-
-shiftRL :: Nat -> Int -> Nat
-shiftRL w i = shiftR w i
-
+shiftRL x i = shiftR x i
#endif
{--------------------------------------------------------------------
data IntSet = Nil
| Tip {-# UNPACK #-} !Int
| Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
+-- Invariant: Nil is never found as a child of Bin.
+
type Prefix = Int
type Mask = Int
+instance Monoid IntSet where
+ mempty = empty
+ mappend = union
+ mconcat = unions
+
+#if __GLASGOW_HASKELL__
+
+{--------------------------------------------------------------------
+ A Data instance
+--------------------------------------------------------------------}
+
+-- This instance preserves data abstraction at the cost of inefficiency.
+-- We omit reflection services for the sake of data abstraction.
+
+instance Data IntSet where
+ gfoldl f z is = z fromList `f` (toList is)
+ toConstr _ = error "toConstr"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNorepType "Data.IntSet.IntSet"
+
+#endif
+
{--------------------------------------------------------------------
Query
--------------------------------------------------------------------}
Tip y -> (x==y)
Nil -> False
+-- | /O(min(n,W))/. Is the element not in the set?
+notMember :: Int -> IntSet -> Bool
+notMember k = not . member k
+
-- 'lookup' is used by 'intersection' for left-biasing
lookup :: Int -> IntSet -> Maybe Int
lookup k t
subsetCmp Nil t = LT
-- | /O(n+m)/. Is this a subset?
--- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2.
+-- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
isSubsetOf :: IntSet -> IntSet -> Bool
isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
Nil -> (Nil,Nil)
--- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@
+-- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
-- where all elements in @set1@ are lower than @x@ and all elements in
-- @set2@ larger than @x@.
--
split x t
= case t of
Bin p m l r
- | zero x m -> let (lt,gt) = split x l in (lt,union gt r)
- | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
+ | m < 0 -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt)
+ else let (lt,gt) = split' x r in (lt, union gt l)
+ -- handle negative numbers.
+ | otherwise -> split' x t
+ Tip y
+ | x>y -> (t,Nil)
+ | x<y -> (Nil,t)
+ | otherwise -> (Nil,Nil)
+ Nil -> (Nil, Nil)
+
+split' :: Int -> IntSet -> (IntSet,IntSet)
+split' x t
+ = case t of
+ Bin p m l r
+ | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r)
+ else let (lt,gt) = split' x r in (union l lt,gt)
+ | otherwise -> if x < p then (Nil, t)
+ else (t, Nil)
Tip y
| x>y -> (t,Nil)
| x<y -> (Nil,t)
| otherwise -> (Nil,Nil)
Nil -> (Nil,Nil)
--- | /O(log n)/. Performs a 'split' but also returns whether the pivot
+-- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
-- element was found in the original set.
-splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
+splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
splitMember x t
= case t of
Bin p m l r
- | zero x m -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
- | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
+ | m < 0 -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt)
+ else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l)
+ -- handle negative numbers.
+ | otherwise -> splitMember' x t
Tip y
- | x>y -> (False,t,Nil)
- | x<y -> (False,Nil,t)
- | otherwise -> (True,Nil,Nil)
- Nil -> (False,Nil,Nil)
+ | x>y -> (t,False,Nil)
+ | x<y -> (Nil,False,t)
+ | otherwise -> (Nil,True,Nil)
+ Nil -> (Nil,False,Nil)
+
+splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet)
+splitMember' x t
+ = case t of
+ Bin p m l r
+ | match x p m -> if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
+ else let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
+ | otherwise -> if x < p then (Nil, False, t)
+ else (t, False, Nil)
+ Tip y
+ | x>y -> (t,False,Nil)
+ | x<y -> (Nil,False,t)
+ | otherwise -> (Nil,True,Nil)
+ Nil -> (Nil,False,Nil)
+
+{----------------------------------------------------------------------
+ Min/Max
+----------------------------------------------------------------------}
+
+-- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set stripped from that element
+-- @fail@s (in the monad) when passed an empty set.
+maxView :: (Monad m) => IntSet -> m (Int, IntSet)
+maxView t
+ = case t of
+ Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in return (result, bin p m t' r)
+ Bin p m l r -> let (result,t') = maxViewUnsigned r in return (result, bin p m l t')
+ Tip y -> return (y,Nil)
+ Nil -> fail "maxView: empty set has no maximal element"
+
+maxViewUnsigned :: IntSet -> (Int, IntSet)
+maxViewUnsigned t
+ = case t of
+ Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t')
+ Tip y -> (y, Nil)
+
+-- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set stripped from that element
+-- @fail@s (in the monad) when passed an empty set.
+minView :: (Monad m) => IntSet -> m (Int, IntSet)
+minView t
+ = case t of
+ Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in return (result, bin p m l t')
+ Bin p m l r -> let (result,t') = minViewUnsigned l in return (result, bin p m t' r)
+ Tip y -> return (y, Nil)
+ Nil -> fail "minView: empty set has no minimal element"
+
+minViewUnsigned :: IntSet -> (Int, IntSet)
+minViewUnsigned t
+ = case t of
+ Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r)
+ Tip y -> (y, Nil)
+
+
+-- Duplicate the Identity monad here because base < mtl.
+newtype Identity a = Identity { runIdentity :: a }
+instance Monad Identity where
+ return a = Identity a
+ m >>= k = k (runIdentity m)
+
+
+-- | /O(min(n,W))/. Delete and find the minimal element.
+--
+-- > deleteFindMin set = (findMin set, deleteMin set)
+deleteFindMin :: IntSet -> (Int, IntSet)
+deleteFindMin = runIdentity . minView
+
+-- | /O(min(n,W))/. Delete and find the maximal element.
+--
+-- > deleteFindMax set = (findMax set, deleteMax set)
+deleteFindMax :: IntSet -> (Int, IntSet)
+deleteFindMax = runIdentity . maxView
+
+-- | /O(min(n,W))/. The minimal element of a set.
+findMin :: IntSet -> Int
+findMin = fst . runIdentity . minView
+
+-- | /O(min(n,W))/. The maximal element of a set.
+findMax :: IntSet -> Int
+findMax = fst . runIdentity . maxView
+
+-- | /O(min(n,W))/. Delete the minimal element.
+deleteMin :: IntSet -> IntSet
+deleteMin = snd . runIdentity . minView
+
+-- | /O(min(n,W))/. Delete the maximal element.
+deleteMax :: IntSet -> IntSet
+deleteMax = snd . runIdentity . maxView
+
+
{----------------------------------------------------------------------
Map
----------------------------------------------------------------------}
-- | /O(n*min(n,W))/.
--- @map f s@ is the set obtained by applying @f@ to each element of @s@.
+-- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
--
-- It's worth noting that the size of the result may be smaller if,
-- for some @(x,y)@, @x \/= y && f x == f y@
-- > elems set == fold (:) [] set
fold :: (Int -> b -> b) -> b -> IntSet -> b
fold f z t
- = foldr f z t
+ = case t of
+ Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r
+ -- put negative numbers before.
+ Bin p m l r -> foldr f z t
+ Tip x -> f x z
+ Nil -> z
foldr :: (Int -> b -> b) -> b -> IntSet -> b
foldr f z t
-- | /O(n)/. Convert the set to an ascending list of elements.
toAscList :: IntSet -> [Int]
-toAscList t
- = -- NOTE: the following algorithm only works for big-endian trees
- let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
+toAscList t = toList t
-- | /O(n*min(n,W))/. Create a set from a list of integers.
fromList :: [Int] -> IntSet
-- tentative implementation. See if more efficient exists.
{--------------------------------------------------------------------
- Monoid
---------------------------------------------------------------------}
-
-instance Monoid IntSet where
- mempty = empty
- mappend = union
- mconcat = unions
-
-{--------------------------------------------------------------------
Show
--------------------------------------------------------------------}
instance Show IntSet where
- showsPrec d s = showSet (toList s)
+ showsPrec p xs = showParen (p > 10) $
+ showString "fromList " . shows (toList xs)
showSet :: [Int] -> ShowS
showSet []
showTail (x:xs) = showChar ',' . shows x . showTail xs
{--------------------------------------------------------------------
+ Read
+--------------------------------------------------------------------}
+instance Read IntSet where
+#ifdef __GLASGOW_HASKELL__
+ readPrec = parens $ prec 10 $ do
+ Ident "fromList" <- lexP
+ xs <- readPrec
+ return (fromList xs)
+
+ readListPrec = readListPrecDefault
+#else
+ readsPrec p = readParen (p > 10) $ \ r -> do
+ ("fromList",s) <- lex r
+ (xs,t) <- reads s
+ return (fromList xs,t)
+#endif
+
+{--------------------------------------------------------------------
+ Typeable
+--------------------------------------------------------------------}
+
+#include "Typeable.h"
+INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
+
+{--------------------------------------------------------------------
Debugging
--------------------------------------------------------------------}
-- | /O(n)/. Show the tree that implements the set. The tree is shown
= showTreeWith True False s
-{- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
+{- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
the tree that implements the set. If @hang@ is
- @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
- @wide@ is true, an extra wide version is shown.
+ 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
+ @wide@ is 'True', an extra wide version is shown.
-}
showTreeWith :: Bool -> Bool -> IntSet -> String
showTreeWith hang wide t