1 {-# OPTIONS -cpp -fglasgow-exts #-}
2 -----------------------------------------------------------------------------
4 -- Module : Data.IntSet
5 -- Copyright : (c) Daan Leijen 2002
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
11 -- An efficient implementation of integer sets.
13 -- This module is intended to be imported @qualified@, to avoid name
14 -- clashes with "Prelude" functions. eg.
16 -- > import Data.IntSet as Set
18 -- The implementation is based on /big-endian patricia trees/. This data
19 -- structure performs especially well on binary operations like 'union'
20 -- and 'intersection'. However, my benchmarks show that it is also
21 -- (much) faster on insertions and deletions when compared to a generic
22 -- size-balanced set implementation (see "Data.Set").
24 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
25 -- Workshop on ML, September 1998, pages 77-86,
26 -- <http://www.cse.ogi.edu/~andy/pub/finite.htm>
28 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
29 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
30 -- October 1968, pages 514-534.
32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
33 -- This means that the operation can become linear in the number of
34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
36 -----------------------------------------------------------------------------
40 IntSet -- instance Eq,Show
92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
96 import qualified Data.List as List
102 import List (nub,sort)
103 import qualified List
106 #if __GLASGOW_HASKELL__
107 import Data.Generics.Basics
108 import Data.Generics.Instances
111 #if __GLASGOW_HASKELL__ >= 503
113 import GHC.Exts ( Word(..), Int(..), shiftRL# )
114 #elif __GLASGOW_HASKELL__
116 import GlaExts ( Word(..), Int(..), shiftRL# )
121 infixl 9 \\{-This comment teaches CPP correct behaviour -}
123 -- A "Nat" is a natural machine word (an unsigned Int)
126 natFromInt :: Int -> Nat
127 natFromInt i = fromIntegral i
129 intFromNat :: Nat -> Int
130 intFromNat w = fromIntegral w
132 shiftRL :: Nat -> Int -> Nat
133 #if __GLASGOW_HASKELL__
134 {--------------------------------------------------------------------
135 GHC: use unboxing to get @shiftRL@ inlined.
136 --------------------------------------------------------------------}
137 shiftRL (W# x) (I# i)
140 shiftRL x i = shiftR x i
143 {--------------------------------------------------------------------
145 --------------------------------------------------------------------}
146 -- | /O(n+m)/. See 'difference'.
147 (\\) :: IntSet -> IntSet -> IntSet
148 m1 \\ m2 = difference m1 m2
150 {--------------------------------------------------------------------
152 --------------------------------------------------------------------}
153 -- | A set of integers.
155 | Tip {-# UNPACK #-} !Int
156 | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
161 #if __GLASGOW_HASKELL__
163 {--------------------------------------------------------------------
165 --------------------------------------------------------------------}
167 -- This instance preserves data abstraction at the cost of inefficiency.
168 -- We omit reflection services for the sake of data abstraction.
170 instance Data IntSet where
171 gfoldl f z is = z fromList `f` (toList is)
172 toConstr _ = error "toConstr"
173 gunfold _ _ = error "gunfold"
174 dataTypeOf _ = mkNorepType "Data.IntSet.IntSet"
178 {--------------------------------------------------------------------
180 --------------------------------------------------------------------}
181 -- | /O(1)/. Is the set empty?
182 null :: IntSet -> Bool
186 -- | /O(n)/. Cardinality of the set.
187 size :: IntSet -> Int
190 Bin p m l r -> size l + size r
194 -- | /O(min(n,W))/. Is the value a member of the set?
195 member :: Int -> IntSet -> Bool
199 | nomatch x p m -> False
200 | zero x m -> member x l
201 | otherwise -> member x r
205 -- 'lookup' is used by 'intersection' for left-biasing
206 lookup :: Int -> IntSet -> Maybe Int
208 = let nk = natFromInt k in seq nk (lookupN nk t)
210 lookupN :: Nat -> IntSet -> Maybe Int
214 | zeroN k (natFromInt m) -> lookupN k l
215 | otherwise -> lookupN k r
217 | (k == natFromInt kx) -> Just kx
218 | otherwise -> Nothing
221 {--------------------------------------------------------------------
223 --------------------------------------------------------------------}
224 -- | /O(1)/. The empty set.
229 -- | /O(1)/. A set of one element.
230 singleton :: Int -> IntSet
234 {--------------------------------------------------------------------
236 --------------------------------------------------------------------}
237 -- | /O(min(n,W))/. Add a value to the set. When the value is already
238 -- an element of the set, it is replaced by the new one, ie. 'insert'
240 insert :: Int -> IntSet -> IntSet
244 | nomatch x p m -> join x (Tip x) p t
245 | zero x m -> Bin p m (insert x l) r
246 | otherwise -> Bin p m l (insert x r)
249 | otherwise -> join x (Tip x) y t
252 -- right-biased insertion, used by 'union'
253 insertR :: Int -> IntSet -> IntSet
257 | nomatch x p m -> join x (Tip x) p t
258 | zero x m -> Bin p m (insert x l) r
259 | otherwise -> Bin p m l (insert x r)
262 | otherwise -> join x (Tip x) y t
265 -- | /O(min(n,W))/. Delete a value in the set. Returns the
266 -- original set when the value was not present.
267 delete :: Int -> IntSet -> IntSet
272 | zero x m -> bin p m (delete x l) r
273 | otherwise -> bin p m l (delete x r)
280 {--------------------------------------------------------------------
282 --------------------------------------------------------------------}
283 -- | The union of a list of sets.
284 unions :: [IntSet] -> IntSet
286 = foldlStrict union empty xs
289 -- | /O(n+m)/. The union of two sets.
290 union :: IntSet -> IntSet -> IntSet
291 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
292 | shorter m1 m2 = union1
293 | shorter m2 m1 = union2
294 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
295 | otherwise = join p1 t1 p2 t2
297 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
298 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
299 | otherwise = Bin p1 m1 l1 (union r1 t2)
301 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
302 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
303 | otherwise = Bin p2 m2 l2 (union t1 r2)
305 union (Tip x) t = insert x t
306 union t (Tip x) = insertR x t -- right bias
311 {--------------------------------------------------------------------
313 --------------------------------------------------------------------}
314 -- | /O(n+m)/. Difference between two sets.
315 difference :: IntSet -> IntSet -> IntSet
316 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
317 | shorter m1 m2 = difference1
318 | shorter m2 m1 = difference2
319 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
322 difference1 | nomatch p2 p1 m1 = t1
323 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
324 | otherwise = bin p1 m1 l1 (difference r1 t2)
326 difference2 | nomatch p1 p2 m2 = t1
327 | zero p1 m2 = difference t1 l2
328 | otherwise = difference t1 r2
330 difference t1@(Tip x) t2
334 difference Nil t = Nil
335 difference t (Tip x) = delete x t
340 {--------------------------------------------------------------------
342 --------------------------------------------------------------------}
343 -- | /O(n+m)/. The intersection of two sets.
344 intersection :: IntSet -> IntSet -> IntSet
345 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
346 | shorter m1 m2 = intersection1
347 | shorter m2 m1 = intersection2
348 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
351 intersection1 | nomatch p2 p1 m1 = Nil
352 | zero p2 m1 = intersection l1 t2
353 | otherwise = intersection r1 t2
355 intersection2 | nomatch p1 p2 m2 = Nil
356 | zero p1 m2 = intersection t1 l2
357 | otherwise = intersection t1 r2
359 intersection t1@(Tip x) t2
362 intersection t (Tip x)
366 intersection Nil t = Nil
367 intersection t Nil = Nil
371 {--------------------------------------------------------------------
373 --------------------------------------------------------------------}
374 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
375 isProperSubsetOf :: IntSet -> IntSet -> Bool
376 isProperSubsetOf t1 t2
377 = case subsetCmp t1 t2 of
381 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
383 | shorter m2 m1 = subsetCmpLt
384 | p1 == p2 = subsetCmpEq
385 | otherwise = GT -- disjoint
387 subsetCmpLt | nomatch p1 p2 m2 = GT
388 | zero p1 m2 = subsetCmp t1 l2
389 | otherwise = subsetCmp t1 r2
390 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
396 subsetCmp (Bin p m l r) t = GT
397 subsetCmp (Tip x) (Tip y)
399 | otherwise = GT -- disjoint
402 | otherwise = GT -- disjoint
403 subsetCmp Nil Nil = EQ
406 -- | /O(n+m)/. Is this a subset?
407 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
409 isSubsetOf :: IntSet -> IntSet -> Bool
410 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
411 | shorter m1 m2 = False
412 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
413 else isSubsetOf t1 r2)
414 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
415 isSubsetOf (Bin p m l r) t = False
416 isSubsetOf (Tip x) t = member x t
417 isSubsetOf Nil t = True
420 {--------------------------------------------------------------------
422 --------------------------------------------------------------------}
423 -- | /O(n)/. Filter all elements that satisfy some predicate.
424 filter :: (Int -> Bool) -> IntSet -> IntSet
428 -> bin p m (filter pred l) (filter pred r)
434 -- | /O(n)/. partition the set according to some predicate.
435 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
439 -> let (l1,l2) = partition pred l
440 (r1,r2) = partition pred r
441 in (bin p m l1 r1, bin p m l2 r2)
444 | otherwise -> (Nil,t)
448 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
449 -- where all elements in @set1@ are lower than @x@ and all elements in
450 -- @set2@ larger than @x@.
452 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
453 split :: Int -> IntSet -> (IntSet,IntSet)
457 | zero x m -> let (lt,gt) = split x l in (lt,union gt r)
458 | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
462 | otherwise -> (Nil,Nil)
465 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
466 -- element was found in the original set.
467 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
471 | zero x m -> let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
472 | otherwise -> let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
474 | x>y -> (t,False,Nil)
475 | x<y -> (Nil,False,t)
476 | otherwise -> (Nil,True,Nil)
477 Nil -> (Nil,False,Nil)
479 {----------------------------------------------------------------------
481 ----------------------------------------------------------------------}
483 -- | /O(n*min(n,W))/.
484 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
486 -- It's worth noting that the size of the result may be smaller if,
487 -- for some @(x,y)@, @x \/= y && f x == f y@
489 map :: (Int->Int) -> IntSet -> IntSet
490 map f = fromList . List.map f . toList
492 {--------------------------------------------------------------------
494 --------------------------------------------------------------------}
495 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
497 -- > sum set == fold (+) 0 set
498 -- > elems set == fold (:) [] set
499 fold :: (Int -> b -> b) -> b -> IntSet -> b
503 foldr :: (Int -> b -> b) -> b -> IntSet -> b
506 Bin p m l r -> foldr f (foldr f z r) l
510 {--------------------------------------------------------------------
512 --------------------------------------------------------------------}
513 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
514 elems :: IntSet -> [Int]
518 {--------------------------------------------------------------------
520 --------------------------------------------------------------------}
521 -- | /O(n)/. Convert the set to a list of elements.
522 toList :: IntSet -> [Int]
526 -- | /O(n)/. Convert the set to an ascending list of elements.
527 toAscList :: IntSet -> [Int]
529 = -- NOTE: the following algorithm only works for big-endian trees
530 let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
532 -- | /O(n*min(n,W))/. Create a set from a list of integers.
533 fromList :: [Int] -> IntSet
535 = foldlStrict ins empty xs
539 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
540 fromAscList :: [Int] -> IntSet
544 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
545 fromDistinctAscList :: [Int] -> IntSet
546 fromDistinctAscList xs
550 {--------------------------------------------------------------------
552 --------------------------------------------------------------------}
553 instance Eq IntSet where
554 t1 == t2 = equal t1 t2
555 t1 /= t2 = nequal t1 t2
557 equal :: IntSet -> IntSet -> Bool
558 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
559 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
560 equal (Tip x) (Tip y)
565 nequal :: IntSet -> IntSet -> Bool
566 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
567 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
568 nequal (Tip x) (Tip y)
570 nequal Nil Nil = False
573 {--------------------------------------------------------------------
575 --------------------------------------------------------------------}
577 instance Ord IntSet where
578 compare s1 s2 = compare (toAscList s1) (toAscList s2)
579 -- tentative implementation. See if more efficient exists.
581 {--------------------------------------------------------------------
583 --------------------------------------------------------------------}
584 instance Show IntSet where
585 showsPrec d s = showSet (toList s)
587 showSet :: [Int] -> ShowS
591 = showChar '{' . shows x . showTail xs
593 showTail [] = showChar '}'
594 showTail (x:xs) = showChar ',' . shows x . showTail xs
596 {--------------------------------------------------------------------
598 --------------------------------------------------------------------}
600 #include "Typeable.h"
601 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
603 {--------------------------------------------------------------------
605 --------------------------------------------------------------------}
606 -- | /O(n)/. Show the tree that implements the set. The tree is shown
607 -- in a compressed, hanging format.
608 showTree :: IntSet -> String
610 = showTreeWith True False s
613 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
614 the tree that implements the set. If @hang@ is
615 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
616 @wide@ is 'True', an extra wide version is shown.
618 showTreeWith :: Bool -> Bool -> IntSet -> String
619 showTreeWith hang wide t
620 | hang = (showsTreeHang wide [] t) ""
621 | otherwise = (showsTree wide [] [] t) ""
623 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
624 showsTree wide lbars rbars t
627 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
628 showWide wide rbars .
629 showsBars lbars . showString (showBin p m) . showString "\n" .
630 showWide wide lbars .
631 showsTree wide (withEmpty lbars) (withBar lbars) l
633 -> showsBars lbars . showString " " . shows x . showString "\n"
634 Nil -> showsBars lbars . showString "|\n"
636 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
637 showsTreeHang wide bars t
640 -> showsBars bars . showString (showBin p m) . showString "\n" .
642 showsTreeHang wide (withBar bars) l .
644 showsTreeHang wide (withEmpty bars) r
646 -> showsBars bars . showString " " . shows x . showString "\n"
647 Nil -> showsBars bars . showString "|\n"
650 = "*" -- ++ show (p,m)
653 | wide = showString (concat (reverse bars)) . showString "|\n"
656 showsBars :: [String] -> ShowS
660 _ -> showString (concat (reverse (tail bars))) . showString node
663 withBar bars = "| ":bars
664 withEmpty bars = " ":bars
667 {--------------------------------------------------------------------
669 --------------------------------------------------------------------}
670 {--------------------------------------------------------------------
672 --------------------------------------------------------------------}
673 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
675 | zero p1 m = Bin p m t1 t2
676 | otherwise = Bin p m t2 t1
681 {--------------------------------------------------------------------
682 @bin@ assures that we never have empty trees within a tree.
683 --------------------------------------------------------------------}
684 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
687 bin p m l r = Bin p m l r
690 {--------------------------------------------------------------------
691 Endian independent bit twiddling
692 --------------------------------------------------------------------}
693 zero :: Int -> Mask -> Bool
695 = (natFromInt i) .&. (natFromInt m) == 0
697 nomatch,match :: Int -> Prefix -> Mask -> Bool
704 mask :: Int -> Mask -> Prefix
706 = maskW (natFromInt i) (natFromInt m)
708 zeroN :: Nat -> Nat -> Bool
709 zeroN i m = (i .&. m) == 0
711 {--------------------------------------------------------------------
712 Big endian operations
713 --------------------------------------------------------------------}
714 maskW :: Nat -> Nat -> Prefix
716 = intFromNat (i .&. (complement (m-1) `xor` m))
718 shorter :: Mask -> Mask -> Bool
720 = (natFromInt m1) > (natFromInt m2)
722 branchMask :: Prefix -> Prefix -> Mask
724 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
726 {----------------------------------------------------------------------
727 Finding the highest bit (mask) in a word [x] can be done efficiently in
729 * convert to a floating point value and the mantissa tells us the
730 [log2(x)] that corresponds with the highest bit position. The mantissa
731 is retrieved either via the standard C function [frexp] or by some bit
732 twiddling on IEEE compatible numbers (float). Note that one needs to
733 use at least [double] precision for an accurate mantissa of 32 bit
735 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
736 * use processor specific assembler instruction (asm).
738 The most portable way would be [bit], but is it efficient enough?
739 I have measured the cycle counts of the different methods on an AMD
740 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
742 highestBitMask: method cycles
749 highestBit: method cycles
756 Wow, the bit twiddling is on today's RISC like machines even faster
757 than a single CISC instruction (BSR)!
758 ----------------------------------------------------------------------}
760 {----------------------------------------------------------------------
761 [highestBitMask] returns a word where only the highest bit is set.
762 It is found by first setting all bits in lower positions than the
763 highest bit and than taking an exclusive or with the original value.
764 Allthough the function may look expensive, GHC compiles this into
765 excellent C code that subsequently compiled into highly efficient
766 machine code. The algorithm is derived from Jorg Arndt's FXT library.
767 ----------------------------------------------------------------------}
768 highestBitMask :: Nat -> Nat
770 = case (x .|. shiftRL x 1) of
771 x -> case (x .|. shiftRL x 2) of
772 x -> case (x .|. shiftRL x 4) of
773 x -> case (x .|. shiftRL x 8) of
774 x -> case (x .|. shiftRL x 16) of
775 x -> case (x .|. shiftRL x 32) of -- for 64 bit platforms
776 x -> (x `xor` (shiftRL x 1))
779 {--------------------------------------------------------------------
781 --------------------------------------------------------------------}
785 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
789 {--------------------------------------------------------------------
791 --------------------------------------------------------------------}
792 testTree :: [Int] -> IntSet
793 testTree xs = fromList xs
794 test1 = testTree [1..20]
795 test2 = testTree [30,29..10]
796 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
798 {--------------------------------------------------------------------
800 --------------------------------------------------------------------}
805 { configMaxTest = 500
806 , configMaxFail = 5000
807 , configSize = \n -> (div n 2 + 3)
808 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
812 {--------------------------------------------------------------------
813 Arbitrary, reasonably balanced trees
814 --------------------------------------------------------------------}
815 instance Arbitrary IntSet where
816 arbitrary = do{ xs <- arbitrary
817 ; return (fromList xs)
821 {--------------------------------------------------------------------
822 Single, Insert, Delete
823 --------------------------------------------------------------------}
824 prop_Single :: Int -> Bool
826 = (insert x empty == singleton x)
828 prop_InsertDelete :: Int -> IntSet -> Property
829 prop_InsertDelete k t
830 = not (member k t) ==> delete k (insert k t) == t
833 {--------------------------------------------------------------------
835 --------------------------------------------------------------------}
836 prop_UnionInsert :: Int -> IntSet -> Bool
838 = union t (singleton x) == insert x t
840 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
841 prop_UnionAssoc t1 t2 t3
842 = union t1 (union t2 t3) == union (union t1 t2) t3
844 prop_UnionComm :: IntSet -> IntSet -> Bool
846 = (union t1 t2 == union t2 t1)
848 prop_Diff :: [Int] -> [Int] -> Bool
850 = toAscList (difference (fromList xs) (fromList ys))
851 == List.sort ((List.\\) (nub xs) (nub ys))
853 prop_Int :: [Int] -> [Int] -> Bool
855 = toAscList (intersection (fromList xs) (fromList ys))
856 == List.sort (nub ((List.intersect) (xs) (ys)))
858 {--------------------------------------------------------------------
860 --------------------------------------------------------------------}
862 = forAll (choose (5,100)) $ \n ->
864 in fromAscList xs == fromList xs
866 prop_List :: [Int] -> Bool
868 = (sort (nub xs) == toAscList (fromList xs))