Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.IntSet
   5 -- Copyright   :  (c) Daan Leijen 2002
   6 -- License     :  BSD-style
   7 -- Maintainer  :  libraries@haskell.org
   8 -- Stability   :  provisional
   9 -- Portability :  portable
  10 --
  11 -- An efficient implementation of integer sets.
  12 --
  13 -- This module is intended to be imported @qualified@, to avoid name
  14 -- clashes with "Prelude" functions.  eg.
  15 --
  16 -- >  import Data.IntSet as Set
  17 --
  18 -- The implementation is based on /big-endian patricia trees/.  This data
  19 -- structure performs especially well on binary operations like 'union'
  20 -- and 'intersection'.  However, my benchmarks show that it is also
  21 -- (much) faster on insertions and deletions when compared to a generic
  22 -- size-balanced set implementation (see "Data.Set").
  23 --
  24 --    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  25 --      Workshop on ML, September 1998, pages 77-86,
  26 --      <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  27 --
  28 --    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
  29 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
  30 --      October 1968, pages 514-534.
  31 --
  32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
  33 -- This means that the operation can become linear in the number of
  34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
  35 -- (32 or 64).
  36 -----------------------------------------------------------------------------
  37
  38 module Data.IntSet  (
  39             -- * Set type
  40               IntSet          -- instance Eq,Show
  41
  42             -- * Operators
  43             , (\\)
  44
  45             -- * Query
  46             , null
  47             , size
  48             , member
  49             , isSubsetOf
  50             , isProperSubsetOf
  51
  52             -- * Construction
  53             , empty
  54             , singleton
  55             , insert
  56             , delete
  57
  58             -- * Combine
  59             , union, unions
  60             , difference
  61             , intersection
  62
  63             -- * Filter
  64             , filter
  65             , partition
  66             , split
  67             , splitMember
  68
  69             -- * Map
  70             , map
  71
  72             -- * Fold
  73             , fold
  74
  75             -- * Conversion
  76             -- ** List
  77             , elems
  78             , toList
  79             , fromList
  80
  81             -- ** Ordered list
  82             , toAscList
  83             , fromAscList
  84             , fromDistinctAscList
  85
  86             -- * Debugging
  87             , showTree
  88             , showTreeWith
  89             ) where
  90
  91
  92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
  93 import Data.Bits
  94 import Data.Int
  95
  96 import qualified Data.List as List
  97 import Data.Monoid
  98 import Data.Typeable
  99
 100 {-
 101 -- just for testing
 102 import QuickCheck
 103 import List (nub,sort)
 104 import qualified List
 105 -}
 106
 107 #if __GLASGOW_HASKELL__ >= 503
 108 import GHC.Word
 109 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 110 #elif __GLASGOW_HASKELL__
 111 import Word
 112 import GlaExts ( Word(..), Int(..), shiftRL# )
 113 #else
 114 import Data.Word
 115 #endif
 116
 117 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 118
 119 -- A "Nat" is a natural machine word (an unsigned Int)
 120 type Nat = Word
 121
 122 natFromInt :: Int -> Nat
 123 natFromInt i = fromIntegral i
 124
 125 intFromNat :: Nat -> Int
 126 intFromNat w = fromIntegral w
 127
 128 shiftRL :: Nat -> Int -> Nat
 129 #if __GLASGOW_HASKELL__
 130 {--------------------------------------------------------------------
 131   GHC: use unboxing to get @shiftRL@ inlined.
 132 --------------------------------------------------------------------}
 133 shiftRL (W# x) (I# i)
 134   = W# (shiftRL# x i)
 135 #else
 136 shiftRL x i   = shiftR x i
 137 #endif
 138
 139 {--------------------------------------------------------------------
 140   Operators
 141 --------------------------------------------------------------------}
 142 -- | /O(n+m)/. See 'difference'.
 143 (\\) :: IntSet -> IntSet -> IntSet
 144 m1 \\ m2 = difference m1 m2
 145
 146 {--------------------------------------------------------------------
 147   Types
 148 --------------------------------------------------------------------}
 149 -- | A set of integers.
 150 data IntSet = Nil
 151             | Tip {-# UNPACK #-} !Int
 152             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 153
 154 type Prefix = Int
 155 type Mask   = Int
 156
 157 {--------------------------------------------------------------------
 158   Query
 159 --------------------------------------------------------------------}
 160 -- | /O(1)/. Is the set empty?
 161 null :: IntSet -> Bool
 162 null Nil   = True
 163 null other = False
 164
 165 -- | /O(n)/. Cardinality of the set.
 166 size :: IntSet -> Int
 167 size t
 168   = case t of
 169       Bin p m l r -> size l + size r
 170       Tip y -> 1
 171       Nil   -> 0
 172
 173 -- | /O(min(n,W))/. Is the value a member of the set?
 174 member :: Int -> IntSet -> Bool
 175 member x t
 176   = case t of
 177       Bin p m l r
 178         | nomatch x p m -> False
 179         | zero x m      -> member x l
 180         | otherwise     -> member x r
 181       Tip y -> (x==y)
 182       Nil   -> False
 183
 184 -- 'lookup' is used by 'intersection' for left-biasing
 185 lookup :: Int -> IntSet -> Maybe Int
 186 lookup k t
 187   = let nk = natFromInt k  in seq nk (lookupN nk t)
 188
 189 lookupN :: Nat -> IntSet -> Maybe Int
 190 lookupN k t
 191   = case t of
 192       Bin p m l r
 193         | zeroN k (natFromInt m) -> lookupN k l
 194         | otherwise              -> lookupN k r
 195       Tip kx
 196         | (k == natFromInt kx)  -> Just kx
 197         | otherwise             -> Nothing
 198       Nil -> Nothing
 199
 200 {--------------------------------------------------------------------
 201   Construction
 202 --------------------------------------------------------------------}
 203 -- | /O(1)/. The empty set.
 204 empty :: IntSet
 205 empty
 206   = Nil
 207
 208 -- | /O(1)/. A set of one element.
 209 singleton :: Int -> IntSet
 210 singleton x
 211   = Tip x
 212
 213 {--------------------------------------------------------------------
 214   Insert
 215 --------------------------------------------------------------------}
 216 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 217 -- an element of the set, it is replaced by the new one, ie. 'insert'
 218 -- is left-biased.
 219 insert :: Int -> IntSet -> IntSet
 220 insert x t
 221   = case t of
 222       Bin p m l r
 223         | nomatch x p m -> join x (Tip x) p t
 224         | zero x m      -> Bin p m (insert x l) r
 225         | otherwise     -> Bin p m l (insert x r)
 226       Tip y
 227         | x==y          -> Tip x
 228         | otherwise     -> join x (Tip x) y t
 229       Nil -> Tip x
 230
 231 -- right-biased insertion, used by 'union'
 232 insertR :: Int -> IntSet -> IntSet
 233 insertR x t
 234   = case t of
 235       Bin p m l r
 236         | nomatch x p m -> join x (Tip x) p t
 237         | zero x m      -> Bin p m (insert x l) r
 238         | otherwise     -> Bin p m l (insert x r)
 239       Tip y
 240         | x==y          -> t
 241         | otherwise     -> join x (Tip x) y t
 242       Nil -> Tip x
 243
 244 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 245 -- original set when the value was not present.
 246 delete :: Int -> IntSet -> IntSet
 247 delete x t
 248   = case t of
 249       Bin p m l r
 250         | nomatch x p m -> t
 251         | zero x m      -> bin p m (delete x l) r
 252         | otherwise     -> bin p m l (delete x r)
 253       Tip y
 254         | x==y          -> Nil
 255         | otherwise     -> t
 256       Nil -> Nil
 257
 258
 259 {--------------------------------------------------------------------
 260   Union
 261 --------------------------------------------------------------------}
 262 -- | The union of a list of sets.
 263 unions :: [IntSet] -> IntSet
 264 unions xs
 265   = foldlStrict union empty xs
 266
 267
 268 -- | /O(n+m)/. The union of two sets.
 269 union :: IntSet -> IntSet -> IntSet
 270 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 271   | shorter m1 m2  = union1
 272   | shorter m2 m1  = union2
 273   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 274   | otherwise      = join p1 t1 p2 t2
 275   where
 276     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 277             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 278             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 279
 280     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 281             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 282             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 283
 284 union (Tip x) t = insert x t
 285 union t (Tip x) = insertR x t  -- right bias
 286 union Nil t     = t
 287 union t Nil     = t
 288
 289
 290 {--------------------------------------------------------------------
 291   Difference
 292 --------------------------------------------------------------------}
 293 -- | /O(n+m)/. Difference between two sets.
 294 difference :: IntSet -> IntSet -> IntSet
 295 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 296   | shorter m1 m2  = difference1
 297   | shorter m2 m1  = difference2
 298   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 299   | otherwise      = t1
 300   where
 301     difference1 | nomatch p2 p1 m1  = t1
 302                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 303                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 304
 305     difference2 | nomatch p1 p2 m2  = t1
 306                 | zero p1 m2        = difference t1 l2
 307                 | otherwise         = difference t1 r2
 308
 309 difference t1@(Tip x) t2
 310   | member x t2  = Nil
 311   | otherwise    = t1
 312
 313 difference Nil t     = Nil
 314 difference t (Tip x) = delete x t
 315 difference t Nil     = t
 316
 317
 318
 319 {--------------------------------------------------------------------
 320   Intersection
 321 --------------------------------------------------------------------}
 322 -- | /O(n+m)/. The intersection of two sets.
 323 intersection :: IntSet -> IntSet -> IntSet
 324 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 325   | shorter m1 m2  = intersection1
 326   | shorter m2 m1  = intersection2
 327   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 328   | otherwise      = Nil
 329   where
 330     intersection1 | nomatch p2 p1 m1  = Nil
 331                   | zero p2 m1        = intersection l1 t2
 332                   | otherwise         = intersection r1 t2
 333
 334     intersection2 | nomatch p1 p2 m2  = Nil
 335                   | zero p1 m2        = intersection t1 l2
 336                   | otherwise         = intersection t1 r2
 337
 338 intersection t1@(Tip x) t2
 339   | member x t2  = t1
 340   | otherwise    = Nil
 341 intersection t (Tip x)
 342   = case lookup x t of
 343       Just y  -> Tip y
 344       Nothing -> Nil
 345 intersection Nil t = Nil
 346 intersection t Nil = Nil
 347
 348
 349
 350 {--------------------------------------------------------------------
 351   Subset
 352 --------------------------------------------------------------------}
 353 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 354 isProperSubsetOf :: IntSet -> IntSet -> Bool
 355 isProperSubsetOf t1 t2
 356   = case subsetCmp t1 t2 of
 357       LT -> True
 358       ge -> False
 359
 360 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 361   | shorter m1 m2  = GT
 362   | shorter m2 m1  = subsetCmpLt
 363   | p1 == p2       = subsetCmpEq
 364   | otherwise      = GT  -- disjoint
 365   where
 366     subsetCmpLt | nomatch p1 p2 m2  = GT
 367                 | zero p1 m2        = subsetCmp t1 l2
 368                 | otherwise         = subsetCmp t1 r2
 369     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 370                     (GT,_ ) -> GT
 371                     (_ ,GT) -> GT
 372                     (EQ,EQ) -> EQ
 373                     other   -> LT
 374
 375 subsetCmp (Bin p m l r) t  = GT
 376 subsetCmp (Tip x) (Tip y)
 377   | x==y       = EQ
 378   | otherwise  = GT  -- disjoint
 379 subsetCmp (Tip x) t
 380   | member x t = LT
 381   | otherwise  = GT  -- disjoint
 382 subsetCmp Nil Nil = EQ
 383 subsetCmp Nil t   = LT
 384
 385 -- | /O(n+m)/. Is this a subset?
 386 -- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2.
 387
 388 isSubsetOf :: IntSet -> IntSet -> Bool
 389 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 390   | shorter m1 m2  = False
 391   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 392                                                       else isSubsetOf t1 r2)
 393   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 394 isSubsetOf (Bin p m l r) t  = False
 395 isSubsetOf (Tip x) t        = member x t
 396 isSubsetOf Nil t            = True
 397
 398
 399 {--------------------------------------------------------------------
 400   Filter
 401 --------------------------------------------------------------------}
 402 -- | /O(n)/. Filter all elements that satisfy some predicate.
 403 filter :: (Int -> Bool) -> IntSet -> IntSet
 404 filter pred t
 405   = case t of
 406       Bin p m l r
 407         -> bin p m (filter pred l) (filter pred r)
 408       Tip x
 409         | pred x    -> t
 410         | otherwise -> Nil
 411       Nil -> Nil
 412
 413 -- | /O(n)/. partition the set according to some predicate.
 414 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 415 partition pred t
 416   = case t of
 417       Bin p m l r
 418         -> let (l1,l2) = partition pred l
 419                (r1,r2) = partition pred r
 420            in (bin p m l1 r1, bin p m l2 r2)
 421       Tip x
 422         | pred x    -> (t,Nil)
 423         | otherwise -> (Nil,t)
 424       Nil -> (Nil,Nil)
 425
 426
 427 -- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@
 428 -- where all elements in @set1@ are lower than @x@ and all elements in
 429 -- @set2@ larger than @x@.
 430 --
 431 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 432 split :: Int -> IntSet -> (IntSet,IntSet)
 433 split x t
 434   = case t of
 435       Bin p m l r
 436         | zero x m  -> let (lt,gt) = split x l in (lt,union gt r)
 437         | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
 438       Tip y
 439         | x>y       -> (t,Nil)
 440         | x<y       -> (Nil,t)
 441         | otherwise -> (Nil,Nil)
 442       Nil -> (Nil,Nil)
 443
 444 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
 445 -- element was found in the original set.
 446 splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
 447 splitMember x t
 448   = case t of
 449       Bin p m l r
 450         | zero x m  -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
 451         | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
 452       Tip y
 453         | x>y       -> (False,t,Nil)
 454         | x<y       -> (False,Nil,t)
 455         | otherwise -> (True,Nil,Nil)
 456       Nil -> (False,Nil,Nil)
 457
 458 {----------------------------------------------------------------------
 459   Map
 460 ----------------------------------------------------------------------}
 461
 462 -- | /O(n*min(n,W))/.
 463 -- @map f s@ is the set obtained by applying @f@ to each element of @s@.
 464 --
 465 -- It's worth noting that the size of the result may be smaller if,
 466 -- for some @(x,y)@, @x \/= y && f x == f y@
 467
 468 map :: (Int->Int) -> IntSet -> IntSet
 469 map f = fromList . List.map f . toList
 470
 471 {--------------------------------------------------------------------
 472   Fold
 473 --------------------------------------------------------------------}
 474 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 475 --
 476 -- > sum set   == fold (+) 0 set
 477 -- > elems set == fold (:) [] set
 478 fold :: (Int -> b -> b) -> b -> IntSet -> b
 479 fold f z t
 480   = foldr f z t
 481
 482 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 483 foldr f z t
 484   = case t of
 485       Bin p m l r -> foldr f (foldr f z r) l
 486       Tip x       -> f x z
 487       Nil         -> z
 488
 489 {--------------------------------------------------------------------
 490   List variations
 491 --------------------------------------------------------------------}
 492 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 493 elems :: IntSet -> [Int]
 494 elems s
 495   = toList s
 496
 497 {--------------------------------------------------------------------
 498   Lists
 499 --------------------------------------------------------------------}
 500 -- | /O(n)/. Convert the set to a list of elements.
 501 toList :: IntSet -> [Int]
 502 toList t
 503   = fold (:) [] t
 504
 505 -- | /O(n)/. Convert the set to an ascending list of elements.
 506 toAscList :: IntSet -> [Int]
 507 toAscList t
 508   = -- NOTE: the following algorithm only works for big-endian trees
 509     let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
 510
 511 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 512 fromList :: [Int] -> IntSet
 513 fromList xs
 514   = foldlStrict ins empty xs
 515   where
 516     ins t x  = insert x t
 517
 518 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 519 fromAscList :: [Int] -> IntSet
 520 fromAscList xs
 521   = fromList xs
 522
 523 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 524 fromDistinctAscList :: [Int] -> IntSet
 525 fromDistinctAscList xs
 526   = fromList xs
 527
 528
 529 {--------------------------------------------------------------------
 530   Eq
 531 --------------------------------------------------------------------}
 532 instance Eq IntSet where
 533   t1 == t2  = equal t1 t2
 534   t1 /= t2  = nequal t1 t2
 535
 536 equal :: IntSet -> IntSet -> Bool
 537 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 538   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 539 equal (Tip x) (Tip y)
 540   = (x==y)
 541 equal Nil Nil = True
 542 equal t1 t2   = False
 543
 544 nequal :: IntSet -> IntSet -> Bool
 545 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 546   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 547 nequal (Tip x) (Tip y)
 548   = (x/=y)
 549 nequal Nil Nil = False
 550 nequal t1 t2   = True
 551
 552 {--------------------------------------------------------------------
 553   Ord
 554 --------------------------------------------------------------------}
 555
 556 instance Ord IntSet where
 557     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 558     -- tentative implementation. See if more efficient exists.
 559
 560 {--------------------------------------------------------------------
 561   Monoid
 562 --------------------------------------------------------------------}
 563
 564 instance Monoid IntSet where
 565     mempty = empty
 566     mappend = union
 567     mconcat = unions
 568
 569 {--------------------------------------------------------------------
 570   Show
 571 --------------------------------------------------------------------}
 572 instance Show IntSet where
 573   showsPrec d s  = showSet (toList s)
 574
 575 showSet :: [Int] -> ShowS
 576 showSet []
 577   = showString "{}"
 578 showSet (x:xs)
 579   = showChar '{' . shows x . showTail xs
 580   where
 581     showTail []     = showChar '}'
 582     showTail (x:xs) = showChar ',' . shows x . showTail xs
 583
 584 {--------------------------------------------------------------------
 585   Typeable
 586 --------------------------------------------------------------------}
 587
 588 #include "Typeable.h"
 589 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
 590
 591 {--------------------------------------------------------------------
 592   Debugging
 593 --------------------------------------------------------------------}
 594 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 595 -- in a compressed, hanging format.
 596 showTree :: IntSet -> String
 597 showTree s
 598   = showTreeWith True False s
 599
 600
 601 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
 602  the tree that implements the set. If @hang@ is
 603  @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
 604  @wide@ is true, an extra wide version is shown.
 605 -}
 606 showTreeWith :: Bool -> Bool -> IntSet -> String
 607 showTreeWith hang wide t
 608   | hang      = (showsTreeHang wide [] t) ""
 609   | otherwise = (showsTree wide [] [] t) ""
 610
 611 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 612 showsTree wide lbars rbars t
 613   = case t of
 614       Bin p m l r
 615           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 616              showWide wide rbars .
 617              showsBars lbars . showString (showBin p m) . showString "\n" .
 618              showWide wide lbars .
 619              showsTree wide (withEmpty lbars) (withBar lbars) l
 620       Tip x
 621           -> showsBars lbars . showString " " . shows x . showString "\n"
 622       Nil -> showsBars lbars . showString "|\n"
 623
 624 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 625 showsTreeHang wide bars t
 626   = case t of
 627       Bin p m l r
 628           -> showsBars bars . showString (showBin p m) . showString "\n" .
 629              showWide wide bars .
 630              showsTreeHang wide (withBar bars) l .
 631              showWide wide bars .
 632              showsTreeHang wide (withEmpty bars) r
 633       Tip x
 634           -> showsBars bars . showString " " . shows x . showString "\n"
 635       Nil -> showsBars bars . showString "|\n"
 636
 637 showBin p m
 638   = "*" -- ++ show (p,m)
 639
 640 showWide wide bars
 641   | wide      = showString (concat (reverse bars)) . showString "|\n"
 642   | otherwise = id
 643
 644 showsBars :: [String] -> ShowS
 645 showsBars bars
 646   = case bars of
 647       [] -> id
 648       _  -> showString (concat (reverse (tail bars))) . showString node
 649
 650 node           = "+--"
 651 withBar bars   = "|  ":bars
 652 withEmpty bars = "   ":bars
 653
 654
 655 {--------------------------------------------------------------------
 656   Helpers
 657 --------------------------------------------------------------------}
 658 {--------------------------------------------------------------------
 659   Join
 660 --------------------------------------------------------------------}
 661 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 662 join p1 t1 p2 t2
 663   | zero p1 m = Bin p m t1 t2
 664   | otherwise = Bin p m t2 t1
 665   where
 666     m = branchMask p1 p2
 667     p = mask p1 m
 668
 669 {--------------------------------------------------------------------
 670   @bin@ assures that we never have empty trees within a tree.
 671 --------------------------------------------------------------------}
 672 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 673 bin p m l Nil = l
 674 bin p m Nil r = r
 675 bin p m l r   = Bin p m l r
 676
 677
 678 {--------------------------------------------------------------------
 679   Endian independent bit twiddling
 680 --------------------------------------------------------------------}
 681 zero :: Int -> Mask -> Bool
 682 zero i m
 683   = (natFromInt i) .&. (natFromInt m) == 0
 684
 685 nomatch,match :: Int -> Prefix -> Mask -> Bool
 686 nomatch i p m
 687   = (mask i m) /= p
 688
 689 match i p m
 690   = (mask i m) == p
 691
 692 mask :: Int -> Mask -> Prefix
 693 mask i m
 694   = maskW (natFromInt i) (natFromInt m)
 695
 696 zeroN :: Nat -> Nat -> Bool
 697 zeroN i m = (i .&. m) == 0
 698
 699 {--------------------------------------------------------------------
 700   Big endian operations
 701 --------------------------------------------------------------------}
 702 maskW :: Nat -> Nat -> Prefix
 703 maskW i m
 704   = intFromNat (i .&. (complement (m-1) `xor` m))
 705
 706 shorter :: Mask -> Mask -> Bool
 707 shorter m1 m2
 708   = (natFromInt m1) > (natFromInt m2)
 709
 710 branchMask :: Prefix -> Prefix -> Mask
 711 branchMask p1 p2
 712   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 713
 714 {----------------------------------------------------------------------
 715   Finding the highest bit (mask) in a word [x] can be done efficiently in
 716   three ways:
 717   * convert to a floating point value and the mantissa tells us the
 718     [log2(x)] that corresponds with the highest bit position. The mantissa
 719     is retrieved either via the standard C function [frexp] or by some bit
 720     twiddling on IEEE compatible numbers (float). Note that one needs to
 721     use at least [double] precision for an accurate mantissa of 32 bit
 722     numbers.
 723   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 724   * use processor specific assembler instruction (asm).
 725
 726   The most portable way would be [bit], but is it efficient enough?
 727   I have measured the cycle counts of the different methods on an AMD
 728   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 729
 730   highestBitMask: method  cycles
 731                   --------------
 732                    frexp   200
 733                    float    33
 734                    bit      11
 735                    asm      12
 736
 737   highestBit:     method  cycles
 738                   --------------
 739                    frexp   195
 740                    float    33
 741                    bit      11
 742                    asm      11
 743
 744   Wow, the bit twiddling is on today's RISC like machines even faster
 745   than a single CISC instruction (BSR)!
 746 ----------------------------------------------------------------------}
 747
 748 {----------------------------------------------------------------------
 749   [highestBitMask] returns a word where only the highest bit is set.
 750   It is found by first setting all bits in lower positions than the
 751   highest bit and than taking an exclusive or with the original value.
 752   Allthough the function may look expensive, GHC compiles this into
 753   excellent C code that subsequently compiled into highly efficient
 754   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 755 ----------------------------------------------------------------------}
 756 highestBitMask :: Nat -> Nat
 757 highestBitMask x
 758   = case (x .|. shiftRL x 1) of
 759      x -> case (x .|. shiftRL x 2) of
 760       x -> case (x .|. shiftRL x 4) of
 761        x -> case (x .|. shiftRL x 8) of
 762         x -> case (x .|. shiftRL x 16) of
 763          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 764           x -> (x `xor` (shiftRL x 1))
 765
 766
 767 {--------------------------------------------------------------------
 768   Utilities
 769 --------------------------------------------------------------------}
 770 foldlStrict f z xs
 771   = case xs of
 772       []     -> z
 773       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 774
 775
 776 {-
 777 {--------------------------------------------------------------------
 778   Testing
 779 --------------------------------------------------------------------}
 780 testTree :: [Int] -> IntSet
 781 testTree xs   = fromList xs
 782 test1 = testTree [1..20]
 783 test2 = testTree [30,29..10]
 784 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 785
 786 {--------------------------------------------------------------------
 787   QuickCheck
 788 --------------------------------------------------------------------}
 789 qcheck prop
 790   = check config prop
 791   where
 792     config = Config
 793       { configMaxTest = 500
 794       , configMaxFail = 5000
 795       , configSize    = \n -> (div n 2 + 3)
 796       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 797       }
 798
 799
 800 {--------------------------------------------------------------------
 801   Arbitrary, reasonably balanced trees
 802 --------------------------------------------------------------------}
 803 instance Arbitrary IntSet where
 804   arbitrary = do{ xs <- arbitrary
 805                 ; return (fromList xs)
 806                 }
 807
 808
 809 {--------------------------------------------------------------------
 810   Single, Insert, Delete
 811 --------------------------------------------------------------------}
 812 prop_Single :: Int -> Bool
 813 prop_Single x
 814   = (insert x empty == singleton x)
 815
 816 prop_InsertDelete :: Int -> IntSet -> Property
 817 prop_InsertDelete k t
 818   = not (member k t) ==> delete k (insert k t) == t
 819
 820
 821 {--------------------------------------------------------------------
 822   Union
 823 --------------------------------------------------------------------}
 824 prop_UnionInsert :: Int -> IntSet -> Bool
 825 prop_UnionInsert x t
 826   = union t (singleton x) == insert x t
 827
 828 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 829 prop_UnionAssoc t1 t2 t3
 830   = union t1 (union t2 t3) == union (union t1 t2) t3
 831
 832 prop_UnionComm :: IntSet -> IntSet -> Bool
 833 prop_UnionComm t1 t2
 834   = (union t1 t2 == union t2 t1)
 835
 836 prop_Diff :: [Int] -> [Int] -> Bool
 837 prop_Diff xs ys
 838   =  toAscList (difference (fromList xs) (fromList ys))
 839     == List.sort ((List.\\) (nub xs)  (nub ys))
 840
 841 prop_Int :: [Int] -> [Int] -> Bool
 842 prop_Int xs ys
 843   =  toAscList (intersection (fromList xs) (fromList ys))
 844     == List.sort (nub ((List.intersect) (xs)  (ys)))
 845
 846 {--------------------------------------------------------------------
 847   Lists
 848 --------------------------------------------------------------------}
 849 prop_Ordered
 850   = forAll (choose (5,100)) $ \n ->
 851     let xs = [0..n::Int]
 852     in fromAscList xs == fromList xs
 853
 854 prop_List :: [Int] -> Bool
 855 prop_List xs
 856   = (sort (nub xs) == toAscList (fromList xs))
 857 -}