Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.IntSet
   5 -- Copyright   :  (c) Daan Leijen 2002
   6 -- License     :  BSD-style
   7 -- Maintainer  :  libraries@haskell.org
   8 -- Stability   :  provisional
   9 -- Portability :  portable
  10 --
  11 -- An efficient implementation of integer sets.
  12 --
  13 -- This module is intended to be imported @qualified@, to avoid name
  14 -- clashes with "Prelude" functions.  eg.
  15 --
  16 -- >  import Data.IntSet as Set
  17 --
  18 -- The implementation is based on /big-endian patricia trees/.  This data
  19 -- structure performs especially well on binary operations like 'union'
  20 -- and 'intersection'.  However, my benchmarks show that it is also
  21 -- (much) faster on insertions and deletions when compared to a generic
  22 -- size-balanced set implementation (see "Data.Set").
  23 --
  24 --    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  25 --      Workshop on ML, September 1998, pages 77-86,
  26 --      <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  27 --
  28 --    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
  29 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
  30 --      October 1968, pages 514-534.
  31 --
  32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
  33 -- This means that the operation can become linear in the number of
  34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
  35 -- (32 or 64).
  36 -----------------------------------------------------------------------------
  37
  38 module Data.IntSet  (
  39             -- * Set type
  40               IntSet          -- instance Eq,Show
  41
  42             -- * Operators
  43             , (\\)
  44
  45             -- * Query
  46             , null
  47             , size
  48             , member
  49             , isSubsetOf
  50             , isProperSubsetOf
  51
  52             -- * Construction
  53             , empty
  54             , singleton
  55             , insert
  56             , delete
  57
  58             -- * Combine
  59             , union, unions
  60             , difference
  61             , intersection
  62
  63             -- * Filter
  64             , filter
  65             , partition
  66             , split
  67             , splitMember
  68
  69             -- * Map
  70             , map
  71
  72             -- * Fold
  73             , fold
  74
  75             -- * Conversion
  76             -- ** List
  77             , elems
  78             , toList
  79             , fromList
  80
  81             -- ** Ordered list
  82             , toAscList
  83             , fromAscList
  84             , fromDistinctAscList
  85
  86             -- * Debugging
  87             , showTree
  88             , showTreeWith
  89             ) where
  90
  91
  92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
  93 import Data.Bits
  94 import Data.Int
  95
  96 import qualified Data.List as List
  97 import Data.Monoid
  98
  99 {-
 100 -- just for testing
 101 import QuickCheck
 102 import List (nub,sort)
 103 import qualified List
 104 -}
 105
 106 #if __GLASGOW_HASKELL__ >= 503
 107 import GHC.Word
 108 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 109 #elif __GLASGOW_HASKELL__
 110 import Word
 111 import GlaExts ( Word(..), Int(..), shiftRL# )
 112 #else
 113 import Data.Word
 114 #endif
 115
 116 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 117
 118 -- A "Nat" is a natural machine word (an unsigned Int)
 119 type Nat = Word
 120
 121 natFromInt :: Int -> Nat
 122 natFromInt i = fromIntegral i
 123
 124 intFromNat :: Nat -> Int
 125 intFromNat w = fromIntegral w
 126
 127 shiftRL :: Nat -> Int -> Nat
 128 #if __GLASGOW_HASKELL__
 129 {--------------------------------------------------------------------
 130   GHC: use unboxing to get @shiftRL@ inlined.
 131 --------------------------------------------------------------------}
 132 shiftRL (W# x) (I# i)
 133   = W# (shiftRL# x i)
 134 #else
 135 shiftRL x i   = shiftR x i
 136 #endif
 137
 138 {--------------------------------------------------------------------
 139   Operators
 140 --------------------------------------------------------------------}
 141 -- | /O(n+m)/. See 'difference'.
 142 (\\) :: IntSet -> IntSet -> IntSet
 143 m1 \\ m2 = difference m1 m2
 144
 145 {--------------------------------------------------------------------
 146   Types
 147 --------------------------------------------------------------------}
 148 -- | A set of integers.
 149 data IntSet = Nil
 150             | Tip {-# UNPACK #-} !Int
 151             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 152
 153 type Prefix = Int
 154 type Mask   = Int
 155
 156 {--------------------------------------------------------------------
 157   Query
 158 --------------------------------------------------------------------}
 159 -- | /O(1)/. Is the set empty?
 160 null :: IntSet -> Bool
 161 null Nil   = True
 162 null other = False
 163
 164 -- | /O(n)/. Cardinality of the set.
 165 size :: IntSet -> Int
 166 size t
 167   = case t of
 168       Bin p m l r -> size l + size r
 169       Tip y -> 1
 170       Nil   -> 0
 171
 172 -- | /O(min(n,W))/. Is the value a member of the set?
 173 member :: Int -> IntSet -> Bool
 174 member x t
 175   = case t of
 176       Bin p m l r
 177         | nomatch x p m -> False
 178         | zero x m      -> member x l
 179         | otherwise     -> member x r
 180       Tip y -> (x==y)
 181       Nil   -> False
 182
 183 -- 'lookup' is used by 'intersection' for left-biasing
 184 lookup :: Int -> IntSet -> Maybe Int
 185 lookup k t
 186   = let nk = natFromInt k  in seq nk (lookupN nk t)
 187
 188 lookupN :: Nat -> IntSet -> Maybe Int
 189 lookupN k t
 190   = case t of
 191       Bin p m l r
 192         | zeroN k (natFromInt m) -> lookupN k l
 193         | otherwise              -> lookupN k r
 194       Tip kx
 195         | (k == natFromInt kx)  -> Just kx
 196         | otherwise             -> Nothing
 197       Nil -> Nothing
 198
 199 {--------------------------------------------------------------------
 200   Construction
 201 --------------------------------------------------------------------}
 202 -- | /O(1)/. The empty set.
 203 empty :: IntSet
 204 empty
 205   = Nil
 206
 207 -- | /O(1)/. A set of one element.
 208 singleton :: Int -> IntSet
 209 singleton x
 210   = Tip x
 211
 212 {--------------------------------------------------------------------
 213   Insert
 214 --------------------------------------------------------------------}
 215 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 216 -- an element of the set, it is replaced by the new one, ie. 'insert'
 217 -- is left-biased.
 218 insert :: Int -> IntSet -> IntSet
 219 insert x t
 220   = case t of
 221       Bin p m l r
 222         | nomatch x p m -> join x (Tip x) p t
 223         | zero x m      -> Bin p m (insert x l) r
 224         | otherwise     -> Bin p m l (insert x r)
 225       Tip y
 226         | x==y          -> Tip x
 227         | otherwise     -> join x (Tip x) y t
 228       Nil -> Tip x
 229
 230 -- right-biased insertion, used by 'union'
 231 insertR :: Int -> IntSet -> IntSet
 232 insertR x t
 233   = case t of
 234       Bin p m l r
 235         | nomatch x p m -> join x (Tip x) p t
 236         | zero x m      -> Bin p m (insert x l) r
 237         | otherwise     -> Bin p m l (insert x r)
 238       Tip y
 239         | x==y          -> t
 240         | otherwise     -> join x (Tip x) y t
 241       Nil -> Tip x
 242
 243 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 244 -- original set when the value was not present.
 245 delete :: Int -> IntSet -> IntSet
 246 delete x t
 247   = case t of
 248       Bin p m l r
 249         | nomatch x p m -> t
 250         | zero x m      -> bin p m (delete x l) r
 251         | otherwise     -> bin p m l (delete x r)
 252       Tip y
 253         | x==y          -> Nil
 254         | otherwise     -> t
 255       Nil -> Nil
 256
 257
 258 {--------------------------------------------------------------------
 259   Union
 260 --------------------------------------------------------------------}
 261 -- | The union of a list of sets.
 262 unions :: [IntSet] -> IntSet
 263 unions xs
 264   = foldlStrict union empty xs
 265
 266
 267 -- | /O(n+m)/. The union of two sets.
 268 union :: IntSet -> IntSet -> IntSet
 269 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 270   | shorter m1 m2  = union1
 271   | shorter m2 m1  = union2
 272   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 273   | otherwise      = join p1 t1 p2 t2
 274   where
 275     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 276             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 277             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 278
 279     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 280             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 281             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 282
 283 union (Tip x) t = insert x t
 284 union t (Tip x) = insertR x t  -- right bias
 285 union Nil t     = t
 286 union t Nil     = t
 287
 288
 289 {--------------------------------------------------------------------
 290   Difference
 291 --------------------------------------------------------------------}
 292 -- | /O(n+m)/. Difference between two sets.
 293 difference :: IntSet -> IntSet -> IntSet
 294 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 295   | shorter m1 m2  = difference1
 296   | shorter m2 m1  = difference2
 297   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 298   | otherwise      = t1
 299   where
 300     difference1 | nomatch p2 p1 m1  = t1
 301                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 302                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 303
 304     difference2 | nomatch p1 p2 m2  = t1
 305                 | zero p1 m2        = difference t1 l2
 306                 | otherwise         = difference t1 r2
 307
 308 difference t1@(Tip x) t2
 309   | member x t2  = Nil
 310   | otherwise    = t1
 311
 312 difference Nil t     = Nil
 313 difference t (Tip x) = delete x t
 314 difference t Nil     = t
 315
 316
 317
 318 {--------------------------------------------------------------------
 319   Intersection
 320 --------------------------------------------------------------------}
 321 -- | /O(n+m)/. The intersection of two sets.
 322 intersection :: IntSet -> IntSet -> IntSet
 323 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 324   | shorter m1 m2  = intersection1
 325   | shorter m2 m1  = intersection2
 326   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 327   | otherwise      = Nil
 328   where
 329     intersection1 | nomatch p2 p1 m1  = Nil
 330                   | zero p2 m1        = intersection l1 t2
 331                   | otherwise         = intersection r1 t2
 332
 333     intersection2 | nomatch p1 p2 m2  = Nil
 334                   | zero p1 m2        = intersection t1 l2
 335                   | otherwise         = intersection t1 r2
 336
 337 intersection t1@(Tip x) t2
 338   | member x t2  = t1
 339   | otherwise    = Nil
 340 intersection t (Tip x)
 341   = case lookup x t of
 342       Just y  -> Tip y
 343       Nothing -> Nil
 344 intersection Nil t = Nil
 345 intersection t Nil = Nil
 346
 347
 348
 349 {--------------------------------------------------------------------
 350   Subset
 351 --------------------------------------------------------------------}
 352 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 353 isProperSubsetOf :: IntSet -> IntSet -> Bool
 354 isProperSubsetOf t1 t2
 355   = case subsetCmp t1 t2 of
 356       LT -> True
 357       ge -> False
 358
 359 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 360   | shorter m1 m2  = GT
 361   | shorter m2 m1  = subsetCmpLt
 362   | p1 == p2       = subsetCmpEq
 363   | otherwise      = GT  -- disjoint
 364   where
 365     subsetCmpLt | nomatch p1 p2 m2  = GT
 366                 | zero p1 m2        = subsetCmp t1 l2
 367                 | otherwise         = subsetCmp t1 r2
 368     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 369                     (GT,_ ) -> GT
 370                     (_ ,GT) -> GT
 371                     (EQ,EQ) -> EQ
 372                     other   -> LT
 373
 374 subsetCmp (Bin p m l r) t  = GT
 375 subsetCmp (Tip x) (Tip y)
 376   | x==y       = EQ
 377   | otherwise  = GT  -- disjoint
 378 subsetCmp (Tip x) t
 379   | member x t = LT
 380   | otherwise  = GT  -- disjoint
 381 subsetCmp Nil Nil = EQ
 382 subsetCmp Nil t   = LT
 383
 384 -- | /O(n+m)/. Is this a subset?
 385 -- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2.
 386
 387 isSubsetOf :: IntSet -> IntSet -> Bool
 388 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 389   | shorter m1 m2  = False
 390   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 391                                                       else isSubsetOf t1 r2)
 392   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 393 isSubsetOf (Bin p m l r) t  = False
 394 isSubsetOf (Tip x) t        = member x t
 395 isSubsetOf Nil t            = True
 396
 397
 398 {--------------------------------------------------------------------
 399   Filter
 400 --------------------------------------------------------------------}
 401 -- | /O(n)/. Filter all elements that satisfy some predicate.
 402 filter :: (Int -> Bool) -> IntSet -> IntSet
 403 filter pred t
 404   = case t of
 405       Bin p m l r
 406         -> bin p m (filter pred l) (filter pred r)
 407       Tip x
 408         | pred x    -> t
 409         | otherwise -> Nil
 410       Nil -> Nil
 411
 412 -- | /O(n)/. partition the set according to some predicate.
 413 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 414 partition pred t
 415   = case t of
 416       Bin p m l r
 417         -> let (l1,l2) = partition pred l
 418                (r1,r2) = partition pred r
 419            in (bin p m l1 r1, bin p m l2 r2)
 420       Tip x
 421         | pred x    -> (t,Nil)
 422         | otherwise -> (Nil,t)
 423       Nil -> (Nil,Nil)
 424
 425
 426 -- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@
 427 -- where all elements in @set1@ are lower than @x@ and all elements in
 428 -- @set2@ larger than @x@.
 429 --
 430 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 431 split :: Int -> IntSet -> (IntSet,IntSet)
 432 split x t
 433   = case t of
 434       Bin p m l r
 435         | zero x m  -> let (lt,gt) = split x l in (lt,union gt r)
 436         | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
 437       Tip y
 438         | x>y       -> (t,Nil)
 439         | x<y       -> (Nil,t)
 440         | otherwise -> (Nil,Nil)
 441       Nil -> (Nil,Nil)
 442
 443 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
 444 -- element was found in the original set.
 445 splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
 446 splitMember x t
 447   = case t of
 448       Bin p m l r
 449         | zero x m  -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
 450         | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
 451       Tip y
 452         | x>y       -> (False,t,Nil)
 453         | x<y       -> (False,Nil,t)
 454         | otherwise -> (True,Nil,Nil)
 455       Nil -> (False,Nil,Nil)
 456
 457 {----------------------------------------------------------------------
 458   Map
 459 ----------------------------------------------------------------------}
 460
 461 -- | /O(n*min(n,W))/.
 462 -- @map f s@ is the set obtained by applying @f@ to each element of @s@.
 463 --
 464 -- It's worth noting that the size of the result may be smaller if,
 465 -- for some @(x,y)@, @x \/= y && f x == f y@
 466
 467 map :: (Int->Int) -> IntSet -> IntSet
 468 map f = fromList . List.map f . toList
 469
 470 {--------------------------------------------------------------------
 471   Fold
 472 --------------------------------------------------------------------}
 473 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 474 --
 475 -- > sum set   == fold (+) 0 set
 476 -- > elems set == fold (:) [] set
 477 fold :: (Int -> b -> b) -> b -> IntSet -> b
 478 fold f z t
 479   = foldr f z t
 480
 481 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 482 foldr f z t
 483   = case t of
 484       Bin p m l r -> foldr f (foldr f z r) l
 485       Tip x       -> f x z
 486       Nil         -> z
 487
 488 {--------------------------------------------------------------------
 489   List variations
 490 --------------------------------------------------------------------}
 491 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 492 elems :: IntSet -> [Int]
 493 elems s
 494   = toList s
 495
 496 {--------------------------------------------------------------------
 497   Lists
 498 --------------------------------------------------------------------}
 499 -- | /O(n)/. Convert the set to a list of elements.
 500 toList :: IntSet -> [Int]
 501 toList t
 502   = fold (:) [] t
 503
 504 -- | /O(n)/. Convert the set to an ascending list of elements.
 505 toAscList :: IntSet -> [Int]
 506 toAscList t
 507   = -- NOTE: the following algorithm only works for big-endian trees
 508     let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
 509
 510 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 511 fromList :: [Int] -> IntSet
 512 fromList xs
 513   = foldlStrict ins empty xs
 514   where
 515     ins t x  = insert x t
 516
 517 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 518 fromAscList :: [Int] -> IntSet
 519 fromAscList xs
 520   = fromList xs
 521
 522 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 523 fromDistinctAscList :: [Int] -> IntSet
 524 fromDistinctAscList xs
 525   = fromList xs
 526
 527
 528 {--------------------------------------------------------------------
 529   Eq
 530 --------------------------------------------------------------------}
 531 instance Eq IntSet where
 532   t1 == t2  = equal t1 t2
 533   t1 /= t2  = nequal t1 t2
 534
 535 equal :: IntSet -> IntSet -> Bool
 536 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 537   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 538 equal (Tip x) (Tip y)
 539   = (x==y)
 540 equal Nil Nil = True
 541 equal t1 t2   = False
 542
 543 nequal :: IntSet -> IntSet -> Bool
 544 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 545   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 546 nequal (Tip x) (Tip y)
 547   = (x/=y)
 548 nequal Nil Nil = False
 549 nequal t1 t2   = True
 550
 551 {--------------------------------------------------------------------
 552   Ord
 553 --------------------------------------------------------------------}
 554
 555 instance Ord IntSet where
 556     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 557     -- tentative implementation. See if more efficient exists.
 558
 559 {--------------------------------------------------------------------
 560   Monoid
 561 --------------------------------------------------------------------}
 562
 563 instance Monoid IntSet where
 564     mempty = empty
 565     mappend = union
 566     mconcat = unions
 567
 568 {--------------------------------------------------------------------
 569   Show
 570 --------------------------------------------------------------------}
 571 instance Show IntSet where
 572   showsPrec d s  = showSet (toList s)
 573
 574 showSet :: [Int] -> ShowS
 575 showSet []
 576   = showString "{}"
 577 showSet (x:xs)
 578   = showChar '{' . shows x . showTail xs
 579   where
 580     showTail []     = showChar '}'
 581     showTail (x:xs) = showChar ',' . shows x . showTail xs
 582
 583 {--------------------------------------------------------------------
 584   Debugging
 585 --------------------------------------------------------------------}
 586 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 587 -- in a compressed, hanging format.
 588 showTree :: IntSet -> String
 589 showTree s
 590   = showTreeWith True False s
 591
 592
 593 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
 594  the tree that implements the set. If @hang@ is
 595  @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
 596  @wide@ is true, an extra wide version is shown.
 597 -}
 598 showTreeWith :: Bool -> Bool -> IntSet -> String
 599 showTreeWith hang wide t
 600   | hang      = (showsTreeHang wide [] t) ""
 601   | otherwise = (showsTree wide [] [] t) ""
 602
 603 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 604 showsTree wide lbars rbars t
 605   = case t of
 606       Bin p m l r
 607           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 608              showWide wide rbars .
 609              showsBars lbars . showString (showBin p m) . showString "\n" .
 610              showWide wide lbars .
 611              showsTree wide (withEmpty lbars) (withBar lbars) l
 612       Tip x
 613           -> showsBars lbars . showString " " . shows x . showString "\n"
 614       Nil -> showsBars lbars . showString "|\n"
 615
 616 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 617 showsTreeHang wide bars t
 618   = case t of
 619       Bin p m l r
 620           -> showsBars bars . showString (showBin p m) . showString "\n" .
 621              showWide wide bars .
 622              showsTreeHang wide (withBar bars) l .
 623              showWide wide bars .
 624              showsTreeHang wide (withEmpty bars) r
 625       Tip x
 626           -> showsBars bars . showString " " . shows x . showString "\n"
 627       Nil -> showsBars bars . showString "|\n"
 628
 629 showBin p m
 630   = "*" -- ++ show (p,m)
 631
 632 showWide wide bars
 633   | wide      = showString (concat (reverse bars)) . showString "|\n"
 634   | otherwise = id
 635
 636 showsBars :: [String] -> ShowS
 637 showsBars bars
 638   = case bars of
 639       [] -> id
 640       _  -> showString (concat (reverse (tail bars))) . showString node
 641
 642 node           = "+--"
 643 withBar bars   = "|  ":bars
 644 withEmpty bars = "   ":bars
 645
 646
 647 {--------------------------------------------------------------------
 648   Helpers
 649 --------------------------------------------------------------------}
 650 {--------------------------------------------------------------------
 651   Join
 652 --------------------------------------------------------------------}
 653 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 654 join p1 t1 p2 t2
 655   | zero p1 m = Bin p m t1 t2
 656   | otherwise = Bin p m t2 t1
 657   where
 658     m = branchMask p1 p2
 659     p = mask p1 m
 660
 661 {--------------------------------------------------------------------
 662   @bin@ assures that we never have empty trees within a tree.
 663 --------------------------------------------------------------------}
 664 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 665 bin p m l Nil = l
 666 bin p m Nil r = r
 667 bin p m l r   = Bin p m l r
 668
 669
 670 {--------------------------------------------------------------------
 671   Endian independent bit twiddling
 672 --------------------------------------------------------------------}
 673 zero :: Int -> Mask -> Bool
 674 zero i m
 675   = (natFromInt i) .&. (natFromInt m) == 0
 676
 677 nomatch,match :: Int -> Prefix -> Mask -> Bool
 678 nomatch i p m
 679   = (mask i m) /= p
 680
 681 match i p m
 682   = (mask i m) == p
 683
 684 mask :: Int -> Mask -> Prefix
 685 mask i m
 686   = maskW (natFromInt i) (natFromInt m)
 687
 688 zeroN :: Nat -> Nat -> Bool
 689 zeroN i m = (i .&. m) == 0
 690
 691 {--------------------------------------------------------------------
 692   Big endian operations
 693 --------------------------------------------------------------------}
 694 maskW :: Nat -> Nat -> Prefix
 695 maskW i m
 696   = intFromNat (i .&. (complement (m-1) `xor` m))
 697
 698 shorter :: Mask -> Mask -> Bool
 699 shorter m1 m2
 700   = (natFromInt m1) > (natFromInt m2)
 701
 702 branchMask :: Prefix -> Prefix -> Mask
 703 branchMask p1 p2
 704   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 705
 706 {----------------------------------------------------------------------
 707   Finding the highest bit (mask) in a word [x] can be done efficiently in
 708   three ways:
 709   * convert to a floating point value and the mantissa tells us the
 710     [log2(x)] that corresponds with the highest bit position. The mantissa
 711     is retrieved either via the standard C function [frexp] or by some bit
 712     twiddling on IEEE compatible numbers (float). Note that one needs to
 713     use at least [double] precision for an accurate mantissa of 32 bit
 714     numbers.
 715   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 716   * use processor specific assembler instruction (asm).
 717
 718   The most portable way would be [bit], but is it efficient enough?
 719   I have measured the cycle counts of the different methods on an AMD
 720   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 721
 722   highestBitMask: method  cycles
 723                   --------------
 724                    frexp   200
 725                    float    33
 726                    bit      11
 727                    asm      12
 728
 729   highestBit:     method  cycles
 730                   --------------
 731                    frexp   195
 732                    float    33
 733                    bit      11
 734                    asm      11
 735
 736   Wow, the bit twiddling is on today's RISC like machines even faster
 737   than a single CISC instruction (BSR)!
 738 ----------------------------------------------------------------------}
 739
 740 {----------------------------------------------------------------------
 741   [highestBitMask] returns a word where only the highest bit is set.
 742   It is found by first setting all bits in lower positions than the
 743   highest bit and than taking an exclusive or with the original value.
 744   Allthough the function may look expensive, GHC compiles this into
 745   excellent C code that subsequently compiled into highly efficient
 746   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 747 ----------------------------------------------------------------------}
 748 highestBitMask :: Nat -> Nat
 749 highestBitMask x
 750   = case (x .|. shiftRL x 1) of
 751      x -> case (x .|. shiftRL x 2) of
 752       x -> case (x .|. shiftRL x 4) of
 753        x -> case (x .|. shiftRL x 8) of
 754         x -> case (x .|. shiftRL x 16) of
 755          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 756           x -> (x `xor` (shiftRL x 1))
 757
 758
 759 {--------------------------------------------------------------------
 760   Utilities
 761 --------------------------------------------------------------------}
 762 foldlStrict f z xs
 763   = case xs of
 764       []     -> z
 765       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 766
 767
 768 {-
 769 {--------------------------------------------------------------------
 770   Testing
 771 --------------------------------------------------------------------}
 772 testTree :: [Int] -> IntSet
 773 testTree xs   = fromList xs
 774 test1 = testTree [1..20]
 775 test2 = testTree [30,29..10]
 776 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 777
 778 {--------------------------------------------------------------------
 779   QuickCheck
 780 --------------------------------------------------------------------}
 781 qcheck prop
 782   = check config prop
 783   where
 784     config = Config
 785       { configMaxTest = 500
 786       , configMaxFail = 5000
 787       , configSize    = \n -> (div n 2 + 3)
 788       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 789       }
 790
 791
 792 {--------------------------------------------------------------------
 793   Arbitrary, reasonably balanced trees
 794 --------------------------------------------------------------------}
 795 instance Arbitrary IntSet where
 796   arbitrary = do{ xs <- arbitrary
 797                 ; return (fromList xs)
 798                 }
 799
 800
 801 {--------------------------------------------------------------------
 802   Single, Insert, Delete
 803 --------------------------------------------------------------------}
 804 prop_Single :: Int -> Bool
 805 prop_Single x
 806   = (insert x empty == singleton x)
 807
 808 prop_InsertDelete :: Int -> IntSet -> Property
 809 prop_InsertDelete k t
 810   = not (member k t) ==> delete k (insert k t) == t
 811
 812
 813 {--------------------------------------------------------------------
 814   Union
 815 --------------------------------------------------------------------}
 816 prop_UnionInsert :: Int -> IntSet -> Bool
 817 prop_UnionInsert x t
 818   = union t (singleton x) == insert x t
 819
 820 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 821 prop_UnionAssoc t1 t2 t3
 822   = union t1 (union t2 t3) == union (union t1 t2) t3
 823
 824 prop_UnionComm :: IntSet -> IntSet -> Bool
 825 prop_UnionComm t1 t2
 826   = (union t1 t2 == union t2 t1)
 827
 828 prop_Diff :: [Int] -> [Int] -> Bool
 829 prop_Diff xs ys
 830   =  toAscList (difference (fromList xs) (fromList ys))
 831     == List.sort ((List.\\) (nub xs)  (nub ys))
 832
 833 prop_Int :: [Int] -> [Int] -> Bool
 834 prop_Int xs ys
 835   =  toAscList (intersection (fromList xs) (fromList ys))
 836     == List.sort (nub ((List.intersect) (xs)  (ys)))
 837
 838 {--------------------------------------------------------------------
 839   Lists
 840 --------------------------------------------------------------------}
 841 prop_Ordered
 842   = forAll (choose (5,100)) $ \n ->
 843     let xs = [0..n::Int]
 844     in fromAscList xs == fromList xs
 845
 846 prop_List :: [Int] -> Bool
 847 prop_List xs
 848   = (sort (nub xs) == toAscList (fromList xs))
 849 -}