Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 --------------------------------------------------------------------------------
   3 {-| Module      :  Data.IntSet
   4     Copyright   :  (c) Daan Leijen 2002
   5     License     :  BSD-style
   6     Maintainer  :  libraries@haskell.org
   7     Stability   :  provisional
   8     Portability :  portable
   9
  10   An efficient implementation of integer sets.
  11
  12   This module is intended to be imported @qualified@, to avoid name
  13   clashes with Prelude functions.  eg.
  14
  15   >  import Data.IntSet as Set
  16
  17   The implementation is based on /big-endian patricia trees/. This data structure
  18   performs especially well on binary operations like 'union' and 'intersection'. However,
  19   my benchmarks show that it is also (much) faster on insertions and deletions when
  20   compared to a generic size-balanced set implementation (see "Set").
  21
  22   *  Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  23      Workshop on ML, September 1998, pages 77--86, <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  24
  25   *  D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve Information
  26      Coded In Alphanumeric/\", Journal of the ACM, 15(4), October 1968, pages 514--534.
  27
  28   Many operations have a worst-case complexity of /O(min(n,W))/. This means that the
  29   operation can become linear in the number of elements
  30   with a maximum of /W/ -- the number of bits in an 'Int' (32 or 64).
  31 -}
  32 ---------------------------------------------------------------------------------}
  33 module Data.IntSet  (
  34             -- * Set type
  35               IntSet          -- instance Eq,Show
  36
  37             -- * Operators
  38             , (\\)
  39
  40             -- * Query
  41             , null
  42             , size
  43             , member
  44             , isSubsetOf
  45             , isProperSubsetOf
  46
  47             -- * Construction
  48             , empty
  49             , singleton
  50             , insert
  51             , delete
  52
  53             -- * Combine
  54             , union, unions
  55             , difference
  56             , intersection
  57
  58             -- * Filter
  59             , filter
  60             , partition
  61             , split
  62             , splitMember
  63
  64             -- * Map
  65             , map
  66
  67             -- * Fold
  68             , fold
  69
  70             -- * Conversion
  71             -- ** List
  72             , elems
  73             , toList
  74             , fromList
  75
  76             -- ** Ordered list
  77             , toAscList
  78             , fromAscList
  79             , fromDistinctAscList
  80
  81             -- * Debugging
  82             , showTree
  83             , showTreeWith
  84             ) where
  85
  86
  87 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
  88 import Data.Bits
  89 import Data.Int
  90
  91 import qualified Data.List as List
  92 import Data.Monoid
  93
  94 {-
  95 -- just for testing
  96 import QuickCheck
  97 import List (nub,sort)
  98 import qualified List
  99 -}
 100
 101
 102 #ifdef __GLASGOW_HASKELL__
 103 {--------------------------------------------------------------------
 104   GHC: use unboxing to get @shiftRL@ inlined.
 105 --------------------------------------------------------------------}
 106 #if __GLASGOW_HASKELL__ >= 503
 107 import GHC.Word
 108 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 109 #else
 110 import Word
 111 import GlaExts ( Word(..), Int(..), shiftRL# )
 112 #endif
 113
 114 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 115
 116 type Nat = Word
 117
 118 natFromInt :: Int -> Nat
 119 natFromInt i = fromIntegral i
 120
 121 intFromNat :: Nat -> Int
 122 intFromNat w = fromIntegral w
 123
 124 shiftRL :: Nat -> Int -> Nat
 125 shiftRL (W# x) (I# i)
 126   = W# (shiftRL# x i)
 127
 128 #elif __HUGS__
 129 {--------------------------------------------------------------------
 130  Hugs:
 131  * raises errors on boundary values when using 'fromIntegral'
 132    but not with the deprecated 'fromInt/toInt'.
 133  * Older Hugs doesn't define 'Word'.
 134  * Newer Hugs defines 'Word' in the Prelude but no operations.
 135 --------------------------------------------------------------------}
 136 import Data.Word
 137 infixl 9 \\
 138
 139 type Nat = Word32   -- illegal on 64-bit platforms!
 140
 141 natFromInt :: Int -> Nat
 142 natFromInt i = fromInt i
 143
 144 intFromNat :: Nat -> Int
 145 intFromNat w = toInt w
 146
 147 shiftRL :: Nat -> Int -> Nat
 148 shiftRL x i   = shiftR x i
 149
 150 #else
 151 {--------------------------------------------------------------------
 152   'Standard' Haskell
 153   * A "Nat" is a natural machine word (an unsigned Int)
 154 --------------------------------------------------------------------}
 155 import Data.Word
 156 infixl 9 \\
 157
 158 type Nat = Word
 159
 160 natFromInt :: Int -> Nat
 161 natFromInt i = fromIntegral i
 162
 163 intFromNat :: Nat -> Int
 164 intFromNat w = fromIntegral w
 165
 166 shiftRL :: Nat -> Int -> Nat
 167 shiftRL w i   = shiftR w i
 168
 169 #endif
 170
 171 {--------------------------------------------------------------------
 172   Operators
 173 --------------------------------------------------------------------}
 174 -- | /O(n+m)/. See 'difference'.
 175 (\\) :: IntSet -> IntSet -> IntSet
 176 m1 \\ m2 = difference m1 m2
 177
 178 {--------------------------------------------------------------------
 179   Types
 180 --------------------------------------------------------------------}
 181 -- | A set of integers.
 182 data IntSet = Nil
 183             | Tip {-# UNPACK #-} !Int
 184             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 185
 186 type Prefix = Int
 187 type Mask   = Int
 188
 189 {--------------------------------------------------------------------
 190   Query
 191 --------------------------------------------------------------------}
 192 -- | /O(1)/. Is the set empty?
 193 null :: IntSet -> Bool
 194 null Nil   = True
 195 null other = False
 196
 197 -- | /O(n)/. Cardinality of the set.
 198 size :: IntSet -> Int
 199 size t
 200   = case t of
 201       Bin p m l r -> size l + size r
 202       Tip y -> 1
 203       Nil   -> 0
 204
 205 -- | /O(min(n,W))/. Is the value a member of the set?
 206 member :: Int -> IntSet -> Bool
 207 member x t
 208   = case t of
 209       Bin p m l r
 210         | nomatch x p m -> False
 211         | zero x m      -> member x l
 212         | otherwise     -> member x r
 213       Tip y -> (x==y)
 214       Nil   -> False
 215
 216 -- 'lookup' is used by 'intersection' for left-biasing
 217 lookup :: Int -> IntSet -> Maybe Int
 218 lookup k t
 219   = let nk = natFromInt k  in seq nk (lookupN nk t)
 220
 221 lookupN :: Nat -> IntSet -> Maybe Int
 222 lookupN k t
 223   = case t of
 224       Bin p m l r
 225         | zeroN k (natFromInt m) -> lookupN k l
 226         | otherwise              -> lookupN k r
 227       Tip kx
 228         | (k == natFromInt kx)  -> Just kx
 229         | otherwise             -> Nothing
 230       Nil -> Nothing
 231
 232 {--------------------------------------------------------------------
 233   Construction
 234 --------------------------------------------------------------------}
 235 -- | /O(1)/. The empty set.
 236 empty :: IntSet
 237 empty
 238   = Nil
 239
 240 -- | /O(1)/. A set of one element.
 241 singleton :: Int -> IntSet
 242 singleton x
 243   = Tip x
 244
 245 {--------------------------------------------------------------------
 246   Insert
 247 --------------------------------------------------------------------}
 248 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 249 -- an element of the set, it is replaced by the new one, ie. 'insert'
 250 -- is left-biased.
 251 insert :: Int -> IntSet -> IntSet
 252 insert x t
 253   = case t of
 254       Bin p m l r
 255         | nomatch x p m -> join x (Tip x) p t
 256         | zero x m      -> Bin p m (insert x l) r
 257         | otherwise     -> Bin p m l (insert x r)
 258       Tip y
 259         | x==y          -> Tip x
 260         | otherwise     -> join x (Tip x) y t
 261       Nil -> Tip x
 262
 263 -- right-biased insertion, used by 'union'
 264 insertR :: Int -> IntSet -> IntSet
 265 insertR x t
 266   = case t of
 267       Bin p m l r
 268         | nomatch x p m -> join x (Tip x) p t
 269         | zero x m      -> Bin p m (insert x l) r
 270         | otherwise     -> Bin p m l (insert x r)
 271       Tip y
 272         | x==y          -> t
 273         | otherwise     -> join x (Tip x) y t
 274       Nil -> Tip x
 275
 276 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 277 -- original set when the value was not present.
 278 delete :: Int -> IntSet -> IntSet
 279 delete x t
 280   = case t of
 281       Bin p m l r
 282         | nomatch x p m -> t
 283         | zero x m      -> bin p m (delete x l) r
 284         | otherwise     -> bin p m l (delete x r)
 285       Tip y
 286         | x==y          -> Nil
 287         | otherwise     -> t
 288       Nil -> Nil
 289
 290
 291 {--------------------------------------------------------------------
 292   Union
 293 --------------------------------------------------------------------}
 294 -- | The union of a list of sets.
 295 unions :: [IntSet] -> IntSet
 296 unions xs
 297   = foldlStrict union empty xs
 298
 299
 300 -- | /O(n+m)/. The union of two sets.
 301 union :: IntSet -> IntSet -> IntSet
 302 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 303   | shorter m1 m2  = union1
 304   | shorter m2 m1  = union2
 305   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 306   | otherwise      = join p1 t1 p2 t2
 307   where
 308     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 309             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 310             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 311
 312     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 313             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 314             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 315
 316 union (Tip x) t = insert x t
 317 union t (Tip x) = insertR x t  -- right bias
 318 union Nil t     = t
 319 union t Nil     = t
 320
 321
 322 {--------------------------------------------------------------------
 323   Difference
 324 --------------------------------------------------------------------}
 325 -- | /O(n+m)/. Difference between two sets.
 326 difference :: IntSet -> IntSet -> IntSet
 327 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 328   | shorter m1 m2  = difference1
 329   | shorter m2 m1  = difference2
 330   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 331   | otherwise      = t1
 332   where
 333     difference1 | nomatch p2 p1 m1  = t1
 334                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 335                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 336
 337     difference2 | nomatch p1 p2 m2  = t1
 338                 | zero p1 m2        = difference t1 l2
 339                 | otherwise         = difference t1 r2
 340
 341 difference t1@(Tip x) t2
 342   | member x t2  = Nil
 343   | otherwise    = t1
 344
 345 difference Nil t     = Nil
 346 difference t (Tip x) = delete x t
 347 difference t Nil     = t
 348
 349
 350
 351 {--------------------------------------------------------------------
 352   Intersection
 353 --------------------------------------------------------------------}
 354 -- | /O(n+m)/. The intersection of two sets.
 355 intersection :: IntSet -> IntSet -> IntSet
 356 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 357   | shorter m1 m2  = intersection1
 358   | shorter m2 m1  = intersection2
 359   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 360   | otherwise      = Nil
 361   where
 362     intersection1 | nomatch p2 p1 m1  = Nil
 363                   | zero p2 m1        = intersection l1 t2
 364                   | otherwise         = intersection r1 t2
 365
 366     intersection2 | nomatch p1 p2 m2  = Nil
 367                   | zero p1 m2        = intersection t1 l2
 368                   | otherwise         = intersection t1 r2
 369
 370 intersection t1@(Tip x) t2
 371   | member x t2  = t1
 372   | otherwise    = Nil
 373 intersection t (Tip x)
 374   = case lookup x t of
 375       Just y  -> Tip y
 376       Nothing -> Nil
 377 intersection Nil t = Nil
 378 intersection t Nil = Nil
 379
 380
 381
 382 {--------------------------------------------------------------------
 383   Subset
 384 --------------------------------------------------------------------}
 385 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 386 isProperSubsetOf :: IntSet -> IntSet -> Bool
 387 isProperSubsetOf t1 t2
 388   = case subsetCmp t1 t2 of
 389       LT -> True
 390       ge -> False
 391
 392 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 393   | shorter m1 m2  = GT
 394   | shorter m2 m1  = subsetCmpLt
 395   | p1 == p2       = subsetCmpEq
 396   | otherwise      = GT  -- disjoint
 397   where
 398     subsetCmpLt | nomatch p1 p2 m2  = GT
 399                 | zero p1 m2        = subsetCmp t1 l2
 400                 | otherwise         = subsetCmp t1 r2
 401     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 402                     (GT,_ ) -> GT
 403                     (_ ,GT) -> GT
 404                     (EQ,EQ) -> EQ
 405                     other   -> LT
 406
 407 subsetCmp (Bin p m l r) t  = GT
 408 subsetCmp (Tip x) (Tip y)
 409   | x==y       = EQ
 410   | otherwise  = GT  -- disjoint
 411 subsetCmp (Tip x) t
 412   | member x t = LT
 413   | otherwise  = GT  -- disjoint
 414 subsetCmp Nil Nil = EQ
 415 subsetCmp Nil t   = LT
 416
 417 -- | /O(n+m)/. Is this a subset?
 418 -- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2.
 419
 420 isSubsetOf :: IntSet -> IntSet -> Bool
 421 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 422   | shorter m1 m2  = False
 423   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 424                                                       else isSubsetOf t1 r2)
 425   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 426 isSubsetOf (Bin p m l r) t  = False
 427 isSubsetOf (Tip x) t        = member x t
 428 isSubsetOf Nil t            = True
 429
 430
 431 {--------------------------------------------------------------------
 432   Filter
 433 --------------------------------------------------------------------}
 434 -- | /O(n)/. Filter all elements that satisfy some predicate.
 435 filter :: (Int -> Bool) -> IntSet -> IntSet
 436 filter pred t
 437   = case t of
 438       Bin p m l r
 439         -> bin p m (filter pred l) (filter pred r)
 440       Tip x
 441         | pred x    -> t
 442         | otherwise -> Nil
 443       Nil -> Nil
 444
 445 -- | /O(n)/. partition the set according to some predicate.
 446 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 447 partition pred t
 448   = case t of
 449       Bin p m l r
 450         -> let (l1,l2) = partition pred l
 451                (r1,r2) = partition pred r
 452            in (bin p m l1 r1, bin p m l2 r2)
 453       Tip x
 454         | pred x    -> (t,Nil)
 455         | otherwise -> (Nil,t)
 456       Nil -> (Nil,Nil)
 457
 458
 459 -- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@
 460 -- where all elements in @set1@ are lower than @x@ and all elements in
 461 -- @set2@ larger than @x@.
 462 --
 463 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 464 split :: Int -> IntSet -> (IntSet,IntSet)
 465 split x t
 466   = case t of
 467       Bin p m l r
 468         | zero x m  -> let (lt,gt) = split x l in (lt,union gt r)
 469         | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
 470       Tip y
 471         | x>y       -> (t,Nil)
 472         | x<y       -> (Nil,t)
 473         | otherwise -> (Nil,Nil)
 474       Nil -> (Nil,Nil)
 475
 476 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
 477 -- element was found in the original set.
 478 splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
 479 splitMember x t
 480   = case t of
 481       Bin p m l r
 482         | zero x m  -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
 483         | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
 484       Tip y
 485         | x>y       -> (False,t,Nil)
 486         | x<y       -> (False,Nil,t)
 487         | otherwise -> (True,Nil,Nil)
 488       Nil -> (False,Nil,Nil)
 489
 490 {----------------------------------------------------------------------
 491   Map
 492 ----------------------------------------------------------------------}
 493
 494 -- | /O(n*min(n,W))/.
 495 -- @map f s@ is the set obtained by applying @f@ to each element of @s@.
 496 --
 497 -- It's worth noting that the size of the result may be smaller if,
 498 -- for some @(x,y)@, @x \/= y && f x == f y@
 499
 500 map :: (Int->Int) -> IntSet -> IntSet
 501 map f = fromList . List.map f . toList
 502
 503 {--------------------------------------------------------------------
 504   Fold
 505 --------------------------------------------------------------------}
 506 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 507 --
 508 -- > sum set   == fold (+) 0 set
 509 -- > elems set == fold (:) [] set
 510 fold :: (Int -> b -> b) -> b -> IntSet -> b
 511 fold f z t
 512   = foldr f z t
 513
 514 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 515 foldr f z t
 516   = case t of
 517       Bin p m l r -> foldr f (foldr f z r) l
 518       Tip x       -> f x z
 519       Nil         -> z
 520
 521 {--------------------------------------------------------------------
 522   List variations
 523 --------------------------------------------------------------------}
 524 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 525 elems :: IntSet -> [Int]
 526 elems s
 527   = toList s
 528
 529 {--------------------------------------------------------------------
 530   Lists
 531 --------------------------------------------------------------------}
 532 -- | /O(n)/. Convert the set to a list of elements.
 533 toList :: IntSet -> [Int]
 534 toList t
 535   = fold (:) [] t
 536
 537 -- | /O(n)/. Convert the set to an ascending list of elements.
 538 toAscList :: IntSet -> [Int]
 539 toAscList t
 540   = -- NOTE: the following algorithm only works for big-endian trees
 541     let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
 542
 543 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 544 fromList :: [Int] -> IntSet
 545 fromList xs
 546   = foldlStrict ins empty xs
 547   where
 548     ins t x  = insert x t
 549
 550 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 551 fromAscList :: [Int] -> IntSet
 552 fromAscList xs
 553   = fromList xs
 554
 555 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 556 fromDistinctAscList :: [Int] -> IntSet
 557 fromDistinctAscList xs
 558   = fromList xs
 559
 560
 561 {--------------------------------------------------------------------
 562   Eq
 563 --------------------------------------------------------------------}
 564 instance Eq IntSet where
 565   t1 == t2  = equal t1 t2
 566   t1 /= t2  = nequal t1 t2
 567
 568 equal :: IntSet -> IntSet -> Bool
 569 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 570   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 571 equal (Tip x) (Tip y)
 572   = (x==y)
 573 equal Nil Nil = True
 574 equal t1 t2   = False
 575
 576 nequal :: IntSet -> IntSet -> Bool
 577 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 578   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 579 nequal (Tip x) (Tip y)
 580   = (x/=y)
 581 nequal Nil Nil = False
 582 nequal t1 t2   = True
 583
 584 {--------------------------------------------------------------------
 585   Ord
 586 --------------------------------------------------------------------}
 587
 588 instance Ord IntSet where
 589     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 590     -- tentative implementation. See if more efficient exists.
 591
 592 {--------------------------------------------------------------------
 593   Monoid
 594 --------------------------------------------------------------------}
 595
 596 instance Monoid IntSet where
 597     mempty = empty
 598     mappend = union
 599     mconcat = unions
 600
 601 {--------------------------------------------------------------------
 602   Show
 603 --------------------------------------------------------------------}
 604 instance Show IntSet where
 605   showsPrec d s  = showSet (toList s)
 606
 607 showSet :: [Int] -> ShowS
 608 showSet []
 609   = showString "{}"
 610 showSet (x:xs)
 611   = showChar '{' . shows x . showTail xs
 612   where
 613     showTail []     = showChar '}'
 614     showTail (x:xs) = showChar ',' . shows x . showTail xs
 615
 616 {--------------------------------------------------------------------
 617   Debugging
 618 --------------------------------------------------------------------}
 619 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 620 -- in a compressed, hanging format.
 621 showTree :: IntSet -> String
 622 showTree s
 623   = showTreeWith True False s
 624
 625
 626 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
 627  the tree that implements the set. If @hang@ is
 628  @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
 629  @wide@ is true, an extra wide version is shown.
 630 -}
 631 showTreeWith :: Bool -> Bool -> IntSet -> String
 632 showTreeWith hang wide t
 633   | hang      = (showsTreeHang wide [] t) ""
 634   | otherwise = (showsTree wide [] [] t) ""
 635
 636 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 637 showsTree wide lbars rbars t
 638   = case t of
 639       Bin p m l r
 640           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 641              showWide wide rbars .
 642              showsBars lbars . showString (showBin p m) . showString "\n" .
 643              showWide wide lbars .
 644              showsTree wide (withEmpty lbars) (withBar lbars) l
 645       Tip x
 646           -> showsBars lbars . showString " " . shows x . showString "\n"
 647       Nil -> showsBars lbars . showString "|\n"
 648
 649 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 650 showsTreeHang wide bars t
 651   = case t of
 652       Bin p m l r
 653           -> showsBars bars . showString (showBin p m) . showString "\n" .
 654              showWide wide bars .
 655              showsTreeHang wide (withBar bars) l .
 656              showWide wide bars .
 657              showsTreeHang wide (withEmpty bars) r
 658       Tip x
 659           -> showsBars bars . showString " " . shows x . showString "\n"
 660       Nil -> showsBars bars . showString "|\n"
 661
 662 showBin p m
 663   = "*" -- ++ show (p,m)
 664
 665 showWide wide bars
 666   | wide      = showString (concat (reverse bars)) . showString "|\n"
 667   | otherwise = id
 668
 669 showsBars :: [String] -> ShowS
 670 showsBars bars
 671   = case bars of
 672       [] -> id
 673       _  -> showString (concat (reverse (tail bars))) . showString node
 674
 675 node           = "+--"
 676 withBar bars   = "|  ":bars
 677 withEmpty bars = "   ":bars
 678
 679
 680 {--------------------------------------------------------------------
 681   Helpers
 682 --------------------------------------------------------------------}
 683 {--------------------------------------------------------------------
 684   Join
 685 --------------------------------------------------------------------}
 686 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 687 join p1 t1 p2 t2
 688   | zero p1 m = Bin p m t1 t2
 689   | otherwise = Bin p m t2 t1
 690   where
 691     m = branchMask p1 p2
 692     p = mask p1 m
 693
 694 {--------------------------------------------------------------------
 695   @bin@ assures that we never have empty trees within a tree.
 696 --------------------------------------------------------------------}
 697 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 698 bin p m l Nil = l
 699 bin p m Nil r = r
 700 bin p m l r   = Bin p m l r
 701
 702
 703 {--------------------------------------------------------------------
 704   Endian independent bit twiddling
 705 --------------------------------------------------------------------}
 706 zero :: Int -> Mask -> Bool
 707 zero i m
 708   = (natFromInt i) .&. (natFromInt m) == 0
 709
 710 nomatch,match :: Int -> Prefix -> Mask -> Bool
 711 nomatch i p m
 712   = (mask i m) /= p
 713
 714 match i p m
 715   = (mask i m) == p
 716
 717 mask :: Int -> Mask -> Prefix
 718 mask i m
 719   = maskW (natFromInt i) (natFromInt m)
 720
 721 zeroN :: Nat -> Nat -> Bool
 722 zeroN i m = (i .&. m) == 0
 723
 724 {--------------------------------------------------------------------
 725   Big endian operations
 726 --------------------------------------------------------------------}
 727 maskW :: Nat -> Nat -> Prefix
 728 maskW i m
 729   = intFromNat (i .&. (complement (m-1) `xor` m))
 730
 731 shorter :: Mask -> Mask -> Bool
 732 shorter m1 m2
 733   = (natFromInt m1) > (natFromInt m2)
 734
 735 branchMask :: Prefix -> Prefix -> Mask
 736 branchMask p1 p2
 737   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 738
 739 {----------------------------------------------------------------------
 740   Finding the highest bit (mask) in a word [x] can be done efficiently in
 741   three ways:
 742   * convert to a floating point value and the mantissa tells us the
 743     [log2(x)] that corresponds with the highest bit position. The mantissa
 744     is retrieved either via the standard C function [frexp] or by some bit
 745     twiddling on IEEE compatible numbers (float). Note that one needs to
 746     use at least [double] precision for an accurate mantissa of 32 bit
 747     numbers.
 748   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 749   * use processor specific assembler instruction (asm).
 750
 751   The most portable way would be [bit], but is it efficient enough?
 752   I have measured the cycle counts of the different methods on an AMD
 753   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 754
 755   highestBitMask: method  cycles
 756                   --------------
 757                    frexp   200
 758                    float    33
 759                    bit      11
 760                    asm      12
 761
 762   highestBit:     method  cycles
 763                   --------------
 764                    frexp   195
 765                    float    33
 766                    bit      11
 767                    asm      11
 768
 769   Wow, the bit twiddling is on today's RISC like machines even faster
 770   than a single CISC instruction (BSR)!
 771 ----------------------------------------------------------------------}
 772
 773 {----------------------------------------------------------------------
 774   [highestBitMask] returns a word where only the highest bit is set.
 775   It is found by first setting all bits in lower positions than the
 776   highest bit and than taking an exclusive or with the original value.
 777   Allthough the function may look expensive, GHC compiles this into
 778   excellent C code that subsequently compiled into highly efficient
 779   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 780 ----------------------------------------------------------------------}
 781 highestBitMask :: Nat -> Nat
 782 highestBitMask x
 783   = case (x .|. shiftRL x 1) of
 784      x -> case (x .|. shiftRL x 2) of
 785       x -> case (x .|. shiftRL x 4) of
 786        x -> case (x .|. shiftRL x 8) of
 787         x -> case (x .|. shiftRL x 16) of
 788          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 789           x -> (x `xor` (shiftRL x 1))
 790
 791
 792 {--------------------------------------------------------------------
 793   Utilities
 794 --------------------------------------------------------------------}
 795 foldlStrict f z xs
 796   = case xs of
 797       []     -> z
 798       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 799
 800
 801 {-
 802 {--------------------------------------------------------------------
 803   Testing
 804 --------------------------------------------------------------------}
 805 testTree :: [Int] -> IntSet
 806 testTree xs   = fromList xs
 807 test1 = testTree [1..20]
 808 test2 = testTree [30,29..10]
 809 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 810
 811 {--------------------------------------------------------------------
 812   QuickCheck
 813 --------------------------------------------------------------------}
 814 qcheck prop
 815   = check config prop
 816   where
 817     config = Config
 818       { configMaxTest = 500
 819       , configMaxFail = 5000
 820       , configSize    = \n -> (div n 2 + 3)
 821       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 822       }
 823
 824
 825 {--------------------------------------------------------------------
 826   Arbitrary, reasonably balanced trees
 827 --------------------------------------------------------------------}
 828 instance Arbitrary IntSet where
 829   arbitrary = do{ xs <- arbitrary
 830                 ; return (fromList xs)
 831                 }
 832
 833
 834 {--------------------------------------------------------------------
 835   Single, Insert, Delete
 836 --------------------------------------------------------------------}
 837 prop_Single :: Int -> Bool
 838 prop_Single x
 839   = (insert x empty == singleton x)
 840
 841 prop_InsertDelete :: Int -> IntSet -> Property
 842 prop_InsertDelete k t
 843   = not (member k t) ==> delete k (insert k t) == t
 844
 845
 846 {--------------------------------------------------------------------
 847   Union
 848 --------------------------------------------------------------------}
 849 prop_UnionInsert :: Int -> IntSet -> Bool
 850 prop_UnionInsert x t
 851   = union t (singleton x) == insert x t
 852
 853 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 854 prop_UnionAssoc t1 t2 t3
 855   = union t1 (union t2 t3) == union (union t1 t2) t3
 856
 857 prop_UnionComm :: IntSet -> IntSet -> Bool
 858 prop_UnionComm t1 t2
 859   = (union t1 t2 == union t2 t1)
 860
 861 prop_Diff :: [Int] -> [Int] -> Bool
 862 prop_Diff xs ys
 863   =  toAscList (difference (fromList xs) (fromList ys))
 864     == List.sort ((List.\\) (nub xs)  (nub ys))
 865
 866 prop_Int :: [Int] -> [Int] -> Bool
 867 prop_Int xs ys
 868   =  toAscList (intersection (fromList xs) (fromList ys))
 869     == List.sort (nub ((List.intersect) (xs)  (ys)))
 870
 871 {--------------------------------------------------------------------
 872   Lists
 873 --------------------------------------------------------------------}
 874 prop_Ordered
 875   = forAll (choose (5,100)) $ \n ->
 876     let xs = [0..n::Int]
 877     in fromAscList xs == fromList xs
 878
 879 prop_List :: [Int] -> Bool
 880 prop_List xs
 881   = (sort (nub xs) == toAscList (fromList xs))
 882 -}