Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.IntSet
   5 -- Copyright   :  (c) Daan Leijen 2002
   6 -- License     :  BSD-style
   7 -- Maintainer  :  libraries@haskell.org
   8 -- Stability   :  provisional
   9 -- Portability :  portable
  10 --
  11 -- An efficient implementation of integer sets.
  12 --
  13 -- This module is intended to be imported @qualified@, to avoid name
  14 -- clashes with "Prelude" functions.  eg.
  15 --
  16 -- >  import Data.IntSet as Set
  17 --
  18 -- The implementation is based on /big-endian patricia trees/.  This data
  19 -- structure performs especially well on binary operations like 'union'
  20 -- and 'intersection'.  However, my benchmarks show that it is also
  21 -- (much) faster on insertions and deletions when compared to a generic
  22 -- size-balanced set implementation (see "Data.Set").
  23 --
  24 --    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  25 --      Workshop on ML, September 1998, pages 77-86,
  26 --      <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  27 --
  28 --    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
  29 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
  30 --      October 1968, pages 514-534.
  31 --
  32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
  33 -- This means that the operation can become linear in the number of
  34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
  35 -- (32 or 64).
  36 -----------------------------------------------------------------------------
  37
  38 module Data.IntSet  (
  39             -- * Set type
  40               IntSet          -- instance Eq,Show
  41
  42             -- * Operators
  43             , (\\)
  44
  45             -- * Query
  46             , null
  47             , size
  48             , member
  49             , isSubsetOf
  50             , isProperSubsetOf
  51
  52             -- * Construction
  53             , empty
  54             , singleton
  55             , insert
  56             , delete
  57
  58             -- * Combine
  59             , union, unions
  60             , difference
  61             , intersection
  62
  63             -- * Filter
  64             , filter
  65             , partition
  66             , split
  67             , splitMember
  68
  69             -- * Map
  70             , map
  71
  72             -- * Fold
  73             , fold
  74
  75             -- * Conversion
  76             -- ** List
  77             , elems
  78             , toList
  79             , fromList
  80
  81             -- ** Ordered list
  82             , toAscList
  83             , fromAscList
  84             , fromDistinctAscList
  85
  86             -- * Debugging
  87             , showTree
  88             , showTreeWith
  89             ) where
  90
  91
  92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
  93 import Data.Bits
  94 import Data.Int
  95
  96 import qualified Data.List as List
  97 import Data.Monoid
  98
  99 {-
 100 -- just for testing
 101 import QuickCheck
 102 import List (nub,sort)
 103 import qualified List
 104 -}
 105
 106 #if __GLASGOW_HASKELL__ >= 503
 107 import GHC.Word
 108 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 109 #elif __GLASGOW_HASKELL__
 110 import Word
 111 import GlaExts ( Word(..), Int(..), shiftRL# )
 112 #else
 113 import Data.Word
 114 #endif
 115
 116 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 117
 118 #if __HUGS__
 119 {--------------------------------------------------------------------
 120   Hugs:
 121   * Older Hugs doesn't define 'Word'.
 122   * Newer Hugs defines 'Word' in the Prelude but no operations.
 123 --------------------------------------------------------------------}
 124 type Nat = Word32   -- illegal on 64-bit platforms!
 125 #else
 126 {--------------------------------------------------------------------
 127   'Standard' Haskell
 128   * A "Nat" is a natural machine word (an unsigned Int)
 129 --------------------------------------------------------------------}
 130 type Nat = Word
 131 #endif
 132
 133 natFromInt :: Int -> Nat
 134 natFromInt i = fromIntegral i
 135
 136 intFromNat :: Nat -> Int
 137 intFromNat w = fromIntegral w
 138
 139 shiftRL :: Nat -> Int -> Nat
 140 #if __GLASGOW_HASKELL__
 141 {--------------------------------------------------------------------
 142   GHC: use unboxing to get @shiftRL@ inlined.
 143 --------------------------------------------------------------------}
 144 shiftRL (W# x) (I# i)
 145   = W# (shiftRL# x i)
 146 #else
 147 shiftRL x i   = shiftR x i
 148 #endif
 149
 150 {--------------------------------------------------------------------
 151   Operators
 152 --------------------------------------------------------------------}
 153 -- | /O(n+m)/. See 'difference'.
 154 (\\) :: IntSet -> IntSet -> IntSet
 155 m1 \\ m2 = difference m1 m2
 156
 157 {--------------------------------------------------------------------
 158   Types
 159 --------------------------------------------------------------------}
 160 -- | A set of integers.
 161 data IntSet = Nil
 162             | Tip {-# UNPACK #-} !Int
 163             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 164
 165 type Prefix = Int
 166 type Mask   = Int
 167
 168 {--------------------------------------------------------------------
 169   Query
 170 --------------------------------------------------------------------}
 171 -- | /O(1)/. Is the set empty?
 172 null :: IntSet -> Bool
 173 null Nil   = True
 174 null other = False
 175
 176 -- | /O(n)/. Cardinality of the set.
 177 size :: IntSet -> Int
 178 size t
 179   = case t of
 180       Bin p m l r -> size l + size r
 181       Tip y -> 1
 182       Nil   -> 0
 183
 184 -- | /O(min(n,W))/. Is the value a member of the set?
 185 member :: Int -> IntSet -> Bool
 186 member x t
 187   = case t of
 188       Bin p m l r
 189         | nomatch x p m -> False
 190         | zero x m      -> member x l
 191         | otherwise     -> member x r
 192       Tip y -> (x==y)
 193       Nil   -> False
 194
 195 -- 'lookup' is used by 'intersection' for left-biasing
 196 lookup :: Int -> IntSet -> Maybe Int
 197 lookup k t
 198   = let nk = natFromInt k  in seq nk (lookupN nk t)
 199
 200 lookupN :: Nat -> IntSet -> Maybe Int
 201 lookupN k t
 202   = case t of
 203       Bin p m l r
 204         | zeroN k (natFromInt m) -> lookupN k l
 205         | otherwise              -> lookupN k r
 206       Tip kx
 207         | (k == natFromInt kx)  -> Just kx
 208         | otherwise             -> Nothing
 209       Nil -> Nothing
 210
 211 {--------------------------------------------------------------------
 212   Construction
 213 --------------------------------------------------------------------}
 214 -- | /O(1)/. The empty set.
 215 empty :: IntSet
 216 empty
 217   = Nil
 218
 219 -- | /O(1)/. A set of one element.
 220 singleton :: Int -> IntSet
 221 singleton x
 222   = Tip x
 223
 224 {--------------------------------------------------------------------
 225   Insert
 226 --------------------------------------------------------------------}
 227 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 228 -- an element of the set, it is replaced by the new one, ie. 'insert'
 229 -- is left-biased.
 230 insert :: Int -> IntSet -> IntSet
 231 insert x t
 232   = case t of
 233       Bin p m l r
 234         | nomatch x p m -> join x (Tip x) p t
 235         | zero x m      -> Bin p m (insert x l) r
 236         | otherwise     -> Bin p m l (insert x r)
 237       Tip y
 238         | x==y          -> Tip x
 239         | otherwise     -> join x (Tip x) y t
 240       Nil -> Tip x
 241
 242 -- right-biased insertion, used by 'union'
 243 insertR :: Int -> IntSet -> IntSet
 244 insertR x t
 245   = case t of
 246       Bin p m l r
 247         | nomatch x p m -> join x (Tip x) p t
 248         | zero x m      -> Bin p m (insert x l) r
 249         | otherwise     -> Bin p m l (insert x r)
 250       Tip y
 251         | x==y          -> t
 252         | otherwise     -> join x (Tip x) y t
 253       Nil -> Tip x
 254
 255 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 256 -- original set when the value was not present.
 257 delete :: Int -> IntSet -> IntSet
 258 delete x t
 259   = case t of
 260       Bin p m l r
 261         | nomatch x p m -> t
 262         | zero x m      -> bin p m (delete x l) r
 263         | otherwise     -> bin p m l (delete x r)
 264       Tip y
 265         | x==y          -> Nil
 266         | otherwise     -> t
 267       Nil -> Nil
 268
 269
 270 {--------------------------------------------------------------------
 271   Union
 272 --------------------------------------------------------------------}
 273 -- | The union of a list of sets.
 274 unions :: [IntSet] -> IntSet
 275 unions xs
 276   = foldlStrict union empty xs
 277
 278
 279 -- | /O(n+m)/. The union of two sets.
 280 union :: IntSet -> IntSet -> IntSet
 281 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 282   | shorter m1 m2  = union1
 283   | shorter m2 m1  = union2
 284   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 285   | otherwise      = join p1 t1 p2 t2
 286   where
 287     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 288             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 289             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 290
 291     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 292             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 293             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 294
 295 union (Tip x) t = insert x t
 296 union t (Tip x) = insertR x t  -- right bias
 297 union Nil t     = t
 298 union t Nil     = t
 299
 300
 301 {--------------------------------------------------------------------
 302   Difference
 303 --------------------------------------------------------------------}
 304 -- | /O(n+m)/. Difference between two sets.
 305 difference :: IntSet -> IntSet -> IntSet
 306 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 307   | shorter m1 m2  = difference1
 308   | shorter m2 m1  = difference2
 309   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 310   | otherwise      = t1
 311   where
 312     difference1 | nomatch p2 p1 m1  = t1
 313                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 314                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 315
 316     difference2 | nomatch p1 p2 m2  = t1
 317                 | zero p1 m2        = difference t1 l2
 318                 | otherwise         = difference t1 r2
 319
 320 difference t1@(Tip x) t2
 321   | member x t2  = Nil
 322   | otherwise    = t1
 323
 324 difference Nil t     = Nil
 325 difference t (Tip x) = delete x t
 326 difference t Nil     = t
 327
 328
 329
 330 {--------------------------------------------------------------------
 331   Intersection
 332 --------------------------------------------------------------------}
 333 -- | /O(n+m)/. The intersection of two sets.
 334 intersection :: IntSet -> IntSet -> IntSet
 335 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 336   | shorter m1 m2  = intersection1
 337   | shorter m2 m1  = intersection2
 338   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 339   | otherwise      = Nil
 340   where
 341     intersection1 | nomatch p2 p1 m1  = Nil
 342                   | zero p2 m1        = intersection l1 t2
 343                   | otherwise         = intersection r1 t2
 344
 345     intersection2 | nomatch p1 p2 m2  = Nil
 346                   | zero p1 m2        = intersection t1 l2
 347                   | otherwise         = intersection t1 r2
 348
 349 intersection t1@(Tip x) t2
 350   | member x t2  = t1
 351   | otherwise    = Nil
 352 intersection t (Tip x)
 353   = case lookup x t of
 354       Just y  -> Tip y
 355       Nothing -> Nil
 356 intersection Nil t = Nil
 357 intersection t Nil = Nil
 358
 359
 360
 361 {--------------------------------------------------------------------
 362   Subset
 363 --------------------------------------------------------------------}
 364 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 365 isProperSubsetOf :: IntSet -> IntSet -> Bool
 366 isProperSubsetOf t1 t2
 367   = case subsetCmp t1 t2 of
 368       LT -> True
 369       ge -> False
 370
 371 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 372   | shorter m1 m2  = GT
 373   | shorter m2 m1  = subsetCmpLt
 374   | p1 == p2       = subsetCmpEq
 375   | otherwise      = GT  -- disjoint
 376   where
 377     subsetCmpLt | nomatch p1 p2 m2  = GT
 378                 | zero p1 m2        = subsetCmp t1 l2
 379                 | otherwise         = subsetCmp t1 r2
 380     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 381                     (GT,_ ) -> GT
 382                     (_ ,GT) -> GT
 383                     (EQ,EQ) -> EQ
 384                     other   -> LT
 385
 386 subsetCmp (Bin p m l r) t  = GT
 387 subsetCmp (Tip x) (Tip y)
 388   | x==y       = EQ
 389   | otherwise  = GT  -- disjoint
 390 subsetCmp (Tip x) t
 391   | member x t = LT
 392   | otherwise  = GT  -- disjoint
 393 subsetCmp Nil Nil = EQ
 394 subsetCmp Nil t   = LT
 395
 396 -- | /O(n+m)/. Is this a subset?
 397 -- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2.
 398
 399 isSubsetOf :: IntSet -> IntSet -> Bool
 400 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 401   | shorter m1 m2  = False
 402   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 403                                                       else isSubsetOf t1 r2)
 404   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 405 isSubsetOf (Bin p m l r) t  = False
 406 isSubsetOf (Tip x) t        = member x t
 407 isSubsetOf Nil t            = True
 408
 409
 410 {--------------------------------------------------------------------
 411   Filter
 412 --------------------------------------------------------------------}
 413 -- | /O(n)/. Filter all elements that satisfy some predicate.
 414 filter :: (Int -> Bool) -> IntSet -> IntSet
 415 filter pred t
 416   = case t of
 417       Bin p m l r
 418         -> bin p m (filter pred l) (filter pred r)
 419       Tip x
 420         | pred x    -> t
 421         | otherwise -> Nil
 422       Nil -> Nil
 423
 424 -- | /O(n)/. partition the set according to some predicate.
 425 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 426 partition pred t
 427   = case t of
 428       Bin p m l r
 429         -> let (l1,l2) = partition pred l
 430                (r1,r2) = partition pred r
 431            in (bin p m l1 r1, bin p m l2 r2)
 432       Tip x
 433         | pred x    -> (t,Nil)
 434         | otherwise -> (Nil,t)
 435       Nil -> (Nil,Nil)
 436
 437
 438 -- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@
 439 -- where all elements in @set1@ are lower than @x@ and all elements in
 440 -- @set2@ larger than @x@.
 441 --
 442 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 443 split :: Int -> IntSet -> (IntSet,IntSet)
 444 split x t
 445   = case t of
 446       Bin p m l r
 447         | zero x m  -> let (lt,gt) = split x l in (lt,union gt r)
 448         | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
 449       Tip y
 450         | x>y       -> (t,Nil)
 451         | x<y       -> (Nil,t)
 452         | otherwise -> (Nil,Nil)
 453       Nil -> (Nil,Nil)
 454
 455 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
 456 -- element was found in the original set.
 457 splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
 458 splitMember x t
 459   = case t of
 460       Bin p m l r
 461         | zero x m  -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
 462         | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
 463       Tip y
 464         | x>y       -> (False,t,Nil)
 465         | x<y       -> (False,Nil,t)
 466         | otherwise -> (True,Nil,Nil)
 467       Nil -> (False,Nil,Nil)
 468
 469 {----------------------------------------------------------------------
 470   Map
 471 ----------------------------------------------------------------------}
 472
 473 -- | /O(n*min(n,W))/.
 474 -- @map f s@ is the set obtained by applying @f@ to each element of @s@.
 475 --
 476 -- It's worth noting that the size of the result may be smaller if,
 477 -- for some @(x,y)@, @x \/= y && f x == f y@
 478
 479 map :: (Int->Int) -> IntSet -> IntSet
 480 map f = fromList . List.map f . toList
 481
 482 {--------------------------------------------------------------------
 483   Fold
 484 --------------------------------------------------------------------}
 485 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 486 --
 487 -- > sum set   == fold (+) 0 set
 488 -- > elems set == fold (:) [] set
 489 fold :: (Int -> b -> b) -> b -> IntSet -> b
 490 fold f z t
 491   = foldr f z t
 492
 493 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 494 foldr f z t
 495   = case t of
 496       Bin p m l r -> foldr f (foldr f z r) l
 497       Tip x       -> f x z
 498       Nil         -> z
 499
 500 {--------------------------------------------------------------------
 501   List variations
 502 --------------------------------------------------------------------}
 503 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 504 elems :: IntSet -> [Int]
 505 elems s
 506   = toList s
 507
 508 {--------------------------------------------------------------------
 509   Lists
 510 --------------------------------------------------------------------}
 511 -- | /O(n)/. Convert the set to a list of elements.
 512 toList :: IntSet -> [Int]
 513 toList t
 514   = fold (:) [] t
 515
 516 -- | /O(n)/. Convert the set to an ascending list of elements.
 517 toAscList :: IntSet -> [Int]
 518 toAscList t
 519   = -- NOTE: the following algorithm only works for big-endian trees
 520     let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
 521
 522 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 523 fromList :: [Int] -> IntSet
 524 fromList xs
 525   = foldlStrict ins empty xs
 526   where
 527     ins t x  = insert x t
 528
 529 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 530 fromAscList :: [Int] -> IntSet
 531 fromAscList xs
 532   = fromList xs
 533
 534 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 535 fromDistinctAscList :: [Int] -> IntSet
 536 fromDistinctAscList xs
 537   = fromList xs
 538
 539
 540 {--------------------------------------------------------------------
 541   Eq
 542 --------------------------------------------------------------------}
 543 instance Eq IntSet where
 544   t1 == t2  = equal t1 t2
 545   t1 /= t2  = nequal t1 t2
 546
 547 equal :: IntSet -> IntSet -> Bool
 548 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 549   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 550 equal (Tip x) (Tip y)
 551   = (x==y)
 552 equal Nil Nil = True
 553 equal t1 t2   = False
 554
 555 nequal :: IntSet -> IntSet -> Bool
 556 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 557   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 558 nequal (Tip x) (Tip y)
 559   = (x/=y)
 560 nequal Nil Nil = False
 561 nequal t1 t2   = True
 562
 563 {--------------------------------------------------------------------
 564   Ord
 565 --------------------------------------------------------------------}
 566
 567 instance Ord IntSet where
 568     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 569     -- tentative implementation. See if more efficient exists.
 570
 571 {--------------------------------------------------------------------
 572   Monoid
 573 --------------------------------------------------------------------}
 574
 575 instance Monoid IntSet where
 576     mempty = empty
 577     mappend = union
 578     mconcat = unions
 579
 580 {--------------------------------------------------------------------
 581   Show
 582 --------------------------------------------------------------------}
 583 instance Show IntSet where
 584   showsPrec d s  = showSet (toList s)
 585
 586 showSet :: [Int] -> ShowS
 587 showSet []
 588   = showString "{}"
 589 showSet (x:xs)
 590   = showChar '{' . shows x . showTail xs
 591   where
 592     showTail []     = showChar '}'
 593     showTail (x:xs) = showChar ',' . shows x . showTail xs
 594
 595 {--------------------------------------------------------------------
 596   Debugging
 597 --------------------------------------------------------------------}
 598 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 599 -- in a compressed, hanging format.
 600 showTree :: IntSet -> String
 601 showTree s
 602   = showTreeWith True False s
 603
 604
 605 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
 606  the tree that implements the set. If @hang@ is
 607  @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
 608  @wide@ is true, an extra wide version is shown.
 609 -}
 610 showTreeWith :: Bool -> Bool -> IntSet -> String
 611 showTreeWith hang wide t
 612   | hang      = (showsTreeHang wide [] t) ""
 613   | otherwise = (showsTree wide [] [] t) ""
 614
 615 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 616 showsTree wide lbars rbars t
 617   = case t of
 618       Bin p m l r
 619           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 620              showWide wide rbars .
 621              showsBars lbars . showString (showBin p m) . showString "\n" .
 622              showWide wide lbars .
 623              showsTree wide (withEmpty lbars) (withBar lbars) l
 624       Tip x
 625           -> showsBars lbars . showString " " . shows x . showString "\n"
 626       Nil -> showsBars lbars . showString "|\n"
 627
 628 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 629 showsTreeHang wide bars t
 630   = case t of
 631       Bin p m l r
 632           -> showsBars bars . showString (showBin p m) . showString "\n" .
 633              showWide wide bars .
 634              showsTreeHang wide (withBar bars) l .
 635              showWide wide bars .
 636              showsTreeHang wide (withEmpty bars) r
 637       Tip x
 638           -> showsBars bars . showString " " . shows x . showString "\n"
 639       Nil -> showsBars bars . showString "|\n"
 640
 641 showBin p m
 642   = "*" -- ++ show (p,m)
 643
 644 showWide wide bars
 645   | wide      = showString (concat (reverse bars)) . showString "|\n"
 646   | otherwise = id
 647
 648 showsBars :: [String] -> ShowS
 649 showsBars bars
 650   = case bars of
 651       [] -> id
 652       _  -> showString (concat (reverse (tail bars))) . showString node
 653
 654 node           = "+--"
 655 withBar bars   = "|  ":bars
 656 withEmpty bars = "   ":bars
 657
 658
 659 {--------------------------------------------------------------------
 660   Helpers
 661 --------------------------------------------------------------------}
 662 {--------------------------------------------------------------------
 663   Join
 664 --------------------------------------------------------------------}
 665 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 666 join p1 t1 p2 t2
 667   | zero p1 m = Bin p m t1 t2
 668   | otherwise = Bin p m t2 t1
 669   where
 670     m = branchMask p1 p2
 671     p = mask p1 m
 672
 673 {--------------------------------------------------------------------
 674   @bin@ assures that we never have empty trees within a tree.
 675 --------------------------------------------------------------------}
 676 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 677 bin p m l Nil = l
 678 bin p m Nil r = r
 679 bin p m l r   = Bin p m l r
 680
 681
 682 {--------------------------------------------------------------------
 683   Endian independent bit twiddling
 684 --------------------------------------------------------------------}
 685 zero :: Int -> Mask -> Bool
 686 zero i m
 687   = (natFromInt i) .&. (natFromInt m) == 0
 688
 689 nomatch,match :: Int -> Prefix -> Mask -> Bool
 690 nomatch i p m
 691   = (mask i m) /= p
 692
 693 match i p m
 694   = (mask i m) == p
 695
 696 mask :: Int -> Mask -> Prefix
 697 mask i m
 698   = maskW (natFromInt i) (natFromInt m)
 699
 700 zeroN :: Nat -> Nat -> Bool
 701 zeroN i m = (i .&. m) == 0
 702
 703 {--------------------------------------------------------------------
 704   Big endian operations
 705 --------------------------------------------------------------------}
 706 maskW :: Nat -> Nat -> Prefix
 707 maskW i m
 708   = intFromNat (i .&. (complement (m-1) `xor` m))
 709
 710 shorter :: Mask -> Mask -> Bool
 711 shorter m1 m2
 712   = (natFromInt m1) > (natFromInt m2)
 713
 714 branchMask :: Prefix -> Prefix -> Mask
 715 branchMask p1 p2
 716   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 717
 718 {----------------------------------------------------------------------
 719   Finding the highest bit (mask) in a word [x] can be done efficiently in
 720   three ways:
 721   * convert to a floating point value and the mantissa tells us the
 722     [log2(x)] that corresponds with the highest bit position. The mantissa
 723     is retrieved either via the standard C function [frexp] or by some bit
 724     twiddling on IEEE compatible numbers (float). Note that one needs to
 725     use at least [double] precision for an accurate mantissa of 32 bit
 726     numbers.
 727   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 728   * use processor specific assembler instruction (asm).
 729
 730   The most portable way would be [bit], but is it efficient enough?
 731   I have measured the cycle counts of the different methods on an AMD
 732   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 733
 734   highestBitMask: method  cycles
 735                   --------------
 736                    frexp   200
 737                    float    33
 738                    bit      11
 739                    asm      12
 740
 741   highestBit:     method  cycles
 742                   --------------
 743                    frexp   195
 744                    float    33
 745                    bit      11
 746                    asm      11
 747
 748   Wow, the bit twiddling is on today's RISC like machines even faster
 749   than a single CISC instruction (BSR)!
 750 ----------------------------------------------------------------------}
 751
 752 {----------------------------------------------------------------------
 753   [highestBitMask] returns a word where only the highest bit is set.
 754   It is found by first setting all bits in lower positions than the
 755   highest bit and than taking an exclusive or with the original value.
 756   Allthough the function may look expensive, GHC compiles this into
 757   excellent C code that subsequently compiled into highly efficient
 758   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 759 ----------------------------------------------------------------------}
 760 highestBitMask :: Nat -> Nat
 761 highestBitMask x
 762   = case (x .|. shiftRL x 1) of
 763      x -> case (x .|. shiftRL x 2) of
 764       x -> case (x .|. shiftRL x 4) of
 765        x -> case (x .|. shiftRL x 8) of
 766         x -> case (x .|. shiftRL x 16) of
 767          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 768           x -> (x `xor` (shiftRL x 1))
 769
 770
 771 {--------------------------------------------------------------------
 772   Utilities
 773 --------------------------------------------------------------------}
 774 foldlStrict f z xs
 775   = case xs of
 776       []     -> z
 777       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 778
 779
 780 {-
 781 {--------------------------------------------------------------------
 782   Testing
 783 --------------------------------------------------------------------}
 784 testTree :: [Int] -> IntSet
 785 testTree xs   = fromList xs
 786 test1 = testTree [1..20]
 787 test2 = testTree [30,29..10]
 788 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 789
 790 {--------------------------------------------------------------------
 791   QuickCheck
 792 --------------------------------------------------------------------}
 793 qcheck prop
 794   = check config prop
 795   where
 796     config = Config
 797       { configMaxTest = 500
 798       , configMaxFail = 5000
 799       , configSize    = \n -> (div n 2 + 3)
 800       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 801       }
 802
 803
 804 {--------------------------------------------------------------------
 805   Arbitrary, reasonably balanced trees
 806 --------------------------------------------------------------------}
 807 instance Arbitrary IntSet where
 808   arbitrary = do{ xs <- arbitrary
 809                 ; return (fromList xs)
 810                 }
 811
 812
 813 {--------------------------------------------------------------------
 814   Single, Insert, Delete
 815 --------------------------------------------------------------------}
 816 prop_Single :: Int -> Bool
 817 prop_Single x
 818   = (insert x empty == singleton x)
 819
 820 prop_InsertDelete :: Int -> IntSet -> Property
 821 prop_InsertDelete k t
 822   = not (member k t) ==> delete k (insert k t) == t
 823
 824
 825 {--------------------------------------------------------------------
 826   Union
 827 --------------------------------------------------------------------}
 828 prop_UnionInsert :: Int -> IntSet -> Bool
 829 prop_UnionInsert x t
 830   = union t (singleton x) == insert x t
 831
 832 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 833 prop_UnionAssoc t1 t2 t3
 834   = union t1 (union t2 t3) == union (union t1 t2) t3
 835
 836 prop_UnionComm :: IntSet -> IntSet -> Bool
 837 prop_UnionComm t1 t2
 838   = (union t1 t2 == union t2 t1)
 839
 840 prop_Diff :: [Int] -> [Int] -> Bool
 841 prop_Diff xs ys
 842   =  toAscList (difference (fromList xs) (fromList ys))
 843     == List.sort ((List.\\) (nub xs)  (nub ys))
 844
 845 prop_Int :: [Int] -> [Int] -> Bool
 846 prop_Int xs ys
 847   =  toAscList (intersection (fromList xs) (fromList ys))
 848     == List.sort (nub ((List.intersect) (xs)  (ys)))
 849
 850 {--------------------------------------------------------------------
 851   Lists
 852 --------------------------------------------------------------------}
 853 prop_Ordered
 854   = forAll (choose (5,100)) $ \n ->
 855     let xs = [0..n::Int]
 856     in fromAscList xs == fromList xs
 857
 858 prop_List :: [Int] -> Bool
 859 prop_List xs
 860   = (sort (nub xs) == toAscList (fromList xs))
 861 -}