Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.IntSet
   5 -- Copyright   :  (c) Daan Leijen 2002
   6 -- License     :  BSD-style
   7 -- Maintainer  :  libraries@haskell.org
   8 -- Stability   :  provisional
   9 -- Portability :  portable
  10 --
  11 -- An efficient implementation of integer sets.
  12 --
  13 -- This module is intended to be imported @qualified@, to avoid name
  14 -- clashes with "Prelude" functions.  eg.
  15 --
  16 -- >  import Data.IntSet as Set
  17 --
  18 -- The implementation is based on /big-endian patricia trees/.  This data
  19 -- structure performs especially well on binary operations like 'union'
  20 -- and 'intersection'.  However, my benchmarks show that it is also
  21 -- (much) faster on insertions and deletions when compared to a generic
  22 -- size-balanced set implementation (see "Data.Set").
  23 --
  24 --    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  25 --      Workshop on ML, September 1998, pages 77-86,
  26 --      <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  27 --
  28 --    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
  29 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
  30 --      October 1968, pages 514-534.
  31 --
  32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
  33 -- This means that the operation can become linear in the number of
  34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
  35 -- (32 or 64).
  36 -----------------------------------------------------------------------------
  37
  38 module Data.IntSet  (
  39             -- * Set type
  40               IntSet          -- instance Eq,Show
  41
  42             -- * Operators
  43             , (\\)
  44
  45             -- * Query
  46             , null
  47             , size
  48             , member
  49             , isSubsetOf
  50             , isProperSubsetOf
  51
  52             -- * Construction
  53             , empty
  54             , singleton
  55             , insert
  56             , delete
  57
  58             -- * Combine
  59             , union, unions
  60             , difference
  61             , intersection
  62
  63             -- * Filter
  64             , filter
  65             , partition
  66             , split
  67             , splitMember
  68
  69             -- * Map
  70             , map
  71
  72             -- * Fold
  73             , fold
  74
  75             -- * Conversion
  76             -- ** List
  77             , elems
  78             , toList
  79             , fromList
  80
  81             -- ** Ordered list
  82             , toAscList
  83             , fromAscList
  84             , fromDistinctAscList
  85
  86             -- * Debugging
  87             , showTree
  88             , showTreeWith
  89             ) where
  90
  91
  92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
  93 import Data.Bits
  94 import Data.Int
  95
  96 import qualified Data.List as List
  97 import Data.Typeable
  98
  99 {-
 100 -- just for testing
 101 import QuickCheck
 102 import List (nub,sort)
 103 import qualified List
 104 -}
 105
 106 #if __GLASGOW_HASKELL__
 107 import Data.Generics.Basics
 108 import Data.Generics.Instances
 109 #endif
 110
 111 #if __GLASGOW_HASKELL__ >= 503
 112 import GHC.Word
 113 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 114 #elif __GLASGOW_HASKELL__
 115 import Word
 116 import GlaExts ( Word(..), Int(..), shiftRL# )
 117 #else
 118 import Data.Word
 119 #endif
 120
 121 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 122
 123 -- A "Nat" is a natural machine word (an unsigned Int)
 124 type Nat = Word
 125
 126 natFromInt :: Int -> Nat
 127 natFromInt i = fromIntegral i
 128
 129 intFromNat :: Nat -> Int
 130 intFromNat w = fromIntegral w
 131
 132 shiftRL :: Nat -> Int -> Nat
 133 #if __GLASGOW_HASKELL__
 134 {--------------------------------------------------------------------
 135   GHC: use unboxing to get @shiftRL@ inlined.
 136 --------------------------------------------------------------------}
 137 shiftRL (W# x) (I# i)
 138   = W# (shiftRL# x i)
 139 #else
 140 shiftRL x i   = shiftR x i
 141 #endif
 142
 143 {--------------------------------------------------------------------
 144   Operators
 145 --------------------------------------------------------------------}
 146 -- | /O(n+m)/. See 'difference'.
 147 (\\) :: IntSet -> IntSet -> IntSet
 148 m1 \\ m2 = difference m1 m2
 149
 150 {--------------------------------------------------------------------
 151   Types
 152 --------------------------------------------------------------------}
 153 -- | A set of integers.
 154 data IntSet = Nil
 155             | Tip {-# UNPACK #-} !Int
 156             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 157
 158 type Prefix = Int
 159 type Mask   = Int
 160
 161 #if __GLASGOW_HASKELL__
 162
 163 {--------------------------------------------------------------------
 164   A Data instance
 165 --------------------------------------------------------------------}
 166
 167 -- This instance preserves data abstraction at the cost of inefficiency.
 168 -- We omit reflection services for the sake of data abstraction.
 169
 170 instance Data IntSet where
 171   gfoldl f z is = z fromList `f` (toList is)
 172   toConstr _    = error "toConstr"
 173   gunfold _ _   = error "gunfold"
 174   dataTypeOf _  = mkNorepType "Data.IntSet.IntSet"
 175
 176 #endif
 177
 178 {--------------------------------------------------------------------
 179   Query
 180 --------------------------------------------------------------------}
 181 -- | /O(1)/. Is the set empty?
 182 null :: IntSet -> Bool
 183 null Nil   = True
 184 null other = False
 185
 186 -- | /O(n)/. Cardinality of the set.
 187 size :: IntSet -> Int
 188 size t
 189   = case t of
 190       Bin p m l r -> size l + size r
 191       Tip y -> 1
 192       Nil   -> 0
 193
 194 -- | /O(min(n,W))/. Is the value a member of the set?
 195 member :: Int -> IntSet -> Bool
 196 member x t
 197   = case t of
 198       Bin p m l r
 199         | nomatch x p m -> False
 200         | zero x m      -> member x l
 201         | otherwise     -> member x r
 202       Tip y -> (x==y)
 203       Nil   -> False
 204
 205 -- 'lookup' is used by 'intersection' for left-biasing
 206 lookup :: Int -> IntSet -> Maybe Int
 207 lookup k t
 208   = let nk = natFromInt k  in seq nk (lookupN nk t)
 209
 210 lookupN :: Nat -> IntSet -> Maybe Int
 211 lookupN k t
 212   = case t of
 213       Bin p m l r
 214         | zeroN k (natFromInt m) -> lookupN k l
 215         | otherwise              -> lookupN k r
 216       Tip kx
 217         | (k == natFromInt kx)  -> Just kx
 218         | otherwise             -> Nothing
 219       Nil -> Nothing
 220
 221 {--------------------------------------------------------------------
 222   Construction
 223 --------------------------------------------------------------------}
 224 -- | /O(1)/. The empty set.
 225 empty :: IntSet
 226 empty
 227   = Nil
 228
 229 -- | /O(1)/. A set of one element.
 230 singleton :: Int -> IntSet
 231 singleton x
 232   = Tip x
 233
 234 {--------------------------------------------------------------------
 235   Insert
 236 --------------------------------------------------------------------}
 237 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 238 -- an element of the set, it is replaced by the new one, ie. 'insert'
 239 -- is left-biased.
 240 insert :: Int -> IntSet -> IntSet
 241 insert x t
 242   = case t of
 243       Bin p m l r
 244         | nomatch x p m -> join x (Tip x) p t
 245         | zero x m      -> Bin p m (insert x l) r
 246         | otherwise     -> Bin p m l (insert x r)
 247       Tip y
 248         | x==y          -> Tip x
 249         | otherwise     -> join x (Tip x) y t
 250       Nil -> Tip x
 251
 252 -- right-biased insertion, used by 'union'
 253 insertR :: Int -> IntSet -> IntSet
 254 insertR x t
 255   = case t of
 256       Bin p m l r
 257         | nomatch x p m -> join x (Tip x) p t
 258         | zero x m      -> Bin p m (insert x l) r
 259         | otherwise     -> Bin p m l (insert x r)
 260       Tip y
 261         | x==y          -> t
 262         | otherwise     -> join x (Tip x) y t
 263       Nil -> Tip x
 264
 265 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 266 -- original set when the value was not present.
 267 delete :: Int -> IntSet -> IntSet
 268 delete x t
 269   = case t of
 270       Bin p m l r
 271         | nomatch x p m -> t
 272         | zero x m      -> bin p m (delete x l) r
 273         | otherwise     -> bin p m l (delete x r)
 274       Tip y
 275         | x==y          -> Nil
 276         | otherwise     -> t
 277       Nil -> Nil
 278
 279
 280 {--------------------------------------------------------------------
 281   Union
 282 --------------------------------------------------------------------}
 283 -- | The union of a list of sets.
 284 unions :: [IntSet] -> IntSet
 285 unions xs
 286   = foldlStrict union empty xs
 287
 288
 289 -- | /O(n+m)/. The union of two sets.
 290 union :: IntSet -> IntSet -> IntSet
 291 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 292   | shorter m1 m2  = union1
 293   | shorter m2 m1  = union2
 294   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 295   | otherwise      = join p1 t1 p2 t2
 296   where
 297     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 298             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 299             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 300
 301     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 302             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 303             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 304
 305 union (Tip x) t = insert x t
 306 union t (Tip x) = insertR x t  -- right bias
 307 union Nil t     = t
 308 union t Nil     = t
 309
 310
 311 {--------------------------------------------------------------------
 312   Difference
 313 --------------------------------------------------------------------}
 314 -- | /O(n+m)/. Difference between two sets.
 315 difference :: IntSet -> IntSet -> IntSet
 316 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 317   | shorter m1 m2  = difference1
 318   | shorter m2 m1  = difference2
 319   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 320   | otherwise      = t1
 321   where
 322     difference1 | nomatch p2 p1 m1  = t1
 323                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 324                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 325
 326     difference2 | nomatch p1 p2 m2  = t1
 327                 | zero p1 m2        = difference t1 l2
 328                 | otherwise         = difference t1 r2
 329
 330 difference t1@(Tip x) t2
 331   | member x t2  = Nil
 332   | otherwise    = t1
 333
 334 difference Nil t     = Nil
 335 difference t (Tip x) = delete x t
 336 difference t Nil     = t
 337
 338
 339
 340 {--------------------------------------------------------------------
 341   Intersection
 342 --------------------------------------------------------------------}
 343 -- | /O(n+m)/. The intersection of two sets.
 344 intersection :: IntSet -> IntSet -> IntSet
 345 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 346   | shorter m1 m2  = intersection1
 347   | shorter m2 m1  = intersection2
 348   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 349   | otherwise      = Nil
 350   where
 351     intersection1 | nomatch p2 p1 m1  = Nil
 352                   | zero p2 m1        = intersection l1 t2
 353                   | otherwise         = intersection r1 t2
 354
 355     intersection2 | nomatch p1 p2 m2  = Nil
 356                   | zero p1 m2        = intersection t1 l2
 357                   | otherwise         = intersection t1 r2
 358
 359 intersection t1@(Tip x) t2
 360   | member x t2  = t1
 361   | otherwise    = Nil
 362 intersection t (Tip x)
 363   = case lookup x t of
 364       Just y  -> Tip y
 365       Nothing -> Nil
 366 intersection Nil t = Nil
 367 intersection t Nil = Nil
 368
 369
 370
 371 {--------------------------------------------------------------------
 372   Subset
 373 --------------------------------------------------------------------}
 374 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 375 isProperSubsetOf :: IntSet -> IntSet -> Bool
 376 isProperSubsetOf t1 t2
 377   = case subsetCmp t1 t2 of
 378       LT -> True
 379       ge -> False
 380
 381 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 382   | shorter m1 m2  = GT
 383   | shorter m2 m1  = subsetCmpLt
 384   | p1 == p2       = subsetCmpEq
 385   | otherwise      = GT  -- disjoint
 386   where
 387     subsetCmpLt | nomatch p1 p2 m2  = GT
 388                 | zero p1 m2        = subsetCmp t1 l2
 389                 | otherwise         = subsetCmp t1 r2
 390     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 391                     (GT,_ ) -> GT
 392                     (_ ,GT) -> GT
 393                     (EQ,EQ) -> EQ
 394                     other   -> LT
 395
 396 subsetCmp (Bin p m l r) t  = GT
 397 subsetCmp (Tip x) (Tip y)
 398   | x==y       = EQ
 399   | otherwise  = GT  -- disjoint
 400 subsetCmp (Tip x) t
 401   | member x t = LT
 402   | otherwise  = GT  -- disjoint
 403 subsetCmp Nil Nil = EQ
 404 subsetCmp Nil t   = LT
 405
 406 -- | /O(n+m)/. Is this a subset?
 407 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
 408
 409 isSubsetOf :: IntSet -> IntSet -> Bool
 410 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 411   | shorter m1 m2  = False
 412   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 413                                                       else isSubsetOf t1 r2)
 414   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 415 isSubsetOf (Bin p m l r) t  = False
 416 isSubsetOf (Tip x) t        = member x t
 417 isSubsetOf Nil t            = True
 418
 419
 420 {--------------------------------------------------------------------
 421   Filter
 422 --------------------------------------------------------------------}
 423 -- | /O(n)/. Filter all elements that satisfy some predicate.
 424 filter :: (Int -> Bool) -> IntSet -> IntSet
 425 filter pred t
 426   = case t of
 427       Bin p m l r
 428         -> bin p m (filter pred l) (filter pred r)
 429       Tip x
 430         | pred x    -> t
 431         | otherwise -> Nil
 432       Nil -> Nil
 433
 434 -- | /O(n)/. partition the set according to some predicate.
 435 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 436 partition pred t
 437   = case t of
 438       Bin p m l r
 439         -> let (l1,l2) = partition pred l
 440                (r1,r2) = partition pred r
 441            in (bin p m l1 r1, bin p m l2 r2)
 442       Tip x
 443         | pred x    -> (t,Nil)
 444         | otherwise -> (Nil,t)
 445       Nil -> (Nil,Nil)
 446
 447
 448 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
 449 -- where all elements in @set1@ are lower than @x@ and all elements in
 450 -- @set2@ larger than @x@.
 451 --
 452 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 453 split :: Int -> IntSet -> (IntSet,IntSet)
 454 split x t
 455   = case t of
 456       Bin p m l r
 457         | zero x m  -> let (lt,gt) = split x l in (lt,union gt r)
 458         | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
 459       Tip y
 460         | x>y       -> (t,Nil)
 461         | x<y       -> (Nil,t)
 462         | otherwise -> (Nil,Nil)
 463       Nil -> (Nil,Nil)
 464
 465 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
 466 -- element was found in the original set.
 467 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
 468 splitMember x t
 469   = case t of
 470       Bin p m l r
 471         | zero x m  -> let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
 472         | otherwise -> let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
 473       Tip y
 474         | x>y       -> (t,False,Nil)
 475         | x<y       -> (Nil,False,t)
 476         | otherwise -> (Nil,True,Nil)
 477       Nil -> (Nil,False,Nil)
 478
 479 {----------------------------------------------------------------------
 480   Map
 481 ----------------------------------------------------------------------}
 482
 483 -- | /O(n*min(n,W))/.
 484 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
 485 --
 486 -- It's worth noting that the size of the result may be smaller if,
 487 -- for some @(x,y)@, @x \/= y && f x == f y@
 488
 489 map :: (Int->Int) -> IntSet -> IntSet
 490 map f = fromList . List.map f . toList
 491
 492 {--------------------------------------------------------------------
 493   Fold
 494 --------------------------------------------------------------------}
 495 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 496 --
 497 -- > sum set   == fold (+) 0 set
 498 -- > elems set == fold (:) [] set
 499 fold :: (Int -> b -> b) -> b -> IntSet -> b
 500 fold f z t
 501   = foldr f z t
 502
 503 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 504 foldr f z t
 505   = case t of
 506       Bin p m l r -> foldr f (foldr f z r) l
 507       Tip x       -> f x z
 508       Nil         -> z
 509
 510 {--------------------------------------------------------------------
 511   List variations
 512 --------------------------------------------------------------------}
 513 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 514 elems :: IntSet -> [Int]
 515 elems s
 516   = toList s
 517
 518 {--------------------------------------------------------------------
 519   Lists
 520 --------------------------------------------------------------------}
 521 -- | /O(n)/. Convert the set to a list of elements.
 522 toList :: IntSet -> [Int]
 523 toList t
 524   = fold (:) [] t
 525
 526 -- | /O(n)/. Convert the set to an ascending list of elements.
 527 toAscList :: IntSet -> [Int]
 528 toAscList t
 529   = -- NOTE: the following algorithm only works for big-endian trees
 530     let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
 531
 532 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 533 fromList :: [Int] -> IntSet
 534 fromList xs
 535   = foldlStrict ins empty xs
 536   where
 537     ins t x  = insert x t
 538
 539 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 540 fromAscList :: [Int] -> IntSet
 541 fromAscList xs
 542   = fromList xs
 543
 544 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 545 fromDistinctAscList :: [Int] -> IntSet
 546 fromDistinctAscList xs
 547   = fromList xs
 548
 549
 550 {--------------------------------------------------------------------
 551   Eq
 552 --------------------------------------------------------------------}
 553 instance Eq IntSet where
 554   t1 == t2  = equal t1 t2
 555   t1 /= t2  = nequal t1 t2
 556
 557 equal :: IntSet -> IntSet -> Bool
 558 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 559   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 560 equal (Tip x) (Tip y)
 561   = (x==y)
 562 equal Nil Nil = True
 563 equal t1 t2   = False
 564
 565 nequal :: IntSet -> IntSet -> Bool
 566 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 567   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 568 nequal (Tip x) (Tip y)
 569   = (x/=y)
 570 nequal Nil Nil = False
 571 nequal t1 t2   = True
 572
 573 {--------------------------------------------------------------------
 574   Ord
 575 --------------------------------------------------------------------}
 576
 577 instance Ord IntSet where
 578     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 579     -- tentative implementation. See if more efficient exists.
 580
 581 {--------------------------------------------------------------------
 582   Show
 583 --------------------------------------------------------------------}
 584 instance Show IntSet where
 585   showsPrec d s  = showSet (toList s)
 586
 587 showSet :: [Int] -> ShowS
 588 showSet []
 589   = showString "{}"
 590 showSet (x:xs)
 591   = showChar '{' . shows x . showTail xs
 592   where
 593     showTail []     = showChar '}'
 594     showTail (x:xs) = showChar ',' . shows x . showTail xs
 595
 596 {--------------------------------------------------------------------
 597   Typeable
 598 --------------------------------------------------------------------}
 599
 600 #include "Typeable.h"
 601 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
 602
 603 {--------------------------------------------------------------------
 604   Debugging
 605 --------------------------------------------------------------------}
 606 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 607 -- in a compressed, hanging format.
 608 showTree :: IntSet -> String
 609 showTree s
 610   = showTreeWith True False s
 611
 612
 613 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
 614  the tree that implements the set. If @hang@ is
 615  'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
 616  @wide@ is 'True', an extra wide version is shown.
 617 -}
 618 showTreeWith :: Bool -> Bool -> IntSet -> String
 619 showTreeWith hang wide t
 620   | hang      = (showsTreeHang wide [] t) ""
 621   | otherwise = (showsTree wide [] [] t) ""
 622
 623 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 624 showsTree wide lbars rbars t
 625   = case t of
 626       Bin p m l r
 627           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 628              showWide wide rbars .
 629              showsBars lbars . showString (showBin p m) . showString "\n" .
 630              showWide wide lbars .
 631              showsTree wide (withEmpty lbars) (withBar lbars) l
 632       Tip x
 633           -> showsBars lbars . showString " " . shows x . showString "\n"
 634       Nil -> showsBars lbars . showString "|\n"
 635
 636 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 637 showsTreeHang wide bars t
 638   = case t of
 639       Bin p m l r
 640           -> showsBars bars . showString (showBin p m) . showString "\n" .
 641              showWide wide bars .
 642              showsTreeHang wide (withBar bars) l .
 643              showWide wide bars .
 644              showsTreeHang wide (withEmpty bars) r
 645       Tip x
 646           -> showsBars bars . showString " " . shows x . showString "\n"
 647       Nil -> showsBars bars . showString "|\n"
 648
 649 showBin p m
 650   = "*" -- ++ show (p,m)
 651
 652 showWide wide bars
 653   | wide      = showString (concat (reverse bars)) . showString "|\n"
 654   | otherwise = id
 655
 656 showsBars :: [String] -> ShowS
 657 showsBars bars
 658   = case bars of
 659       [] -> id
 660       _  -> showString (concat (reverse (tail bars))) . showString node
 661
 662 node           = "+--"
 663 withBar bars   = "|  ":bars
 664 withEmpty bars = "   ":bars
 665
 666
 667 {--------------------------------------------------------------------
 668   Helpers
 669 --------------------------------------------------------------------}
 670 {--------------------------------------------------------------------
 671   Join
 672 --------------------------------------------------------------------}
 673 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 674 join p1 t1 p2 t2
 675   | zero p1 m = Bin p m t1 t2
 676   | otherwise = Bin p m t2 t1
 677   where
 678     m = branchMask p1 p2
 679     p = mask p1 m
 680
 681 {--------------------------------------------------------------------
 682   @bin@ assures that we never have empty trees within a tree.
 683 --------------------------------------------------------------------}
 684 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 685 bin p m l Nil = l
 686 bin p m Nil r = r
 687 bin p m l r   = Bin p m l r
 688
 689
 690 {--------------------------------------------------------------------
 691   Endian independent bit twiddling
 692 --------------------------------------------------------------------}
 693 zero :: Int -> Mask -> Bool
 694 zero i m
 695   = (natFromInt i) .&. (natFromInt m) == 0
 696
 697 nomatch,match :: Int -> Prefix -> Mask -> Bool
 698 nomatch i p m
 699   = (mask i m) /= p
 700
 701 match i p m
 702   = (mask i m) == p
 703
 704 mask :: Int -> Mask -> Prefix
 705 mask i m
 706   = maskW (natFromInt i) (natFromInt m)
 707
 708 zeroN :: Nat -> Nat -> Bool
 709 zeroN i m = (i .&. m) == 0
 710
 711 {--------------------------------------------------------------------
 712   Big endian operations
 713 --------------------------------------------------------------------}
 714 maskW :: Nat -> Nat -> Prefix
 715 maskW i m
 716   = intFromNat (i .&. (complement (m-1) `xor` m))
 717
 718 shorter :: Mask -> Mask -> Bool
 719 shorter m1 m2
 720   = (natFromInt m1) > (natFromInt m2)
 721
 722 branchMask :: Prefix -> Prefix -> Mask
 723 branchMask p1 p2
 724   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 725
 726 {----------------------------------------------------------------------
 727   Finding the highest bit (mask) in a word [x] can be done efficiently in
 728   three ways:
 729   * convert to a floating point value and the mantissa tells us the
 730     [log2(x)] that corresponds with the highest bit position. The mantissa
 731     is retrieved either via the standard C function [frexp] or by some bit
 732     twiddling on IEEE compatible numbers (float). Note that one needs to
 733     use at least [double] precision for an accurate mantissa of 32 bit
 734     numbers.
 735   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 736   * use processor specific assembler instruction (asm).
 737
 738   The most portable way would be [bit], but is it efficient enough?
 739   I have measured the cycle counts of the different methods on an AMD
 740   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 741
 742   highestBitMask: method  cycles
 743                   --------------
 744                    frexp   200
 745                    float    33
 746                    bit      11
 747                    asm      12
 748
 749   highestBit:     method  cycles
 750                   --------------
 751                    frexp   195
 752                    float    33
 753                    bit      11
 754                    asm      11
 755
 756   Wow, the bit twiddling is on today's RISC like machines even faster
 757   than a single CISC instruction (BSR)!
 758 ----------------------------------------------------------------------}
 759
 760 {----------------------------------------------------------------------
 761   [highestBitMask] returns a word where only the highest bit is set.
 762   It is found by first setting all bits in lower positions than the
 763   highest bit and than taking an exclusive or with the original value.
 764   Allthough the function may look expensive, GHC compiles this into
 765   excellent C code that subsequently compiled into highly efficient
 766   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 767 ----------------------------------------------------------------------}
 768 highestBitMask :: Nat -> Nat
 769 highestBitMask x
 770   = case (x .|. shiftRL x 1) of
 771      x -> case (x .|. shiftRL x 2) of
 772       x -> case (x .|. shiftRL x 4) of
 773        x -> case (x .|. shiftRL x 8) of
 774         x -> case (x .|. shiftRL x 16) of
 775          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 776           x -> (x `xor` (shiftRL x 1))
 777
 778
 779 {--------------------------------------------------------------------
 780   Utilities
 781 --------------------------------------------------------------------}
 782 foldlStrict f z xs
 783   = case xs of
 784       []     -> z
 785       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 786
 787
 788 {-
 789 {--------------------------------------------------------------------
 790   Testing
 791 --------------------------------------------------------------------}
 792 testTree :: [Int] -> IntSet
 793 testTree xs   = fromList xs
 794 test1 = testTree [1..20]
 795 test2 = testTree [30,29..10]
 796 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 797
 798 {--------------------------------------------------------------------
 799   QuickCheck
 800 --------------------------------------------------------------------}
 801 qcheck prop
 802   = check config prop
 803   where
 804     config = Config
 805       { configMaxTest = 500
 806       , configMaxFail = 5000
 807       , configSize    = \n -> (div n 2 + 3)
 808       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 809       }
 810
 811
 812 {--------------------------------------------------------------------
 813   Arbitrary, reasonably balanced trees
 814 --------------------------------------------------------------------}
 815 instance Arbitrary IntSet where
 816   arbitrary = do{ xs <- arbitrary
 817                 ; return (fromList xs)
 818                 }
 819
 820
 821 {--------------------------------------------------------------------
 822   Single, Insert, Delete
 823 --------------------------------------------------------------------}
 824 prop_Single :: Int -> Bool
 825 prop_Single x
 826   = (insert x empty == singleton x)
 827
 828 prop_InsertDelete :: Int -> IntSet -> Property
 829 prop_InsertDelete k t
 830   = not (member k t) ==> delete k (insert k t) == t
 831
 832
 833 {--------------------------------------------------------------------
 834   Union
 835 --------------------------------------------------------------------}
 836 prop_UnionInsert :: Int -> IntSet -> Bool
 837 prop_UnionInsert x t
 838   = union t (singleton x) == insert x t
 839
 840 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 841 prop_UnionAssoc t1 t2 t3
 842   = union t1 (union t2 t3) == union (union t1 t2) t3
 843
 844 prop_UnionComm :: IntSet -> IntSet -> Bool
 845 prop_UnionComm t1 t2
 846   = (union t1 t2 == union t2 t1)
 847
 848 prop_Diff :: [Int] -> [Int] -> Bool
 849 prop_Diff xs ys
 850   =  toAscList (difference (fromList xs) (fromList ys))
 851     == List.sort ((List.\\) (nub xs)  (nub ys))
 852
 853 prop_Int :: [Int] -> [Int] -> Bool
 854 prop_Int xs ys
 855   =  toAscList (intersection (fromList xs) (fromList ys))
 856     == List.sort (nub ((List.intersect) (xs)  (ys)))
 857
 858 {--------------------------------------------------------------------
 859   Lists
 860 --------------------------------------------------------------------}
 861 prop_Ordered
 862   = forAll (choose (5,100)) $ \n ->
 863     let xs = [0..n::Int]
 864     in fromAscList xs == fromList xs
 865
 866 prop_List :: [Int] -> Bool
 867 prop_List xs
 868   = (sort (nub xs) == toAscList (fromList xs))
 869 -}