Data/IntSet.hs

   1 {-# OPTIONS -cpp -fglasgow-exts #-}
   2 -----------------------------------------------------------------------------
   3 -- |
   4 -- Module      :  Data.IntSet
   5 -- Copyright   :  (c) Daan Leijen 2002
   6 -- License     :  BSD-style
   7 -- Maintainer  :  libraries@haskell.org
   8 -- Stability   :  provisional
   9 -- Portability :  portable
  10 --
  11 -- An efficient implementation of integer sets.
  12 --
  13 -- Since many function names (but not the type name) clash with
  14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
  15 --
  16 -- >  import Data.IntSet (IntSet)
  17 -- >  import qualified Data.IntSet as IntSet
  18 --
  19 -- The implementation is based on /big-endian patricia trees/.  This data
  20 -- structure performs especially well on binary operations like 'union'
  21 -- and 'intersection'.  However, my benchmarks show that it is also
  22 -- (much) faster on insertions and deletions when compared to a generic
  23 -- size-balanced set implementation (see "Data.Set").
  24 --
  25 --    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
  26 --      Workshop on ML, September 1998, pages 77-86,
  27 --      <http://www.cse.ogi.edu/~andy/pub/finite.htm>
  28 --
  29 --    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
  30 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
  31 --      October 1968, pages 514-534.
  32 --
  33 -- Many operations have a worst-case complexity of /O(min(n,W))/.
  34 -- This means that the operation can become linear in the number of
  35 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
  36 -- (32 or 64).
  37 -----------------------------------------------------------------------------
  38
  39 module Data.IntSet  (
  40             -- * Set type
  41               IntSet          -- instance Eq,Show
  42
  43             -- * Operators
  44             , (\\)
  45
  46             -- * Query
  47             , null
  48             , size
  49             , member
  50             , notMember
  51             , isSubsetOf
  52             , isProperSubsetOf
  53
  54             -- * Construction
  55             , empty
  56             , singleton
  57             , insert
  58             , delete
  59
  60             -- * Combine
  61             , union, unions
  62             , difference
  63             , intersection
  64
  65             -- * Filter
  66             , filter
  67             , partition
  68             , split
  69             , splitMember
  70
  71             -- * Min\/Max
  72             , findMin
  73             , findMax
  74             , deleteMin
  75             , deleteMax
  76             , deleteFindMin
  77             , deleteFindMax
  78             , maxView
  79             , minView
  80
  81             -- * Map
  82             , map
  83
  84             -- * Fold
  85             , fold
  86
  87             -- * Conversion
  88             -- ** List
  89             , elems
  90             , toList
  91             , fromList
  92
  93             -- ** Ordered list
  94             , toAscList
  95             , fromAscList
  96             , fromDistinctAscList
  97
  98             -- * Debugging
  99             , showTree
 100             , showTreeWith
 101             ) where
 102
 103
 104 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
 105 import Data.Bits
 106
 107 import qualified Data.List as List
 108 import Data.Monoid (Monoid(..))
 109 import Data.Typeable
 110
 111 {-
 112 -- just for testing
 113 import QuickCheck
 114 import List (nub,sort)
 115 import qualified List
 116 -}
 117
 118 #if __GLASGOW_HASKELL__
 119 import Text.Read
 120 import Data.Generics.Basics (Data(..), mkNorepType)
 121 import Data.Generics.Instances ()
 122 #endif
 123
 124 #if __GLASGOW_HASKELL__ >= 503
 125 import GHC.Exts ( Word(..), Int(..), shiftRL# )
 126 #elif __GLASGOW_HASKELL__
 127 import Word
 128 import GlaExts ( Word(..), Int(..), shiftRL# )
 129 #else
 130 import Data.Word
 131 #endif
 132
 133 infixl 9 \\{-This comment teaches CPP correct behaviour -}
 134
 135 -- A "Nat" is a natural machine word (an unsigned Int)
 136 type Nat = Word
 137
 138 natFromInt :: Int -> Nat
 139 natFromInt i = fromIntegral i
 140
 141 intFromNat :: Nat -> Int
 142 intFromNat w = fromIntegral w
 143
 144 shiftRL :: Nat -> Int -> Nat
 145 #if __GLASGOW_HASKELL__
 146 {--------------------------------------------------------------------
 147   GHC: use unboxing to get @shiftRL@ inlined.
 148 --------------------------------------------------------------------}
 149 shiftRL (W# x) (I# i)
 150   = W# (shiftRL# x i)
 151 #else
 152 shiftRL x i   = shiftR x i
 153 #endif
 154
 155 {--------------------------------------------------------------------
 156   Operators
 157 --------------------------------------------------------------------}
 158 -- | /O(n+m)/. See 'difference'.
 159 (\\) :: IntSet -> IntSet -> IntSet
 160 m1 \\ m2 = difference m1 m2
 161
 162 {--------------------------------------------------------------------
 163   Types
 164 --------------------------------------------------------------------}
 165 -- | A set of integers.
 166 data IntSet = Nil
 167             | Tip {-# UNPACK #-} !Int
 168             | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
 169 -- Invariant: Nil is never found as a child of Bin.
 170
 171
 172 type Prefix = Int
 173 type Mask   = Int
 174
 175 instance Monoid IntSet where
 176     mempty  = empty
 177     mappend = union
 178     mconcat = unions
 179
 180 #if __GLASGOW_HASKELL__
 181
 182 {--------------------------------------------------------------------
 183   A Data instance
 184 --------------------------------------------------------------------}
 185
 186 -- This instance preserves data abstraction at the cost of inefficiency.
 187 -- We omit reflection services for the sake of data abstraction.
 188
 189 instance Data IntSet where
 190   gfoldl f z is = z fromList `f` (toList is)
 191   toConstr _    = error "toConstr"
 192   gunfold _ _   = error "gunfold"
 193   dataTypeOf _  = mkNorepType "Data.IntSet.IntSet"
 194
 195 #endif
 196
 197 {--------------------------------------------------------------------
 198   Query
 199 --------------------------------------------------------------------}
 200 -- | /O(1)/. Is the set empty?
 201 null :: IntSet -> Bool
 202 null Nil   = True
 203 null other = False
 204
 205 -- | /O(n)/. Cardinality of the set.
 206 size :: IntSet -> Int
 207 size t
 208   = case t of
 209       Bin p m l r -> size l + size r
 210       Tip y -> 1
 211       Nil   -> 0
 212
 213 -- | /O(min(n,W))/. Is the value a member of the set?
 214 member :: Int -> IntSet -> Bool
 215 member x t
 216   = case t of
 217       Bin p m l r
 218         | nomatch x p m -> False
 219         | zero x m      -> member x l
 220         | otherwise     -> member x r
 221       Tip y -> (x==y)
 222       Nil   -> False
 223
 224 -- | /O(min(n,W))/. Is the element not in the set?
 225 notMember :: Int -> IntSet -> Bool
 226 notMember k = not . member k
 227
 228 -- 'lookup' is used by 'intersection' for left-biasing
 229 lookup :: Int -> IntSet -> Maybe Int
 230 lookup k t
 231   = let nk = natFromInt k  in seq nk (lookupN nk t)
 232
 233 lookupN :: Nat -> IntSet -> Maybe Int
 234 lookupN k t
 235   = case t of
 236       Bin p m l r
 237         | zeroN k (natFromInt m) -> lookupN k l
 238         | otherwise              -> lookupN k r
 239       Tip kx
 240         | (k == natFromInt kx)  -> Just kx
 241         | otherwise             -> Nothing
 242       Nil -> Nothing
 243
 244 {--------------------------------------------------------------------
 245   Construction
 246 --------------------------------------------------------------------}
 247 -- | /O(1)/. The empty set.
 248 empty :: IntSet
 249 empty
 250   = Nil
 251
 252 -- | /O(1)/. A set of one element.
 253 singleton :: Int -> IntSet
 254 singleton x
 255   = Tip x
 256
 257 {--------------------------------------------------------------------
 258   Insert
 259 --------------------------------------------------------------------}
 260 -- | /O(min(n,W))/. Add a value to the set. When the value is already
 261 -- an element of the set, it is replaced by the new one, ie. 'insert'
 262 -- is left-biased.
 263 insert :: Int -> IntSet -> IntSet
 264 insert x t
 265   = case t of
 266       Bin p m l r
 267         | nomatch x p m -> join x (Tip x) p t
 268         | zero x m      -> Bin p m (insert x l) r
 269         | otherwise     -> Bin p m l (insert x r)
 270       Tip y
 271         | x==y          -> Tip x
 272         | otherwise     -> join x (Tip x) y t
 273       Nil -> Tip x
 274
 275 -- right-biased insertion, used by 'union'
 276 insertR :: Int -> IntSet -> IntSet
 277 insertR x t
 278   = case t of
 279       Bin p m l r
 280         | nomatch x p m -> join x (Tip x) p t
 281         | zero x m      -> Bin p m (insert x l) r
 282         | otherwise     -> Bin p m l (insert x r)
 283       Tip y
 284         | x==y          -> t
 285         | otherwise     -> join x (Tip x) y t
 286       Nil -> Tip x
 287
 288 -- | /O(min(n,W))/. Delete a value in the set. Returns the
 289 -- original set when the value was not present.
 290 delete :: Int -> IntSet -> IntSet
 291 delete x t
 292   = case t of
 293       Bin p m l r
 294         | nomatch x p m -> t
 295         | zero x m      -> bin p m (delete x l) r
 296         | otherwise     -> bin p m l (delete x r)
 297       Tip y
 298         | x==y          -> Nil
 299         | otherwise     -> t
 300       Nil -> Nil
 301
 302
 303 {--------------------------------------------------------------------
 304   Union
 305 --------------------------------------------------------------------}
 306 -- | The union of a list of sets.
 307 unions :: [IntSet] -> IntSet
 308 unions xs
 309   = foldlStrict union empty xs
 310
 311
 312 -- | /O(n+m)/. The union of two sets.
 313 union :: IntSet -> IntSet -> IntSet
 314 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 315   | shorter m1 m2  = union1
 316   | shorter m2 m1  = union2
 317   | p1 == p2       = Bin p1 m1 (union l1 l2) (union r1 r2)
 318   | otherwise      = join p1 t1 p2 t2
 319   where
 320     union1  | nomatch p2 p1 m1  = join p1 t1 p2 t2
 321             | zero p2 m1        = Bin p1 m1 (union l1 t2) r1
 322             | otherwise         = Bin p1 m1 l1 (union r1 t2)
 323
 324     union2  | nomatch p1 p2 m2  = join p1 t1 p2 t2
 325             | zero p1 m2        = Bin p2 m2 (union t1 l2) r2
 326             | otherwise         = Bin p2 m2 l2 (union t1 r2)
 327
 328 union (Tip x) t = insert x t
 329 union t (Tip x) = insertR x t  -- right bias
 330 union Nil t     = t
 331 union t Nil     = t
 332
 333
 334 {--------------------------------------------------------------------
 335   Difference
 336 --------------------------------------------------------------------}
 337 -- | /O(n+m)/. Difference between two sets.
 338 difference :: IntSet -> IntSet -> IntSet
 339 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 340   | shorter m1 m2  = difference1
 341   | shorter m2 m1  = difference2
 342   | p1 == p2       = bin p1 m1 (difference l1 l2) (difference r1 r2)
 343   | otherwise      = t1
 344   where
 345     difference1 | nomatch p2 p1 m1  = t1
 346                 | zero p2 m1        = bin p1 m1 (difference l1 t2) r1
 347                 | otherwise         = bin p1 m1 l1 (difference r1 t2)
 348
 349     difference2 | nomatch p1 p2 m2  = t1
 350                 | zero p1 m2        = difference t1 l2
 351                 | otherwise         = difference t1 r2
 352
 353 difference t1@(Tip x) t2
 354   | member x t2  = Nil
 355   | otherwise    = t1
 356
 357 difference Nil t     = Nil
 358 difference t (Tip x) = delete x t
 359 difference t Nil     = t
 360
 361
 362
 363 {--------------------------------------------------------------------
 364   Intersection
 365 --------------------------------------------------------------------}
 366 -- | /O(n+m)/. The intersection of two sets.
 367 intersection :: IntSet -> IntSet -> IntSet
 368 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 369   | shorter m1 m2  = intersection1
 370   | shorter m2 m1  = intersection2
 371   | p1 == p2       = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
 372   | otherwise      = Nil
 373   where
 374     intersection1 | nomatch p2 p1 m1  = Nil
 375                   | zero p2 m1        = intersection l1 t2
 376                   | otherwise         = intersection r1 t2
 377
 378     intersection2 | nomatch p1 p2 m2  = Nil
 379                   | zero p1 m2        = intersection t1 l2
 380                   | otherwise         = intersection t1 r2
 381
 382 intersection t1@(Tip x) t2
 383   | member x t2  = t1
 384   | otherwise    = Nil
 385 intersection t (Tip x)
 386   = case lookup x t of
 387       Just y  -> Tip y
 388       Nothing -> Nil
 389 intersection Nil t = Nil
 390 intersection t Nil = Nil
 391
 392
 393
 394 {--------------------------------------------------------------------
 395   Subset
 396 --------------------------------------------------------------------}
 397 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
 398 isProperSubsetOf :: IntSet -> IntSet -> Bool
 399 isProperSubsetOf t1 t2
 400   = case subsetCmp t1 t2 of
 401       LT -> True
 402       ge -> False
 403
 404 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 405   | shorter m1 m2  = GT
 406   | shorter m2 m1  = subsetCmpLt
 407   | p1 == p2       = subsetCmpEq
 408   | otherwise      = GT  -- disjoint
 409   where
 410     subsetCmpLt | nomatch p1 p2 m2  = GT
 411                 | zero p1 m2        = subsetCmp t1 l2
 412                 | otherwise         = subsetCmp t1 r2
 413     subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
 414                     (GT,_ ) -> GT
 415                     (_ ,GT) -> GT
 416                     (EQ,EQ) -> EQ
 417                     other   -> LT
 418
 419 subsetCmp (Bin p m l r) t  = GT
 420 subsetCmp (Tip x) (Tip y)
 421   | x==y       = EQ
 422   | otherwise  = GT  -- disjoint
 423 subsetCmp (Tip x) t
 424   | member x t = LT
 425   | otherwise  = GT  -- disjoint
 426 subsetCmp Nil Nil = EQ
 427 subsetCmp Nil t   = LT
 428
 429 -- | /O(n+m)/. Is this a subset?
 430 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
 431
 432 isSubsetOf :: IntSet -> IntSet -> Bool
 433 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
 434   | shorter m1 m2  = False
 435   | shorter m2 m1  = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
 436                                                       else isSubsetOf t1 r2)
 437   | otherwise      = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
 438 isSubsetOf (Bin p m l r) t  = False
 439 isSubsetOf (Tip x) t        = member x t
 440 isSubsetOf Nil t            = True
 441
 442
 443 {--------------------------------------------------------------------
 444   Filter
 445 --------------------------------------------------------------------}
 446 -- | /O(n)/. Filter all elements that satisfy some predicate.
 447 filter :: (Int -> Bool) -> IntSet -> IntSet
 448 filter pred t
 449   = case t of
 450       Bin p m l r
 451         -> bin p m (filter pred l) (filter pred r)
 452       Tip x
 453         | pred x    -> t
 454         | otherwise -> Nil
 455       Nil -> Nil
 456
 457 -- | /O(n)/. partition the set according to some predicate.
 458 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
 459 partition pred t
 460   = case t of
 461       Bin p m l r
 462         -> let (l1,l2) = partition pred l
 463                (r1,r2) = partition pred r
 464            in (bin p m l1 r1, bin p m l2 r2)
 465       Tip x
 466         | pred x    -> (t,Nil)
 467         | otherwise -> (Nil,t)
 468       Nil -> (Nil,Nil)
 469
 470
 471 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
 472 -- where all elements in @set1@ are lower than @x@ and all elements in
 473 -- @set2@ larger than @x@.
 474 --
 475 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
 476 split :: Int -> IntSet -> (IntSet,IntSet)
 477 split x t
 478   = case t of
 479       Bin p m l r
 480         | m < 0       -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt)
 481                                    else let (lt,gt) = split' x r in (lt, union gt l)
 482                                    -- handle negative numbers.
 483         | otherwise   -> split' x t
 484       Tip y
 485         | x>y         -> (t,Nil)
 486         | x<y         -> (Nil,t)
 487         | otherwise   -> (Nil,Nil)
 488       Nil             -> (Nil, Nil)
 489
 490 split' :: Int -> IntSet -> (IntSet,IntSet)
 491 split' x t
 492   = case t of
 493       Bin p m l r
 494         | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r)
 495                                      else let (lt,gt) = split' x r in (union l lt,gt)
 496         | otherwise   -> if x < p then (Nil, t)
 497                                   else (t, Nil)
 498       Tip y
 499         | x>y       -> (t,Nil)
 500         | x<y       -> (Nil,t)
 501         | otherwise -> (Nil,Nil)
 502       Nil -> (Nil,Nil)
 503
 504 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
 505 -- element was found in the original set.
 506 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
 507 splitMember x t
 508   = case t of
 509       Bin p m l r
 510         | m < 0       -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt)
 511                                    else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l)
 512                                    -- handle negative numbers.
 513         | otherwise   -> splitMember' x t
 514       Tip y
 515         | x>y       -> (t,False,Nil)
 516         | x<y       -> (Nil,False,t)
 517         | otherwise -> (Nil,True,Nil)
 518       Nil -> (Nil,False,Nil)
 519
 520 splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet)
 521 splitMember' x t
 522   = case t of
 523       Bin p m l r
 524          | match x p m ->  if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
 525                                        else let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
 526          | otherwise   -> if x < p then (Nil, False, t)
 527                                    else (t, False, Nil)
 528       Tip y
 529         | x>y       -> (t,False,Nil)
 530         | x<y       -> (Nil,False,t)
 531         | otherwise -> (Nil,True,Nil)
 532       Nil -> (Nil,False,Nil)
 533
 534 {----------------------------------------------------------------------
 535   Min/Max
 536 ----------------------------------------------------------------------}
 537
 538 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set stripped from that element
 539 -- @fail@s (in the monad) when passed an empty set.
 540 maxView :: (Monad m) => IntSet -> m (Int, IntSet)
 541 maxView t
 542     = case t of
 543         Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in return (result, bin p m t' r)
 544         Bin p m l r         -> let (result,t') = maxViewUnsigned r in return (result, bin p m l t')
 545         Tip y -> return (y,Nil)
 546         Nil -> fail "maxView: empty set has no maximal element"
 547
 548 maxViewUnsigned :: IntSet -> (Int, IntSet)
 549 maxViewUnsigned t
 550     = case t of
 551         Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t')
 552         Tip y -> (y, Nil)
 553
 554 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set stripped from that element
 555 -- @fail@s (in the monad) when passed an empty set.
 556 minView :: (Monad m) => IntSet -> m (Int, IntSet)
 557 minView t
 558     = case t of
 559         Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in return (result, bin p m l t')
 560         Bin p m l r         -> let (result,t') = minViewUnsigned l in return (result, bin p m t' r)
 561         Tip y -> return (y, Nil)
 562         Nil -> fail "minView: empty set has no minimal element"
 563
 564 minViewUnsigned :: IntSet -> (Int, IntSet)
 565 minViewUnsigned t
 566     = case t of
 567         Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r)
 568         Tip y -> (y, Nil)
 569
 570
 571 -- Duplicate the Identity monad here because base < mtl.
 572 newtype Identity a = Identity { runIdentity :: a }
 573 instance Monad Identity where
 574         return a = Identity a
 575         m >>= k  = k (runIdentity m)
 576
 577
 578 -- | /O(min(n,W))/. Delete and find the minimal element.
 579 --
 580 -- > deleteFindMin set = (findMin set, deleteMin set)
 581 deleteFindMin :: IntSet -> (Int, IntSet)
 582 deleteFindMin = runIdentity . minView
 583
 584 -- | /O(min(n,W))/. Delete and find the maximal element.
 585 --
 586 -- > deleteFindMax set = (findMax set, deleteMax set)
 587 deleteFindMax :: IntSet -> (Int, IntSet)
 588 deleteFindMax = runIdentity . maxView
 589
 590 -- | /O(min(n,W))/. The minimal element of a set.
 591 findMin :: IntSet -> Int
 592 findMin = fst . runIdentity . minView
 593
 594 -- | /O(min(n,W))/. The maximal element of a set.
 595 findMax :: IntSet -> Int
 596 findMax = fst . runIdentity . maxView
 597
 598 -- | /O(min(n,W))/. Delete the minimal element.
 599 deleteMin :: IntSet -> IntSet
 600 deleteMin = snd . runIdentity . minView
 601
 602 -- | /O(min(n,W))/. Delete the maximal element.
 603 deleteMax :: IntSet -> IntSet
 604 deleteMax = snd . runIdentity . maxView
 605
 606
 607
 608 {----------------------------------------------------------------------
 609   Map
 610 ----------------------------------------------------------------------}
 611
 612 -- | /O(n*min(n,W))/.
 613 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
 614 --
 615 -- It's worth noting that the size of the result may be smaller if,
 616 -- for some @(x,y)@, @x \/= y && f x == f y@
 617
 618 map :: (Int->Int) -> IntSet -> IntSet
 619 map f = fromList . List.map f . toList
 620
 621 {--------------------------------------------------------------------
 622   Fold
 623 --------------------------------------------------------------------}
 624 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
 625 --
 626 -- > sum set   == fold (+) 0 set
 627 -- > elems set == fold (:) [] set
 628 fold :: (Int -> b -> b) -> b -> IntSet -> b
 629 fold f z t
 630   = case t of
 631       Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r
 632       -- put negative numbers before.
 633       Bin p m l r -> foldr f z t
 634       Tip x       -> f x z
 635       Nil         -> z
 636
 637 foldr :: (Int -> b -> b) -> b -> IntSet -> b
 638 foldr f z t
 639   = case t of
 640       Bin p m l r -> foldr f (foldr f z r) l
 641       Tip x       -> f x z
 642       Nil         -> z
 643
 644 {--------------------------------------------------------------------
 645   List variations
 646 --------------------------------------------------------------------}
 647 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
 648 elems :: IntSet -> [Int]
 649 elems s
 650   = toList s
 651
 652 {--------------------------------------------------------------------
 653   Lists
 654 --------------------------------------------------------------------}
 655 -- | /O(n)/. Convert the set to a list of elements.
 656 toList :: IntSet -> [Int]
 657 toList t
 658   = fold (:) [] t
 659
 660 -- | /O(n)/. Convert the set to an ascending list of elements.
 661 toAscList :: IntSet -> [Int]
 662 toAscList t = toList t
 663
 664 -- | /O(n*min(n,W))/. Create a set from a list of integers.
 665 fromList :: [Int] -> IntSet
 666 fromList xs
 667   = foldlStrict ins empty xs
 668   where
 669     ins t x  = insert x t
 670
 671 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
 672 fromAscList :: [Int] -> IntSet
 673 fromAscList xs
 674   = fromList xs
 675
 676 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
 677 fromDistinctAscList :: [Int] -> IntSet
 678 fromDistinctAscList xs
 679   = fromList xs
 680
 681
 682 {--------------------------------------------------------------------
 683   Eq
 684 --------------------------------------------------------------------}
 685 instance Eq IntSet where
 686   t1 == t2  = equal t1 t2
 687   t1 /= t2  = nequal t1 t2
 688
 689 equal :: IntSet -> IntSet -> Bool
 690 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 691   = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
 692 equal (Tip x) (Tip y)
 693   = (x==y)
 694 equal Nil Nil = True
 695 equal t1 t2   = False
 696
 697 nequal :: IntSet -> IntSet -> Bool
 698 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
 699   = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
 700 nequal (Tip x) (Tip y)
 701   = (x/=y)
 702 nequal Nil Nil = False
 703 nequal t1 t2   = True
 704
 705 {--------------------------------------------------------------------
 706   Ord
 707 --------------------------------------------------------------------}
 708
 709 instance Ord IntSet where
 710     compare s1 s2 = compare (toAscList s1) (toAscList s2)
 711     -- tentative implementation. See if more efficient exists.
 712
 713 {--------------------------------------------------------------------
 714   Show
 715 --------------------------------------------------------------------}
 716 instance Show IntSet where
 717   showsPrec p xs = showParen (p > 10) $
 718     showString "fromList " . shows (toList xs)
 719
 720 showSet :: [Int] -> ShowS
 721 showSet []
 722   = showString "{}"
 723 showSet (x:xs)
 724   = showChar '{' . shows x . showTail xs
 725   where
 726     showTail []     = showChar '}'
 727     showTail (x:xs) = showChar ',' . shows x . showTail xs
 728
 729 {--------------------------------------------------------------------
 730   Read
 731 --------------------------------------------------------------------}
 732 instance Read IntSet where
 733 #ifdef __GLASGOW_HASKELL__
 734   readPrec = parens $ prec 10 $ do
 735     Ident "fromList" <- lexP
 736     xs <- readPrec
 737     return (fromList xs)
 738
 739   readListPrec = readListPrecDefault
 740 #else
 741   readsPrec p = readParen (p > 10) $ \ r -> do
 742     ("fromList",s) <- lex r
 743     (xs,t) <- reads s
 744     return (fromList xs,t)
 745 #endif
 746
 747 {--------------------------------------------------------------------
 748   Typeable
 749 --------------------------------------------------------------------}
 750
 751 #include "Typeable.h"
 752 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
 753
 754 {--------------------------------------------------------------------
 755   Debugging
 756 --------------------------------------------------------------------}
 757 -- | /O(n)/. Show the tree that implements the set. The tree is shown
 758 -- in a compressed, hanging format.
 759 showTree :: IntSet -> String
 760 showTree s
 761   = showTreeWith True False s
 762
 763
 764 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
 765  the tree that implements the set. If @hang@ is
 766  'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
 767  @wide@ is 'True', an extra wide version is shown.
 768 -}
 769 showTreeWith :: Bool -> Bool -> IntSet -> String
 770 showTreeWith hang wide t
 771   | hang      = (showsTreeHang wide [] t) ""
 772   | otherwise = (showsTree wide [] [] t) ""
 773
 774 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
 775 showsTree wide lbars rbars t
 776   = case t of
 777       Bin p m l r
 778           -> showsTree wide (withBar rbars) (withEmpty rbars) r .
 779              showWide wide rbars .
 780              showsBars lbars . showString (showBin p m) . showString "\n" .
 781              showWide wide lbars .
 782              showsTree wide (withEmpty lbars) (withBar lbars) l
 783       Tip x
 784           -> showsBars lbars . showString " " . shows x . showString "\n"
 785       Nil -> showsBars lbars . showString "|\n"
 786
 787 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
 788 showsTreeHang wide bars t
 789   = case t of
 790       Bin p m l r
 791           -> showsBars bars . showString (showBin p m) . showString "\n" .
 792              showWide wide bars .
 793              showsTreeHang wide (withBar bars) l .
 794              showWide wide bars .
 795              showsTreeHang wide (withEmpty bars) r
 796       Tip x
 797           -> showsBars bars . showString " " . shows x . showString "\n"
 798       Nil -> showsBars bars . showString "|\n"
 799
 800 showBin p m
 801   = "*" -- ++ show (p,m)
 802
 803 showWide wide bars
 804   | wide      = showString (concat (reverse bars)) . showString "|\n"
 805   | otherwise = id
 806
 807 showsBars :: [String] -> ShowS
 808 showsBars bars
 809   = case bars of
 810       [] -> id
 811       _  -> showString (concat (reverse (tail bars))) . showString node
 812
 813 node           = "+--"
 814 withBar bars   = "|  ":bars
 815 withEmpty bars = "   ":bars
 816
 817
 818 {--------------------------------------------------------------------
 819   Helpers
 820 --------------------------------------------------------------------}
 821 {--------------------------------------------------------------------
 822   Join
 823 --------------------------------------------------------------------}
 824 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
 825 join p1 t1 p2 t2
 826   | zero p1 m = Bin p m t1 t2
 827   | otherwise = Bin p m t2 t1
 828   where
 829     m = branchMask p1 p2
 830     p = mask p1 m
 831
 832 {--------------------------------------------------------------------
 833   @bin@ assures that we never have empty trees within a tree.
 834 --------------------------------------------------------------------}
 835 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
 836 bin p m l Nil = l
 837 bin p m Nil r = r
 838 bin p m l r   = Bin p m l r
 839
 840
 841 {--------------------------------------------------------------------
 842   Endian independent bit twiddling
 843 --------------------------------------------------------------------}
 844 zero :: Int -> Mask -> Bool
 845 zero i m
 846   = (natFromInt i) .&. (natFromInt m) == 0
 847
 848 nomatch,match :: Int -> Prefix -> Mask -> Bool
 849 nomatch i p m
 850   = (mask i m) /= p
 851
 852 match i p m
 853   = (mask i m) == p
 854
 855 mask :: Int -> Mask -> Prefix
 856 mask i m
 857   = maskW (natFromInt i) (natFromInt m)
 858
 859 zeroN :: Nat -> Nat -> Bool
 860 zeroN i m = (i .&. m) == 0
 861
 862 {--------------------------------------------------------------------
 863   Big endian operations
 864 --------------------------------------------------------------------}
 865 maskW :: Nat -> Nat -> Prefix
 866 maskW i m
 867   = intFromNat (i .&. (complement (m-1) `xor` m))
 868
 869 shorter :: Mask -> Mask -> Bool
 870 shorter m1 m2
 871   = (natFromInt m1) > (natFromInt m2)
 872
 873 branchMask :: Prefix -> Prefix -> Mask
 874 branchMask p1 p2
 875   = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
 876
 877 {----------------------------------------------------------------------
 878   Finding the highest bit (mask) in a word [x] can be done efficiently in
 879   three ways:
 880   * convert to a floating point value and the mantissa tells us the
 881     [log2(x)] that corresponds with the highest bit position. The mantissa
 882     is retrieved either via the standard C function [frexp] or by some bit
 883     twiddling on IEEE compatible numbers (float). Note that one needs to
 884     use at least [double] precision for an accurate mantissa of 32 bit
 885     numbers.
 886   * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
 887   * use processor specific assembler instruction (asm).
 888
 889   The most portable way would be [bit], but is it efficient enough?
 890   I have measured the cycle counts of the different methods on an AMD
 891   Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
 892
 893   highestBitMask: method  cycles
 894                   --------------
 895                    frexp   200
 896                    float    33
 897                    bit      11
 898                    asm      12
 899
 900   highestBit:     method  cycles
 901                   --------------
 902                    frexp   195
 903                    float    33
 904                    bit      11
 905                    asm      11
 906
 907   Wow, the bit twiddling is on today's RISC like machines even faster
 908   than a single CISC instruction (BSR)!
 909 ----------------------------------------------------------------------}
 910
 911 {----------------------------------------------------------------------
 912   [highestBitMask] returns a word where only the highest bit is set.
 913   It is found by first setting all bits in lower positions than the
 914   highest bit and than taking an exclusive or with the original value.
 915   Allthough the function may look expensive, GHC compiles this into
 916   excellent C code that subsequently compiled into highly efficient
 917   machine code. The algorithm is derived from Jorg Arndt's FXT library.
 918 ----------------------------------------------------------------------}
 919 highestBitMask :: Nat -> Nat
 920 highestBitMask x
 921   = case (x .|. shiftRL x 1) of
 922      x -> case (x .|. shiftRL x 2) of
 923       x -> case (x .|. shiftRL x 4) of
 924        x -> case (x .|. shiftRL x 8) of
 925         x -> case (x .|. shiftRL x 16) of
 926          x -> case (x .|. shiftRL x 32) of   -- for 64 bit platforms
 927           x -> (x `xor` (shiftRL x 1))
 928
 929
 930 {--------------------------------------------------------------------
 931   Utilities
 932 --------------------------------------------------------------------}
 933 foldlStrict f z xs
 934   = case xs of
 935       []     -> z
 936       (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
 937
 938
 939 {-
 940 {--------------------------------------------------------------------
 941   Testing
 942 --------------------------------------------------------------------}
 943 testTree :: [Int] -> IntSet
 944 testTree xs   = fromList xs
 945 test1 = testTree [1..20]
 946 test2 = testTree [30,29..10]
 947 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
 948
 949 {--------------------------------------------------------------------
 950   QuickCheck
 951 --------------------------------------------------------------------}
 952 qcheck prop
 953   = check config prop
 954   where
 955     config = Config
 956       { configMaxTest = 500
 957       , configMaxFail = 5000
 958       , configSize    = \n -> (div n 2 + 3)
 959       , configEvery   = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
 960       }
 961
 962
 963 {--------------------------------------------------------------------
 964   Arbitrary, reasonably balanced trees
 965 --------------------------------------------------------------------}
 966 instance Arbitrary IntSet where
 967   arbitrary = do{ xs <- arbitrary
 968                 ; return (fromList xs)
 969                 }
 970
 971
 972 {--------------------------------------------------------------------
 973   Single, Insert, Delete
 974 --------------------------------------------------------------------}
 975 prop_Single :: Int -> Bool
 976 prop_Single x
 977   = (insert x empty == singleton x)
 978
 979 prop_InsertDelete :: Int -> IntSet -> Property
 980 prop_InsertDelete k t
 981   = not (member k t) ==> delete k (insert k t) == t
 982
 983
 984 {--------------------------------------------------------------------
 985   Union
 986 --------------------------------------------------------------------}
 987 prop_UnionInsert :: Int -> IntSet -> Bool
 988 prop_UnionInsert x t
 989   = union t (singleton x) == insert x t
 990
 991 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
 992 prop_UnionAssoc t1 t2 t3
 993   = union t1 (union t2 t3) == union (union t1 t2) t3
 994
 995 prop_UnionComm :: IntSet -> IntSet -> Bool
 996 prop_UnionComm t1 t2
 997   = (union t1 t2 == union t2 t1)
 998
 999 prop_Diff :: [Int] -> [Int] -> Bool
1000 prop_Diff xs ys
1001   =  toAscList (difference (fromList xs) (fromList ys))
1002     == List.sort ((List.\\) (nub xs)  (nub ys))
1003
1004 prop_Int :: [Int] -> [Int] -> Bool
1005 prop_Int xs ys
1006   =  toAscList (intersection (fromList xs) (fromList ys))
1007     == List.sort (nub ((List.intersect) (xs)  (ys)))
1008
1009 {--------------------------------------------------------------------
1010   Lists
1011 --------------------------------------------------------------------}
1012 prop_Ordered
1013   = forAll (choose (5,100)) $ \n ->
1014     let xs = [0..n::Int]
1015     in fromAscList xs == fromList xs
1016
1017 prop_List :: [Int] -> Bool
1018 prop_List xs
1019   = (sort (nub xs) == toAscList (fromList xs))
1020 -}