X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=Data%2FIntSet.hs;h=16226082b7f98e8f8475456341746321dc3fd8c5;hb=4b26136ab82fb1ff12e49477c4833a9586d368c5;hp=90b9bd9f7c6358a4361ba33fe490482fb849a998;hpb=faa067c418ce36f4cb4e54e2fb19012736ef49ed;p=haskell-directory.git diff --git a/Data/IntSet.hs b/Data/IntSet.hs index 90b9bd9..1622608 100644 --- a/Data/IntSet.hs +++ b/Data/IntSet.hs @@ -10,10 +10,11 @@ -- -- An efficient implementation of integer sets. -- --- This module is intended to be imported @qualified@, to avoid name --- clashes with "Prelude" functions. eg. +-- Since many function names (but not the type name) clash with +-- "Prelude" names, this module is usually imported @qualified@, e.g. -- --- > import Data.IntSet as Set +-- > import Data.IntSet (IntSet) +-- > import qualified Data.IntSet as IntSet -- -- The implementation is based on /big-endian patricia trees/. This data -- structure performs especially well on binary operations like 'union' @@ -46,6 +47,7 @@ module Data.IntSet ( , null , size , member + , notMember , isSubsetOf , isProperSubsetOf @@ -66,6 +68,16 @@ module Data.IntSet ( , split , splitMember + -- * Min\/Max + , findMin + , findMax + , deleteMin + , deleteMax + , deleteFindMin + , deleteFindMax + , maxView + , minView + -- * Map , map @@ -91,10 +103,10 @@ module Data.IntSet ( import Prelude hiding (lookup,filter,foldr,foldl,null,map) import Data.Bits -import Data.Int import qualified Data.List as List -import Data.Monoid +import Data.Monoid (Monoid(..)) +import Data.Typeable {- -- just for testing @@ -103,21 +115,24 @@ import List (nub,sort) import qualified List -} +#if __GLASGOW_HASKELL__ +import Text.Read +import Data.Generics.Basics (Data(..), mkNorepType) +import Data.Generics.Instances () +#endif -#ifdef __GLASGOW_HASKELL__ -{-------------------------------------------------------------------- - GHC: use unboxing to get @shiftRL@ inlined. ---------------------------------------------------------------------} #if __GLASGOW_HASKELL__ >= 503 -import GHC.Word import GHC.Exts ( Word(..), Int(..), shiftRL# ) -#else +#elif __GLASGOW_HASKELL__ import Word import GlaExts ( Word(..), Int(..), shiftRL# ) +#else +import Data.Word #endif infixl 9 \\{-This comment teaches CPP correct behaviour -} +-- A "Nat" is a natural machine word (an unsigned Int) type Nat = Word natFromInt :: Int -> Nat @@ -127,50 +142,14 @@ intFromNat :: Nat -> Int intFromNat w = fromIntegral w shiftRL :: Nat -> Int -> Nat -shiftRL (W# x) (I# i) - = W# (shiftRL# x i) - -#elif __HUGS__ +#if __GLASGOW_HASKELL__ {-------------------------------------------------------------------- - Hugs: - * raises errors on boundary values when using 'fromIntegral' - but not with the deprecated 'fromInt/toInt'. - * Older Hugs doesn't define 'Word'. - * Newer Hugs defines 'Word' in the Prelude but no operations. + GHC: use unboxing to get @shiftRL@ inlined. --------------------------------------------------------------------} -import Data.Word -infixl 9 \\ -- comment to fool cpp - -type Nat = Word32 -- illegal on 64-bit platforms! - -natFromInt :: Int -> Nat -natFromInt i = fromInt i - -intFromNat :: Nat -> Int -intFromNat w = toInt w - -shiftRL :: Nat -> Int -> Nat -shiftRL x i = shiftR x i - +shiftRL (W# x) (I# i) + = W# (shiftRL# x i) #else -{-------------------------------------------------------------------- - 'Standard' Haskell - * A "Nat" is a natural machine word (an unsigned Int) ---------------------------------------------------------------------} -import Data.Word -infixl 9 \\ -- comment to fool cpp - -type Nat = Word - -natFromInt :: Int -> Nat -natFromInt i = fromIntegral i - -intFromNat :: Nat -> Int -intFromNat w = fromIntegral w - -shiftRL :: Nat -> Int -> Nat -shiftRL w i = shiftR w i - +shiftRL x i = shiftR x i #endif {-------------------------------------------------------------------- @@ -187,10 +166,34 @@ m1 \\ m2 = difference m1 m2 data IntSet = Nil | Tip {-# UNPACK #-} !Int | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet +-- Invariant: Nil is never found as a child of Bin. + type Prefix = Int type Mask = Int +instance Monoid IntSet where + mempty = empty + mappend = union + mconcat = unions + +#if __GLASGOW_HASKELL__ + +{-------------------------------------------------------------------- + A Data instance +--------------------------------------------------------------------} + +-- This instance preserves data abstraction at the cost of inefficiency. +-- We omit reflection services for the sake of data abstraction. + +instance Data IntSet where + gfoldl f z is = z fromList `f` (toList is) + toConstr _ = error "toConstr" + gunfold _ _ = error "gunfold" + dataTypeOf _ = mkNorepType "Data.IntSet.IntSet" + +#endif + {-------------------------------------------------------------------- Query --------------------------------------------------------------------} @@ -218,6 +221,10 @@ member x t Tip y -> (x==y) Nil -> False +-- | /O(min(n,W))/. Is the element not in the set? +notMember :: Int -> IntSet -> Bool +notMember k = not . member k + -- 'lookup' is used by 'intersection' for left-biasing lookup :: Int -> IntSet -> Maybe Int lookup k t @@ -420,7 +427,7 @@ subsetCmp Nil Nil = EQ subsetCmp Nil t = LT -- | /O(n+m)/. Is this a subset? --- @(s1 `isSubsetOf` s2)@ tells whether s1 is a subset of s2. +-- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@. isSubsetOf :: IntSet -> IntSet -> Bool isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) @@ -461,7 +468,7 @@ partition pred t Nil -> (Nil,Nil) --- | /O(log n)/. The expression (@split x set@) is a pair @(set1,set2)@ +-- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@ -- where all elements in @set1@ are lower than @x@ and all elements in -- @set2@ larger than @x@. -- @@ -470,34 +477,140 @@ split :: Int -> IntSet -> (IntSet,IntSet) split x t = case t of Bin p m l r - | zero x m -> let (lt,gt) = split x l in (lt,union gt r) - | otherwise -> let (lt,gt) = split x r in (union l lt,gt) + | m < 0 -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt) + else let (lt,gt) = split' x r in (lt, union gt l) + -- handle negative numbers. + | otherwise -> split' x t + Tip y + | x>y -> (t,Nil) + | x (Nil,t) + | otherwise -> (Nil,Nil) + Nil -> (Nil, Nil) + +split' :: Int -> IntSet -> (IntSet,IntSet) +split' x t + = case t of + Bin p m l r + | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r) + else let (lt,gt) = split' x r in (union l lt,gt) + | otherwise -> if x < p then (Nil, t) + else (t, Nil) Tip y | x>y -> (t,Nil) | x (Nil,t) | otherwise -> (Nil,Nil) Nil -> (Nil,Nil) --- | /O(log n)/. Performs a 'split' but also returns whether the pivot +-- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot -- element was found in the original set. -splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet) +splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet) splitMember x t = case t of Bin p m l r - | zero x m -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r) - | otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt) + | m < 0 -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt) + else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l) + -- handle negative numbers. + | otherwise -> splitMember' x t + Tip y + | x>y -> (t,False,Nil) + | x (Nil,False,t) + | otherwise -> (Nil,True,Nil) + Nil -> (Nil,False,Nil) + +splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet) +splitMember' x t + = case t of + Bin p m l r + | match x p m -> if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r) + else let (lt,found,gt) = splitMember x r in (union l lt,found,gt) + | otherwise -> if x < p then (Nil, False, t) + else (t, False, Nil) Tip y - | x>y -> (False,t,Nil) - | x (False,Nil,t) - | otherwise -> (True,Nil,Nil) - Nil -> (False,Nil,Nil) + | x>y -> (t,False,Nil) + | x (Nil,False,t) + | otherwise -> (Nil,True,Nil) + Nil -> (Nil,False,Nil) + +{---------------------------------------------------------------------- + Min/Max +----------------------------------------------------------------------} + +-- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set stripped from that element +-- @fail@s (in the monad) when passed an empty set. +maxView :: (Monad m) => IntSet -> m (Int, IntSet) +maxView t + = case t of + Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in return (result, bin p m t' r) + Bin p m l r -> let (result,t') = maxViewUnsigned r in return (result, bin p m l t') + Tip y -> return (y,Nil) + Nil -> fail "maxView: empty set has no maximal element" + +maxViewUnsigned :: IntSet -> (Int, IntSet) +maxViewUnsigned t + = case t of + Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t') + Tip y -> (y, Nil) + +-- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set stripped from that element +-- @fail@s (in the monad) when passed an empty set. +minView :: (Monad m) => IntSet -> m (Int, IntSet) +minView t + = case t of + Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in return (result, bin p m l t') + Bin p m l r -> let (result,t') = minViewUnsigned l in return (result, bin p m t' r) + Tip y -> return (y, Nil) + Nil -> fail "minView: empty set has no minimal element" + +minViewUnsigned :: IntSet -> (Int, IntSet) +minViewUnsigned t + = case t of + Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r) + Tip y -> (y, Nil) + + +-- Duplicate the Identity monad here because base < mtl. +newtype Identity a = Identity { runIdentity :: a } +instance Monad Identity where + return a = Identity a + m >>= k = k (runIdentity m) + + +-- | /O(min(n,W))/. Delete and find the minimal element. +-- +-- > deleteFindMin set = (findMin set, deleteMin set) +deleteFindMin :: IntSet -> (Int, IntSet) +deleteFindMin = runIdentity . minView + +-- | /O(min(n,W))/. Delete and find the maximal element. +-- +-- > deleteFindMax set = (findMax set, deleteMax set) +deleteFindMax :: IntSet -> (Int, IntSet) +deleteFindMax = runIdentity . maxView + +-- | /O(min(n,W))/. The minimal element of a set. +findMin :: IntSet -> Int +findMin = fst . runIdentity . minView + +-- | /O(min(n,W))/. The maximal element of a set. +findMax :: IntSet -> Int +findMax = fst . runIdentity . maxView + +-- | /O(min(n,W))/. Delete the minimal element. +deleteMin :: IntSet -> IntSet +deleteMin = snd . runIdentity . minView + +-- | /O(min(n,W))/. Delete the maximal element. +deleteMax :: IntSet -> IntSet +deleteMax = snd . runIdentity . maxView + + {---------------------------------------------------------------------- Map ----------------------------------------------------------------------} -- | /O(n*min(n,W))/. --- @map f s@ is the set obtained by applying @f@ to each element of @s@. +-- @'map' f s@ is the set obtained by applying @f@ to each element of @s@. -- -- It's worth noting that the size of the result may be smaller if, -- for some @(x,y)@, @x \/= y && f x == f y@ @@ -514,7 +627,12 @@ map f = fromList . List.map f . toList -- > elems set == fold (:) [] set fold :: (Int -> b -> b) -> b -> IntSet -> b fold f z t - = foldr f z t + = case t of + Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r + -- put negative numbers before. + Bin p m l r -> foldr f z t + Tip x -> f x z + Nil -> z foldr :: (Int -> b -> b) -> b -> IntSet -> b foldr f z t @@ -541,9 +659,7 @@ toList t -- | /O(n)/. Convert the set to an ascending list of elements. toAscList :: IntSet -> [Int] -toAscList t - = -- NOTE: the following algorithm only works for big-endian trees - let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos +toAscList t = toList t -- | /O(n*min(n,W))/. Create a set from a list of integers. fromList :: [Int] -> IntSet @@ -595,19 +711,11 @@ instance Ord IntSet where -- tentative implementation. See if more efficient exists. {-------------------------------------------------------------------- - Monoid ---------------------------------------------------------------------} - -instance Monoid IntSet where - mempty = empty - mappend = union - mconcat = unions - -{-------------------------------------------------------------------- Show --------------------------------------------------------------------} instance Show IntSet where - showsPrec d s = showSet (toList s) + showsPrec p xs = showParen (p > 10) $ + showString "fromList " . shows (toList xs) showSet :: [Int] -> ShowS showSet [] @@ -619,6 +727,31 @@ showSet (x:xs) showTail (x:xs) = showChar ',' . shows x . showTail xs {-------------------------------------------------------------------- + Read +--------------------------------------------------------------------} +instance Read IntSet where +#ifdef __GLASGOW_HASKELL__ + readPrec = parens $ prec 10 $ do + Ident "fromList" <- lexP + xs <- readPrec + return (fromList xs) + + readListPrec = readListPrecDefault +#else + readsPrec p = readParen (p > 10) $ \ r -> do + ("fromList",s) <- lex r + (xs,t) <- reads s + return (fromList xs,t) +#endif + +{-------------------------------------------------------------------- + Typeable +--------------------------------------------------------------------} + +#include "Typeable.h" +INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet") + +{-------------------------------------------------------------------- Debugging --------------------------------------------------------------------} -- | /O(n)/. Show the tree that implements the set. The tree is shown @@ -628,10 +761,10 @@ showTree s = showTreeWith True False s -{- | /O(n)/. The expression (@showTreeWith hang wide map@) shows +{- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows the tree that implements the set. If @hang@ is - @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If - @wide@ is true, an extra wide version is shown. + 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If + @wide@ is 'True', an extra wide version is shown. -} showTreeWith :: Bool -> Bool -> IntSet -> String showTreeWith hang wide t