2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 \section[Util]{Highly random utility functions}
7 -- IF_NOT_GHC is meant to make this module useful outside the context of GHC
12 Eager, thenEager, returnEager, mapEager, appEager, runEager,
14 -- general list processing
15 zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
16 zipLazy, stretchZipEqual,
17 mapAndUnzip, mapAndUnzip3,
18 nOfThem, lengthExceeds, isSingleton,
23 assoc, assocUsing, assocDefault, assocDefaultUsing,
26 hasNoDups, equivClasses, runs, removeDups, equivClassesByUniq,
29 IF_NOT_GHC(quicksort COMMA stableSortLt COMMA mergesort COMMA)
31 IF_NOT_GHC(mergeSort COMMA) naturalMergeSortLe, -- from Carsten
32 IF_NOT_GHC(naturalMergeSort COMMA mergeSortLe COMMA)
34 -- transitive closures
38 mapAccumL, mapAccumR, mapAccumB,
44 IF_NOT_GHC(cfst COMMA applyToPair COMMA applyToFst COMMA)
45 IF_NOT_GHC(applyToSnd COMMA foldPair COMMA)
49 #include "HsVersions.h"
51 import List ( zipWith4 )
52 import Panic ( panic )
53 import Unique ( Unique )
54 import UniqFM ( eltsUFM, emptyUFM, addToUFM_C )
59 %************************************************************************
61 \subsection{The Eager monad}
63 %************************************************************************
65 The @Eager@ monad is just an encoding of continuation-passing style,
66 used to allow you to express "do this and then that", mainly to avoid
67 space leaks. It's done with a type synonym to save bureaucracy.
70 type Eager ans a = (a -> ans) -> ans
72 runEager :: Eager a a -> a
73 runEager m = m (\x -> x)
75 appEager :: Eager ans a -> (a -> ans) -> ans
76 appEager m cont = m cont
78 thenEager :: Eager ans a -> (a -> Eager ans b) -> Eager ans b
79 thenEager m k cont = m (\r -> k r cont)
81 returnEager :: a -> Eager ans a
82 returnEager v cont = cont v
84 mapEager :: (a -> Eager ans b) -> [a] -> Eager ans [b]
85 mapEager f [] = returnEager []
86 mapEager f (x:xs) = f x `thenEager` \ y ->
87 mapEager f xs `thenEager` \ ys ->
91 %************************************************************************
93 \subsection[Utils-lists]{General list processing}
95 %************************************************************************
97 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
98 are of equal length. Alastair Reid thinks this should only happen if
99 DEBUGging on; hey, why not?
102 zipEqual :: String -> [a] -> [b] -> [(a,b)]
103 zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
104 zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
105 zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
109 zipWithEqual _ = zipWith
110 zipWith3Equal _ = zipWith3
111 zipWith4Equal _ = zipWith4
113 zipEqual msg [] [] = []
114 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
115 zipEqual msg as bs = panic ("zipEqual: unequal lists:"++msg)
117 zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
118 zipWithEqual msg _ [] [] = []
119 zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
121 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
122 = z a b c : zipWith3Equal msg z as bs cs
123 zipWith3Equal msg _ [] [] [] = []
124 zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
126 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
127 = z a b c d : zipWith4Equal msg z as bs cs ds
128 zipWith4Equal msg _ [] [] [] [] = []
129 zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
134 -- zipLazy is lazy in the second list (observe the ~)
136 zipLazy :: [a] -> [b] -> [(a,b)]
138 zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
143 stretchZipEqual :: (a -> b -> Maybe a) -> [a] -> [b] -> [a]
144 -- (stretchZipEqual f xs ys) stretches ys to "fit" the places where f returns a Just
146 stretchZipEqual f [] [] = []
147 stretchZipEqual f (x:xs) (y:ys) = case f x y of
148 Just x' -> x' : stretchZipEqual f xs ys
149 Nothing -> x : stretchZipEqual f xs (y:ys)
154 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
156 mapAndUnzip f [] = ([],[])
160 (rs1, rs2) = mapAndUnzip f xs
164 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
166 mapAndUnzip3 f [] = ([],[],[])
167 mapAndUnzip3 f (x:xs)
170 (rs1, rs2, rs3) = mapAndUnzip3 f xs
172 (r1:rs1, r2:rs2, r3:rs3)
176 nOfThem :: Int -> a -> [a]
177 nOfThem n thing = replicate n thing
179 lengthExceeds :: [a] -> Int -> Bool
181 [] `lengthExceeds` n = 0 > n
182 (x:xs) `lengthExceeds` n = (1 > n) || (xs `lengthExceeds` (n - 1))
184 isSingleton :: [a] -> Bool
186 isSingleton [x] = True
187 isSingleton _ = False
191 snocView :: [a] -> ([a], a) -- Split off the last element
192 snocView xs = go xs []
194 go [x] acc = (reverse acc, x)
195 go (x:xs) acc = go xs (x:acc)
198 Debugging/specialising versions of \tr{elem} and \tr{notElem}
201 isIn, isn'tIn :: (Eq a) => String -> a -> [a] -> Bool
204 isIn msg x ys = elem__ x ys
205 isn'tIn msg x ys = notElem__ x ys
207 --these are here to be SPECIALIZEd (automagically)
209 elem__ x (y:ys) = x==y || elem__ x ys
211 notElem__ x [] = True
212 notElem__ x (y:ys) = x /= y && notElem__ x ys
220 | i _GE_ ILIT(100) = panic ("Over-long elem in: " ++ msg)
221 | otherwise = x == y || elem (i _ADD_ ILIT(1)) x ys
224 = notElem ILIT(0) x ys
226 notElem i x [] = True
228 | i _GE_ ILIT(100) = panic ("Over-long notElem in: " ++ msg)
229 | otherwise = x /= y && notElem (i _ADD_ ILIT(1)) x ys
235 %************************************************************************
237 \subsection[Utils-assoc]{Association lists}
239 %************************************************************************
241 See also @assocMaybe@ and @mkLookupFun@ in module @Maybes@.
244 assoc :: (Eq a) => String -> [(a, b)] -> a -> b
245 assocDefault :: (Eq a) => b -> [(a, b)] -> a -> b
246 assocUsing :: (a -> a -> Bool) -> String -> [(a, b)] -> a -> b
247 assocDefaultUsing :: (a -> a -> Bool) -> b -> [(a, b)] -> a -> b
249 assocDefaultUsing eq deflt ((k,v) : rest) key
251 | otherwise = assocDefaultUsing eq deflt rest key
253 assocDefaultUsing eq deflt [] key = deflt
255 assoc crash_msg list key = assocDefaultUsing (==) (panic ("Failed in assoc: " ++ crash_msg)) list key
256 assocDefault deflt list key = assocDefaultUsing (==) deflt list key
257 assocUsing eq crash_msg list key = assocDefaultUsing eq (panic ("Failed in assoc: " ++ crash_msg)) list key
260 %************************************************************************
262 \subsection[Utils-dups]{Duplicate-handling}
264 %************************************************************************
267 hasNoDups :: (Eq a) => [a] -> Bool
269 hasNoDups xs = f [] xs
271 f seen_so_far [] = True
272 f seen_so_far (x:xs) = if x `is_elem` seen_so_far then
277 is_elem = isIn "hasNoDups"
281 equivClasses :: (a -> a -> Ordering) -- Comparison
285 equivClasses cmp stuff@[] = []
286 equivClasses cmp stuff@[item] = [stuff]
287 equivClasses cmp items
288 = runs eq (sortLt lt items)
290 eq a b = case cmp a b of { EQ -> True; _ -> False }
291 lt a b = case cmp a b of { LT -> True; _ -> False }
294 The first cases in @equivClasses@ above are just to cut to the point
297 @runs@ groups a list into a list of lists, each sublist being a run of
298 identical elements of the input list. It is passed a predicate @p@ which
299 tells when two elements are equal.
302 runs :: (a -> a -> Bool) -- Equality
307 runs p (x:xs) = case (span (p x) xs) of
308 (first, rest) -> (x:first) : (runs p rest)
312 removeDups :: (a -> a -> Ordering) -- Comparison function
314 -> ([a], -- List with no duplicates
315 [[a]]) -- List of duplicate groups. One representative from
316 -- each group appears in the first result
318 removeDups cmp [] = ([], [])
319 removeDups cmp [x] = ([x],[])
321 = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') ->
324 collect_dups dups_so_far [x] = (dups_so_far, x)
325 collect_dups dups_so_far dups@(x:xs) = (dups:dups_so_far, x)
330 equivClassesByUniq :: (a -> Unique) -> [a] -> [[a]]
331 -- NB: it's *very* important that if we have the input list [a,b,c],
332 -- where a,b,c all have the same unique, then we get back the list
336 -- Hence the use of foldr, plus the reversed-args tack_on below
337 equivClassesByUniq get_uniq xs
338 = eltsUFM (foldr add emptyUFM xs)
340 add a ufm = addToUFM_C tack_on ufm (get_uniq a) [a]
341 tack_on old new = new++old
344 %************************************************************************
346 \subsection[Utils-sorting]{Sorting}
348 %************************************************************************
350 %************************************************************************
352 \subsubsection[Utils-quicksorting]{Quicksorts}
354 %************************************************************************
357 -- tail-recursive, etc., "quicker sort" [as per Meira thesis]
358 quicksort :: (a -> a -> Bool) -- Less-than predicate
360 -> [a] -- Result list in increasing order
363 quicksort lt [x] = [x]
364 quicksort lt (x:xs) = split x [] [] xs
366 split x lo hi [] = quicksort lt lo ++ (x : quicksort lt hi)
367 split x lo hi (y:ys) | y `lt` x = split x (y:lo) hi ys
368 | True = split x lo (y:hi) ys
371 Quicksort variant from Lennart's Haskell-library contribution. This
372 is a {\em stable} sort.
375 stableSortLt = sortLt -- synonym; when we want to highlight stable-ness
377 sortLt :: (a -> a -> Bool) -- Less-than predicate
379 -> [a] -- Result list
381 sortLt lt l = qsort lt l []
383 -- qsort is stable and does not concatenate.
384 qsort :: (a -> a -> Bool) -- Less-than predicate
385 -> [a] -- xs, Input list
386 -> [a] -- r, Concatenate this list to the sorted input list
387 -> [a] -- Result = sort xs ++ r
391 qsort lt (x:xs) r = qpart lt x xs [] [] r
393 -- qpart partitions and sorts the sublists
394 -- rlt contains things less than x,
395 -- rge contains the ones greater than or equal to x.
396 -- Both have equal elements reversed with respect to the original list.
398 qpart lt x [] rlt rge r =
399 -- rlt and rge are in reverse order and must be sorted with an
400 -- anti-stable sorting
401 rqsort lt rlt (x : rqsort lt rge r)
403 qpart lt x (y:ys) rlt rge r =
406 qpart lt x ys (y:rlt) rge r
409 qpart lt x ys rlt (y:rge) r
411 -- rqsort is as qsort but anti-stable, i.e. reverses equal elements
413 rqsort lt [x] r = x:r
414 rqsort lt (x:xs) r = rqpart lt x xs [] [] r
416 rqpart lt x [] rle rgt r =
417 qsort lt rle (x : qsort lt rgt r)
419 rqpart lt x (y:ys) rle rgt r =
422 rqpart lt x ys rle (y:rgt) r
425 rqpart lt x ys (y:rle) rgt r
428 %************************************************************************
430 \subsubsection[Utils-dull-mergesort]{A rather dull mergesort}
432 %************************************************************************
435 mergesort :: (a -> a -> Ordering) -> [a] -> [a]
437 mergesort cmp xs = merge_lists (split_into_runs [] xs)
439 a `le` b = case cmp a b of { LT -> True; EQ -> True; GT -> False }
440 a `ge` b = case cmp a b of { LT -> False; EQ -> True; GT -> True }
442 split_into_runs [] [] = []
443 split_into_runs run [] = [run]
444 split_into_runs [] (x:xs) = split_into_runs [x] xs
445 split_into_runs [r] (x:xs) | x `ge` r = split_into_runs [r,x] xs
446 split_into_runs rl@(r:rs) (x:xs) | x `le` r = split_into_runs (x:rl) xs
447 | True = rl : (split_into_runs [x] xs)
450 merge_lists (x:xs) = merge x (merge_lists xs)
454 merge xl@(x:xs) yl@(y:ys)
456 EQ -> x : y : (merge xs ys)
457 LT -> x : (merge xs yl)
458 GT -> y : (merge xl ys)
461 %************************************************************************
463 \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten}
465 %************************************************************************
468 Date: Mon, 3 May 93 20:45:23 +0200
469 From: Carsten Kehler Holst <kehler@cs.chalmers.se>
470 To: partain@dcs.gla.ac.uk
471 Subject: natural merge sort beats quick sort [ and it is prettier ]
473 Here is a piece of Haskell code that I'm rather fond of. See it as an
474 attempt to get rid of the ridiculous quick-sort routine. group is
475 quite useful by itself I think it was John's idea originally though I
476 believe the lazy version is due to me [surprisingly complicated].
477 gamma [used to be called] is called gamma because I got inspired by
478 the Gamma calculus. It is not very close to the calculus but does
479 behave less sequentially than both foldr and foldl. One could imagine
480 a version of gamma that took a unit element as well thereby avoiding
481 the problem with empty lists.
483 I've tried this code against
485 1) insertion sort - as provided by haskell
486 2) the normal implementation of quick sort
487 3) a deforested version of quick sort due to Jan Sparud
488 4) a super-optimized-quick-sort of Lennart's
490 If the list is partially sorted both merge sort and in particular
491 natural merge sort wins. If the list is random [ average length of
492 rising subsequences = approx 2 ] mergesort still wins and natural
493 merge sort is marginally beaten by Lennart's soqs. The space
494 consumption of merge sort is a bit worse than Lennart's quick sort
495 approx a factor of 2. And a lot worse if Sparud's bug-fix [see his
496 fpca article ] isn't used because of group.
503 group :: (a -> a -> Bool) -> [a] -> [[a]]
506 Date: Mon, 12 Feb 1996 15:09:41 +0000
507 From: Andy Gill <andy@dcs.gla.ac.uk>
509 Here is a `better' definition of group.
512 group p (x:xs) = group' xs x x (x :)
514 group' [] _ _ s = [s []]
515 group' (x:xs) x_min x_max s
516 | not (x `p` x_max) = group' xs x_min x (s . (x :))
517 | x `p` x_min = group' xs x x_max ((x :) . s)
518 | otherwise = s [] : group' xs x x (x :)
520 -- This one works forwards *and* backwards, as well as also being
521 -- faster that the one in Util.lhs.
526 let ((h1:t1):tt1) = group p xs
527 (t,tt) = if null xs then ([],[]) else
528 if x `p` h1 then (h1:t1,tt1) else
533 generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a]
534 generalMerge p xs [] = xs
535 generalMerge p [] ys = ys
536 generalMerge p (x:xs) (y:ys) | x `p` y = x : generalMerge p xs (y:ys)
537 | otherwise = y : generalMerge p (x:xs) ys
539 -- gamma is now called balancedFold
541 balancedFold :: (a -> a -> a) -> [a] -> a
542 balancedFold f [] = error "can't reduce an empty list using balancedFold"
543 balancedFold f [x] = x
544 balancedFold f l = balancedFold f (balancedFold' f l)
546 balancedFold' :: (a -> a -> a) -> [a] -> [a]
547 balancedFold' f (x:y:xs) = f x y : balancedFold' f xs
548 balancedFold' f xs = xs
550 generalMergeSort p [] = []
551 generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs
553 generalNaturalMergeSort p [] = []
554 generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs
556 mergeSort, naturalMergeSort :: Ord a => [a] -> [a]
558 mergeSort = generalMergeSort (<=)
559 naturalMergeSort = generalNaturalMergeSort (<=)
561 mergeSortLe le = generalMergeSort le
562 naturalMergeSortLe le = generalNaturalMergeSort le
565 %************************************************************************
567 \subsection[Utils-transitive-closure]{Transitive closure}
569 %************************************************************************
571 This algorithm for transitive closure is straightforward, albeit quadratic.
574 transitiveClosure :: (a -> [a]) -- Successor function
575 -> (a -> a -> Bool) -- Equality predicate
577 -> [a] -- The transitive closure
579 transitiveClosure succ eq xs
583 go done (x:xs) | x `is_in` done = go done xs
584 | otherwise = go (x:done) (succ x ++ xs)
587 x `is_in` (y:ys) | eq x y = True
588 | otherwise = x `is_in` ys
591 %************************************************************************
593 \subsection[Utils-accum]{Accumulating}
595 %************************************************************************
597 @mapAccumL@ behaves like a combination
598 of @map@ and @foldl@;
599 it applies a function to each element of a list, passing an accumulating
600 parameter from left to right, and returning a final value of this
601 accumulator together with the new list.
604 mapAccumL :: (acc -> x -> (acc, y)) -- Function of elt of input list
605 -- and accumulator, returning new
606 -- accumulator and elt of result list
607 -> acc -- Initial accumulator
609 -> (acc, [y]) -- Final accumulator and result list
611 mapAccumL f b [] = (b, [])
612 mapAccumL f b (x:xs) = (b'', x':xs') where
614 (b'', xs') = mapAccumL f b' xs
617 @mapAccumR@ does the same, but working from right to left instead. Its type is
618 the same as @mapAccumL@, though.
621 mapAccumR :: (acc -> x -> (acc, y)) -- Function of elt of input list
622 -- and accumulator, returning new
623 -- accumulator and elt of result list
624 -> acc -- Initial accumulator
626 -> (acc, [y]) -- Final accumulator and result list
628 mapAccumR f b [] = (b, [])
629 mapAccumR f b (x:xs) = (b'', x':xs') where
631 (b', xs') = mapAccumR f b xs
634 Here is the bi-directional version, that works from both left and right.
637 mapAccumB :: (accl -> accr -> x -> (accl, accr,y))
638 -- Function of elt of input list
639 -- and accumulator, returning new
640 -- accumulator and elt of result list
641 -> accl -- Initial accumulator from left
642 -> accr -- Initial accumulator from right
644 -> (accl, accr, [y]) -- Final accumulators and result list
646 mapAccumB f a b [] = (a,b,[])
647 mapAccumB f a b (x:xs) = (a'',b'',y:ys)
649 (a',b'',y) = f a b' x
650 (a'',b',ys) = mapAccumB f a' b xs
653 %************************************************************************
655 \subsection[Utils-comparison]{Comparisons}
657 %************************************************************************
660 thenCmp :: Ordering -> Ordering -> Ordering
661 {-# INLINE thenCmp #-}
663 thenCmp other any = other
665 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
666 -- `cmpList' uses a user-specified comparer
668 cmpList cmp [] [] = EQ
669 cmpList cmp [] _ = LT
670 cmpList cmp _ [] = GT
671 cmpList cmp (a:as) (b:bs)
672 = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
676 cmpString :: String -> String -> Ordering
679 cmpString (x:xs) (y:ys) = if x == y then cmpString xs ys
680 else if x < y then LT
688 %************************************************************************
690 \subsection[Utils-pairs]{Pairs}
692 %************************************************************************
694 The following are curried versions of @fst@ and @snd@.
697 cfst :: a -> b -> a -- stranal-sem only (Note)
701 The following provide us higher order functions that, when applied
702 to a function, operate on pairs.
705 applyToPair :: ((a -> c),(b -> d)) -> (a,b) -> (c,d)
706 applyToPair (f,g) (x,y) = (f x, g y)
708 applyToFst :: (a -> c) -> (a,b)-> (c,b)
709 applyToFst f (x,y) = (f x,y)
711 applyToSnd :: (b -> d) -> (a,b) -> (a,d)
712 applyToSnd f (x,y) = (x,f y)
714 foldPair :: (a->a->a,b->b->b) -> (a,b) -> [(a,b)] -> (a,b)
715 foldPair fg ab [] = ab
716 foldPair fg@(f,g) ab ((a,b):abs) = (f a u,g b v)
717 where (u,v) = foldPair fg ab abs
721 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
722 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs