X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Futils%2FUtil.lhs;h=11d1b5e545700e1552773d0870de7ecd96aa603c;hb=153b9cb9b11e05c4edb1b6bc0a7b972660e41f70;hp=816cb2491903133b51754698dc0a06ab839a7505;hpb=10521d8418fd3a1cf32882718b5bd28992db36fd;p=ghc-hetmet.git diff --git a/ghc/compiler/utils/Util.lhs b/ghc/compiler/utils/Util.lhs index 816cb24..11d1b5e 100644 --- a/ghc/compiler/utils/Util.lhs +++ b/ghc/compiler/utils/Util.lhs @@ -1,380 +1,128 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995 +% (c) The University of Glasgow 1992-2002 % \section[Util]{Highly random utility functions} \begin{code} -#if defined(COMPILING_GHC) -# include "HsVersions.h" -# define IF_NOT_GHC(a) {--} -#else -# define panic error -# define TAG_ _CMP_TAG -# define LT_ _LT -# define EQ_ _EQ -# define GT_ _GT -# define GT__ _ -# define tagCmp_ _tagCmp -# define FAST_STRING String -# define ASSERT(x) {-nothing-} -# define IF_NOT_GHC(a) a -# define COMMA , -#endif - -#ifndef __GLASGOW_HASKELL__ -# undef TAG_ -# undef LT_ -# undef EQ_ -# undef GT_ -# undef tagCmp_ -#endif - module Util ( - -- Haskell-version support -#ifndef __GLASGOW_HASKELL__ - tagCmp_, - TAG_(..), -#endif + -- general list processing - IF_NOT_GHC(forall COMMA exists COMMA) - zipEqual, nOfThem, lengthExceeds, isSingleton, -#if defined(COMPILING_GHC) - isIn, isn'tIn, -#endif + zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, + zipLazy, stretchZipWith, + mapAndUnzip, mapAndUnzip3, + nOfThem, filterOut, + lengthExceeds, lengthIs, lengthAtLeast, listLengthCmp, atLength, + isSingleton, only, + notNull, snocView, - -- association lists - assoc, -#ifdef USE_SEMANTIQUE_STRANAL - clookup, clookrepl, elemIndex, (\\\), -#endif + isIn, isn'tIn, - -- duplicate handling - hasNoDups, equivClasses, runs, removeDups, + -- for-loop + nTimes, -- sorting - IF_NOT_GHC(quicksort COMMA stableSortLt COMMA mergesort COMMA) - sortLt, - IF_NOT_GHC(mergeSort COMMA) naturalMergeSortLe, -- from Carsten - IF_NOT_GHC(naturalMergeSort COMMA mergeSortLe COMMA) + sortLe, sortWith, -- transitive closures transitiveClosure, -- accumulating - mapAccumL, mapAccumR, mapAccumB, + mapAccumL, mapAccumR, mapAccumB, + foldl2, count, + + takeList, dropList, splitAtList, -- comparisons - IF_NOT_GHC(cmpString COMMA) -#ifdef USE_FAST_STRINGS - cmpPString, -#else - substr, -#endif + isEqual, eqListBy, equalLength, compareLength, + thenCmp, cmpList, prefixMatch, suffixMatch, maybePrefixMatch, + + -- strictness + foldl', seqList, + -- pairs - IF_NOT_GHC(cfst COMMA applyToPair COMMA applyToFst COMMA) - IF_NOT_GHC(applyToSnd COMMA foldPair COMMA) - unzipWith - - -- error handling -#if defined(COMPILING_GHC) - , panic, pprPanic, pprTrace -# ifdef DEBUG - , assertPanic -# endif -#endif {- COMPILING_GHC -} - - -- and to make the interface self-sufficient... -#if __HASKELL1__ < 3 -# if defined(COMPILING_GHC) - , Maybe(..){-.. for pragmas...-}, PrettyRep, Pretty(..) -# else - , Maybe -# endif -#endif + unzipWith, -#ifdef USE_ATTACK_PRAGMAS - -- as more-or-less of a *HACK*, Util exports - -- many types abstractly, so that pragmas will be - -- able to see them (given that most modules - -- import Util). - , - AbstractC, - ArgUsage, - ArgUsageInfo, - ArithSeqInfo, - ArityInfo, - Bag, - BasicLit, - Bind, - BinderInfo, - Binds, - CAddrMode, - CExprMacro, - CLabel, - CSeq, - CStmtMacro, - CcKind, - Class, - ClassDecl, - ClassOp, - ClassOpPragmas, - ClassPragmas, - ClosureInfo, - ConDecl, - CoreArg, - CoreAtom, - CoreBinding, - CoreCaseAlternatives, - CoreCaseDefault, - CoreExpr, - CostCentre, - DataPragmas, - DataTypeSig, - DefaultDecl, - DeforestInfo, - Delay, - Demand, - DemandInfo, - DuplicationDanger, - EnclosingCcDetails, - EndOfBlockInfo, - ExportFlag, - Expr, - FBConsum, - FBProd, - FBType, - FBTypeInfo, - FiniteMap, - FixityDecl, - FormSummary, - FullName, - FunOrArg, - GRHS, - GRHSsAndBinds, - GenPragmas, - GlobalSwitch, - HeapOffset, - IE, - Id, - IdDetails, - IdEnv(..), -- UGH - IdInfo, - IdVal, - IfaceImportDecl, - ImpStrictness, - ImpUnfolding, - ImportedInterface, - InPat, - InsideSCC, - Inst, - InstDecl, - InstOrigin, - InstTemplate, - InstTy, - InstancePragmas, - Interface, - IsDupdCC, IsCafCC, - LambdaFormInfo, - Literal, - MagicId, - MagicUnfoldingFun, - Match, - Module, - MonoBinds, - MonoType, - Name, - NamedThing(..), -- SIGH - OptIdInfo(..), -- SIGH - OrdList, - Outputable(..), -- SIGH - OverloadedLit, - PolyType, - PprStyle, - PrimKind, - PrimOp, - ProtoName, - Provenance, - Qual, - RegRelative, - Renaming, - ReturnInfo, - SMRep, - SMSpecRepKind, - SMUpdateKind, - Sequel, - ShortName, - Sig, - SimplCount, - SimplEnv, - SimplifierSwitch, - SpecEnv, - SpecInfo, - SpecialisedInstanceSig, - SplitUniqSupply, - SrcLoc, - StableLoc, - StandardFormInfo, - StgAtom, - StgBinderInfo, - StgBinding, - StgCaseAlternatives, - StgCaseDefault, - StgExpr, - StgRhs, - StrictnessInfo, - StubFlag, - SwitchResult, - TickType, - TyCon, - TyDecl, - TyVar, - TyVarEnv(..), - TyVarTemplate, - TypePragmas, - TypecheckedPat, - UfCostCentre, - UfId, - UnfoldEnv, - UnfoldItem, - UnfoldConApp, - UnfoldingCoreAlts, - UnfoldingCoreAtom, - UnfoldingCoreBinding, - UnfoldingCoreDefault, - UnfoldingCoreExpr, - UnfoldingDetails, - UnfoldingGuidance, - UnfoldingPrimOp, - UniType, - UniqFM, - Unique, - UniqueSupply, - UpdateFlag, - UpdateInfo, - VolatileLoc, - -#if ! OMIT_NATIVE_CODEGEN - Reg, - CodeSegment, - RegLoc, - StixReg, - StixTree, -#endif + global, - getIdUniType, typeOfBasicLit, typeOfPat, - getIdKind, kindOfBasicLit, - kindFromType, + -- module names + looksLikeModuleName, + + toArgs, + + -- Floating point stuff + readRational, + ) where - eqId, cmpId, - eqName, cmpName, - cmpProtoName, eqProtoName, - cmpByLocalName, eqByLocalName, - eqUnique, cmpUnique, - showUnique, +#include "HsVersions.h" - switchIsOn, +import Panic ( panic, trace ) +import FastTypes - ppNil, ppStr, ppInt, ppInteger, ppDouble, -#if __GLASGOW_HASKELL__ >= 23 - ppRational, --- ??? +#if __GLASGOW_HASKELL__ <= 408 +import EXCEPTION ( catchIO, justIoErrors, raiseInThread ) #endif - cNil, cStr, cAppend, cCh, cShow, -#if __GLASGOW_HASKELL__ >= 23 - cPStr, +import DATA_IOREF ( IORef, newIORef ) +import UNSAFE_IO ( unsafePerformIO ) + +import qualified List ( elem, notElem ) + +#ifndef DEBUG +import List ( zipWith4 ) #endif --- mkBlackHoleCLabel, +import Char ( isUpper, isAlphaNum, isSpace, ord, isDigit ) +import Ratio ( (%) ) - emptyBag, snocBag, - emptyFM, ---OLD: emptySet, - nullSpecEnv, - - mkUnknownSrcLoc, - - pprCoreBinding, pprCoreExpr, pprTyCon, pprUniType, +infixr 9 `thenCmp` +\end{code} - tagOf_PrimOp, - pprPrimOp +%************************************************************************ +%* * +\subsection{The Eager monad} +%* * +%************************************************************************ -#endif {-USE_ATTACK_PRAGMAS-} - ) where +The @Eager@ monad is just an encoding of continuation-passing style, +used to allow you to express "do this and then that", mainly to avoid +space leaks. It's done with a type synonym to save bureaucracy. -#if defined(COMPILING_GHC) -IMPORT_Trace -import Pretty -#endif -#if __HASKELL1__ < 3 -import Maybes ( Maybe(..) ) -#endif +\begin{code} +#if NOT_USED + +type Eager ans a = (a -> ans) -> ans + +runEager :: Eager a a -> a +runEager m = m (\x -> x) + +appEager :: Eager ans a -> (a -> ans) -> ans +appEager m cont = m cont -#if defined(COMPILING_GHC) -import Id -import IdInfo -import Outputable - -# ifdef USE_ATTACK_PRAGMAS - -import AbsCSyn -import AbsSyn -import AbsUniType -import Bag -import BasicLit -import BinderInfo -import CLabelInfo -import CgBindery -import CgMonad -import CharSeq -import ClosureInfo -import CmdLineOpts -import CoreSyn -import FiniteMap -import HsCore -import HsPragmas -import Inst -import InstEnv -import Name -import NameTypes -import OrdList -import PlainCore -import PrimOps -import ProtoName -import CostCentre -import SMRep -import SimplEnv -import SimplMonad -import SplitUniq -import SrcLoc -import StgSyn -import TyVarEnv -import UniqFM -import Unique - -# if ! OMIT_NATIVE_CODEGEN -import AsmRegAlloc ( Reg ) -import MachDesc -import Stix -# endif - -# endif {-USE_ATTACK_PRAGMAS-} +thenEager :: Eager ans a -> (a -> Eager ans b) -> Eager ans b +thenEager m k cont = m (\r -> k r cont) +returnEager :: a -> Eager ans a +returnEager v cont = cont v + +mapEager :: (a -> Eager ans b) -> [a] -> Eager ans [b] +mapEager f [] = returnEager [] +mapEager f (x:xs) = f x `thenEager` \ y -> + mapEager f xs `thenEager` \ ys -> + returnEager (y:ys) #endif \end{code} %************************************************************************ %* * -\subsection[Utils-version-support]{Functions to help pre-1.2 versions of (non-Glasgow) Haskell} +\subsection{A for loop} %* * %************************************************************************ -This is our own idea: \begin{code} -#ifndef __GLASGOW_HASKELL__ -data TAG_ = LT_ | EQ_ | GT_ - -tagCmp_ :: Ord a => a -> a -> TAG_ -tagCmp_ a b = if a == b then EQ_ else if a < b then LT_ else GT_ -#endif +-- Compose a function with itself n times. (nth rather than twice) +nTimes :: Int -> (a -> a) -> (a -> a) +nTimes 0 _ = id +nTimes 1 f = f +nTimes n f = f . nTimes (n-1) f \end{code} %************************************************************************ @@ -383,52 +131,170 @@ tagCmp_ a b = if a == b then EQ_ else if a < b then LT_ else GT_ %* * %************************************************************************ -Quantifiers are not standard in Haskell. The following fill in the gap. - \begin{code} -forall :: (a -> Bool) -> [a] -> Bool -forall pred [] = True -forall pred (x:xs) = pred x && forall pred xs - -exists :: (a -> Bool) -> [a] -> Bool -exists pred [] = False -exists pred (x:xs) = pred x || exists pred xs +filterOut :: (a->Bool) -> [a] -> [a] +-- Like filter, only reverses the sense of the test +filterOut p [] = [] +filterOut p (x:xs) | p x = filterOut p xs + | otherwise = x : filterOut p xs \end{code} -A paranoid @zip@ that checks the lists are of equal length. -Alastair Reid thinks this should only happen if DEBUGging on; -hey, why not? +A paranoid @zip@ (and some @zipWith@ friends) that checks the lists +are of equal length. Alastair Reid thinks this should only happen if +DEBUGging on; hey, why not? \begin{code} -zipEqual :: [a] -> [b] -> [(a,b)] +zipEqual :: String -> [a] -> [b] -> [(a,b)] +zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c] +zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d] +zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e] #ifndef DEBUG -zipEqual a b = zip a b +zipEqual _ = zip +zipWithEqual _ = zipWith +zipWith3Equal _ = zipWith3 +zipWith4Equal _ = zipWith4 #else -zipEqual [] [] = [] -zipEqual (a:as) (b:bs) = (a,b) : zipEqual as bs -zipEqual as bs = panic "zipEqual: unequal lists" +zipEqual msg [] [] = [] +zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs +zipEqual msg as bs = panic ("zipEqual: unequal lists:"++msg) + +zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs +zipWithEqual msg _ [] [] = [] +zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg) + +zipWith3Equal msg z (a:as) (b:bs) (c:cs) + = z a b c : zipWith3Equal msg z as bs cs +zipWith3Equal msg _ [] [] [] = [] +zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg) + +zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds) + = z a b c d : zipWith4Equal msg z as bs cs ds +zipWith4Equal msg _ [] [] [] [] = [] +zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) #endif \end{code} \begin{code} +-- zipLazy is lazy in the second list (observe the ~) + +zipLazy :: [a] -> [b] -> [(a,b)] +zipLazy [] ys = [] +zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys +\end{code} + + +\begin{code} +stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c] +-- (stretchZipWith p z f xs ys) stretches ys by inserting z in +-- the places where p returns *True* + +stretchZipWith p z f [] ys = [] +stretchZipWith p z f (x:xs) ys + | p x = f x z : stretchZipWith p z f xs ys + | otherwise = case ys of + [] -> [] + (y:ys) -> f x y : stretchZipWith p z f xs ys +\end{code} + + +\begin{code} +mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c]) + +mapAndUnzip f [] = ([],[]) +mapAndUnzip f (x:xs) + = let + (r1, r2) = f x + (rs1, rs2) = mapAndUnzip f xs + in + (r1:rs1, r2:rs2) + +mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d]) + +mapAndUnzip3 f [] = ([],[],[]) +mapAndUnzip3 f (x:xs) + = let + (r1, r2, r3) = f x + (rs1, rs2, rs3) = mapAndUnzip3 f xs + in + (r1:rs1, r2:rs2, r3:rs3) +\end{code} + +\begin{code} nOfThem :: Int -> a -> [a] -nOfThem n thing = take n (repeat thing) +nOfThem n thing = replicate n thing + +-- 'atLength atLen atEnd ls n' unravels list 'ls' to position 'n'; +-- specification: +-- +-- atLength atLenPred atEndPred ls n +-- | n < 0 = atLenPred n +-- | length ls < n = atEndPred (n - length ls) +-- | otherwise = atLenPred (drop n ls) +-- +atLength :: ([a] -> b) + -> (Int -> b) + -> [a] + -> Int + -> b +atLength atLenPred atEndPred ls n + | n < 0 = atEndPred n + | otherwise = go n ls + where + go n [] = atEndPred n + go 0 ls = atLenPred ls + go n (_:xs) = go (n-1) xs +-- special cases. lengthExceeds :: [a] -> Int -> Bool +-- (lengthExceeds xs n) = (length xs > n) +lengthExceeds = atLength notNull (const False) -[] `lengthExceeds` n = 0 > n -(x:xs) `lengthExceeds` n = (1 > n) || (xs `lengthExceeds` (n - 1)) +lengthAtLeast :: [a] -> Int -> Bool +lengthAtLeast = atLength notNull (== 0) -isSingleton :: [a] -> Bool +lengthIs :: [a] -> Int -> Bool +lengthIs = atLength null (==0) +listLengthCmp :: [a] -> Int -> Ordering +listLengthCmp = atLength atLen atEnd + where + atEnd 0 = EQ + atEnd x + | x > 0 = LT -- not yet seen 'n' elts, so list length is < n. + | otherwise = GT + + atLen [] = EQ + atLen _ = GT + +isSingleton :: [a] -> Bool isSingleton [x] = True isSingleton _ = False + +notNull :: [a] -> Bool +notNull [] = False +notNull _ = True + +snocView :: [a] -> Maybe ([a],a) + -- Split off the last element +snocView [] = Nothing +snocView xs = go [] xs + where + -- Invariant: second arg is non-empty + go acc [x] = Just (reverse acc, x) + go acc (x:xs) = go (x:acc) xs + +only :: [a] -> a +#ifdef DEBUG +only [a] = a +#else +only (a:_) = a +#endif \end{code} Debugging/specialising versions of \tr{elem} and \tr{notElem} + \begin{code} -#if defined(COMPILING_GHC) isIn, isn'tIn :: (Eq a) => String -> a -> [a] -> Bool # ifndef DEBUG @@ -442,347 +308,25 @@ elem__ x (y:ys) = x==y || elem__ x ys notElem__ x [] = True notElem__ x (y:ys) = x /= y && notElem__ x ys -# else {- DEBUG -} +# else /* DEBUG */ isIn msg x ys - = elem ILIT(0) x ys + = elem (_ILIT 0) x ys where elem i _ [] = False elem i x (y:ys) - | i _GE_ ILIT(100) = panic ("Over-long elem in: " ++ msg) - | otherwise = x == y || elem (i _ADD_ ILIT(1)) x ys + | i ># _ILIT 100 = trace ("Over-long elem in " ++ msg) $ + x `List.elem` (y:ys) + | otherwise = x == y || elem (i +# _ILIT(1)) x ys isn'tIn msg x ys - = notElem ILIT(0) x ys + = notElem (_ILIT 0) x ys where notElem i x [] = True notElem i x (y:ys) - | i _GE_ ILIT(100) = panic ("Over-long notElem in: " ++ msg) - | otherwise = x /= y && notElem (i _ADD_ ILIT(1)) x ys - -# endif {- DEBUG -} - -# ifdef USE_ATTACK_PRAGMAS -{-# SPECIALIZE isIn :: String -> BasicLit -> [BasicLit] -> Bool #-} -{-# SPECIALIZE isIn :: String -> Class -> [Class] -> Bool #-} -{-# SPECIALIZE isIn :: String -> Id -> [Id] -> Bool #-} -{-# SPECIALIZE isIn :: String -> Int -> [Int] -> Bool #-} -{-# SPECIALIZE isIn :: String -> MagicId -> [MagicId] -> Bool #-} -{-# SPECIALIZE isIn :: String -> Name -> [Name] -> Bool #-} -{-# SPECIALIZE isIn :: String -> TyCon -> [TyCon] -> Bool #-} -{-# SPECIALIZE isIn :: String -> TyVar -> [TyVar] -> Bool #-} -{-# SPECIALIZE isIn :: String -> TyVarTemplate -> [TyVarTemplate] -> Bool #-} -{-# SPECIALIZE isIn :: String -> Unique -> [Unique] -> Bool #-} -{-# SPECIALIZE isIn :: String -> _PackedString -> [_PackedString] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> (Id, Id) -> [(Id, Id)] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> Int -> [Int] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> Id -> [Id] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> MagicId -> [MagicId] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> TyCon -> [TyCon] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> TyVar -> [TyVar] -> Bool #-} -{-# SPECIALIZE isn'tIn :: String -> TyVarTemplate -> [TyVarTemplate] -> Bool #-} -# endif - -#endif {- COMPILING_GHC -} -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-assoc]{Association lists} -%* * -%************************************************************************ - -See also @assocMaybe@ and @mkLookupFun@ in module @Maybes@. - -\begin{code} -assoc :: (Eq a) => String -> [(a, b)] -> a -> b - -assoc crash_msg lst key - = if (null res) - then panic ("Failed in assoc: " ++ crash_msg) - else head res - where res = [ val | (key', val) <- lst, key == key'] - -#if defined(COMPILING_GHC) -# ifdef USE_ATTACK_PRAGMAS -{-# SPECIALIZE assoc :: String -> [(Id, a)] -> Id -> a #-} -{-# SPECIALIZE assoc :: String -> [(Class, a)] -> Class -> a #-} -{-# SPECIALIZE assoc :: String -> [(Name, a)] -> Name -> a #-} -{-# SPECIALIZE assoc :: String -> [(PrimKind, a)] -> PrimKind -> a #-} -{-# SPECIALIZE assoc :: String -> [(String, a)] -> String -> a #-} -{-# SPECIALIZE assoc :: String -> [(TyCon, a)] -> TyCon -> a #-} -{-# SPECIALIZE assoc :: String -> [(TyVar, a)] -> TyVar -> a #-} -{-# SPECIALIZE assoc :: String -> [(TyVarTemplate, a)] -> TyVarTemplate -> a #-} -{-# SPECIALIZE assoc :: String -> [(UniType, a)] -> UniType -> a #-} -{-# SPECIALIZE assoc :: String -> [(_PackedString, a)] -> _PackedString -> a #-} -# endif -#endif -\end{code} - -Given a list of associations one wants to look for the most recent -association for a given key. A couple of functions follow that cover -the simple lookup, the lookup with a default value when the key not -found, and two corresponding functions operating on unzipped lists -of associations. - -\begin{code} -#ifdef USE_SEMANTIQUE_STRANAL - -clookup :: (Eq a) => [a] -> [b] -> a -> b -clookup = clookupElse (panic "clookup") - where - -- clookupElse :: (Eq a) => b -> [a] -> [b] -> a -> b - clookupElse d [] [] a = d - clookupElse d (x:xs) (y:ys) a - | a==x = y - | True = clookupElse d xs ys a -#endif -\end{code} - -The following routine given a curried environment replaces the entry -labelled with a given name with a new value given. The new value is -given in the form of a function that allows to transform the old entry. - -Assumption is that the list of labels contains the given one and that -the two lists of the curried environment are of equal lengths. - -\begin{code} -#ifdef USE_SEMANTIQUE_STRANAL -clookrepl :: Eq a => [a] -> [b] -> a -> (b -> b) -> [b] -clookrepl (a:as) (b:bs) x f - = if x == a then (f b:bs) else (b:clookrepl as bs x f) -#endif -\end{code} - -The following returns the index of an element in a list. - -\begin{code} -#ifdef USE_SEMANTIQUE_STRANAL - -elemIndex :: Eq a => [a] -> a -> Int -elemIndex as x = indx as x 0 - where - indx :: Eq a => [a] -> a -> Int -> Int - indx (a:as) x n = if a==x then n else indx as x ((n+1)::Int) -# if defined(COMPILING_GHC) - indx [] x n = pprPanic "element not in list in elemIndex" ppNil -# else - indx [] x n = error "element not in list in elemIndex" -# endif -#endif -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-dups]{Duplicate-handling} -%* * -%************************************************************************ - -List difference (non-associative). In the result of @xs \\\ ys@, the -first occurrence of each element of ys in turn (if any) has been -removed from xs. Thus, @(xs ++ ys) \\\ xs == ys@. This function is -a copy of @\\@ from report 1.1 and is added to overshade the buggy -version from the 1.0 version of Haskell. - -This routine can be removed after the compiler bootstraps itself and -a proper @\\@ is can be applied. - -\begin{code} -#ifdef USE_SEMANTIQUE_STRANAL -(\\\) :: (Eq a) => [a] -> [a] -> [a] -(\\\) = foldl del - where - [] `del` _ = [] - (x:xs) `del` y - | x == y = xs - | otherwise = x : xs `del` y -#endif -\end{code} - -\begin{code} -hasNoDups :: (Eq a) => [a] -> Bool -hasNoDups xs = f [] xs - where - f seen_so_far [] = True - f seen_so_far (x:xs) = if x `is_elem` seen_so_far then - False - else - f (x:seen_so_far) xs - -#if defined(COMPILING_GHC) - is_elem = isIn "hasNoDups" -#else - is_elem = elem -#endif -#if defined(COMPILING_GHC) -# ifdef USE_ATTACK_PRAGMAS -{-# SPECIALIZE hasNoDups :: [TyVar] -> Bool #-} -# endif -#endif -\end{code} - -\begin{code} -equivClasses :: (a -> a -> TAG_) -- Comparison - -> [a] - -> [[a]] - -equivClasses cmp stuff@[] = [] -equivClasses cmp stuff@[item] = [stuff] -equivClasses cmp items - = runs eq (sortLt lt items) - where - eq a b = case cmp a b of { EQ_ -> True; _ -> False } - lt a b = case cmp a b of { LT_ -> True; _ -> False } -\end{code} - -The first cases in @equivClasses@ above are just to cut to the point -more quickly... - -@runs@ groups a list into a list of lists, each sublist being a run of -identical elements of the input list. It is passed a predicate @p@ which -tells when two elements are equal. - -\begin{code} -runs :: (a -> a -> Bool) -- Equality - -> [a] - -> [[a]] - -runs p [] = [] -runs p (x:xs) = case (span (p x) xs) of - (first, rest) -> (x:first) : (runs p rest) -\end{code} - -\begin{code} -removeDups :: (a -> a -> TAG_) -- Comparison function - -> [a] - -> ([a], -- List with no duplicates - [[a]]) -- List of duplicate groups. One representative from - -- each group appears in the first result - -removeDups cmp [] = ([], []) -removeDups cmp [x] = ([x],[]) -removeDups cmp xs - = case (mapAccumR collect_dups [] (equivClasses cmp xs)) of { (dups, xs') -> - (xs', dups) } - where - collect_dups dups_so_far [x] = (dups_so_far, x) - collect_dups dups_so_far dups@(x:xs) = (dups:dups_so_far, x) -\end{code} - -%************************************************************************ -%* * -\subsection[Utils-sorting]{Sorting} -%* * -%************************************************************************ - -%************************************************************************ -%* * -\subsubsection[Utils-quicksorting]{Quicksorts} -%* * -%************************************************************************ - -\begin{code} --- tail-recursive, etc., "quicker sort" [as per Meira thesis] -quicksort :: (a -> a -> Bool) -- Less-than predicate - -> [a] -- Input list - -> [a] -- Result list in increasing order - -quicksort lt [] = [] -quicksort lt [x] = [x] -quicksort lt (x:xs) = split x [] [] xs - where - split x lo hi [] = quicksort lt lo ++ (x : quicksort lt hi) - split x lo hi (y:ys) | y `lt` x = split x (y:lo) hi ys - | True = split x lo (y:hi) ys -\end{code} - -Quicksort variant from Lennart's Haskell-library contribution. This -is a {\em stable} sort. - -\begin{code} -stableSortLt = sortLt -- synonym; when we want to highlight stable-ness - -sortLt :: (a -> a -> Bool) -- Less-than predicate - -> [a] -- Input list - -> [a] -- Result list - -sortLt lt l = qsort lt l [] - --- qsort is stable and does not concatenate. -qsort :: (a -> a -> Bool) -- Less-than predicate - -> [a] -- xs, Input list - -> [a] -- r, Concatenate this list to the sorted input list - -> [a] -- Result = sort xs ++ r - -qsort lt [] r = r -qsort lt [x] r = x:r -qsort lt (x:xs) r = qpart lt x xs [] [] r - --- qpart partitions and sorts the sublists --- rlt contains things less than x, --- rge contains the ones greater than or equal to x. --- Both have equal elements reversed with respect to the original list. - -qpart lt x [] rlt rge r = - -- rlt and rge are in reverse order and must be sorted with an - -- anti-stable sorting - rqsort lt rlt (x : rqsort lt rge r) - -qpart lt x (y:ys) rlt rge r = - if lt y x then - -- y < x - qpart lt x ys (y:rlt) rge r - else - -- y >= x - qpart lt x ys rlt (y:rge) r - --- rqsort is as qsort but anti-stable, i.e. reverses equal elements -rqsort lt [] r = r -rqsort lt [x] r = x:r -rqsort lt (x:xs) r = rqpart lt x xs [] [] r - -rqpart lt x [] rle rgt r = - qsort lt rle (x : qsort lt rgt r) - -rqpart lt x (y:ys) rle rgt r = - if lt x y then - -- y > x - rqpart lt x ys rle (y:rgt) r - else - -- y <= x - rqpart lt x ys (y:rle) rgt r -\end{code} - -%************************************************************************ -%* * -\subsubsection[Utils-dull-mergesort]{A rather dull mergesort} -%* * -%************************************************************************ - -\begin{code} -mergesort :: (a -> a -> TAG_) -> [a] -> [a] - -mergesort cmp xs = merge_lists (split_into_runs [] xs) - where - a `le` b = case cmp a b of { LT_ -> True; EQ_ -> True; GT__ -> False } - a `ge` b = case cmp a b of { LT_ -> False; EQ_ -> True; GT__ -> True } - - split_into_runs [] [] = [] - split_into_runs run [] = [run] - split_into_runs [] (x:xs) = split_into_runs [x] xs - split_into_runs [r] (x:xs) | x `ge` r = split_into_runs [r,x] xs - split_into_runs rl@(r:rs) (x:xs) | x `le` r = split_into_runs (x:rl) xs - | True = rl : (split_into_runs [x] xs) - - merge_lists [] = [] - merge_lists (x:xs) = merge x (merge_lists xs) - - merge [] ys = ys - merge xs [] = xs - merge xl@(x:xs) yl@(y:ys) - = case cmp x y of - EQ_ -> x : y : (merge xs ys) - LT_ -> x : (merge xs yl) - GT__ -> y : (merge xl ys) + | i ># _ILIT 100 = trace ("Over-long notElem in " ++ msg) $ + x `List.notElem` (y:ys) + | otherwise = x /= y && notElem (i +# _ILIT(1)) x ys +# endif /* DEBUG */ \end{code} %************************************************************************ @@ -797,51 +341,61 @@ From: Carsten Kehler Holst To: partain@dcs.gla.ac.uk Subject: natural merge sort beats quick sort [ and it is prettier ] -Here a piece of Haskell code that I'm rather fond of. See it as an +Here is a piece of Haskell code that I'm rather fond of. See it as an attempt to get rid of the ridiculous quick-sort routine. group is quite useful by itself I think it was John's idea originally though I believe the lazy version is due to me [surprisingly complicated]. gamma [used to be called] is called gamma because I got inspired by the Gamma calculus. It is not very close to the calculus but does -behave less sequential that both foldr and foldl. One could imagine a -version of gamma that took a unit element as well thereby avoiding the -problem with empty lists. +behave less sequentially than both foldr and foldl. One could imagine +a version of gamma that took a unit element as well thereby avoiding +the problem with empty lists. I've tried this code against 1) insertion sort - as provided by haskell 2) the normal implementation of quick sort 3) a deforested version of quick sort due to Jan Sparud - 4) a super-optimized-quick-sort of Lennarts + 4) a super-optimized-quick-sort of Lennart's If the list is partially sorted both merge sort and in particular natural merge sort wins. If the list is random [ average length of rising subsequences = approx 2 ] mergesort still wins and natural -merge sort is marginally beeten by lennart's soqs. The space +merge sort is marginally beaten by Lennart's soqs. The space consumption of merge sort is a bit worse than Lennart's quick sort approx a factor of 2. And a lot worse if Sparud's bug-fix [see his fpca article ] isn't used because of group. -have fun +have fun Carsten \end{display} \begin{code} group :: (a -> a -> Bool) -> [a] -> [[a]] - -group p [] = [[]] -group p (x:xs) = - let ((h1:t1):tt1) = group p xs - (t,tt) = if null xs then ([],[]) else - if x `p` h1 then (h1:t1,tt1) else - ([], (h1:t1):tt1) - in ((x:t):tt) +-- Given a <= function, group finds maximal contiguous up-runs +-- or down-runs in the input list. +-- It's stable, in the sense that it never re-orders equal elements +-- +-- Date: Mon, 12 Feb 1996 15:09:41 +0000 +-- From: Andy Gill +-- Here is a `better' definition of group. + +group p [] = [] +group p (x:xs) = group' xs x x (x :) + where + group' [] _ _ s = [s []] + group' (x:xs) x_min x_max s + | x_max `p` x = group' xs x_min x (s . (x :)) + | not (x_min `p` x) = group' xs x x_max ((x :) . s) + | otherwise = s [] : group' xs x x (x :) + -- NB: the 'not' is essential for stablity + -- x `p` x_min would reverse equal elements generalMerge :: (a -> a -> Bool) -> [a] -> [a] -> [a] generalMerge p xs [] = xs generalMerge p [] ys = ys generalMerge p (x:xs) (y:ys) | x `p` y = x : generalMerge p xs (y:ys) - | otherwise = y : generalMerge p (x:xs) ys + | otherwise = y : generalMerge p (x:xs) ys -- gamma is now called balancedFold @@ -854,19 +408,28 @@ balancedFold' :: (a -> a -> a) -> [a] -> [a] balancedFold' f (x:y:xs) = f x y : balancedFold' f xs balancedFold' f xs = xs -generalMergeSort p [] = [] -generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs - generalNaturalMergeSort p [] = [] generalNaturalMergeSort p xs = (balancedFold (generalMerge p) . group p) xs +#if NOT_USED +generalMergeSort p [] = [] +generalMergeSort p xs = (balancedFold (generalMerge p) . map (: [])) xs + mergeSort, naturalMergeSort :: Ord a => [a] -> [a] mergeSort = generalMergeSort (<=) naturalMergeSort = generalNaturalMergeSort (<=) mergeSortLe le = generalMergeSort le -naturalMergeSortLe le = generalNaturalMergeSort le +#endif + +sortLe :: (a->a->Bool) -> [a] -> [a] +sortLe le = generalNaturalMergeSort le + +sortWith :: Ord b => (a->b) -> [a] -> [a] +sortWith get_key xs = sortLe le xs + where + x `le` y = get_key x < get_key y \end{code} %************************************************************************ @@ -880,15 +443,15 @@ This algorithm for transitive closure is straightforward, albeit quadratic. \begin{code} transitiveClosure :: (a -> [a]) -- Successor function -> (a -> a -> Bool) -- Equality predicate - -> [a] + -> [a] -> [a] -- The transitive closure transitiveClosure succ eq xs - = do [] xs + = go [] xs where - do done [] = done - do done (x:xs) | x `is_in` done = do done xs - | otherwise = do (x:done) (succ x ++ xs) + go done [] = done + go done (x:xs) | x `is_in` done = go done xs + | otherwise = go (x:done) (succ x ++ xs) x `is_in` [] = False x `is_in` (y:ys) | eq x y = True @@ -945,10 +508,10 @@ mapAccumB :: (accl -> accr -> x -> (accl, accr,y)) -- Function of elt of input list -- and accumulator, returning new -- accumulator and elt of result list - -> accl -- Initial accumulator from left - -> accr -- Initial accumulator from right - -> [x] -- Input list - -> (accl, accr, [y]) -- Final accumulators and result list + -> accl -- Initial accumulator from left + -> accr -- Initial accumulator from right + -> [x] -- Input list + -> (accl, accr, [y]) -- Final accumulators and result list mapAccumB f a b [] = (a,b,[]) mapAccumB f a b (x:xs) = (a'',b'',y:ys) @@ -957,46 +520,120 @@ mapAccumB f a b (x:xs) = (a'',b'',y:ys) (a'',b',ys) = mapAccumB f a' b xs \end{code} -%************************************************************************ -%* * -\subsection[Utils-comparison]{Comparisons} -%* * -%************************************************************************ +A strict version of foldl. -See also @tagCmp_@ near the versions-compatibility section. +\begin{code} +foldl' :: (a -> b -> a) -> a -> [b] -> a +foldl' f z xs = lgo z xs + where + lgo z [] = z + lgo z (x:xs) = (lgo $! (f z x)) xs +\end{code} + +A combination of foldl with zip. It works with equal length lists. \begin{code} -cmpString :: String -> String -> TAG_ - -cmpString [] [] = EQ_ -cmpString (x:xs) (y:ys) = if x == y then cmpString xs ys - else if x < y then LT_ - else GT_ -cmpString [] ys = LT_ -cmpString xs [] = GT_ - -cmpString _ _ = case (panic "cmpString") of { s -> -- BUG avoidance: never get here - cmpString s "" -- will never get here - } +foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc +foldl2 k z [] [] = z +foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs \end{code} +Count the number of times a predicate is true + \begin{code} -#ifdef USE_FAST_STRINGS -cmpPString :: FAST_STRING -> FAST_STRING -> TAG_ +count :: (a -> Bool) -> [a] -> Int +count p [] = 0 +count p (x:xs) | p x = 1 + count p xs + | otherwise = count p xs +\end{code} + +@splitAt@, @take@, and @drop@ but with length of another +list giving the break-off point: + +\begin{code} +takeList :: [b] -> [a] -> [a] +takeList [] _ = [] +takeList (_:xs) ls = + case ls of + [] -> [] + (y:ys) -> y : takeList xs ys + +dropList :: [b] -> [a] -> [a] +dropList [] xs = xs +dropList _ xs@[] = xs +dropList (_:xs) (_:ys) = dropList xs ys + + +splitAtList :: [b] -> [a] -> ([a], [a]) +splitAtList [] xs = ([], xs) +splitAtList _ xs@[] = (xs, xs) +splitAtList (_:xs) (y:ys) = (y:ys', ys'') + where + (ys', ys'') = splitAtList xs ys -cmpPString x y - = case (_tagCmp x y) of { _LT -> LT_ ; _EQ -> EQ_ ; _GT -> GT_ } -#endif \end{code} + +%************************************************************************ +%* * +\subsection[Utils-comparison]{Comparisons} +%* * +%************************************************************************ + \begin{code} -#ifndef USE_FAST_STRINGS -substr :: FAST_STRING -> Int -> Int -> FAST_STRING +isEqual :: Ordering -> Bool +-- Often used in (isEqual (a `compare` b)) +isEqual GT = False +isEqual EQ = True +isEqual LT = False + +thenCmp :: Ordering -> Ordering -> Ordering +{-# INLINE thenCmp #-} +thenCmp EQ any = any +thenCmp other any = other + +eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool +eqListBy eq [] [] = True +eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys +eqListBy eq xs ys = False + +equalLength :: [a] -> [b] -> Bool +equalLength [] [] = True +equalLength (_:xs) (_:ys) = equalLength xs ys +equalLength xs ys = False + +compareLength :: [a] -> [b] -> Ordering +compareLength [] [] = EQ +compareLength (_:xs) (_:ys) = compareLength xs ys +compareLength [] _ys = LT +compareLength _xs [] = GT + +cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering + -- `cmpList' uses a user-specified comparer + +cmpList cmp [] [] = EQ +cmpList cmp [] _ = LT +cmpList cmp _ [] = GT +cmpList cmp (a:as) (b:bs) + = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx } +\end{code} -substr str beg end - = ASSERT (beg >= 0 && beg <= end) - take (end - beg + 1) (drop beg str) -#endif +\begin{code} +prefixMatch :: Eq a => [a] -> [a] -> Bool +prefixMatch [] _str = True +prefixMatch _pat [] = False +prefixMatch (p:ps) (s:ss) | p == s = prefixMatch ps ss + | otherwise = False + +maybePrefixMatch :: String -> String -> Maybe String +maybePrefixMatch [] rest = Just rest +maybePrefixMatch (_:_) [] = Nothing +maybePrefixMatch (p:pat) (r:rest) + | p == r = maybePrefixMatch pat rest + | otherwise = Nothing + +suffixMatch :: Eq a => [a] -> [a] -> Bool +suffixMatch pat str = prefixMatch (reverse pat) (reverse str) \end{code} %************************************************************************ @@ -1008,14 +645,17 @@ substr str beg end The following are curried versions of @fst@ and @snd@. \begin{code} +#if NOT_USED cfst :: a -> b -> a -- stranal-sem only (Note) cfst x y = x +#endif \end{code} The following provide us higher order functions that, when applied to a function, operate on pairs. \begin{code} +#if NOT_USED applyToPair :: ((a -> c),(b -> d)) -> (a,b) -> (c,d) applyToPair (f,g) (x,y) = (f x, g y) @@ -1024,11 +664,7 @@ applyToFst f (x,y) = (f x,y) applyToSnd :: (b -> d) -> (a,b) -> (a,d) applyToSnd f (x,y) = (x,f y) - -foldPair :: (a->a->a,b->b->b) -> (a,b) -> [(a,b)] -> (a,b) -foldPair fg ab [] = ab -foldPair fg@(f,g) ab ((a,b):abs) = (f a u,g b v) - where (u,v) = foldPair fg ab abs +#endif \end{code} \begin{code} @@ -1036,26 +672,100 @@ unzipWith :: (a -> b -> c) -> [(a, b)] -> [c] unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs \end{code} -%************************************************************************ -%* * -\subsection[Utils-errors]{Error handling} -%* * -%************************************************************************ +\begin{code} +seqList :: [a] -> b -> b +seqList [] b = b +seqList (x:xs) b = x `seq` seqList xs b +\end{code} + +Global variables: \begin{code} -#if defined(COMPILING_GHC) -panic x = error ("panic! (the `impossible' happened):\n\t" - ++ x ++ "\n\n" - ++ "Please report it as a compiler bug " - ++ "to glasgow-haskell-bugs@dcs.glasgow.ac.uk.\n\n" ) +global :: a -> IORef a +global a = unsafePerformIO (newIORef a) +\end{code} -pprPanic heading pretty_msg = panic (heading++(ppShow 80 pretty_msg)) +Module names: -pprTrace heading pretty_msg = trace (heading++(ppShow 80 pretty_msg)) +\begin{code} +looksLikeModuleName [] = False +looksLikeModuleName (c:cs) = isUpper c && go cs + where go [] = True + go ('.':cs) = looksLikeModuleName cs + go (c:cs) = (isAlphaNum c || c == '_') && go cs +\end{code} -# ifdef DEBUG -assertPanic :: String -> Int -> a -assertPanic file line = panic ("ASSERT failed! file "++file++", line "++show line) -# endif -#endif {- COMPILING_GHC -} +Akin to @Prelude.words@, but sensitive to dquoted entities treating +them as single words. + +\begin{code} +toArgs :: String -> [String] +toArgs "" = [] +toArgs s = + case break (\ ch -> isSpace ch || ch == '"') (dropWhile isSpace s) of -- " + (w,aft) -> + (\ ws -> if null w then ws else w : ws) $ + case aft of + [] -> [] + (x:xs) + | x /= '"' -> toArgs xs + | otherwise -> + case lex aft of + ((str,rs):_) -> stripQuotes str : toArgs rs + _ -> [aft] + where + -- strip away dquotes; assume first and last chars contain quotes. + stripQuotes :: String -> String + stripQuotes ('"':xs) = init xs + stripQuotes xs = xs +\end{code} + +-- ----------------------------------------------------------------------------- +-- Floats + +\begin{code} +readRational__ :: ReadS Rational -- NB: doesn't handle leading "-" +readRational__ r = do + (n,d,s) <- readFix r + (k,t) <- readExp s + return ((n%1)*10^^(k-d), t) + where + readFix r = do + (ds,s) <- lexDecDigits r + (ds',t) <- lexDotDigits s + return (read (ds++ds'), length ds', t) + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = return (0,s) + + readExp' ('+':s) = readDec s + readExp' ('-':s) = do + (k,t) <- readDec s + return (-k,t) + readExp' s = readDec s + + readDec s = do + (ds,r) <- nonnull isDigit s + return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ], + r) + + lexDecDigits = nonnull isDigit + + lexDotDigits ('.':s) = return (span isDigit s) + lexDotDigits s = return ("",s) + + nonnull p s = do (cs@(_:_),t) <- return (span p s) + return (cs,t) + +readRational :: String -> Rational -- NB: *does* handle a leading "-" +readRational top_s + = case top_s of + '-' : xs -> - (read_me xs) + xs -> read_me xs + where + read_me s + = case (do { (x,"") <- readRational__ s ; return x }) of + [x] -> x + [] -> error ("readRational: no parse:" ++ top_s) + _ -> error ("readRational: ambiguous parse:" ++ top_s) \end{code}