X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FUtil.lhs;h=dc4f32ec5e7d7f4dffec59df200cd8fd6db7cf3c;hp=1ecf82c03848d507a452257fb6502c770b92d614;hb=d0faaa6fa0cecd23c5670fd199e9206275313666;hpb=43d9be898eadb04950bc0b0b272e0f05aedb463d diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs index 1ecf82c..dc4f32e 100644 --- a/compiler/utils/Util.lhs +++ b/compiler/utils/Util.lhs @@ -2,20 +2,25 @@ % (c) The University of Glasgow 2006 % (c) The University of Glasgow 1992-2002 % -\section[Util]{Highly random utility functions} \begin{code} +-- | Highly random utility functions module Util ( - debugIsOn, ghciTablesNextToCode, + -- * Flags dependent on the compiler build + ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib, isWindowsHost, isWindowsTarget, isDarwinTarget, - -- general list processing + -- * General list processing zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal, zipLazy, stretchZipWith, + + unzipWith, + mapFst, mapSnd, mapAndUnzip, mapAndUnzip3, nOfThem, filterOut, partitionWith, splitEithers, - foldl1', + + foldl1', foldl2, count, all2, lengthExceeds, lengthIs, lengthAtLeast, listLengthCmp, atLength, equalLength, compareLength, @@ -25,93 +30,121 @@ module Util ( isIn, isn'tIn, - -- for-loop - nTimes, - - -- sorting - sortLe, sortWith, on, + -- * Tuples + fstOf3, sndOf3, thirdOf3, - -- transitive closures - transitiveClosure, + -- * List operations controlled by another list + takeList, dropList, splitAtList, split, + dropTail, - -- accumulating - foldl2, count, all2, + -- * For loop + nTimes, - takeList, dropList, splitAtList, split, + -- * Sorting + sortLe, sortWith, on, - -- comparisons + -- * Comparisons isEqual, eqListBy, - thenCmp, cmpList, maybePrefixMatch, + thenCmp, cmpList, removeSpaces, + + -- * Edit distance + fuzzyMatch, fuzzyLookup, - -- strictness - seqList, - - -- pairs - unzipWith, + -- * Transitive closures + transitiveClosure, - global, consIORef, + -- * Strictness + seqList, - -- module names + -- * Module names looksLikeModuleName, + -- * Argument processing getCmd, toCmdArgs, toArgs, - -- Floating point stuff + -- * Floating point readRational, - -- IO-ish utilities + -- * read helpers + maybeReadFuzzy, + + -- * IO-ish utilities createDirectoryHierarchy, doesDirNameExist, modificationTimeIfExists, - later, handleDyn, handle, + global, consIORef, globalMVar, globalEmptyMVar, - -- Filename utils + -- * Filenames and paths Suffix, splitLongestPrefix, escapeSpaces, parseSearchPath, Direction(..), reslash, + + -- * Utils for defining Data instances + abstractConstr, abstractDataType, mkNoRepType, + + -- * Utils for printing C code + charToC ) where #include "HsVersions.h" +import Exception import Panic -import Control.Exception ( Exception(..), finally, catchDyn, throw ) -import qualified Control.Exception as Exception -import Data.Dynamic ( Typeable ) -import Data.IORef ( IORef, newIORef ) +import Data.Data +import Data.IORef ( IORef, newIORef, atomicModifyIORef ) import System.IO.Unsafe ( unsafePerformIO ) -import Data.IORef ( readIORef, writeIORef ) import Data.List hiding (group) +import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar ) -import qualified Data.List as List ( elem ) #ifdef DEBUG -import qualified Data.List as List ( notElem ) import FastTypes #endif import Control.Monad ( unless ) -import SYSTEM_IO_ERROR as IO ( catch, isDoesNotExistError ) +import System.IO.Error as IO ( isDoesNotExistError ) import System.Directory ( doesDirectoryExist, createDirectory, getModificationTime ) -import System.FilePath hiding ( searchPathSeparator ) -import Data.Char ( isUpper, isAlphaNum, isSpace, ord, isDigit ) -import Data.Ratio ( (%) ) +import System.FilePath import System.Time ( ClockTime ) +import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit ) +import Data.Ratio ( (%) ) +import Data.Ord ( comparing ) +import Data.Bits +import Data.Word +import qualified Data.IntMap as IM + infixr 9 `thenCmp` \end{code} %************************************************************************ %* * -\subsection{Is DEBUG on, are we on Windows?} +\subsection{Is DEBUG on, are we on Windows, etc?} %* * %************************************************************************ +These booleans are global constants, set by CPP flags. They allow us to +recompile a single module (this one) to change whether or not debug output +appears. They sometimes let us avoid even running CPP elsewhere. + +It's important that the flags are literal constants (True/False). Then, +with -0, tests of the flags in other modules will simplify to the correct +branch of the conditional, thereby dropping debug code altogether when +the flags are off. + \begin{code} +ghciSupported :: Bool +#ifdef GHCI +ghciSupported = True +#else +ghciSupported = False +#endif + debugIsOn :: Bool #ifdef DEBUG debugIsOn = True @@ -126,6 +159,13 @@ ghciTablesNextToCode = True ghciTablesNextToCode = False #endif +isDynamicGhcLib :: Bool +#ifdef DYNAMIC +isDynamicGhcLib = True +#else +isDynamicGhcLib = False +#endif + isWindowsHost :: Bool #ifdef mingw32_HOST_OS isWindowsHost = True @@ -155,13 +195,22 @@ isDarwinTarget = False %************************************************************************ \begin{code} --- Compose a function with itself n times. (nth rather than twice) +-- | Compose a function with itself n times. (nth rather than twice) nTimes :: Int -> (a -> a) -> (a -> a) nTimes 0 _ = id nTimes 1 f = f nTimes n f = f . nTimes (n-1) f \end{code} +\begin{code} +fstOf3 :: (a,b,c) -> a +sndOf3 :: (a,b,c) -> b +thirdOf3 :: (a,b,c) -> c +fstOf3 (a,_,_) = a +sndOf3 (_,b,_) = b +thirdOf3 (_,_,c) = c +\end{code} + %************************************************************************ %* * \subsection[Utils-lists]{General list processing} @@ -170,12 +219,13 @@ nTimes n f = f . nTimes (n-1) f \begin{code} filterOut :: (a->Bool) -> [a] -> [a] --- Like filter, only reverses the sense of the test +-- ^ Like filter, only it reverses the sense of the test filterOut _ [] = [] filterOut p (x:xs) | p x = filterOut p xs | otherwise = x : filterOut p xs partitionWith :: (a -> Either b c) -> [a] -> ([b], [c]) +-- ^ Uses a function to determine which of two output lists an input element should join partitionWith _ [] = ([],[]) partitionWith f (x:xs) = case f x of Left b -> (b:bs, cs) @@ -183,6 +233,7 @@ partitionWith f (x:xs) = case f x of where (bs,cs) = partitionWith f xs splitEithers :: [Either a b] -> ([a], [b]) +-- ^ Teases a list of 'Either's apart into two lists splitEithers [] = ([],[]) splitEithers (e : es) = case e of Left x -> (x:xs, ys) @@ -227,8 +278,7 @@ zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg) \end{code} \begin{code} --- zipLazy is lazy in the second list (observe the ~) - +-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~) zipLazy :: [a] -> [b] -> [(a,b)] zipLazy [] _ = [] -- We want to write this, but with GHC 6.4 we get a warning, so it @@ -242,8 +292,8 @@ zipLazy (x:xs) zs = let y : ys = zs \begin{code} stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c] --- (stretchZipWith p z f xs ys) stretches ys by inserting z in --- the places where p returns *True* +-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in +-- the places where @p@ returns @True@ stretchZipWith _ _ _ [] _ = [] stretchZipWith p z f (x:xs) ys @@ -284,14 +334,14 @@ mapAndUnzip3 f (x:xs) nOfThem :: Int -> a -> [a] nOfThem n thing = replicate n thing --- 'atLength atLen atEnd ls n' unravels list 'ls' to position 'n'; --- specification: +-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely: -- +-- @ -- atLength atLenPred atEndPred ls n -- | n < 0 = atLenPred n -- | length ls < n = atEndPred (n - length ls) -- | otherwise = atLenPred (drop n ls) --- +-- @ atLength :: ([a] -> b) -> (Int -> b) -> [a] @@ -305,9 +355,10 @@ atLength atLenPred atEndPred ls n go 0 ls = atLenPred ls go n (_:xs) = go (n-1) xs --- special cases. +-- Some special cases of atLength: + lengthExceeds :: [a] -> Int -> Bool --- (lengthExceeds xs n) = (length xs > n) +-- ^ > (lengthExceeds xs n) = (length xs > n) lengthExceeds = atLength notNull (const False) lengthAtLeast :: [a] -> Int -> Bool @@ -365,49 +416,30 @@ Debugging/specialising versions of \tr{elem} and \tr{notElem} isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool # ifndef DEBUG -isIn _msg x ys = elem__ x ys -isn'tIn _msg x ys = notElem__ x ys - ---these are here to be SPECIALIZEd (automagically) -elem__ :: Eq a => a -> [a] -> Bool -elem__ _ [] = False -elem__ x (y:ys) = x == y || elem__ x ys - -notElem__ :: Eq a => a -> [a] -> Bool -notElem__ _ [] = True -notElem__ x (y:ys) = x /= y && notElem__ x ys +isIn _msg x ys = x `elem` ys +isn'tIn _msg x ys = x `notElem` ys # else /* DEBUG */ isIn msg x ys - = elem (_ILIT(0)) x ys + = elem100 (_ILIT(0)) x ys where - elem _ _ [] = False - elem i x (y:ys) + elem100 _ _ [] = False + elem100 i x (y:ys) | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg) - (x `List.elem` (y:ys)) - | otherwise = x == y || elem (i +# _ILIT(1)) x ys + (x `elem` (y:ys)) + | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys isn'tIn msg x ys - = notElem (_ILIT(0)) x ys + = notElem100 (_ILIT(0)) x ys where - notElem _ _ [] = True - notElem i x (y:ys) + notElem100 _ _ [] = True + notElem100 i x (y:ys) | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg) - (x `List.notElem` (y:ys)) - | otherwise = x /= y && notElem (i +# _ILIT(1)) x ys + (x `notElem` (y:ys)) + | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys # endif /* DEBUG */ \end{code} -foldl1' was added in GHC 6.4 - -\begin{code} -#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ < 604 -foldl1' :: (a -> a -> a) -> [a] -> a -foldl1' f (x:xs) = foldl' f x xs -foldl1' _ [] = panic "foldl1'" -#endif -\end{code} - %************************************************************************ %* * \subsubsection[Utils-Carsten-mergesort]{A mergesort from Carsten} @@ -511,7 +543,7 @@ sortWith get_key xs = sortLe le xs where x `le` y = get_key x < get_key y -on :: (a -> a -> Ordering) -> (b -> a) -> b -> b -> Ordering +on :: (a -> a -> c) -> (b -> a) -> b -> b -> c on cmp sel = \x y -> sel x `cmp` sel y \end{code} @@ -597,6 +629,10 @@ splitAtList (_:xs) (y:ys) = (y:ys', ys'') where (ys', ys'') = splitAtList xs ys +-- drop from the end of a list +dropTail :: Int -> [a] -> [a] +dropTail n = reverse . drop n . reverse + snocView :: [a] -> Maybe ([a],a) -- Split off the last element snocView [] = Nothing @@ -649,21 +685,145 @@ cmpList cmp (a:as) (b:bs) \end{code} \begin{code} --- This (with a more general type) is Data.List.stripPrefix from GHC 6.8. --- This definition can be removed once we require at least 6.8 to build. -maybePrefixMatch :: String -> String -> Maybe String -maybePrefixMatch [] rest = Just rest -maybePrefixMatch (_:_) [] = Nothing -maybePrefixMatch (p:pat) (r:rest) - | p == r = maybePrefixMatch pat rest - | otherwise = Nothing - removeSpaces :: String -> String removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace \end{code} %************************************************************************ %* * +\subsection{Edit distance} +%* * +%************************************************************************ + +\begin{code} +-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. +-- See: . +-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing +-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro). +-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and +-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation +restrictedDamerauLevenshteinDistance :: String -> String -> Int +restrictedDamerauLevenshteinDistance str1 str2 + = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + where + m = length str1 + n = length str2 + +restrictedDamerauLevenshteinDistanceWithLengths + :: Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + | m <= n + = if n <= 32 -- n must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2 + + | otherwise + = if m <= 32 -- m must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1 + +restrictedDamerauLevenshteinDistance' + :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 + | [] <- str1 = n + | otherwise = extractAnswer $ + foldl' (restrictedDamerauLevenshteinDistanceWorker + (matchVectors str1) top_bit_mask vector_mask) + (0, 0, m_ones, 0, m) str2 + where + m_ones@vector_mask = (2 ^ m) - 1 + top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy + extractAnswer (_, _, _, _, distance) = distance + +restrictedDamerauLevenshteinDistanceWorker + :: (Bits bv) => IM.IntMap bv -> bv -> bv + -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int) +restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask + (pm, d0, vp, vn, distance) char2 + = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ + seq pm' $ seq d0' $ seq vp' $ seq vn' $ + seq distance'' $ seq char2 $ + (pm', d0', vp', vn', distance'') + where + pm' = IM.findWithDefault 0 (ord char2) str1_mvs + + d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) + .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn + -- No need to mask the shiftL because of the restricted range of pm + + hp' = vn .|. sizedComplement vector_mask (d0' .|. vp) + hn' = d0' .&. vp + + hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask + hn'_shift = (hn' `shiftL` 1) .&. vector_mask + vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift) + vn' = d0' .&. hp'_shift + + distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance + distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance' + +sizedComplement :: Bits bv => bv -> bv -> bv +sizedComplement vector_mask vect = vector_mask `xor` vect + +matchVectors :: Bits bv => String -> IM.IntMap bv +matchVectors = snd . foldl' go (0 :: Int, IM.empty) + where + go (ix, im) char = let ix' = ix + 1 + im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im + in seq ix' $ seq im' $ (ix', im') + +#ifdef __GLASGOW_HASKELL__ +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Word32 -> Int -> Int -> String -> String -> Int #-} +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Integer -> Int -> Int -> String -> String -> Int #-} + +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Word32 -> Word32 -> Word32 + -> (Word32, Word32, Word32, Word32, Int) + -> Char -> (Word32, Word32, Word32, Word32, Int) #-} +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Integer -> Integer -> Integer + -> (Integer, Integer, Integer, Integer, Int) + -> Char -> (Integer, Integer, Integer, Integer, Int) #-} + +{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-} +{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-} + +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-} +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-} +#endif + +fuzzyMatch :: String -> [String] -> [String] +fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals] + +-- | Search for possible matches to the users input in the given list, +-- returning a small number of ranked results +fuzzyLookup :: String -> [(String,a)] -> [a] +fuzzyLookup user_entered possibilites + = map fst $ take mAX_RESULTS $ sortBy (comparing snd) + [ (poss_val, distance) | (poss_str, poss_val) <- possibilites + , let distance = restrictedDamerauLevenshteinDistance + poss_str user_entered + , distance <= fuzzy_threshold ] + where + -- Work out an approriate match threshold: + -- We report a candidate if its edit distance is <= the threshold, + -- The threshhold is set to about a quarter of the # of characters the user entered + -- Length Threshold + -- 1 0 -- Don't suggest *any* candidates + -- 2 1 -- for single-char identifiers + -- 3 1 + -- 4 1 + -- 5 1 + -- 6 2 + -- + fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational) + mAX_RESULTS = 3 +\end{code} + +%************************************************************************ +%* * \subsection[Utils-pairs]{Pairs} %* * %************************************************************************ @@ -689,8 +849,15 @@ global a = unsafePerformIO (newIORef a) \begin{code} consIORef :: IORef [a] -> a -> IO () consIORef var x = do - xs <- readIORef var - writeIORef var (x:xs) + atomicModifyIORef var (\xs -> (x:xs,())) +\end{code} + +\begin{code} +globalMVar :: a -> MVar a +globalMVar a = unsafePerformIO (newMVar a) + +globalEmptyMVar :: MVar a +globalEmptyMVar = unsafePerformIO newEmptyMVar \end{code} Module names: @@ -701,7 +868,7 @@ looksLikeModuleName [] = False looksLikeModuleName (c:cs) = isUpper c && go cs where go [] = True go ('.':cs) = looksLikeModuleName cs - go (c:cs) = (isAlphaNum c || c == '_') && go cs + go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs \end{code} Akin to @Prelude.words@, but acts like the Bourne shell, treating @@ -802,6 +969,17 @@ readRational top_s ----------------------------------------------------------------------------- +-- read helpers + +maybeReadFuzzy :: Read a => String -> Maybe a +maybeReadFuzzy str = case reads str of + [(x, s)] + | all isSpace s -> + Just x + _ -> + Nothing + +----------------------------------------------------------------------------- -- Create a hierarchy of directories createDirectoryHierarchy :: FilePath -> IO () @@ -819,29 +997,15 @@ doesDirNameExist fpath = case takeDirectory fpath of "" -> return True -- XXX Hack _ -> doesDirectoryExist (takeDirectory fpath) --- ----------------------------------------------------------------------------- --- Exception utils - -later :: IO b -> IO a -> IO a -later = flip finally - -handleDyn :: Typeable ex => (ex -> IO a) -> IO a -> IO a -handleDyn = flip catchDyn - -handle :: (Exception -> IO a) -> IO a -> IO a -handle h f = f `Exception.catch` \e -> case e of - ExitException _ -> throw e - _ -> h e - -- -------------------------------------------------------------- -- check existence & modification time at the same time modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime) modificationTimeIfExists f = do (do t <- getModificationTime f; return (Just t)) - `IO.catch` \e -> if isDoesNotExistError e - then return Nothing - else ioError e + `catchIO` \e -> if isDoesNotExistError e + then return Nothing + else ioError e -- split a string at the last character where 'pred' is True, -- returning a pair of strings. The first component holds the string @@ -886,17 +1050,7 @@ parseSearchPath path = split path #endif _ -> chunk' - (chunk', rest') = break (==searchPathSeparator) s - --- | A platform-specific character used to separate search path strings in --- environment variables. The separator is a colon (\":\") on Unix and --- Macintosh, and a semicolon (\";\") on the Windows operating system. -searchPathSeparator :: Char -#if mingw32_HOST_OS || mingw32_TARGET_OS -searchPathSeparator = ';' -#else -searchPathSeparator = ':' -#endif + (chunk', rest') = break isSearchPathSeparator s data Direction = Forwards | Backwards @@ -911,3 +1065,40 @@ reslash d = f Backwards -> '\\' \end{code} +%************************************************************************ +%* * +\subsection[Utils-Data]{Utils for defining Data instances} +%* * +%************************************************************************ + +These functions helps us to define Data instances for abstract types. + +\begin{code} +abstractConstr :: String -> Constr +abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix +\end{code} + +\begin{code} +abstractDataType :: String -> DataType +abstractDataType n = mkDataType n [abstractConstr n] +\end{code} + +%************************************************************************ +%* * +\subsection[Utils-C]{Utils for printing C code} +%* * +%************************************************************************ + +\begin{code} +charToC :: Word8 -> String +charToC w = + case chr (fromIntegral w) of + '\"' -> "\\\"" + '\'' -> "\\\'" + '\\' -> "\\\\" + c | c >= ' ' && c <= '~' -> [c] + | otherwise -> ['\\', + chr (ord '0' + ord c `div` 64), + chr (ord '0' + ord c `div` 8 `mod` 8), + chr (ord '0' + ord c `mod` 8)] +\end{code}