% (c) The University of Glasgow 2006
% (c) The University of Glasgow 1992-2002
%
-\section[Util]{Highly random utility functions}
\begin{code}
+-- | Highly random utility functions
module Util (
- debugIsOn, ghciTablesNextToCode, picIsOn,
+ -- * Flags dependent on the compiler build
+ ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
isWindowsHost, isWindowsTarget, isDarwinTarget,
- -- general list processing
+ -- * General list processing
zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
zipLazy, stretchZipWith,
+
+ unzipWith,
+
mapFst, mapSnd,
mapAndUnzip, mapAndUnzip3,
nOfThem, filterOut, partitionWith, splitEithers,
- foldl1',
+
+ foldl1', foldl2, count, all2,
lengthExceeds, lengthIs, lengthAtLeast,
listLengthCmp, atLength, equalLength, compareLength,
isIn, isn'tIn,
- -- for-loop
- nTimes,
+ -- * Tuples
+ fstOf3, sndOf3, thirdOf3,
- -- sorting
- sortLe, sortWith, on,
-
- -- transitive closures
- transitiveClosure,
+ -- * List operations controlled by another list
+ takeList, dropList, splitAtList, split,
+ dropTail,
- -- accumulating
- foldl2, count, all2,
+ -- * For loop
+ nTimes,
- takeList, dropList, splitAtList, split,
+ -- * Sorting
+ sortLe, sortWith, on,
- -- comparisons
+ -- * Comparisons
isEqual, eqListBy,
- thenCmp, cmpList, maybePrefixMatch,
+ thenCmp, cmpList,
removeSpaces,
+
+ -- * Edit distance
+ fuzzyMatch, fuzzyLookup,
- -- strictness
- seqList,
-
- -- pairs
- unzipWith,
+ -- * Transitive closures
+ transitiveClosure,
- global, consIORef,
+ -- * Strictness
+ seqList,
- -- module names
+ -- * Module names
looksLikeModuleName,
+ -- * Argument processing
getCmd, toCmdArgs, toArgs,
- -- Floating point stuff
+ -- * Floating point
readRational,
- -- IO-ish utilities
+ -- * IO-ish utilities
createDirectoryHierarchy,
doesDirNameExist,
modificationTimeIfExists,
- later, handleDyn, handle,
+ global, consIORef, globalMVar, globalEmptyMVar,
- -- Filename utils
+ -- * Filenames and paths
Suffix,
splitLongestPrefix,
escapeSpaces,
parseSearchPath,
Direction(..), reslash,
+
+ -- * Utils for defining Data instances
+ abstractConstr, abstractDataType, mkNoRepType
) where
#include "HsVersions.h"
+import Exception
import Panic
-import Control.Exception ( Exception(..), finally, catchDyn, throw )
-import qualified Control.Exception as Exception
-import Data.Dynamic ( Typeable )
-import Data.IORef ( IORef, newIORef )
+import Data.Data
+import Data.IORef ( IORef, newIORef, atomicModifyIORef )
import System.IO.Unsafe ( unsafePerformIO )
-import Data.IORef ( readIORef, writeIORef )
import Data.List hiding (group)
+import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
-import qualified Data.List as List ( elem )
#ifdef DEBUG
-import qualified Data.List as List ( notElem )
import FastTypes
#endif
import Control.Monad ( unless )
-import System.IO.Error as IO ( catch, isDoesNotExistError )
+import System.IO.Error as IO ( isDoesNotExistError )
import System.Directory ( doesDirectoryExist, createDirectory,
getModificationTime )
-import System.FilePath hiding ( searchPathSeparator )
+import System.FilePath
+import System.Time ( ClockTime )
+
import Data.Char ( isUpper, isAlphaNum, isSpace, ord, isDigit )
import Data.Ratio ( (%) )
-import System.Time ( ClockTime )
+import Data.Ord ( comparing )
+import Data.Bits
+import Data.Word
+import qualified Data.IntMap as IM
infixr 9 `thenCmp`
\end{code}
%************************************************************************
%* *
-\subsection{Is DEBUG on, are we on Windows?}
+\subsection{Is DEBUG on, are we on Windows, etc?}
%* *
%************************************************************************
+These booleans are global constants, set by CPP flags. They allow us to
+recompile a single module (this one) to change whether or not debug output
+appears. They sometimes let us avoid even running CPP elsewhere.
+
+It's important that the flags are literal constants (True/False). Then,
+with -0, tests of the flags in other modules will simplify to the correct
+branch of the conditional, thereby dropping debug code altogether when
+the flags are off.
+
\begin{code}
+ghciSupported :: Bool
+#ifdef GHCI
+ghciSupported = True
+#else
+ghciSupported = False
+#endif
+
debugIsOn :: Bool
#ifdef DEBUG
debugIsOn = True
ghciTablesNextToCode = False
#endif
-picIsOn :: Bool
-#ifdef __PIC__
-picIsOn = True
+isDynamicGhcLib :: Bool
+#ifdef DYNAMIC
+isDynamicGhcLib = True
#else
-picIsOn = False
+isDynamicGhcLib = False
#endif
isWindowsHost :: Bool
%************************************************************************
\begin{code}
--- Compose a function with itself n times. (nth rather than twice)
+-- | Compose a function with itself n times. (nth rather than twice)
nTimes :: Int -> (a -> a) -> (a -> a)
nTimes 0 _ = id
nTimes 1 f = f
nTimes n f = f . nTimes (n-1) f
\end{code}
+\begin{code}
+fstOf3 :: (a,b,c) -> a
+sndOf3 :: (a,b,c) -> b
+thirdOf3 :: (a,b,c) -> c
+fstOf3 (a,_,_) = a
+sndOf3 (_,b,_) = b
+thirdOf3 (_,_,c) = c
+\end{code}
+
%************************************************************************
%* *
\subsection[Utils-lists]{General list processing}
\begin{code}
filterOut :: (a->Bool) -> [a] -> [a]
--- Like filter, only reverses the sense of the test
+-- ^ Like filter, only it reverses the sense of the test
filterOut _ [] = []
filterOut p (x:xs) | p x = filterOut p xs
| otherwise = x : filterOut p xs
partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
+-- ^ Uses a function to determine which of two output lists an input element should join
partitionWith _ [] = ([],[])
partitionWith f (x:xs) = case f x of
Left b -> (b:bs, cs)
where (bs,cs) = partitionWith f xs
splitEithers :: [Either a b] -> ([a], [b])
+-- ^ Teases a list of 'Either's apart into two lists
splitEithers [] = ([],[])
splitEithers (e : es) = case e of
Left x -> (x:xs, ys)
\end{code}
\begin{code}
--- zipLazy is lazy in the second list (observe the ~)
-
+-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
zipLazy :: [a] -> [b] -> [(a,b)]
zipLazy [] _ = []
-- We want to write this, but with GHC 6.4 we get a warning, so it
\begin{code}
stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
--- (stretchZipWith p z f xs ys) stretches ys by inserting z in
--- the places where p returns *True*
+-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
+-- the places where @p@ returns @True@
stretchZipWith _ _ _ [] _ = []
stretchZipWith p z f (x:xs) ys
nOfThem :: Int -> a -> [a]
nOfThem n thing = replicate n thing
--- 'atLength atLen atEnd ls n' unravels list 'ls' to position 'n';
--- specification:
+-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
--
+-- @
-- atLength atLenPred atEndPred ls n
-- | n < 0 = atLenPred n
-- | length ls < n = atEndPred (n - length ls)
-- | otherwise = atLenPred (drop n ls)
---
+-- @
atLength :: ([a] -> b)
-> (Int -> b)
-> [a]
go 0 ls = atLenPred ls
go n (_:xs) = go (n-1) xs
--- special cases.
+-- Some special cases of atLength:
+
lengthExceeds :: [a] -> Int -> Bool
--- (lengthExceeds xs n) = (length xs > n)
+-- ^ > (lengthExceeds xs n) = (length xs > n)
lengthExceeds = atLength notNull (const False)
lengthAtLeast :: [a] -> Int -> Bool
isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
# ifndef DEBUG
-isIn _msg x ys = elem__ x ys
-isn'tIn _msg x ys = notElem__ x ys
-
---these are here to be SPECIALIZEd (automagically)
-elem__ :: Eq a => a -> [a] -> Bool
-elem__ _ [] = False
-elem__ x (y:ys) = x == y || elem__ x ys
-
-notElem__ :: Eq a => a -> [a] -> Bool
-notElem__ _ [] = True
-notElem__ x (y:ys) = x /= y && notElem__ x ys
+isIn _msg x ys = x `elem` ys
+isn'tIn _msg x ys = x `notElem` ys
# else /* DEBUG */
isIn msg x ys
- = elem (_ILIT(0)) x ys
+ = elem100 (_ILIT(0)) x ys
where
- elem _ _ [] = False
- elem i x (y:ys)
+ elem100 _ _ [] = False
+ elem100 i x (y:ys)
| i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
- (x `List.elem` (y:ys))
- | otherwise = x == y || elem (i +# _ILIT(1)) x ys
+ (x `elem` (y:ys))
+ | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys
isn'tIn msg x ys
- = notElem (_ILIT(0)) x ys
+ = notElem100 (_ILIT(0)) x ys
where
- notElem _ _ [] = True
- notElem i x (y:ys)
+ notElem100 _ _ [] = True
+ notElem100 i x (y:ys)
| i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
- (x `List.notElem` (y:ys))
- | otherwise = x /= y && notElem (i +# _ILIT(1)) x ys
+ (x `notElem` (y:ys))
+ | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys
# endif /* DEBUG */
\end{code}
where
x `le` y = get_key x < get_key y
-on :: (a -> a -> Ordering) -> (b -> a) -> b -> b -> Ordering
+on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
on cmp sel = \x y -> sel x `cmp` sel y
\end{code}
where
(ys', ys'') = splitAtList xs ys
+-- drop from the end of a list
+dropTail :: Int -> [a] -> [a]
+dropTail n = reverse . drop n . reverse
+
snocView :: [a] -> Maybe ([a],a)
-- Split off the last element
snocView [] = Nothing
\end{code}
\begin{code}
--- This (with a more general type) is Data.List.stripPrefix from GHC 6.8.
--- This definition can be removed once we require at least 6.8 to build.
-maybePrefixMatch :: String -> String -> Maybe String
-maybePrefixMatch [] rest = Just rest
-maybePrefixMatch (_:_) [] = Nothing
-maybePrefixMatch (p:pat) (r:rest)
- | p == r = maybePrefixMatch pat rest
- | otherwise = Nothing
-
removeSpaces :: String -> String
removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
\end{code}
%************************************************************************
%* *
+\subsection{Edit distance}
+%* *
+%************************************************************************
+
+\begin{code}
+-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
+-- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
+-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
+-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
+-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
+-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
+restrictedDamerauLevenshteinDistance :: String -> String -> Int
+restrictedDamerauLevenshteinDistance str1 str2
+ = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ where
+ m = length str1
+ n = length str2
+
+restrictedDamerauLevenshteinDistanceWithLengths
+ :: Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ | m <= n
+ = if n <= 32 -- n must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
+
+ | otherwise
+ = if m <= 32 -- m must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
+
+restrictedDamerauLevenshteinDistance'
+ :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
+ | [] <- str1 = n
+ | otherwise = extractAnswer $
+ foldl' (restrictedDamerauLevenshteinDistanceWorker
+ (matchVectors str1) top_bit_mask vector_mask)
+ (0, 0, m_ones, 0, m) str2
+ where
+ m_ones@vector_mask = (2 ^ m) - 1
+ top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
+ extractAnswer (_, _, _, _, distance) = distance
+
+restrictedDamerauLevenshteinDistanceWorker
+ :: (Bits bv) => IM.IntMap bv -> bv -> bv
+ -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
+restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
+ (pm, d0, vp, vn, distance) char2
+ = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
+ seq pm' $ seq d0' $ seq vp' $ seq vn' $
+ seq distance'' $ seq char2 $
+ (pm', d0', vp', vn', distance'')
+ where
+ pm' = IM.findWithDefault 0 (ord char2) str1_mvs
+
+ d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
+ .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
+ -- No need to mask the shiftL because of the restricted range of pm
+
+ hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
+ hn' = d0' .&. vp
+
+ hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
+ hn'_shift = (hn' `shiftL` 1) .&. vector_mask
+ vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
+ vn' = d0' .&. hp'_shift
+
+ distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
+ distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
+
+sizedComplement :: Bits bv => bv -> bv -> bv
+sizedComplement vector_mask vect = vector_mask `xor` vect
+
+matchVectors :: Bits bv => String -> IM.IntMap bv
+matchVectors = snd . foldl' go (0 :: Int, IM.empty)
+ where
+ go (ix, im) char = let ix' = ix + 1
+ im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
+ in seq ix' $ seq im' $ (ix', im')
+
+#ifdef __GLASGOW_HASKELL__
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Word32 -> Int -> Int -> String -> String -> Int #-}
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Integer -> Int -> Int -> String -> String -> Int #-}
+
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Word32 -> Word32 -> Word32
+ -> (Word32, Word32, Word32, Word32, Int)
+ -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Integer -> Integer -> Integer
+ -> (Integer, Integer, Integer, Integer, Int)
+ -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
+
+{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
+{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
+
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
+#endif
+
+fuzzyMatch :: String -> [String] -> [String]
+fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
+
+-- | Search for possible matches to the users input in the given list,
+-- returning a small number of ranked results
+fuzzyLookup :: String -> [(String,a)] -> [a]
+fuzzyLookup user_entered possibilites
+ = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
+ [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
+ , let distance = restrictedDamerauLevenshteinDistance
+ poss_str user_entered
+ , distance <= fuzzy_threshold ]
+ where
+ -- Work out an approriate match threshold:
+ -- We report a candidate if its edit distance is <= the threshold,
+ -- The threshhold is set to about a quarter of the # of characters the user entered
+ -- Length Threshold
+ -- 1 0 -- Don't suggest *any* candidates
+ -- 2 1 -- for single-char identifiers
+ -- 3 1
+ -- 4 1
+ -- 5 1
+ -- 6 2
+ --
+ fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
+ mAX_RESULTS = 3
+\end{code}
+
+%************************************************************************
+%* *
\subsection[Utils-pairs]{Pairs}
%* *
%************************************************************************
\begin{code}
consIORef :: IORef [a] -> a -> IO ()
consIORef var x = do
- xs <- readIORef var
- writeIORef var (x:xs)
+ atomicModifyIORef var (\xs -> (x:xs,()))
+\end{code}
+
+\begin{code}
+globalMVar :: a -> MVar a
+globalMVar a = unsafePerformIO (newMVar a)
+
+globalEmptyMVar :: MVar a
+globalEmptyMVar = unsafePerformIO newEmptyMVar
\end{code}
Module names:
looksLikeModuleName (c:cs) = isUpper c && go cs
where go [] = True
go ('.':cs) = looksLikeModuleName cs
- go (c:cs) = (isAlphaNum c || c == '_') && go cs
+ go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
\end{code}
Akin to @Prelude.words@, but acts like the Bourne shell, treating
"" -> return True -- XXX Hack
_ -> doesDirectoryExist (takeDirectory fpath)
--- -----------------------------------------------------------------------------
--- Exception utils
-
-later :: IO b -> IO a -> IO a
-later = flip finally
-
-handleDyn :: Typeable ex => (ex -> IO a) -> IO a -> IO a
-handleDyn = flip catchDyn
-
-handle :: (Exception -> IO a) -> IO a -> IO a
-handle h f = f `Exception.catch` \e -> case e of
- ExitException _ -> throw e
- _ -> h e
-
-- --------------------------------------------------------------
-- check existence & modification time at the same time
modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime)
modificationTimeIfExists f = do
(do t <- getModificationTime f; return (Just t))
- `IO.catch` \e -> if isDoesNotExistError e
- then return Nothing
- else ioError e
+ `catchIO` \e -> if isDoesNotExistError e
+ then return Nothing
+ else ioError e
-- split a string at the last character where 'pred' is True,
-- returning a pair of strings. The first component holds the string
#endif
_ -> chunk'
- (chunk', rest') = break (==searchPathSeparator) s
-
--- | A platform-specific character used to separate search path strings in
--- environment variables. The separator is a colon (\":\") on Unix and
--- Macintosh, and a semicolon (\";\") on the Windows operating system.
-searchPathSeparator :: Char
-#if mingw32_HOST_OS || mingw32_TARGET_OS
-searchPathSeparator = ';'
-#else
-searchPathSeparator = ':'
-#endif
+ (chunk', rest') = break isSearchPathSeparator s
data Direction = Forwards | Backwards
Backwards -> '\\'
\end{code}
+%************************************************************************
+%* *
+\subsection[Utils-Data]{Utils for defining Data instances}
+%* *
+%************************************************************************
+
+These functions helps us to define Data instances for abstract types.
+
+\begin{code}
+abstractConstr :: String -> Constr
+abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
+\end{code}
+
+\begin{code}
+abstractDataType :: String -> DataType
+abstractDataType n = mkDataType n [abstractConstr n]
+\end{code}
+