X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FUtil.lhs;h=ec5070f26a637a838a9d8c86b881a9ab23b85b84;hp=5d847213cc5287057865a9ab60ee0f8c7a08d135;hb=00022894bbb2dfa33fd213eedbac0f28b4c4b7b4;hpb=81466110ff8104ca60e20d617bab83f6f78f0ec2

diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs
index 5d84721..ec5070f 100644
--- a/compiler/utils/Util.lhs
+++ b/compiler/utils/Util.lhs
@@ -2,20 +2,25 @@
 % (c) The University of Glasgow 2006
 % (c) The University of Glasgow 1992-2002
 %
-\section[Util]{Highly random utility functions}
 
 \begin{code}
+-- | Highly random utility functions
 module Util (
-        ghciSupported, debugIsOn, ghciTablesNextToCode, picIsOn,
+        -- * Flags dependent on the compiler build
+        ghciSupported, debugIsOn, ghciTablesNextToCode, isDynamicGhcLib,
         isWindowsHost, isWindowsTarget, isDarwinTarget,
 
-        -- general list processing
+        -- * General list processing
         zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
         zipLazy, stretchZipWith,
+        
+        unzipWith,
+        
         mapFst, mapSnd,
         mapAndUnzip, mapAndUnzip3,
         nOfThem, filterOut, partitionWith, splitEithers,
-        foldl1',
+        
+        foldl1', foldl2, count, all2,
 
         lengthExceeds, lengthIs, lengthAtLeast,
         listLengthCmp, atLength, equalLength, compareLength,
@@ -25,66 +30,71 @@ module Util (
 
         isIn, isn'tIn,
 
-        -- for-loop
-        nTimes,
+        -- * Tuples
+        fstOf3, sndOf3, thirdOf3,
 
-        -- sorting
-        sortLe, sortWith, on,
-
-        -- transitive closures
-        transitiveClosure,
+        -- * List operations controlled by another list
+        takeList, dropList, splitAtList, split,
+        dropTail,
 
-        -- accumulating
-        foldl2, count, all2,
+        -- * For loop
+        nTimes,
 
-        takeList, dropList, splitAtList, split,
+        -- * Sorting
+        sortLe, sortWith, on,
 
-        -- comparisons
+        -- * Comparisons
         isEqual, eqListBy,
-        thenCmp, cmpList, maybePrefixMatch,
+        thenCmp, cmpList,
         removeSpaces,
+        
+        -- * Edit distance
+        fuzzyMatch,
 
-        -- strictness
-        seqList,
-
-        -- pairs
-        unzipWith,
+        -- * Transitive closures
+        transitiveClosure,
 
-        global, consIORef,
+        -- * Strictness
+        seqList,
 
-        -- module names
+        -- * Module names
         looksLikeModuleName,
 
+        -- * Argument processing
         getCmd, toCmdArgs, toArgs,
 
-        -- Floating point stuff
+        -- * Floating point
         readRational,
 
-        -- IO-ish utilities
+        -- * IO-ish utilities
         createDirectoryHierarchy,
         doesDirNameExist,
         modificationTimeIfExists,
 
-        -- Filename utils
+        global, consIORef, globalMVar, globalEmptyMVar,
+
+        -- * Filenames and paths
         Suffix,
         splitLongestPrefix,
         escapeSpaces,
         parseSearchPath,
         Direction(..), reslash,
+
+        -- * Utils for defining Data instances
+        abstractConstr, abstractDataType, mkNoRepType
     ) where
 
 #include "HsVersions.h"
 
 import Panic
 
-import Data.IORef       ( IORef, newIORef )
+import Data.Data
+import Data.IORef       ( IORef, newIORef, atomicModifyIORef )
 import System.IO.Unsafe ( unsafePerformIO )
-import Data.IORef       ( readIORef, writeIORef )
 import Data.List        hiding (group)
+import Control.Concurrent.MVar ( MVar, newMVar, newEmptyMVar )
 
-import qualified Data.List as List ( elem )
 #ifdef DEBUG
-import qualified Data.List as List ( notElem )
 import FastTypes
 #endif
 
@@ -92,10 +102,15 @@ import Control.Monad    ( unless )
 import System.IO.Error as IO ( catch, isDoesNotExistError )
 import System.Directory ( doesDirectoryExist, createDirectory,
                           getModificationTime )
-import System.FilePath hiding ( searchPathSeparator )
+import System.FilePath
+import System.Time      ( ClockTime )
+
 import Data.Char        ( isUpper, isAlphaNum, isSpace, ord, isDigit )
 import Data.Ratio       ( (%) )
-import System.Time      ( ClockTime )
+import Data.Ord         ( comparing )
+import Data.Bits
+import Data.Word
+import qualified Data.IntMap as IM
 
 infixr 9 `thenCmp`
 \end{code}
@@ -106,6 +121,15 @@ infixr 9 `thenCmp`
 %*                                                                      *
 %************************************************************************
 
+These booleans are global constants, set by CPP flags.  They allow us to
+recompile a single module (this one) to change whether or not debug output
+appears. They sometimes let us avoid even running CPP elsewhere.
+
+It's important that the flags are literal constants (True/False). Then,
+with -0, tests of the flags in other modules will simplify to the correct
+branch of the conditional, thereby dropping debug code altogether when
+the flags are off.
+
 \begin{code}
 ghciSupported :: Bool
 #ifdef GHCI
@@ -128,11 +152,11 @@ ghciTablesNextToCode = True
 ghciTablesNextToCode = False
 #endif
 
-picIsOn :: Bool
-#ifdef __PIC__
-picIsOn = True
+isDynamicGhcLib :: Bool
+#ifdef DYNAMIC
+isDynamicGhcLib = True
 #else
-picIsOn = False
+isDynamicGhcLib = False
 #endif
 
 isWindowsHost :: Bool
@@ -164,13 +188,22 @@ isDarwinTarget = False
 %************************************************************************
 
 \begin{code}
--- Compose a function with itself n times.  (nth rather than twice)
+-- | Compose a function with itself n times.  (nth rather than twice)
 nTimes :: Int -> (a -> a) -> (a -> a)
 nTimes 0 _ = id
 nTimes 1 f = f
 nTimes n f = f . nTimes (n-1) f
 \end{code}
 
+\begin{code}
+fstOf3   :: (a,b,c) -> a    
+sndOf3   :: (a,b,c) -> b    
+thirdOf3 :: (a,b,c) -> c    
+fstOf3      (a,_,_) =  a
+sndOf3      (_,b,_) =  b
+thirdOf3    (_,_,c) =  c
+\end{code}
+
 %************************************************************************
 %*                                                                      *
 \subsection[Utils-lists]{General list processing}
@@ -179,12 +212,13 @@ nTimes n f = f . nTimes (n-1) f
 
 \begin{code}
 filterOut :: (a->Bool) -> [a] -> [a]
--- Like filter, only reverses the sense of the test
+-- ^ Like filter, only it reverses the sense of the test
 filterOut _ [] = []
 filterOut p (x:xs) | p x       = filterOut p xs
                    | otherwise = x : filterOut p xs
 
 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
+-- ^ Uses a function to determine which of two output lists an input element should join
 partitionWith _ [] = ([],[])
 partitionWith f (x:xs) = case f x of
                          Left  b -> (b:bs, cs)
@@ -192,6 +226,7 @@ partitionWith f (x:xs) = case f x of
     where (bs,cs) = partitionWith f xs
 
 splitEithers :: [Either a b] -> ([a], [b])
+-- ^ Teases a list of 'Either's apart into two lists
 splitEithers [] = ([],[])
 splitEithers (e : es) = case e of
                         Left x -> (x:xs, ys)
@@ -236,8 +271,7 @@ zipWith4Equal msg _ _  _  _  _  =  panic ("zipWith4Equal: unequal lists:"++msg)
 \end{code}
 
 \begin{code}
--- zipLazy is lazy in the second list (observe the ~)
-
+-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
 zipLazy :: [a] -> [b] -> [(a,b)]
 zipLazy []     _       = []
 -- We want to write this, but with GHC 6.4 we get a warning, so it
@@ -251,8 +285,8 @@ zipLazy (x:xs) zs = let y : ys = zs
 
 \begin{code}
 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
--- (stretchZipWith p z f xs ys) stretches ys by inserting z in
--- the places where p returns *True*
+-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
+-- the places where @p@ returns @True@
 
 stretchZipWith _ _ _ []     _ = []
 stretchZipWith p z f (x:xs) ys
@@ -293,14 +327,14 @@ mapAndUnzip3 f (x:xs)
 nOfThem :: Int -> a -> [a]
 nOfThem n thing = replicate n thing
 
--- 'atLength atLen atEnd ls n' unravels list 'ls' to position 'n';
--- specification:
+-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
 --
+-- @
 --  atLength atLenPred atEndPred ls n
 --   | n < 0         = atLenPred n
 --   | length ls < n = atEndPred (n - length ls)
 --   | otherwise     = atLenPred (drop n ls)
---
+-- @
 atLength :: ([a] -> b)
          -> (Int -> b)
          -> [a]
@@ -314,9 +348,10 @@ atLength atLenPred atEndPred ls n
     go 0 ls = atLenPred ls
     go n (_:xs) = go (n-1) xs
 
--- special cases.
+-- Some special cases of atLength:
+
 lengthExceeds :: [a] -> Int -> Bool
--- (lengthExceeds xs n) = (length xs > n)
+-- ^ > (lengthExceeds xs n) = (length xs > n)
 lengthExceeds = atLength notNull (const False)
 
 lengthAtLeast :: [a] -> Int -> Bool
@@ -374,36 +409,27 @@ Debugging/specialising versions of \tr{elem} and \tr{notElem}
 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
 
 # ifndef DEBUG
-isIn    _msg x ys = elem__    x ys
-isn'tIn _msg x ys = notElem__ x ys
-
---these are here to be SPECIALIZEd (automagically)
-elem__ :: Eq a => a -> [a] -> Bool
-elem__ _ []     = False
-elem__ x (y:ys) = x == y || elem__ x ys
-
-notElem__ :: Eq a => a -> [a] -> Bool
-notElem__ _ []     = True
-notElem__ x (y:ys) = x /= y && notElem__ x ys
+isIn    _msg x ys = x `elem` ys
+isn'tIn _msg x ys = x `notElem` ys
 
 # else /* DEBUG */
 isIn msg x ys
-  = elem (_ILIT(0)) x ys
+  = elem100 (_ILIT(0)) x ys
   where
-    elem _ _ []        = False
-    elem i x (y:ys)
+    elem100 _ _ []        = False
+    elem100 i x (y:ys)
       | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
-                                (x `List.elem` (y:ys))
-      | otherwise       = x == y || elem (i +# _ILIT(1)) x ys
+                                (x `elem` (y:ys))
+      | otherwise       = x == y || elem100 (i +# _ILIT(1)) x ys
 
 isn'tIn msg x ys
-  = notElem (_ILIT(0)) x ys
+  = notElem100 (_ILIT(0)) x ys
   where
-    notElem _ _ [] =  True
-    notElem i x (y:ys)
+    notElem100 _ _ [] =  True
+    notElem100 i x (y:ys)
       | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
-                                (x `List.notElem` (y:ys))
-      | otherwise      =  x /= y && notElem (i +# _ILIT(1)) x ys
+                                (x `notElem` (y:ys))
+      | otherwise      =  x /= y && notElem100 (i +# _ILIT(1)) x ys
 # endif /* DEBUG */
 \end{code}
 
@@ -510,7 +536,7 @@ sortWith get_key xs = sortLe le xs
   where
     x `le` y = get_key x < get_key y
 
-on :: (a -> a -> Ordering) -> (b -> a) -> b -> b -> Ordering
+on :: (a -> a -> c) -> (b -> a) -> b -> b -> c
 on cmp sel = \x y -> sel x `cmp` sel y
 
 \end{code}
@@ -596,6 +622,10 @@ splitAtList (_:xs) (y:ys) = (y:ys', ys'')
     where
       (ys', ys'') = splitAtList xs ys
 
+-- drop from the end of a list
+dropTail :: Int -> [a] -> [a]
+dropTail n = reverse . drop n . reverse
+
 snocView :: [a] -> Maybe ([a],a)
         -- Split off the last element
 snocView [] = Nothing
@@ -648,21 +678,99 @@ cmpList cmp (a:as) (b:bs)
 \end{code}
 
 \begin{code}
--- This (with a more general type) is Data.List.stripPrefix from GHC 6.8.
--- This definition can be removed once we require at least 6.8 to build.
-maybePrefixMatch :: String -> String -> Maybe String
-maybePrefixMatch []    rest = Just rest
-maybePrefixMatch (_:_) []   = Nothing
-maybePrefixMatch (p:pat) (r:rest)
-  | p == r    = maybePrefixMatch pat rest
-  | otherwise = Nothing
-
 removeSpaces :: String -> String
 removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace
 \end{code}
 
 %************************************************************************
 %*                                                                      *
+\subsection{Edit distance}
+%*                                                                      *
+%************************************************************************
+
+\begin{code}
+-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
+-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
+-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
+restrictedDamerauLevenshteinDistance :: String -> String -> Int
+restrictedDamerauLevenshteinDistance str1 str2 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+  where
+    m = length str1
+    n = length str2
+
+restrictedDamerauLevenshteinDistanceWithLengths :: Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+  | m <= n    = if n <= 32 -- n must be larger so this check is sufficient
+                then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
+                else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
+  | otherwise = if m <= 32 -- m must be larger so this check is sufficient
+                then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
+                else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
+
+restrictedDamerauLevenshteinDistance' :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 
+  | [] <- str1 = n
+  | otherwise  = extractAnswer $ foldl' (restrictedDamerauLevenshteinDistanceWorker (matchVectors str1) top_bit_mask vector_mask) (0, 0, m_ones, 0, m) str2
+  where m_ones@vector_mask = (2 ^ m) - 1
+        top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
+        extractAnswer (_, _, _, _, distance) = distance
+
+restrictedDamerauLevenshteinDistanceWorker :: (Bits bv) => IM.IntMap bv -> bv -> bv -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
+restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask (pm, d0, vp, vn, distance) char2 
+  = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ seq pm' $ seq d0' $ seq vp' $ seq vn' $ seq distance'' $ seq char2 $ (pm', d0', vp', vn', distance'')
+  where
+    pm' = IM.findWithDefault 0 (ord char2) str1_mvs
+    
+    d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) -- No need to mask the shiftL because of the restricted range of pm
+      .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
+    hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
+    hn' = d0' .&. vp
+    
+    hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
+    hn'_shift = (hn' `shiftL` 1) .&. vector_mask
+    vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
+    vn' = d0' .&. hp'_shift
+    
+    distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
+    distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
+
+sizedComplement :: Bits bv => bv -> bv -> bv
+sizedComplement vector_mask vect = vector_mask `xor` vect
+
+matchVectors :: Bits bv => String -> IM.IntMap bv
+matchVectors = snd . foldl' go (0 :: Int, IM.empty)
+  where
+    go (ix, im) char = let ix' = ix + 1
+                           im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
+                       in seq ix' $ seq im' $ (ix', im')
+
+#ifdef __GLASGOW_HASKELL__
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Word32 -> Int -> Int -> String -> String -> Int #-}
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Integer -> Int -> Int -> String -> String -> Int #-}
+
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Word32 -> Word32 -> Word32 -> (Word32, Word32, Word32, Word32, Int) -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Integer -> Integer -> Integer -> (Integer, Integer, Integer, Integer, Int) -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
+
+{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
+{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
+
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
+#endif
+
+-- | Search for possible matches to the users input in the given list, returning a small number of ranked results
+fuzzyMatch :: String -> [String] -> [String]
+fuzzyMatch user_entered possibilites = map fst $ take mAX_RESULTS $ sortBy (comparing snd) 
+                                                [ (poss, distance) | poss <- possibilites
+                                                                   , let distance = restrictedDamerauLevenshteinDistance poss user_entered
+                                                                   , distance <= fuzzy_threshold ]
+  where -- Work out an approriate match threshold (about a quarter of the # of characters the user entered)
+        fuzzy_threshold = max (round $ fromInteger (genericLength user_entered) / (4 :: Rational)) 1
+        mAX_RESULTS = 3
+\end{code}
+
+%************************************************************************
+%*                                                                      *
 \subsection[Utils-pairs]{Pairs}
 %*                                                                      *
 %************************************************************************
@@ -688,8 +796,15 @@ global a = unsafePerformIO (newIORef a)
 \begin{code}
 consIORef :: IORef [a] -> a -> IO ()
 consIORef var x = do
-  xs <- readIORef var
-  writeIORef var (x:xs)
+  atomicModifyIORef var (\xs -> (x:xs,()))
+\end{code}
+
+\begin{code}
+globalMVar :: a -> MVar a
+globalMVar a = unsafePerformIO (newMVar a)
+
+globalEmptyMVar :: MVar a
+globalEmptyMVar = unsafePerformIO newEmptyMVar
 \end{code}
 
 Module names:
@@ -871,17 +986,7 @@ parseSearchPath path = split path
 #endif
             _                                 -> chunk'
 
-        (chunk', rest') = break (==searchPathSeparator) s
-
--- | A platform-specific character used to separate search path strings in
--- environment variables. The separator is a colon (\":\") on Unix and
--- Macintosh, and a semicolon (\";\") on the Windows operating system.
-searchPathSeparator :: Char
-#if mingw32_HOST_OS || mingw32_TARGET_OS
-searchPathSeparator = ';'
-#else
-searchPathSeparator = ':'
-#endif
+        (chunk', rest') = break isSearchPathSeparator s
 
 data Direction = Forwards | Backwards
 
@@ -896,3 +1001,29 @@ reslash d = f
                   Backwards -> '\\'
 \end{code}
 
+%************************************************************************
+%*                                                                      *
+\subsection[Utils-Data]{Utils for defining Data instances}
+%*                                                                      *
+%************************************************************************
+
+These functions helps us to define Data instances for abstract types.
+
+\begin{code}
+abstractConstr :: String -> Constr
+abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
+\end{code}
+
+\begin{code}
+abstractDataType :: String -> DataType
+abstractDataType n = mkDataType n [abstractConstr n]
+\end{code}
+
+\begin{code}
+-- Old GHC versions come with a base library with this function misspelled.
+#if __GLASGOW_HASKELL__ < 612
+mkNoRepType :: String -> DataType
+mkNoRepType = mkNorepType
+#endif
+\end{code}
+