X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Futils%2FUtil.lhs;h=dc4f32ec5e7d7f4dffec59df200cd8fd6db7cf3c;hp=fbbe76717849e76e90affe581da81cb4383dafb2;hb=d0faaa6fa0cecd23c5670fd199e9206275313666;hpb=f278f0676579f67075033a4f9857715909c4b71e diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs index fbbe767..dc4f32e 100644 --- a/compiler/utils/Util.lhs +++ b/compiler/utils/Util.lhs @@ -47,6 +47,9 @@ module Util ( isEqual, eqListBy, thenCmp, cmpList, removeSpaces, + + -- * Edit distance + fuzzyMatch, fuzzyLookup, -- * Transitive closures transitiveClosure, @@ -63,6 +66,9 @@ module Util ( -- * Floating point readRational, + -- * read helpers + maybeReadFuzzy, + -- * IO-ish utilities createDirectoryHierarchy, doesDirNameExist, @@ -78,11 +84,15 @@ module Util ( Direction(..), reslash, -- * Utils for defining Data instances - abstractConstr, abstractDataType, mkNoRepType + abstractConstr, abstractDataType, mkNoRepType, + + -- * Utils for printing C code + charToC ) where #include "HsVersions.h" +import Exception import Panic import Data.Data @@ -96,14 +106,19 @@ import FastTypes #endif import Control.Monad ( unless ) -import System.IO.Error as IO ( catch, isDoesNotExistError ) +import System.IO.Error as IO ( isDoesNotExistError ) import System.Directory ( doesDirectoryExist, createDirectory, getModificationTime ) import System.FilePath -import Data.Char ( isUpper, isAlphaNum, isSpace, ord, isDigit ) -import Data.Ratio ( (%) ) import System.Time ( ClockTime ) +import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit ) +import Data.Ratio ( (%) ) +import Data.Ord ( comparing ) +import Data.Bits +import Data.Word +import qualified Data.IntMap as IM + infixr 9 `thenCmp` \end{code} @@ -676,6 +691,139 @@ removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace %************************************************************************ %* * +\subsection{Edit distance} +%* * +%************************************************************************ + +\begin{code} +-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. +-- See: . +-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing +-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro). +-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and +-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation +restrictedDamerauLevenshteinDistance :: String -> String -> Int +restrictedDamerauLevenshteinDistance str1 str2 + = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + where + m = length str1 + n = length str2 + +restrictedDamerauLevenshteinDistanceWithLengths + :: Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + | m <= n + = if n <= 32 -- n must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2 + + | otherwise + = if m <= 32 -- m must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1 + +restrictedDamerauLevenshteinDistance' + :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 + | [] <- str1 = n + | otherwise = extractAnswer $ + foldl' (restrictedDamerauLevenshteinDistanceWorker + (matchVectors str1) top_bit_mask vector_mask) + (0, 0, m_ones, 0, m) str2 + where + m_ones@vector_mask = (2 ^ m) - 1 + top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy + extractAnswer (_, _, _, _, distance) = distance + +restrictedDamerauLevenshteinDistanceWorker + :: (Bits bv) => IM.IntMap bv -> bv -> bv + -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int) +restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask + (pm, d0, vp, vn, distance) char2 + = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ + seq pm' $ seq d0' $ seq vp' $ seq vn' $ + seq distance'' $ seq char2 $ + (pm', d0', vp', vn', distance'') + where + pm' = IM.findWithDefault 0 (ord char2) str1_mvs + + d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) + .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn + -- No need to mask the shiftL because of the restricted range of pm + + hp' = vn .|. sizedComplement vector_mask (d0' .|. vp) + hn' = d0' .&. vp + + hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask + hn'_shift = (hn' `shiftL` 1) .&. vector_mask + vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift) + vn' = d0' .&. hp'_shift + + distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance + distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance' + +sizedComplement :: Bits bv => bv -> bv -> bv +sizedComplement vector_mask vect = vector_mask `xor` vect + +matchVectors :: Bits bv => String -> IM.IntMap bv +matchVectors = snd . foldl' go (0 :: Int, IM.empty) + where + go (ix, im) char = let ix' = ix + 1 + im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im + in seq ix' $ seq im' $ (ix', im') + +#ifdef __GLASGOW_HASKELL__ +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Word32 -> Int -> Int -> String -> String -> Int #-} +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' + :: Integer -> Int -> Int -> String -> String -> Int #-} + +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Word32 -> Word32 -> Word32 + -> (Word32, Word32, Word32, Word32, Int) + -> Char -> (Word32, Word32, Word32, Word32, Int) #-} +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker + :: IM.IntMap Integer -> Integer -> Integer + -> (Integer, Integer, Integer, Integer, Int) + -> Char -> (Integer, Integer, Integer, Integer, Int) #-} + +{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-} +{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-} + +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-} +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-} +#endif + +fuzzyMatch :: String -> [String] -> [String] +fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals] + +-- | Search for possible matches to the users input in the given list, +-- returning a small number of ranked results +fuzzyLookup :: String -> [(String,a)] -> [a] +fuzzyLookup user_entered possibilites + = map fst $ take mAX_RESULTS $ sortBy (comparing snd) + [ (poss_val, distance) | (poss_str, poss_val) <- possibilites + , let distance = restrictedDamerauLevenshteinDistance + poss_str user_entered + , distance <= fuzzy_threshold ] + where + -- Work out an approriate match threshold: + -- We report a candidate if its edit distance is <= the threshold, + -- The threshhold is set to about a quarter of the # of characters the user entered + -- Length Threshold + -- 1 0 -- Don't suggest *any* candidates + -- 2 1 -- for single-char identifiers + -- 3 1 + -- 4 1 + -- 5 1 + -- 6 2 + -- + fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational) + mAX_RESULTS = 3 +\end{code} + +%************************************************************************ +%* * \subsection[Utils-pairs]{Pairs} %* * %************************************************************************ @@ -720,7 +868,7 @@ looksLikeModuleName [] = False looksLikeModuleName (c:cs) = isUpper c && go cs where go [] = True go ('.':cs) = looksLikeModuleName cs - go (c:cs) = (isAlphaNum c || c == '_') && go cs + go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs \end{code} Akin to @Prelude.words@, but acts like the Bourne shell, treating @@ -821,6 +969,17 @@ readRational top_s ----------------------------------------------------------------------------- +-- read helpers + +maybeReadFuzzy :: Read a => String -> Maybe a +maybeReadFuzzy str = case reads str of + [(x, s)] + | all isSpace s -> + Just x + _ -> + Nothing + +----------------------------------------------------------------------------- -- Create a hierarchy of directories createDirectoryHierarchy :: FilePath -> IO () @@ -844,9 +1003,9 @@ doesDirNameExist fpath = case takeDirectory fpath of modificationTimeIfExists :: FilePath -> IO (Maybe ClockTime) modificationTimeIfExists f = do (do t <- getModificationTime f; return (Just t)) - `IO.catch` \e -> if isDoesNotExistError e - then return Nothing - else ioError e + `catchIO` \e -> if isDoesNotExistError e + then return Nothing + else ioError e -- split a string at the last character where 'pred' is True, -- returning a pair of strings. The first component holds the string @@ -924,11 +1083,22 @@ abstractDataType :: String -> DataType abstractDataType n = mkDataType n [abstractConstr n] \end{code} +%************************************************************************ +%* * +\subsection[Utils-C]{Utils for printing C code} +%* * +%************************************************************************ + \begin{code} --- Old GHC versions come with a base library with this function misspelled. -#if __GLASGOW_HASKELL__ < 612 -mkNoRepType :: String -> DataType -mkNoRepType = mkNorepType -#endif +charToC :: Word8 -> String +charToC w = + case chr (fromIntegral w) of + '\"' -> "\\\"" + '\'' -> "\\\'" + '\\' -> "\\\\" + c | c >= ' ' && c <= '~' -> [c] + | otherwise -> ['\\', + chr (ord '0' + ord c `div` 64), + chr (ord '0' + ord c `div` 8 `mod` 8), + chr (ord '0' + ord c `mod` 8)] \end{code} -