From 00022894bbb2dfa33fd213eedbac0f28b4c4b7b4 Mon Sep 17 00:00:00 2001 From: Max Bolingbroke Date: Tue, 13 Apr 2010 19:28:25 +0000 Subject: [PATCH] Spelling correction for LANGUAGE pragmas --- compiler/main/DynFlags.hs | 5 +-- compiler/main/HeaderInfo.hs | 5 ++- compiler/utils/Outputable.lhs | 12 ++++- compiler/utils/Util.lhs | 97 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 111 insertions(+), 8 deletions(-) diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 8958eb7..8a8b052 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -1003,8 +1003,7 @@ allFlags = map ('-':) $ [ flagName flag | flag <- dynamic_flags, ok (flagOptKind flag) ] ++ map ("fno-"++) flags ++ map ("f"++) flags ++ - map ("X"++) supportedLanguages ++ - map ("XNo"++) supportedLanguages + map ("X"++) supportedLanguages where ok (PrefixPred _ _) = False ok _ = True flags = [ name | (name, _, _) <- fFlags ] @@ -1553,7 +1552,7 @@ fFlags = [ ] supportedLanguages :: [String] -supportedLanguages = [ name | (name, _, _) <- xFlags ] +supportedLanguages = [ name' | (name, _, _) <- xFlags, name' <- [name, "No" ++ name] ] -- This may contain duplicates languageOptions :: [DynFlag] diff --git a/compiler/main/HeaderInfo.hs b/compiler/main/HeaderInfo.hs index e16c2ce..b520098 100644 --- a/compiler/main/HeaderInfo.hs +++ b/compiler/main/HeaderInfo.hs @@ -264,7 +264,6 @@ checkExtension (L l ext) -- its corresponding flag. Otherwise it throws an exception. = let ext' = unpackFS ext in if ext' `elem` supportedLanguages - || ext' `elem` (map ("No"++) supportedLanguages) then L l ("-X"++ext') else unsupportedExtnError l ext' @@ -281,7 +280,9 @@ unsupportedExtnError :: SrcSpan -> String -> a unsupportedExtnError loc unsup = throw $ mkSrcErr $ unitBag $ mkPlainErrMsg loc $ - text "Unsupported extension: " <> text unsup + text "Unsupported extension: " <> text unsup $$ + if null suggestions then empty else text "Perhaps you meant" <+> quotedListWithOr (map text suggestions) + where suggestions = fuzzyMatch unsup supportedLanguages optionsErrorMsgs :: [String] -> [Located String] -> FilePath -> Messages diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs index 84e8b9d..b948990 100644 --- a/compiler/utils/Outputable.lhs +++ b/compiler/utils/Outputable.lhs @@ -17,7 +17,7 @@ module Outputable ( -- * Pretty printing combinators SDoc, docToSDoc, - interppSP, interpp'SP, pprQuotedList, pprWithCommas, + interppSP, interpp'SP, pprQuotedList, pprWithCommas, quotedListWithOr, empty, nest, char, text, ftext, ptext, @@ -660,7 +660,15 @@ interpp'SP xs = sep (punctuate comma (map ppr xs)) -- -- > [x,y,z] ==> `x', `y', `z' pprQuotedList :: Outputable a => [a] -> SDoc -pprQuotedList xs = hsep (punctuate comma (map (quotes . ppr) xs)) +pprQuotedList = quotedList . map ppr + +quotedList :: [SDoc] -> SDoc +quotedList xs = hsep (punctuate comma (map quotes xs)) + +quotedListWithOr :: [SDoc] -> SDoc +-- [x,y,z] ==> `x', `y' or `z' +quotedListWithOr xs@(_:_:_) = quotedList (init xs) <+> ptext (sLit "or") <+> quotes (last xs) +quotedListWithOr xs = quotedList xs \end{code} diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs index fbbe767..ec5070f 100644 --- a/compiler/utils/Util.lhs +++ b/compiler/utils/Util.lhs @@ -47,6 +47,9 @@ module Util ( isEqual, eqListBy, thenCmp, cmpList, removeSpaces, + + -- * Edit distance + fuzzyMatch, -- * Transitive closures transitiveClosure, @@ -100,9 +103,14 @@ import System.IO.Error as IO ( catch, isDoesNotExistError ) import System.Directory ( doesDirectoryExist, createDirectory, getModificationTime ) import System.FilePath +import System.Time ( ClockTime ) + import Data.Char ( isUpper, isAlphaNum, isSpace, ord, isDigit ) import Data.Ratio ( (%) ) -import System.Time ( ClockTime ) +import Data.Ord ( comparing ) +import Data.Bits +import Data.Word +import qualified Data.IntMap as IM infixr 9 `thenCmp` \end{code} @@ -676,6 +684,93 @@ removeSpaces = reverse . dropWhile isSpace . reverse . dropWhile isSpace %************************************************************************ %* * +\subsection{Edit distance} +%* * +%************************************************************************ + +\begin{code} +-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings. See: . +-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro). +-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation +restrictedDamerauLevenshteinDistance :: String -> String -> Int +restrictedDamerauLevenshteinDistance str1 str2 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + where + m = length str1 + n = length str2 + +restrictedDamerauLevenshteinDistanceWithLengths :: Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2 + | m <= n = if n <= 32 -- n must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2 + | otherwise = if m <= 32 -- m must be larger so this check is sufficient + then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1 + else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1 + +restrictedDamerauLevenshteinDistance' :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int +restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2 + | [] <- str1 = n + | otherwise = extractAnswer $ foldl' (restrictedDamerauLevenshteinDistanceWorker (matchVectors str1) top_bit_mask vector_mask) (0, 0, m_ones, 0, m) str2 + where m_ones@vector_mask = (2 ^ m) - 1 + top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy + extractAnswer (_, _, _, _, distance) = distance + +restrictedDamerauLevenshteinDistanceWorker :: (Bits bv) => IM.IntMap bv -> bv -> bv -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int) +restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask (pm, d0, vp, vn, distance) char2 + = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $ seq pm' $ seq d0' $ seq vp' $ seq vn' $ seq distance'' $ seq char2 $ (pm', d0', vp', vn', distance'') + where + pm' = IM.findWithDefault 0 (ord char2) str1_mvs + + d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm) -- No need to mask the shiftL because of the restricted range of pm + .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn + hp' = vn .|. sizedComplement vector_mask (d0' .|. vp) + hn' = d0' .&. vp + + hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask + hn'_shift = (hn' `shiftL` 1) .&. vector_mask + vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift) + vn' = d0' .&. hp'_shift + + distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance + distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance' + +sizedComplement :: Bits bv => bv -> bv -> bv +sizedComplement vector_mask vect = vector_mask `xor` vect + +matchVectors :: Bits bv => String -> IM.IntMap bv +matchVectors = snd . foldl' go (0 :: Int, IM.empty) + where + go (ix, im) char = let ix' = ix + 1 + im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im + in seq ix' $ seq im' $ (ix', im') + +#ifdef __GLASGOW_HASKELL__ +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Word32 -> Int -> Int -> String -> String -> Int #-} +{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance' :: Integer -> Int -> Int -> String -> String -> Int #-} + +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Word32 -> Word32 -> Word32 -> (Word32, Word32, Word32, Word32, Int) -> Char -> (Word32, Word32, Word32, Word32, Int) #-} +{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker :: IM.IntMap Integer -> Integer -> Integer -> (Integer, Integer, Integer, Integer, Int) -> Char -> (Integer, Integer, Integer, Integer, Int) #-} + +{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-} +{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-} + +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-} +{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-} +#endif + +-- | Search for possible matches to the users input in the given list, returning a small number of ranked results +fuzzyMatch :: String -> [String] -> [String] +fuzzyMatch user_entered possibilites = map fst $ take mAX_RESULTS $ sortBy (comparing snd) + [ (poss, distance) | poss <- possibilites + , let distance = restrictedDamerauLevenshteinDistance poss user_entered + , distance <= fuzzy_threshold ] + where -- Work out an approriate match threshold (about a quarter of the # of characters the user entered) + fuzzy_threshold = max (round $ fromInteger (genericLength user_entered) / (4 :: Rational)) 1 + mAX_RESULTS = 3 +\end{code} + +%************************************************************************ +%* * \subsection[Utils-pairs]{Pairs} %* * %************************************************************************ -- 1.7.10.4