+-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
+-- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
+-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
+-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
+-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
+-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
+restrictedDamerauLevenshteinDistance :: String -> String -> Int
+restrictedDamerauLevenshteinDistance str1 str2
+ = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ where
+ m = length str1
+ n = length str2
+
+restrictedDamerauLevenshteinDistanceWithLengths
+ :: Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ | m <= n
+ = if n <= 32 -- n must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
+
+ | otherwise
+ = if m <= 32 -- m must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
+
+restrictedDamerauLevenshteinDistance'
+ :: (Bits bv) => bv -> Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
+ | [] <- str1 = n
+ | otherwise = extractAnswer $
+ foldl' (restrictedDamerauLevenshteinDistanceWorker
+ (matchVectors str1) top_bit_mask vector_mask)
+ (0, 0, m_ones, 0, m) str2
+ where
+ m_ones@vector_mask = (2 ^ m) - 1
+ top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
+ extractAnswer (_, _, _, _, distance) = distance
+
+restrictedDamerauLevenshteinDistanceWorker
+ :: (Bits bv) => IM.IntMap bv -> bv -> bv
+ -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
+restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
+ (pm, d0, vp, vn, distance) char2
+ = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
+ seq pm' $ seq d0' $ seq vp' $ seq vn' $
+ seq distance'' $ seq char2 $
+ (pm', d0', vp', vn', distance'')
+ where
+ pm' = IM.findWithDefault 0 (ord char2) str1_mvs
+
+ d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
+ .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
+ -- No need to mask the shiftL because of the restricted range of pm
+
+ hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
+ hn' = d0' .&. vp
+
+ hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
+ hn'_shift = (hn' `shiftL` 1) .&. vector_mask
+ vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
+ vn' = d0' .&. hp'_shift
+
+ distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
+ distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
+
+sizedComplement :: Bits bv => bv -> bv -> bv
+sizedComplement vector_mask vect = vector_mask `xor` vect
+
+matchVectors :: Bits bv => String -> IM.IntMap bv
+matchVectors = snd . foldl' go (0 :: Int, IM.empty)
+ where
+ go (ix, im) char = let ix' = ix + 1
+ im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
+ in seq ix' $ seq im' $ (ix', im')
+
+#ifdef __GLASGOW_HASKELL__
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Word32 -> Int -> Int -> String -> String -> Int #-}
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Integer -> Int -> Int -> String -> String -> Int #-}
+
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Word32 -> Word32 -> Word32
+ -> (Word32, Word32, Word32, Word32, Int)
+ -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Integer -> Integer -> Integer
+ -> (Integer, Integer, Integer, Integer, Int)
+ -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
+
+{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
+{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
+
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}