From 29a05730930cd2c5986ebb22d550e893d9fa20cc Mon Sep 17 00:00:00 2001 From: Ian Lynagh Date: Tue, 15 Mar 2011 16:29:12 +0000 Subject: [PATCH 1/1] Initial implementation of bindist comparison tool --- distrib/compare/FilenameDescr.hs | 58 ++++++++++++++ distrib/compare/Makefile | 12 +++ distrib/compare/Problem.hs | 31 ++++++++ distrib/compare/Tar.hs | 58 ++++++++++++++ distrib/compare/Utils.hs | 28 +++++++ distrib/compare/compare.hs | 159 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 346 insertions(+) create mode 100644 distrib/compare/FilenameDescr.hs create mode 100644 distrib/compare/Makefile create mode 100644 distrib/compare/Problem.hs create mode 100644 distrib/compare/Tar.hs create mode 100644 distrib/compare/Utils.hs create mode 100644 distrib/compare/compare.hs diff --git a/distrib/compare/FilenameDescr.hs b/distrib/compare/FilenameDescr.hs new file mode 100644 index 0000000..5952058 --- /dev/null +++ b/distrib/compare/FilenameDescr.hs @@ -0,0 +1,58 @@ + +module FilenameDescr where + +import Data.Either + +import Utils +import Tar + +-- We can't just compare plain filenames, because versions numbers of GHC +-- and the libaries will vary. So we use FilenameDescr instead, which +-- abstracts out the version numbers. +type FilenameDescr = [FilenameDescrBit] +data FilenameDescrBit = VersionOf String + | FP String + deriving (Show, Eq, Ord) + +normalise :: FilenameDescr -> FilenameDescr +normalise [] = [] +normalise [x] = [x] +normalise (FP x1 : FP x2 : xs) = normalise (FP (x1 ++ x2) : xs) +normalise (x : xs) = x : normalise xs + +-- Mapping from thing (e.g. "Cabal") to version (e.g. "1.10.0.0") +type ThingVersionMap = [(String, String)] + +addThingVersion :: ThingVersionMap -> String -> String -> Maybe ThingVersionMap +addThingVersion mapping thing version + = case lookup thing mapping of + Just version' -> + if version == version' + then Just mapping + else Nothing + Nothing -> + Just ((thing, version) : mapping) + +-- Sanity check that the FilenameDescr matches the filename in the tar line +checkContent :: ThingVersionMap -> (FilenameDescr, TarLine) -> Errors +checkContent mapping (fd, tl) + = let fn = tlFileName tl + in case flattenFilenameDescr mapping fd of + Right fn' -> + if fn' == fn + then [] + else ["checkContent: Can't happen: filename mismatch: " ++ show fn] + Left errs -> + errs + +flattenFilenameDescr :: ThingVersionMap -> FilenameDescr + -> Either Errors FilePath +flattenFilenameDescr mapping fd = case partitionEithers (map f fd) of + ([], strs) -> Right (concat strs) + (errs, _) -> Left (concat errs) + where f (FP fp) = Right fp + f (VersionOf thing) + = case lookup thing mapping of + Just v -> Right v + Nothing -> Left ["Can't happen: thing has no version in mapping"] + diff --git a/distrib/compare/Makefile b/distrib/compare/Makefile new file mode 100644 index 0000000..3099bc9 --- /dev/null +++ b/distrib/compare/Makefile @@ -0,0 +1,12 @@ + +GHC = ghc + +compare: *.hs + "$(GHC)" --make -Wall -Werror $@ + +.PHONY: clean +clean: + rm -f *.o + rm -f *.hi + rm -f compare compare.exe + diff --git a/distrib/compare/Problem.hs b/distrib/compare/Problem.hs new file mode 100644 index 0000000..f80c856 --- /dev/null +++ b/distrib/compare/Problem.hs @@ -0,0 +1,31 @@ + +module Problem where + +data FileProblem = First Problem + | Second Problem + | Change Problem + +data Problem = DuplicateFile FilePath + | ExtraFile FilePath + | PermissionsChanged FilePath FilePath String String + | FileSizeChanged FilePath FilePath Integer Integer + +pprFileProblem :: FileProblem -> String +pprFileProblem (First p) = "First " ++ pprProblem p +pprFileProblem (Second p) = "Second " ++ pprProblem p +pprFileProblem (Change p) = "Change " ++ pprProblem p + +pprProblem :: Problem -> String +pprProblem (DuplicateFile fp) = "Duplicate file: " ++ show fp +pprProblem (ExtraFile fp) = "Extra file: " ++ show fp +pprProblem (PermissionsChanged fp1 fp2 p1 p2) + = "Permissions changed:\n" + ++ " " ++ show fp1 + ++ " " ++ show fp2 + ++ " " ++ p1 ++ " -> " ++ p2 +pprProblem (FileSizeChanged fp1 fp2 s1 s2) + = "Size changed:\n" + ++ " " ++ show fp1 ++ "\n" + ++ " " ++ show fp2 ++ "\n" + ++ " " ++ show s1 ++ " -> " ++ show s2 + diff --git a/distrib/compare/Tar.hs b/distrib/compare/Tar.hs new file mode 100644 index 0000000..50b238a --- /dev/null +++ b/distrib/compare/Tar.hs @@ -0,0 +1,58 @@ + +module Tar where + +import Data.Either +import Data.List +import System.Exit +import System.Process + +import Utils + +readTarLines :: FilePath -> IO [TarLine] +readTarLines fp + = do (ec, out, err) <- readProcessWithExitCode "tar" ["-jtvf", fp] "" + case (ec, err) of + (ExitSuccess, []) -> + case parseTarLines fp out of + Left errs -> die errs + Right tls -> return tls + _ -> + die ["Failed running tar -jtvf " ++ show fp, + "Exit code: " ++ show ec, + "Stderr: " ++ show err] + +parseTarLines :: FilePath -> String -> Either Errors [TarLine] +parseTarLines fp xs + = case partitionEithers (zipWith (parseTarLine fp) [1..] (lines xs)) of + ([], tls) -> Right tls + (errss, _) -> Left (intercalate [""] errss) + +data TarLine = TarLine { + tlPermissions :: String, + tlUser :: String, + tlGroup :: String, + tlSize :: Integer, + tlDateTime :: String, + tlFileName :: FilePath + } + +parseTarLine :: FilePath -> Int -> String -> Either Errors TarLine +parseTarLine fp line str + = case re "^([^ ]+) ([^ ]+)/([^ ]+) +([0-9]+) ([^ ]+ [^ ]+) ([^ ]+)$" + str of + Just [perms, user, grp, sizeStr, dateTime, filename] -> + case maybeRead sizeStr of + Just size -> + Right $ TarLine { + tlPermissions = perms, + tlUser = user, + tlGroup = grp, + tlSize = size, + tlDateTime = dateTime, + tlFileName = filename + } + _ -> error "Can't happen: Can't parse size" + _ -> + Left ["In " ++ show fp ++ ", at line " ++ show line, + "Tar line doesn't parse: " ++ show str] + diff --git a/distrib/compare/Utils.hs b/distrib/compare/Utils.hs new file mode 100644 index 0000000..58298c1 --- /dev/null +++ b/distrib/compare/Utils.hs @@ -0,0 +1,28 @@ + +module Utils where + +import System.Exit +import System.IO +import Text.Regex.Posix + +die :: Errors -> IO a +die errs = do mapM_ (hPutStrLn stderr) errs + exitFailure + +dieOnErrors :: Either Errors a -> IO a +dieOnErrors (Left errs) = die errs +dieOnErrors (Right x) = return x + +type Errors = [String] + +maybeRead :: Read a => String -> Maybe a +maybeRead str = case reads str of + [(x, "")] -> Just x + _ -> Nothing + +re :: String -> String -> Maybe [String] +re r str = case matchM r' str :: Maybe (String, String, String, [String]) of + Just (_, _, _, ms) -> Just ms + Nothing -> Nothing + where r' = makeRegex r :: Regex + diff --git a/distrib/compare/compare.hs b/distrib/compare/compare.hs new file mode 100644 index 0000000..58f914c --- /dev/null +++ b/distrib/compare/compare.hs @@ -0,0 +1,159 @@ +{-# LANGUAGE PatternGuards #-} + +module Main (main) where + +import Control.Monad.State +import Data.Function +import Data.List +import System.Environment + +import FilenameDescr +import Problem +import Utils +import Tar + +-- TODO: +-- * Check installed trees too +-- * Check hashbangs + +-- Only size changes > sizeAbs are considered an issue +sizeAbs :: Integer +sizeAbs = 1000 + +-- Only a size change of sizePercentage% or more is considered an issue +sizePercentage :: Integer +sizePercentage = 150 + +main :: IO () +main = do args <- getArgs + case args of + [bd1, bd2] -> doit bd1 bd2 + _ -> die ["Bad args. Need 2 bindists."] + +doit :: FilePath -> FilePath -> IO () +doit bd1 bd2 + = do tls1 <- readTarLines bd1 + tls2 <- readTarLines bd2 + content1 <- dieOnErrors $ mkContents tls1 + content2 <- dieOnErrors $ mkContents tls2 + let mySort = sortBy (compare `on` fst) + sortedContent1 = mySort content1 + sortedContent2 = mySort content2 + (nubProbs1, nubbedContent1) = nubContents sortedContent1 + (nubProbs2, nubbedContent2) = nubContents sortedContent2 + differences = compareContent nubbedContent1 + nubbedContent2 + allProbs = map First nubProbs1 ++ map Second nubProbs2 + ++ differences + mapM_ (putStrLn . pprFileProblem) allProbs + +mkContents :: [TarLine] -> Either Errors [(FilenameDescr, TarLine)] +mkContents tls = case runState (mapM f tls) [] of + (xs, mapping) -> + case concat $ map (checkContent mapping) xs of + [] -> Right xs + errs -> Left errs + where f tl = do fnd <- mkFilePathDescr (tlFileName tl) + return (fnd, tl) + +nubContents :: [(FilenameDescr, TarLine)] + -> ([Problem], [(FilenameDescr, TarLine)]) +nubContents [] = ([], []) +nubContents [x] = ([], [x]) +nubContents (x1@(fd1, tl1) : xs@((fd2, _) : _)) + | fd1 == fd2 = (DuplicateFile (tlFileName tl1) : ps, xs') + | otherwise = (ps, x1 : xs') + where (ps, xs') = nubContents xs + +mkFilePathDescr :: FilePath -> State ThingVersionMap FilenameDescr +mkFilePathDescr fp + | Just [ghcVersion, _, middle, filename] + <- re ("^ghc-" ++ versionRE ++ "(/.*)?/([^/]*)$") fp + = do ghcVersionDescr <- do mapping <- get + case addThingVersion mapping "ghc" ghcVersion of + Just mapping' -> + do put mapping' + return (VersionOf "ghc") + Nothing -> + return (FP ghcVersion) + filename' <- mkFileNameDescr filename + let fd = FP "ghc-" : ghcVersionDescr : FP middle : FP "/" : filename' + return $ normalise fd + | otherwise = return [FP fp] + +mkFileNameDescr :: FilePath -> State ThingVersionMap FilenameDescr +mkFileNameDescr filename + | Just [thing, thingVersion, _, ghcVersion, _] + <- re ("^libHS(.*)-" ++ versionRE ++ "-ghc" ++ versionRE ++ "\\.so$") + filename + = do mapping <- get + case addThingVersion mapping "ghc" ghcVersion of + Just m -> + case addThingVersion m thing thingVersion of + Just m' -> + do put m' + return [FP "libHS", FP thing, FP "-", VersionOf thing, + FP "-ghc", VersionOf "ghc", FP ".so"] + _ -> unchanged + _ -> unchanged + | Just [way, thingVersion, _] + <- re ("^libHSrts(_.*)?-ghc" ++ versionRE ++ "\\.so$") + filename + = do mapping <- get + case addThingVersion mapping "ghc" thingVersion of + Just mapping' -> + do put mapping' + return [FP "libHSrts", FP way, FP "-ghc", VersionOf "ghc", + FP ".so"] + _ -> unchanged + | Just [thing, thingVersion, _, way] + <- re ("^libHS(.*)-" ++ versionRE ++ "(_.*)?\\.a$") + filename + = do mapping <- get + case addThingVersion mapping thing thingVersion of + Just mapping' -> + do put mapping' + return [FP "libHS", FP thing, FP "-", VersionOf thing, + FP way, FP ".a"] + _ -> unchanged + | Just [thing, thingVersion, _] + <- re ("^HS(.*)-" ++ versionRE ++ "\\.o$") + filename + = do mapping <- get + case addThingVersion mapping thing thingVersion of + Just mapping' -> + do put mapping' + return [FP "HS", FP thing, FP "-", VersionOf thing, + FP ".o"] + _ -> unchanged + | otherwise = unchanged + where unchanged = return [FP filename] + +compareContent :: [(FilenameDescr, TarLine)] -> [(FilenameDescr, TarLine)] + -> [FileProblem] +compareContent [] [] = [] +compareContent xs [] = map (First . ExtraFile . tlFileName . snd) xs +compareContent [] ys = map (Second . ExtraFile . tlFileName . snd) ys +compareContent xs1@((fd1, tl1) : xs1') xs2@((fd2, tl2) : xs2') + = case fd1 `compare` fd2 of + EQ -> map Change (compareTarLine tl1 tl2) ++ compareContent xs1' xs2' + LT -> First (ExtraFile (tlFileName tl1)) : compareContent xs1' xs2 + GT -> Second (ExtraFile (tlFileName tl2)) : compareContent xs1 xs2' + +compareTarLine :: TarLine -> TarLine -> [Problem] +compareTarLine tl1 tl2 + = [ PermissionsChanged fn1 fn2 perms1 perms2 | perms1 /= perms2 ] + ++ [ FileSizeChanged fn1 fn2 size1 size2 | sizeChanged ] + where fn1 = tlFileName tl1 + fn2 = tlFileName tl2 + perms1 = tlPermissions tl1 + perms2 = tlPermissions tl2 + size1 = tlSize tl1 + size2 = tlSize tl2 + sizeChanged = abs (size1 - size2) > sizeAbs + && (((100 * size1) `div` size2) > sizePercentage || + ((100 * size2) `div` size1) > sizePercentage) + +versionRE :: String +versionRE = "([0-9]+(\\.[0-9]+)*)" + -- 1.7.10.4