From efd78d207a322d05a183e2021b6646b7fe02bd00 Mon Sep 17 00:00:00 2001 From: Michael Snoyman Date: Fri, 3 Jul 2015 16:15:52 +0300 Subject: [PATCH] Use hashes to check file dirtiness #502 --- ChangeLog.md | 1 + src/Stack/Build/Cache.hs | 43 ++++---------------- src/Stack/Build/Execute.hs | 17 ++++---- src/Stack/Build/Source.hs | 81 ++++++++++++++++++++++++++++++++++---- src/Stack/Build/Types.hs | 13 +++++- stack.cabal | 1 + 6 files changed, 104 insertions(+), 52 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 443c8243fd..6806019db5 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -15,6 +15,7 @@ * Add the `--skip-msys` flag [#377](https://github.com/commercialhaskell/stack/issues/377) * `--keep-going`, turned on by default for tests and benchmarks [#478](https://github.com/commercialhaskell/stack/issues/478) * `concurrent-tests: BOOL` [#492](https://github.com/commercialhaskell/stack/issues/492) +* Use hashes to check file dirtiness [#502](https://github.com/commercialhaskell/stack/issues/502) ## 0.1.1.0 diff --git a/src/Stack/Build/Cache.hs b/src/Stack/Build/Cache.hs index 5eee06ac9e..558730ae7f 100644 --- a/src/Stack/Build/Cache.hs +++ b/src/Stack/Build/Cache.hs @@ -7,7 +7,6 @@ module Stack.Build.Cache ( tryGetBuildCache , tryGetConfigCache , tryGetCabalMod - , getPackageFileModTimes , getInstalledExes , buildCacheTimes , tryGetFlagCache @@ -24,8 +23,7 @@ module Stack.Build.Cache ) where import Control.Exception.Enclosed (catchIO, handleIO, tryIO) -import Control.Monad.Catch (MonadCatch, MonadThrow, catch, - throwM) +import Control.Monad.Catch (MonadThrow, catch, throwM) import Control.Monad.IO.Class import Control.Monad.Logger (MonadLogger) import Control.Monad.Reader @@ -34,19 +32,15 @@ import qualified Data.Binary as Binary import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import Data.Map (Map) -import qualified Data.Map as Map -import Data.Maybe (catMaybes, fromMaybe, mapMaybe) -import qualified Data.Set as Set +import Data.Maybe (fromMaybe, mapMaybe) import GHC.Generics (Generic) import Path import Path.IO import Stack.Build.Types import Stack.Constants -import Stack.Package import Stack.Types import System.Directory (createDirectoryIfMissing, getDirectoryContents, - getModificationTime, removeFile) import System.IO.Error (isDoesNotExistError) @@ -89,16 +83,16 @@ markExeNotInstalled loc ident = do -- | Stored on disk to know whether the flags have changed or any -- files have changed. data BuildCache = BuildCache - { buildCacheTimes :: !(Map FilePath ModTime) + { buildCacheTimes :: !(Map FilePath FileCacheInfo) -- ^ Modification times of files. } - deriving (Generic,Eq) + deriving (Generic) instance Binary BuildCache -- | Try to read the dirtiness cache for the given package directory. tryGetBuildCache :: (MonadIO m, MonadReader env m, HasConfig env, MonadThrow m, MonadLogger m, HasEnvConfig env) - => Path Abs Dir -> m (Maybe BuildCache) -tryGetBuildCache = tryGetCache buildCacheFile + => Path Abs Dir -> m (Maybe (Map FilePath FileCacheInfo)) +tryGetBuildCache = liftM (fmap buildCacheTimes) . tryGetCache buildCacheFile -- | Try to read the dirtiness cache for the given package directory. tryGetConfigCache :: (MonadIO m, MonadReader env m, HasConfig env, MonadThrow m, MonadLogger m, HasEnvConfig env) @@ -129,7 +123,7 @@ tryGetCache get' dir = do -- | Write the dirtiness cache for this package's files. writeBuildCache :: (MonadIO m, MonadReader env m, HasConfig env, MonadThrow m, MonadLogger m, HasEnvConfig env) - => Path Abs Dir -> Map FilePath ModTime -> m () + => Path Abs Dir -> Map FilePath FileCacheInfo -> m () writeBuildCache dir times = writeCache dir @@ -209,29 +203,6 @@ writeFlagCache gid cache = do Binary.encodeFile (toFilePath file) cache --- | Get the modified times of all known files in the package, --- including the package's cabal file itself. -getPackageFileModTimes :: (MonadIO m, MonadLogger m, MonadThrow m, MonadCatch m) - => Package - -> Path Abs File -- ^ cabal file - -> m (Map FilePath ModTime) -getPackageFileModTimes pkg cabalfp = do - files <- getPackageFiles (packageFiles pkg) AllFiles cabalfp - liftM (Map.fromList . catMaybes) - $ mapM getModTimeMaybe - $ Set.toList files - where - getModTimeMaybe fp = - liftIO - (catch - (liftM - (Just . (toFilePath fp,) . modTime) - (getModificationTime (toFilePath fp))) - (\e -> - if isDoesNotExistError e - then return Nothing - else throwM e)) - -- | Mark a test suite as having succeeded setTestSuccess :: (MonadIO m, MonadLogger m, MonadThrow m, MonadReader env m, HasConfig env, HasEnvConfig env) => Path Abs Dir diff --git a/src/Stack/Build/Execute.hs b/src/Stack/Build/Execute.hs index 78f0ddc1b3..e94cb2ddc6 100644 --- a/src/Stack/Build/Execute.hs +++ b/src/Stack/Build/Execute.hs @@ -630,9 +630,10 @@ singleBuild ac@ActionContext {..} ee@ExecuteEnv {..} task@Task {..} = withSingleContext ac ee task $ \package cabalfp pkgDir cabal announce console _mlogFile -> do (cache, _neededConfig) <- ensureConfig pkgDir ee task (announce "configure") cabal cabalfp [] - fileModTimes <- getPackageFileModTimes package cabalfp markExeNotInstalled (taskLocation task) taskProvides - writeBuildCache pkgDir fileModTimes + case taskType of + TTLocal lp -> writeBuildCache pkgDir $ lpNewBuildCache lp + TTUpstream _ _ -> return () announce "build" config <- asks getConfig @@ -709,8 +710,9 @@ singleTest rerunTests ac ee task = when needBuild $ do announce "build (test)" unsetTestSuccess pkgDir - fileModTimes <- getPackageFileModTimes package cabalfp - writeBuildCache pkgDir fileModTimes + case taskType task of + TTLocal lp -> writeBuildCache pkgDir $ lpNewBuildCache lp + TTUpstream _ _ -> assert False $ return () cabal (console && configHideTHLoading config) $ "build" : components toRun <- @@ -866,7 +868,7 @@ singleBench :: M env m -> Task -> m () singleBench ac ee task = - withSingleContext ac ee task $ \package cabalfp pkgDir cabal announce console _mlogFile -> do + withSingleContext ac ee task $ \_package cabalfp pkgDir cabal announce console _mlogFile -> do (_cache, neededConfig) <- ensureConfig pkgDir ee task (announce "configure (benchmarks)") cabal cabalfp ["--enable-benchmarks"] let needBuild = neededConfig || @@ -875,8 +877,9 @@ singleBench ac ee task = _ -> assert False True) when needBuild $ do announce "build (benchmarks)" - fileModTimes <- getPackageFileModTimes package cabalfp - writeBuildCache pkgDir fileModTimes + case taskType task of + TTLocal lp -> writeBuildCache pkgDir $ lpNewBuildCache lp + TTUpstream _ _ -> assert False $ return () config <- asks getConfig cabal (console && configHideTHLoading config) ["build"] diff --git a/src/Stack/Build/Source.hs b/src/Stack/Build/Source.hs index 8bceca3181..1b7cb5692d 100644 --- a/src/Stack/Build/Source.hs +++ b/src/Stack/Build/Source.hs @@ -14,13 +14,21 @@ module Stack.Build.Source ) where import Network.HTTP.Client.Conduit (HasHttpManager) -import Control.Applicative ((<|>), (<$>)) +import Control.Applicative ((<|>), (<$>), (<*>)) +import Control.Exception (catch) import Control.Monad import Control.Monad.Catch (MonadCatch) import Control.Monad.IO.Class import Control.Monad.Logger import Control.Monad.Reader (MonadReader, asks) import Control.Monad.Trans.Resource +import Crypto.Hash (Digest, SHA256) +import Crypto.Hash.Conduit (sinkHash) +import Data.Byteable (toBytes) +import qualified Data.ByteString as S +import Data.Conduit (($$), ZipSink (..)) +import qualified Data.Conduit.Binary as CB +import qualified Data.Conduit.List as CL import Data.Either import qualified Data.Foldable as F import Data.Function @@ -29,13 +37,13 @@ import Data.List import qualified Data.Map as Map import Data.Map.Strict (Map) import Data.Maybe -import Data.Monoid ((<>)) +import Data.Monoid ((<>), Any (..), mconcat) import Data.Set (Set) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Path -import Prelude hiding (FilePath, writeFile) +import Prelude hiding (writeFile) import Stack.Build.Cache import Stack.Build.Types import Stack.BuildPlan (loadMiniBuildPlan, @@ -45,6 +53,8 @@ import Stack.Package import Stack.PackageIndex import Stack.Types import System.Directory hiding (findExecutable, findFiles) +import System.IO (withBinaryFile, IOMode (ReadMode)) +import System.IO.Error (isDoesNotExistError) type SourceMap = Map PackageName PackageSource @@ -202,16 +212,16 @@ loadLocals bopts latestVersion = do $ MismatchedCabalName cabalfp (packageName pkg) mbuildCache <- tryGetBuildCache dir files <- getPackageFiles (packageFiles pkg) AllFiles cabalfp - fileModTimes <- getPackageFileModTimes pkg cabalfp + (isDirty, newBuildCache) <- checkBuildCache + (fromMaybe Map.empty mbuildCache) + (map toFilePath $ Set.toList files) return LocalPackage { lpPackage = pkg , lpPackageFinal = pkgFinal , lpWanted = wanted , lpFiles = files - , lpDirtyFiles = - maybe True - ((/= fileModTimes) . buildCacheTimes) - mbuildCache + , lpDirtyFiles = isDirty + , lpNewBuildCache = newBuildCache , lpCabalFile = cabalfp , lpDir = dir , lpComponents = fromMaybe Set.empty $ Map.lookup name names @@ -314,3 +324,58 @@ extendExtraDeps extraDeps0 mbp latestVersion extraNames extraIdents = -- the version matches what's in the snapshot, so just use the snapshot version Just version' | version == version' -> m _ -> Map.insert name version m + +-- | Compare the current filesystem state to the cached information, and +-- determine (1) if the files are dirty, and (2) the new cache values. +checkBuildCache :: MonadIO m + => Map FilePath FileCacheInfo -- ^ old cache + -> [FilePath] -- ^ files in package + -> m (Bool, Map FilePath FileCacheInfo) +checkBuildCache oldCache files = liftIO $ do + (Any isDirty, m) <- fmap mconcat $ mapM go files + return (isDirty, m) + where + go fp = do + mmodTime <- getModTimeMaybe fp + case mmodTime of + Nothing -> return (Any False, Map.empty) + Just modTime' -> do + (isDirty, newFci) <- + case Map.lookup fp oldCache of + Just fci + | fciModTime fci == modTime' -> return (False, fci) + | otherwise -> do + newFci <- calcFci modTime' fp + let isDirty = + fciSize fci /= fciSize newFci || + fciHash fci /= fciHash newFci + return (isDirty, newFci) + Nothing -> do + newFci <- calcFci modTime' fp + return (True, newFci) + return (Any isDirty, Map.singleton fp newFci) + + getModTimeMaybe fp = + liftIO + (catch + (liftM + (Just . modTime) + (getModificationTime fp)) + (\e -> + if isDoesNotExistError e + then return Nothing + else throwM e)) + + calcFci modTime' fp = + withBinaryFile fp ReadMode $ \h -> do + (size, digest) <- CB.sourceHandle h $$ getZipSink + ((,) + <$> ZipSink (CL.fold + (\x y -> x + fromIntegral (S.length y)) + 0) + <*> ZipSink sinkHash) + return FileCacheInfo + { fciModTime = modTime' + , fciSize = size + , fciHash = toBytes (digest :: Digest SHA256) + } diff --git a/src/Stack/Build/Types.hs b/src/Stack/Build/Types.hs index d931054357..9191c8747a 100644 --- a/src/Stack/Build/Types.hs +++ b/src/Stack/Build/Types.hs @@ -29,7 +29,8 @@ module Stack.Build.Types ,ConstructPlanException(..) ,configureOpts ,BadDependency(..) - ,wantedLocalPackages) + ,wantedLocalPackages + ,FileCacheInfo (..)) where import Control.DeepSeq @@ -53,6 +54,7 @@ import Data.Text.Encoding (decodeUtf8With) import Data.Text.Encoding.Error (lenientDecode) import Data.Time.Calendar import Data.Time.Clock +import Data.Word (Word64) import Distribution.System (Arch) import Distribution.Text (display) import GHC.Generics @@ -359,6 +361,7 @@ data LocalPackage = LocalPackage , lpDir :: !(Path Abs Dir) -- ^ Directory of the package. , lpCabalFile :: !(Path Abs File) -- ^ The .cabal file , lpDirtyFiles :: !Bool -- ^ are there files that have changed since the last build? + , lpNewBuildCache :: !(Map FilePath FileCacheInfo) -- ^ current state of the files , lpFiles :: !(Set (Path Abs File)) -- ^ all files used by this package , lpComponents :: !(Set Text) -- ^ components to build, passed directly to Setup.hs build } @@ -531,3 +534,11 @@ modTime x = data Installed = Library GhcPkgId | Executable PackageIdentifier deriving (Show, Eq, Ord) + +data FileCacheInfo = FileCacheInfo + { fciModTime :: !ModTime + , fciSize :: !Word64 + , fciHash :: !S.ByteString + } + deriving (Generic, Show) +instance Binary FileCacheInfo diff --git a/stack.cabal b/stack.cabal index bbaabc0599..17129a71b0 100644 --- a/stack.cabal +++ b/stack.cabal @@ -108,6 +108,7 @@ library , bifunctors >= 4.2.1 , binary >= 0.7 , blaze-builder + , byteable , bytestring , conduit-combinators >= 0.3.1 , conduit >= 1.2.4