Skip to content

Commit

Permalink
dvc: get rid of WorkingTree
Browse files Browse the repository at this point in the history
Working tree is really just a regular local tree and should be
used by outputs when trying to compute a hash for themselves.
We didn't use it previously because local tree was embeded into
the local remote class.

Related to #4050
  • Loading branch information
efiop committed Jul 16, 2020
1 parent 424ee7f commit 394f0fe
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 109 deletions.
6 changes: 3 additions & 3 deletions dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ class Config(dict):
def __init__(
self, dvc_dir=None, validate=True, tree=None,
): # pylint: disable=super-init-not-called
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree

self.dvc_dir = dvc_dir

Expand All @@ -248,7 +248,7 @@ def __init__(
else:
self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir))

self.wtree = WorkingTree(self.dvc_dir)
self.wtree = LocalRemoteTree(None, {"url": self.dvc_dir})
self.tree = tree.tree if tree else self.wtree

self.load(validate=validate)
Expand Down Expand Up @@ -326,7 +326,7 @@ def _save_config(self, level, conf_dict):

logger.debug(f"Writing '{filename}'.")

tree.makedirs(os.path.dirname(filename), exist_ok=True)
tree.makedirs(os.path.dirname(filename))

config = configobj.ConfigObj(_pack_remotes(conf_dict))
with tree.open(filename, "wb") as fobj:
Expand Down
5 changes: 2 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,11 @@ def __init__(self, root_dir=None, scm=None, rev=None):
from dvc.repo.metrics import Metrics
from dvc.repo.plots import Plots
from dvc.repo.params import Params
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree
from dvc.utils.fs import makedirs
from dvc.stage.cache import StageCache

if scm:
# use GitTree instead of WorkingTree as default repo tree instance
tree = scm.get_tree(rev)
self.root_dir = self.find_root(root_dir, tree)
self.scm = scm
Expand All @@ -91,7 +90,7 @@ def __init__(self, root_dir=None, scm=None, rev=None):
else:
root_dir = self.find_root(root_dir)
self.root_dir = os.path.abspath(os.path.realpath(root_dir))
self.tree = WorkingTree(self.root_dir)
self.tree = LocalRemoteTree(None, {"url": self.root_dir})

self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
self.config = Config(self.dvc_dir, tree=self.tree)
Expand Down
4 changes: 2 additions & 2 deletions dvc/repo/brancher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from funcy import group_by

from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree


def brancher( # noqa: E302
Expand Down Expand Up @@ -29,7 +29,7 @@ def brancher( # noqa: E302

scm = self.scm

self.tree = WorkingTree(self.root_dir)
self.tree = LocalRemoteTree(self, {"url": self.root_dir})
yield "workspace"

if revs and "workspace" in revs:
Expand Down
61 changes: 3 additions & 58 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,63 +41,8 @@ def makedirs(self, path, mode=0o777, exist_ok=True):
raise NotImplementedError


class WorkingTree(BaseTree):
"""Proxies the repo file access methods to working tree files"""

def __init__(self, repo_root=None):
repo_root = repo_root or os.getcwd()
self.repo_root = repo_root

@property
def tree_root(self):
return self.repo_root

def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""
if "b" in mode:
encoding = None
return open(path, mode=mode, encoding=encoding)

def exists(self, path):
"""Test whether a path exists."""
return os.path.lexists(path)

def isdir(self, path):
"""Return true if the pathname refers to an existing directory."""
return os.path.isdir(path)

def isfile(self, path):
"""Test whether a path is a regular file"""
return os.path.isfile(path)

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.
See `os.walk` for the docs. Differences:
- no support for symlinks
"""
for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror
):
yield os.path.normpath(root), dirs, files

def isexec(self, path):
mode = os.stat(path).st_mode
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

@staticmethod
def stat(path):
return os.stat(path)

@cached_property
def hash_jobs(self):
return max(1, min(4, cpu_count() // 2))

def makedirs(self, path, mode=0o777, exist_ok=True):
os.makedirs(path, mode=mode, exist_ok=exist_ok)


def is_working_tree(tree):
return isinstance(tree, WorkingTree) or isinstance(
getattr(tree, "tree", None), WorkingTree
from dvc.tree.local import LocalRemoteTree
return isinstance(tree, LocalRemoteTree) or isinstance(
getattr(tree, "tree", None), LocalRemoteTree
)
8 changes: 5 additions & 3 deletions dvc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dvc.utils import current_timestamp, relpath, to_chunks
from dvc.utils.fs import get_inode, get_mtime_and_size, remove


SQLITE_MAX_VARIABLES_NUMBER = 999

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -89,10 +90,11 @@ class State: # pylint: disable=too-many-instance-attributes
MAX_UINT = 2 ** 64 - 2

def __init__(self, local_cache):
from dvc.tree.local import LocalRemoteTree
repo = local_cache.repo
self.repo = repo
self.root_dir = repo.root_dir
self.tree = local_cache.tree.work_tree
self.tree = LocalRemoteTree(None, {})

state_config = repo.config.get("state", {})
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
Expand Down Expand Up @@ -394,8 +396,8 @@ def get(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
path = os.fspath(path_info)

# NOTE: use os.path.exists instead of WorkingTree.exists
# WorkingTree.exists uses lexists() and will return True for broken
# NOTE: use os.path.exists instead of LocalRemoteTree.exists
# because it uses lexists() and will return True for broken
# symlinks that we cannot stat() in get_mtime_and_size
if not os.path.exists(path):
return None
Expand Down
49 changes: 31 additions & 18 deletions dvc/tree/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dvc.exceptions import DvcException
from dvc.path_info import PathInfo
from dvc.scheme import Schemes
from dvc.scm.tree import WorkingTree, is_working_tree
from dvc.system import System
from dvc.utils import file_md5, relpath, tmp_fname
from dvc.utils.fs import (
Expand Down Expand Up @@ -42,21 +41,16 @@ def __init__(self, repo, config):
url = config.get("url")
self.path_info = self.PATH_CLS(url) if url else None

@property
def tree_root(self):
return self.config.get("url")

@property
def state(self):
from dvc.state import StateNoop

return self.repo.state if self.repo else StateNoop()

@cached_property
def work_tree(self):
# When using repo.brancher, repo.tree may change to/from WorkingTree to
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
# use its own WorkingTree instance.
if self.repo:
return WorkingTree(self.repo.root_dir)
return None

@staticmethod
def open(path_info, mode="r", encoding=None):
return open(path_info, mode=mode, encoding=encoding)
Expand All @@ -65,26 +59,39 @@ def exists(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
if not self.repo:
return os.path.exists(path_info)
return self.work_tree.exists(path_info)
return os.path.lexists(path_info)

def isfile(self, path_info):
if not self.repo:
return os.path.isfile(path_info)
return self.work_tree.isfile(path_info)
return os.path.isfile(path_info)

def isdir(self, path_info):
if not self.repo:
return os.path.isdir(path_info)
return self.work_tree.isdir(path_info)
return os.path.isdir(path_info)

def iscopy(self, path_info):
return not (
System.is_symlink(path_info) or System.is_hardlink(path_info)
)

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.
See `os.walk` for the docs. Differences:
- no support for symlinks
"""
for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror
):
yield os.path.normpath(root), dirs, files

def walk_files(self, path_info, **kwargs):
for fname in self.work_tree.walk_files(path_info):
yield PathInfo(fname)
for root, _, files in self.walk(path_info):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield PathInfo(f"{root}{os.sep}{file}")

def is_empty(self, path_info):
path = path_info.fspath
Expand All @@ -111,6 +118,14 @@ def remove(self, path_info):
def makedirs(self, path_info):
makedirs(path_info, exist_ok=True, mode=self.dir_mode)

def isexec(self, path):
mode = os.stat(path).st_mode
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

@staticmethod
def stat(path):
return os.stat(path)

def move(self, from_info, to_info, mode=None):
if from_info.scheme != "local" or to_info.scheme != "local":
raise NotImplementedError
Expand Down Expand Up @@ -215,9 +230,7 @@ def _unprotect_file(self, path):
os.chmod(path, self.file_mode)

def _unprotect_dir(self, path):
assert is_working_tree(self.repo.tree)

for fname in self.repo.tree.walk_files(path):
for fname in self.walk_files(path):
self._unprotect_file(fname)

def unprotect(self, path_info):
Expand Down
2 changes: 1 addition & 1 deletion dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_mtime_and_size(path, tree):
raise
continue
size += stats.st_size
files_mtimes[file_path] = stats.st_mtime
files_mtimes[os.fspath(file_path)] = stats.st_mtime

# We track file changes and moves, which cannot be detected with simply
# max(mtime(f) for f in non_ignored_files)
Expand Down
9 changes: 5 additions & 4 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
DvcIgnoreRepo,
)
from dvc.repo import Repo
from dvc.scm.tree import WorkingTree
from dvc.utils import relpath
from dvc.utils.fs import get_mtime_and_size
from dvc.path_info import PathInfo
from dvc.tree.local import LocalRemoteTree
from tests.dir_helpers import TmpDir
from tests.utils import to_posixpath

Expand Down Expand Up @@ -107,7 +108,7 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
assert ignore_pattern_trie is not None
assert (
DvcIgnorePatterns.from_files(
os.fspath(top_ignore_file), WorkingTree(dvc.root_dir)
os.fspath(top_ignore_file), LocalRemoteTree(None, {"url": dvc.root_dir})
)
== ignore_pattern_trie[os.fspath(ignore_file)]
)
Expand Down Expand Up @@ -165,15 +166,15 @@ def test_ignore_subrepo(tmp_dir, scm, dvc):
scm.commit("init parent dvcignore")

subrepo_dir = tmp_dir / "subdir"
assert not dvc.tree.exists(subrepo_dir / "foo")
assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo"))

with subrepo_dir.chdir():
subrepo = Repo.init(subdir=True)
scm.add(str(subrepo_dir / "foo"))
scm.commit("subrepo init")

for _ in subrepo.brancher(all_commits=True):
assert subrepo.tree.exists(subrepo_dir / "foo")
assert subrepo.tree.exists(PathInfo(subrepo_dir / "foo"))


def test_ignore_blank_line(tmp_dir, dvc):
Expand Down
32 changes: 18 additions & 14 deletions tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from dvc.repo.tree import RepoTree
from dvc.scm import SCM
from dvc.scm.git import GitTree
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree
from dvc.utils.fs import remove
from tests.basic_env import TestDir, TestGit, TestGitSubmodule


class TestWorkingTree(TestDir):
class TestLocalRemoteTree(TestDir):
def setUp(self):
super().setUp()
self.tree = WorkingTree()
self.tree = LocalRemoteTree(None, {})

def test_open(self):
with self.tree.open(self.FOO) as fd:
Expand Down Expand Up @@ -109,7 +109,7 @@ def convert_to_sets(walk_results):

class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir):
def test(self):
tree = WorkingTree(self._root_dir)
tree = LocalRemoteTree(None, {"url": self._root_dir})
self.assertWalkEqual(
tree.walk(self._root_dir),
[
Expand All @@ -128,7 +128,7 @@ def test(self):
)

def test_subdir(self):
tree = WorkingTree(self._root_dir)
tree = LocalRemoteTree(None, {"url": self._root_dir})
self.assertWalkEqual(
tree.walk(join("data_dir", "data_sub_dir")),
[(join("data_dir", "data_sub_dir"), [], ["data_sub"])],
Expand All @@ -137,7 +137,7 @@ def test_subdir(self):

class TestWalkInGit(AssertWalkEqualMixin, TestGit):
def test_nobranch(self):
tree = CleanTree(WorkingTree(self._root_dir))
tree = CleanTree(LocalRemoteTree(None, {"url": self._root_dir}))
self.assertWalkEqual(
tree.walk("."),
[
Expand Down Expand Up @@ -224,20 +224,24 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):


def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
from dvc.path_info import PathInfo

tmp_dir.gen({"subdir": {}})
subrepo_dir = tmp_dir / "subdir"
with subrepo_dir.chdir():
subrepo = Repo.init(subdir=True)
subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}})

path = PathInfo(subrepo_dir)

assert isinstance(dvc.tree, CleanTree)
assert not dvc.tree.exists(subrepo_dir / "foo")
assert not dvc.tree.isfile(subrepo_dir / "foo")
assert not dvc.tree.exists(subrepo_dir / "dir")
assert not dvc.tree.isdir(subrepo_dir / "dir")
assert not dvc.tree.exists(path / "foo")
assert not dvc.tree.isfile(path / "foo")
assert not dvc.tree.exists(path / "dir")
assert not dvc.tree.isdir(path / "dir")

assert isinstance(subrepo.tree, CleanTree)
assert subrepo.tree.exists(subrepo_dir / "foo")
assert subrepo.tree.isfile(subrepo_dir / "foo")
assert subrepo.tree.exists(subrepo_dir / "dir")
assert subrepo.tree.isdir(subrepo_dir / "dir")
assert subrepo.tree.exists(path / "foo")
assert subrepo.tree.isfile(path / "foo")
assert subrepo.tree.exists(path / "dir")
assert subrepo.tree.isdir(path / "dir")
Loading

0 comments on commit 394f0fe

Please sign in to comment.