From 3187040fe8ac52450d3485c44afdb1480de13d31 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Thu, 27 Jun 2019 22:45:14 +0700 Subject: [PATCH 1/3] dvc: raise when Repo.open() fails --- dvc/exceptions.py | 7 +++++++ dvc/repo/__init__.py | 13 ++++++++++--- tests/func/test_api.py | 10 ++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index c5496fa4ee..6b38af4ead 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -263,3 +263,10 @@ def __init__(self, etag, cached_etag): "ETag mismatch detected when copying file to cache! " "(expected: '{}', actual: '{}')".format(etag, cached_etag) ) + + +class OutputFileMissingError(DvcException): + def __init__(self, path): + super(OutputFileMissingError, self).__init__( + "Can't find {} neither locally nor on remote".format(path) + ) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 2a8daba292..df0124495f 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -8,10 +8,11 @@ NotDvcRepoError, OutputNotFoundError, TargetNotDirectoryError, + OutputFileMissingError, ) from dvc.ignore import DvcIgnoreFileHandler from dvc.path_info import PathInfo -from dvc.utils.compat import open as _open +from dvc.utils.compat import open as _open, fspath_py35 from dvc.utils import relpath logger = logging.getLogger(__name__) @@ -455,9 +456,15 @@ def open(self, path, remote=None, mode="r", encoding=None): if out.isdir(): raise ValueError("Can't open a dir") + cache_file = self.cache.local.checksum_to_path_info(out.checksum) + cache_file = fspath_py35(cache_file) + with self.state: cache_info = out.get_used_cache(remote=remote) self.cloud.pull(cache_info, remote=remote) - cache_file = self.cache.local.checksum_to_path_info(out.checksum) - return _open(cache_file.fspath, mode=mode, encoding=encoding) + # Since pull may just skip with a warning, we need to check it here + if not os.path.exists(cache_file): + raise OutputFileMissingError(relpath(path, self.root_dir)) + + return _open(cache_file, mode=mode, encoding=encoding) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index ab1437bee9..84f58044b4 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -2,6 +2,7 @@ import shutil from dvc import api +from dvc.exceptions import OutputFileMissingError from dvc.main import main from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig @@ -113,6 +114,15 @@ def test_open_external(repo_dir, dvc_repo, erepo, remote_url): assert fd.read() == repo_dir.FOO_CONTENTS +def test_open_missing(erepo): + # Remove cache to make foo missing + shutil.rmtree(erepo.dvc.cache.local.cache_dir) + + repo_url = "file://" + erepo.dvc.root_dir + with pytest.raises(OutputFileMissingError): + api.read(erepo.FOO, repo=repo_url) + + def _set_remote_url_and_commit(repo, remote_url): rconfig = RemoteConfig(repo.config) rconfig.modify("upstream", "url", remote_url) From 9ba54c1f56739e9d50bde2c9793a05401f707d94 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Thu, 27 Jun 2019 22:47:14 +0700 Subject: [PATCH 2/3] dvc: add rev param to api.open()/read()/get_url() --- dvc/api.py | 23 ++++++++++++++--------- tests/func/test_api.py | 12 +++++++++--- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index e4f4a064d6..a2235653a6 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -13,21 +13,22 @@ from dvc.external_repo import ExternalRepo -def get_url(path, repo=None, remote=None): +def get_url(path, repo=None, rev=None, remote=None): """Returns an url of a resource specified by path in repo""" - with _make_repo(repo) as _repo: + with _make_repo(repo, rev=rev) as _repo: abspath = os.path.join(_repo.root_dir, path) out, = _repo.find_outs_by_path(abspath) remote_obj = _repo.cloud.get_remote(remote) return str(remote_obj.checksum_to_path_info(out.checksum)) -def open(path, repo=None, remote=None, mode="r", encoding=None): +def open(path, repo=None, rev=None, remote=None, mode="r", encoding=None): """Opens a specified resource as a file descriptor""" args = (path,) kwargs = { "repo": repo, "remote": remote, + "rev": rev, "mode": mode, "encoding": encoding, } @@ -45,8 +46,8 @@ def __getattr__(self, name): ) -def _open(path, repo=None, remote=None, mode="r", encoding=None): - with _make_repo(repo) as _repo: +def _open(path, repo=None, rev=None, remote=None, mode="r", encoding=None): + with _make_repo(repo, rev=rev) as _repo: abspath = os.path.join(_repo.root_dir, path) with _repo.open( abspath, remote=remote, mode=mode, encoding=encoding @@ -54,21 +55,25 @@ def _open(path, repo=None, remote=None, mode="r", encoding=None): yield fd -def read(path, repo=None, remote=None, mode="r", encoding=None): +def read(path, repo=None, rev=None, remote=None, mode="r", encoding=None): """Read a specified resource into string""" - with open(path, repo, remote=remote, mode=mode, encoding=encoding) as fd: + with open( + path, repo=repo, rev=rev, remote=remote, mode=mode, encoding=encoding + ) as fd: return fd.read() @contextmanager -def _make_repo(repo_url): +def _make_repo(repo_url, rev=None): if not repo_url or urlparse(repo_url).scheme == "": + assert rev is None, "Custom revision is not supported for local repo" yield Repo(repo_url) else: tmp_dir = tempfile.mkdtemp("dvc-repo") try: - ext_repo = ExternalRepo(tmp_dir, url=repo_url) + ext_repo = ExternalRepo(tmp_dir, url=repo_url, rev=rev) ext_repo.install() yield ext_repo.repo finally: + ext_repo.repo.scm.git.close() remove(tmp_dir) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 84f58044b4..afc71d9eca 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -102,16 +102,22 @@ def test_open(repo_dir, dvc_repo, remote_url): def test_open_external(repo_dir, dvc_repo, erepo, remote_url): + erepo.dvc.scm.checkout("branch") _set_remote_url_and_commit(erepo.dvc, remote_url) - erepo.dvc.push() + erepo.dvc.scm.checkout("master") + _set_remote_url_and_commit(erepo.dvc, remote_url) + + erepo.dvc.push(all_branches=True) # Remove cache to force download shutil.rmtree(erepo.dvc.cache.local.cache_dir) # Using file url to force clone to tmp repo repo_url = "file://" + erepo.dvc.root_dir - with api.open(repo_dir.FOO, repo=repo_url) as fd: - assert fd.read() == repo_dir.FOO_CONTENTS + with api.open("version", repo=repo_url) as fd: + assert fd.read() == "master" + + assert api.read("version", repo=repo_url, rev="branch") == "branch" def test_open_missing(erepo): From 766b7d8818e1450fc66c20fe1ae0ce25a2bf38f2 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Mon, 1 Jul 2019 18:04:34 +0700 Subject: [PATCH 3/3] dvc: uninstall external repo properly --- dvc/api.py | 6 ++---- dvc/external_repo.py | 13 +++++++++++-- dvc/repo/get.py | 7 ++----- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/dvc/api.py b/dvc/api.py index a2235653a6..506baa6c82 100644 --- a/dvc/api.py +++ b/dvc/api.py @@ -7,7 +7,6 @@ except ImportError: from contextlib import GeneratorContextManager as GCM -from dvc.utils import remove from dvc.utils.compat import urlparse from dvc.repo import Repo from dvc.external_repo import ExternalRepo @@ -70,10 +69,9 @@ def _make_repo(repo_url, rev=None): yield Repo(repo_url) else: tmp_dir = tempfile.mkdtemp("dvc-repo") + ext_repo = ExternalRepo(tmp_dir, url=repo_url, rev=rev) try: - ext_repo = ExternalRepo(tmp_dir, url=repo_url, rev=rev) ext_repo.install() yield ext_repo.repo finally: - ext_repo.repo.scm.git.close() - remove(tmp_dir) + ext_repo.uninstall() diff --git a/dvc/external_repo.py b/dvc/external_repo.py index cbffeee872..2bc2668f9d 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -5,7 +5,7 @@ import logging import shortuuid -from funcy import cached_property +from funcy import cached_property, retry from schema import Optional from dvc.config import Config @@ -138,7 +138,16 @@ def uninstall(self): ) return - remove(self.path) + # If repo has been initialized then we need to close its git repo + if "repo" in self.__dict__: + self.repo.scm.git.close() + + if os.name == "nt": + # git.exe may hang for a while not permitting to remove temp dir + os_retry = retry(5, errors=OSError, timeout=0.1) + os_retry(remove)(self.path) + else: + remove(self.path) def update(self): self.repo.scm.fetch(self.rev) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index f234c61a7c..0871b40232 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -5,7 +5,6 @@ from dvc.path_info import PathInfo from dvc.external_repo import ExternalRepo from dvc.utils.compat import urlparse -from dvc.utils import remove @staticmethod @@ -19,8 +18,8 @@ def get(url, path, out=None, rev=None): # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) + erepo = ExternalRepo(tmp_dir, url=url, rev=rev) try: - erepo = ExternalRepo(tmp_dir, url=url, rev=rev) erepo.install() # Try any links possible to avoid data duplication. # @@ -42,7 +41,5 @@ def get(url, path, out=None, rev=None): o.path_info = PathInfo(os.path.abspath(out)) with o.repo.state: o.checkout() - erepo.repo.scm.git.close() finally: - if os.path.exists(tmp_dir): - remove(tmp_dir) + erepo.uninstall()