Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: add rev param to api calls #2202

Merged
merged 3 commits into from
Jul 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions dvc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,27 @@
except ImportError:
from contextlib import GeneratorContextManager as GCM

from dvc.utils import remove
from dvc.utils.compat import urlparse
from dvc.repo import Repo
from dvc.external_repo import ExternalRepo


def get_url(path, repo=None, remote=None):
def get_url(path, repo=None, rev=None, remote=None):
"""Returns an url of a resource specified by path in repo"""
with _make_repo(repo) as _repo:
with _make_repo(repo, rev=rev) as _repo:
abspath = os.path.join(_repo.root_dir, path)
out, = _repo.find_outs_by_path(abspath)
remote_obj = _repo.cloud.get_remote(remote)
return str(remote_obj.checksum_to_path_info(out.checksum))


def open(path, repo=None, remote=None, mode="r", encoding=None):
def open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
"""Opens a specified resource as a file descriptor"""
args = (path,)
kwargs = {
"repo": repo,
"remote": remote,
"rev": rev,
"mode": mode,
"encoding": encoding,
}
Expand All @@ -45,30 +45,33 @@ def __getattr__(self, name):
)


def _open(path, repo=None, remote=None, mode="r", encoding=None):
with _make_repo(repo) as _repo:
def _open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
with _make_repo(repo, rev=rev) as _repo:
abspath = os.path.join(_repo.root_dir, path)
with _repo.open(
abspath, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd


def read(path, repo=None, remote=None, mode="r", encoding=None):
def read(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
Suor marked this conversation as resolved.
Show resolved Hide resolved
"""Read a specified resource into string"""
with open(path, repo, remote=remote, mode=mode, encoding=encoding) as fd:
with open(
path, repo=repo, rev=rev, remote=remote, mode=mode, encoding=encoding
) as fd:
return fd.read()


@contextmanager
def _make_repo(repo_url):
def _make_repo(repo_url, rev=None):
if not repo_url or urlparse(repo_url).scheme == "":
assert rev is None, "Custom revision is not supported for local repo"
yield Repo(repo_url)
else:
tmp_dir = tempfile.mkdtemp("dvc-repo")
ext_repo = ExternalRepo(tmp_dir, url=repo_url, rev=rev)
try:
ext_repo = ExternalRepo(tmp_dir, url=repo_url)
ext_repo.install()
yield ext_repo.repo
finally:
remove(tmp_dir)
ext_repo.uninstall()
7 changes: 7 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,10 @@ def __init__(self, etag, cached_etag):
"ETag mismatch detected when copying file to cache! "
"(expected: '{}', actual: '{}')".format(etag, cached_etag)
)


class OutputFileMissingError(DvcException):
def __init__(self, path):
super(OutputFileMissingError, self).__init__(
"Can't find {} neither locally nor on remote".format(path)
)
13 changes: 11 additions & 2 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import shortuuid

from funcy import cached_property
from funcy import cached_property, retry
from schema import Optional

from dvc.config import Config
Expand Down Expand Up @@ -138,7 +138,16 @@ def uninstall(self):
)
return

remove(self.path)
# If repo has been initialized then we need to close its git repo
if "repo" in self.__dict__:
self.repo.scm.git.close()

if os.name == "nt":
# git.exe may hang for a while not permitting to remove temp dir
os_retry = retry(5, errors=OSError, timeout=0.1)
os_retry(remove)(self.path)
else:
remove(self.path)

def update(self):
self.repo.scm.fetch(self.rev)
Expand Down
13 changes: 10 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
NotDvcRepoError,
OutputNotFoundError,
TargetNotDirectoryError,
OutputFileMissingError,
)
from dvc.ignore import DvcIgnoreFileHandler
from dvc.path_info import PathInfo
from dvc.utils.compat import open as _open
from dvc.utils.compat import open as _open, fspath_py35
from dvc.utils import relpath

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -455,9 +456,15 @@ def open(self, path, remote=None, mode="r", encoding=None):
if out.isdir():
raise ValueError("Can't open a dir")

cache_file = self.cache.local.checksum_to_path_info(out.checksum)
cache_file = fspath_py35(cache_file)

with self.state:
cache_info = out.get_used_cache(remote=remote)
self.cloud.pull(cache_info, remote=remote)

cache_file = self.cache.local.checksum_to_path_info(out.checksum)
return _open(cache_file.fspath, mode=mode, encoding=encoding)
# Since pull may just skip with a warning, we need to check it here
if not os.path.exists(cache_file):
raise OutputFileMissingError(relpath(path, self.root_dir))

return _open(cache_file, mode=mode, encoding=encoding)
7 changes: 2 additions & 5 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dvc.path_info import PathInfo
from dvc.external_repo import ExternalRepo
from dvc.utils.compat import urlparse
from dvc.utils import remove


@staticmethod
Expand All @@ -19,8 +18,8 @@ def get(url, path, out=None, rev=None):
# and won't work with reflink/hardlink.
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
erepo = ExternalRepo(tmp_dir, url=url, rev=rev)
try:
erepo = ExternalRepo(tmp_dir, url=url, rev=rev)
erepo.install()
# Try any links possible to avoid data duplication.
#
Expand All @@ -42,7 +41,5 @@ def get(url, path, out=None, rev=None):
o.path_info = PathInfo(os.path.abspath(out))
with o.repo.state:
o.checkout()
erepo.repo.scm.git.close()
finally:
if os.path.exists(tmp_dir):
remove(tmp_dir)
erepo.uninstall()
22 changes: 19 additions & 3 deletions tests/func/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil

from dvc import api
from dvc.exceptions import OutputFileMissingError
from dvc.main import main
from dvc.path_info import URLInfo
from dvc.remote.config import RemoteConfig
Expand Down Expand Up @@ -101,16 +102,31 @@ def test_open(repo_dir, dvc_repo, remote_url):


def test_open_external(repo_dir, dvc_repo, erepo, remote_url):
erepo.dvc.scm.checkout("branch")
_set_remote_url_and_commit(erepo.dvc, remote_url)
erepo.dvc.push()
erepo.dvc.scm.checkout("master")
_set_remote_url_and_commit(erepo.dvc, remote_url)

erepo.dvc.push(all_branches=True)

# Remove cache to force download
shutil.rmtree(erepo.dvc.cache.local.cache_dir)

# Using file url to force clone to tmp repo
repo_url = "file://" + erepo.dvc.root_dir
with api.open(repo_dir.FOO, repo=repo_url) as fd:
assert fd.read() == repo_dir.FOO_CONTENTS
with api.open("version", repo=repo_url) as fd:
assert fd.read() == "master"

assert api.read("version", repo=repo_url, rev="branch") == "branch"


def test_open_missing(erepo):
# Remove cache to make foo missing
shutil.rmtree(erepo.dvc.cache.local.cache_dir)

repo_url = "file://" + erepo.dvc.root_dir
with pytest.raises(OutputFileMissingError):
api.read(erepo.FOO, repo=repo_url)


def _set_remote_url_and_commit(repo, remote_url):
Expand Down