Skip to content

Commit

Permalink
dvcfs: implement get_file
Browse files Browse the repository at this point in the history
Currently we use `open`, which is much slower. For example,

```
$ dvc get dvc-bench data/small/dataset
```

goes down from 110sec to 50sec.

Discovered as a part of https://github.com/iterative/dvc-bench/issues/419
  • Loading branch information
efiop committed Feb 20, 2023
1 parent 8f549c8 commit 54b8cbc
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions dvc/fs/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,19 @@ def _info(self, key, path, ignore_subrepos=True, check_ignored=True): # noqa: C
info["name"] = path
return info

def get_file(self, rpath, lpath, **kwargs): # pylint: disable=arguments-differ
key = self._get_key_from_relative(rpath)
fs_path = self._from_key(key)
try:
return self.repo.fs.get_file(fs_path, lpath, **kwargs)
except FileNotFoundError:
_, dvc_fs, subkey = self._get_subrepo_info(key)
if not dvc_fs:
raise

dvc_path = _get_dvc_path(dvc_fs, subkey)
return dvc_fs.get_file(dvc_path, lpath, **kwargs)


class DVCFileSystem(FileSystem):
protocol = "local"
Expand Down

0 comments on commit 54b8cbc

Please sign in to comment.