Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce dvc list command #3246

Merged
merged 1 commit into from
Feb 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ innosetup/config.ini
.coverage
.coverage.*

*.swp
*.sw?

pip-wheel-metadata/
.vscode/
Expand Down
2 changes: 2 additions & 0 deletions dvc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
imp_url,
init,
install,
ls,
lock,
metrics,
move,
Expand Down Expand Up @@ -62,6 +63,7 @@
metrics,
install,
root,
ls,
lock,
pipeline,
daemon,
Expand Down
68 changes: 68 additions & 0 deletions dvc/command/ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import argparse
import logging

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBaseNoRepo
from dvc.exceptions import DvcException


logger = logging.getLogger(__name__)


class CmdList(CmdBaseNoRepo):
def run(self):
from dvc.repo import Repo

try:
nodes = Repo.ls(
self.args.url,
self.args.target,
rev=self.args.rev,
recursive=self.args.recursive,
outs_only=self.args.outs_only,
)
if nodes:
logger.info("\n".join(nodes))
return 0
except DvcException:
logger.exception("failed to list '{}'".format(self.args.url))
return 1


def add_parser(subparsers, parent_parser):
LIST_HELP = "List files and DVC outputs in the repo."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LIST_HELP = "List files and DVC outputs in the repo."
List repository contents, including files and directories tracked by DVC and by Git.

per iterative/dvc.org/pull/967

list_parser = subparsers.add_parser(
"list",
parents=[parent_parser],
description=append_doc_link(LIST_HELP, "list"),
help=LIST_HELP,
formatter_class=argparse.RawTextHelpFormatter,
)
list_parser.add_argument(
"url",
help="Supported urls:\n"
"/path/to/file\n"
"/path/to/directory\n"
"C:\\\\path\\to\\file\n"
"C:\\\\path\\to\\directory\n"
"https://github.com/path/to/repo\n"
"[email protected]:path/to/repo.git\n",
Comment on lines +43 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

help="Location of DVC repository to list"
similar to dvc import. Should be obvious for most people what it means and we'll explain it in docs (again, same as with get/import).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW the args in the -h output of get and import is slightly different (2 lines vs 1 line):

$ dvc get -h
...
positional arguments:
  url                   Location of DVC project or Git repository to download
                        from
  path                  Path to a file or directory within the project or
                        repository

$ dvc import -h
...
positional arguments:
  url                   Location of DVC project or Git repository to download from
  path                  Path to a file or directory within the project or repository

)
list_parser.add_argument(
"-R",
"--recursive",
action="store_true",
help="Recursively list files.",
)
list_parser.add_argument(
"--outs-only", action="store_true", help="Show only DVC outputs."
)
list_parser.add_argument(
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
list_parser.add_argument(
"target",
nargs="?",
help="Path to directory within the repository to list outputs for",
jorgeorpinel marked this conversation as resolved.
Show resolved Hide resolved
)
list_parser.set_defaults(func=CmdList)
17 changes: 14 additions & 3 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,23 @@ def __init__(self, code, reason):


class PathMissingError(DvcException):
def __init__(self, path, repo):
default_msg = (
"The path '{}' does not exist in the target repository '{}'"
" neither as an output nor a git-handled file."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"...as a DVC output nor as a Git-tracked file."

)
default_msg_output_only = (
"The path '{}' does not exist in the target repository '{}'"
" as an output."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as an DVC output

)

def __init__(self, path, repo, output_only=False):
msg = (
"The path '{}' does not exist in the target repository '{}'"
" neither as an output nor a git-handled file."
self.default_msg
if not output_only
else self.default_msg_output_only
)
super().__init__(msg.format(path, repo))
self.output_only = output_only


class RemoteCacheRequiredError(DvcException):
Expand Down
1 change: 1 addition & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class Repo(object):
from dvc.repo.install import install
from dvc.repo.add import add
from dvc.repo.remove import remove
from dvc.repo.ls import ls
from dvc.repo.lock import lock as lock_stage
from dvc.repo.move import move
from dvc.repo.run import run
Expand Down
84 changes: 84 additions & 0 deletions dvc/repo/ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os

from dvc.exceptions import PathMissingError, OutputNotFoundError


@staticmethod
def ls(url, target=None, rev=None, recursive=None, outs_only=False):
from dvc.external_repo import external_repo
from dvc.repo import Repo
from dvc.utils import relpath

with external_repo(url, rev) as repo:
skshetry marked this conversation as resolved.
Show resolved Hide resolved
target_path_info = _get_target_path_info(repo, target)
result = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not set() right away? You are converting it later anyway

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cause I am everywhere uses list (as outs from _ls_outs_repo and _ls_files_repo) and expect to return sorted output. I'm using set only for skipping possible duplicates, but not as a general data-structure

if isinstance(repo, Repo):
result.extend(_ls_outs_repo(repo, target_path_info, recursive))

if not outs_only:
result.extend(_ls_files_repo(target_path_info, recursive))

if target and not result:
raise PathMissingError(target, repo, output_only=outs_only)

def prettify(path_info):
if path_info == target_path_info:
return path_info.name
return relpath(path_info, target_path_info)

result = list(set(map(prettify, result)))
result.sort()
return result


def _ls_files_repo(target_path_info, recursive=None):
from dvc.compat import fspath
from dvc.ignore import CleanTree
from dvc.path_info import PathInfo
from dvc.scm.tree import WorkingTree

if not os.path.exists(fspath(target_path_info)):
return []

files = []
tree = CleanTree(WorkingTree(target_path_info))
try:
for dirpath, dirnames, filenames in tree.walk(target_path_info):
files.extend(map(lambda f: PathInfo(dirpath, f), filenames))
if not recursive:
files.extend(map(lambda d: PathInfo(dirpath, d), dirnames))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably the cause of that bug. This line should be outside of if, right?

break
except NotADirectoryError:
gurobokum marked this conversation as resolved.
Show resolved Hide resolved
if os.path.isfile(fspath(target_path_info)):
return [target_path_info]

return files
gurobokum marked this conversation as resolved.
Show resolved Hide resolved


def _ls_outs_repo(repo, target_path_info, recursive=None):
from dvc.compat import fspath
from dvc.path_info import PathInfo

try:
outs = repo.find_outs_by_path(fspath(target_path_info), recursive=True)
except OutputNotFoundError:
return []

if recursive:
return [out.path_info for out in outs]

def get_top_part(path_info):
relpath = path_info.relpath(target_path_info)
if relpath.parts:
return PathInfo(target_path_info, relpath.parts[0])
return path_info

return list({get_top_part(out.path_info) for out in outs})


def _get_target_path_info(repo, target=None):
from dvc.path_info import PathInfo

if not target:
return PathInfo(repo.root_dir)
return PathInfo(repo.root_dir, target)
32 changes: 24 additions & 8 deletions scripts/completion/dvc.bash
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#----------------------------------------------------------

_dvc_commands='add cache checkout commit config destroy diff fetch get-url get gc \
import-url import init install lock metrics move pipeline pull push \
import-url import init install lock list metrics move pipeline pull push \
remote remove repro root run status unlock unprotect update version'

_dvc_options='-h --help -V --version'
Expand All @@ -31,6 +31,7 @@ _dvc_import_url='-f --file'
_dvc_import='-o --out --rev'
_dvc_init='--no-scm -f --force'
_dvc_install=''
_dvc_list='-R --recursive --outs-only --rev $(compgen -G *)'
_dvc_lock='$(compgen -G *.dvc)'
_dvc_metrics='add modify rmeove show'
_dvc_metrics_add='-t --type -x --xpath $(compgen -G *)'
Expand Down Expand Up @@ -60,6 +61,26 @@ _dvc_unprotect='$(compgen -G *)'
_dvc_update='$(compgen -G *.dvc)'
_dvc_version=''

# Params
# $1 - COMP_WORDS[1]
comp_command() {
local options_list="_dvc_$(replace_hyphen $1)"

COMPREPLY=( $(compgen -W "$_dvc_global_options ${!options_list}" -- "$word") )
}

# Params
# $1 - COMP_WORDS[1]
# $1 - COMP_WORDS[2]
comp_subcommand() {
local options_list="_dvc_$(replace_hyphen $1)_$(replace_hyphen $2)"
if [ -z "${!options_list}" ]; then
comp_command $1
else
COMPREPLY=( $(compgen -W "$_dvc_global_options ${!options_list}" -- "$word") )
fi
}

# Notes:
#
# `COMPREPLY` contains what will be rendered after completion is triggered
Expand All @@ -76,7 +97,6 @@ _dvc() {
replace_hyphen() {
echo $(echo $1 | sed 's/-/_/g')
}

local word="${COMP_WORDS[COMP_CWORD]}"

COMPREPLY=()
Expand All @@ -87,13 +107,9 @@ _dvc() {
*) COMPREPLY=($(compgen -W "$_dvc_commands" -- "$word")) ;;
esac
elif [ "${COMP_CWORD}" -eq 2 ]; then
local options_list="_dvc_$(replace_hyphen ${COMP_WORDS[1]})"

COMPREPLY=($(compgen -W "$_dvc_global_options ${!options_list}" -- "$word"))
comp_command ${COMP_WORDS[1]}
elif [ "${COMP_CWORD}" -eq 3 ]; then
local options_list="_dvc_$(replace_hyphen ${COMP_WORDS[1]})_$(replace_hyphen ${COMP_WORDS[2]})"

COMPREPLY=($(compgen -W "$_dvc_global_options ${!options_list}" -- "$word"))
comp_subcommand ${COMP_WORDS[1]} ${COMP_WORDS[2]}
fi

return 0
Expand Down
10 changes: 10 additions & 0 deletions scripts/completion/dvc.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ _dvc_commands() {
"import:Download data from DVC repository and take it under DVC control."
"init:Initialize DVC in the current directory."
"install:Install DVC git hooks into the repository."
"list:List files."
"lock:Lock DVC-file."
"metrics:Commands to add, manage, collect and display metrics."
"move:Rename or move a DVC controlled data file or a directory."
Expand Down Expand Up @@ -160,6 +161,14 @@ _dvc_lock=(
"*:Stages:_files -g '(*.dvc|Dvcfile)'"
)

_dvc_list=(
"--rev[Git revision (e.g. branch, tag, SHA)]:Revision:"
{-R,--recursive}"[Recursively add each file under the directory.]"
"--outs-only[Only outputs DVC-outs.]"
"1:URL:"
"2:Target:"
)

_dvc_metrics=(
"1:Sub command:(add show diff modify remove)"
)
Expand Down Expand Up @@ -292,6 +301,7 @@ case $words[1] in
init) _arguments $_dvc_global_options $_dvc_init ;;
install) _arguments $_dvc_global_options $_dvc_install ;;
lock) _arguments $_dvc_global_options $_dvc_lock ;;
list) _arguments $_dvc_global_options $_dvc_list ;;
metrics) _arguments $_dvc_global_options $_dvc_metrics ;;
move) _arguments $_dvc_global_options $_dvc_move ;;
pipeline) _arguments $_dvc_global_options $_dvc_pipeline ;;
Expand Down
Loading