diff --git a/dvc/command/ls.py b/dvc/command/ls/__init__.py similarity index 79% rename from dvc/command/ls.py rename to dvc/command/ls/__init__.py index 37c857241f..c006444538 100644 --- a/dvc/command/ls.py +++ b/dvc/command/ls/__init__.py @@ -1,28 +1,43 @@ import argparse import logging +import sys from dvc.command.base import append_doc_link from dvc.command.base import CmdBaseNoRepo +from dvc.command.ls.ls_colors import LsColors from dvc.exceptions import DvcException logger = logging.getLogger(__name__) +def _prettify(entries, with_color=False): + if with_color: + ls_colors = LsColors() + fmt = ls_colors.format + else: + + def fmt(entry): + return entry["path"] + + return [fmt(entry) for entry in entries] + + class CmdList(CmdBaseNoRepo): def run(self): from dvc.repo import Repo try: - nodes = Repo.ls( + entries = Repo.ls( self.args.url, self.args.target, rev=self.args.rev, recursive=self.args.recursive, outs_only=self.args.outs_only, ) - if nodes: - logger.info("\n".join(nodes)) + if entries: + entries = _prettify(entries, sys.stdout.isatty()) + logger.info("\n".join(entries)) return 0 except DvcException: logger.exception("failed to list '{}'".format(self.args.url)) diff --git a/dvc/command/ls/ls_colors.py b/dvc/command/ls/ls_colors.py new file mode 100644 index 0000000000..765cea7827 --- /dev/null +++ b/dvc/command/ls/ls_colors.py @@ -0,0 +1,48 @@ +import os + + +class LsColors(object): + default = "rs=0:di=01;34:ex=01;32" + + def __init__(self, lscolors=None): + self._extensions = {} + self._codes = {} + self._load(lscolors or os.environ.get("LS_COLORS") or LsColors.default) + + def _load(self, lscolors): + for item in lscolors.split(":"): + try: + code, color = item.split("=", 1) + except ValueError: + continue + if code.startswith("*."): + self._extensions[code[1:]] = color + else: + self._codes[code] = color + + def format(self, entry): + text = entry["path"] + + if entry.get("isout", False) and "out" in self._codes: + return self._format(text, code="out") + + if entry.get("isdir", False): + return self._format(text, code="di") + + if entry.get("isexec", False): + return self._format(text, code="ex") + + _, ext = os.path.splitext(text) + return self._format(text, ext=ext) + + def _format(self, text, code=None, ext=None): + val = None + if ext: + val = self._extensions.get(ext, None) + if code: + val = self._codes.get(code, None) + + if not val: + return text + rs = self._codes.get("rs", 0) + return "\033[{}m{}\033[{}m".format(val, text, rs) diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index dba795a9f4..78f1cb0d10 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -1,79 +1,126 @@ import os +import stat from dvc.exceptions import PathMissingError, OutputNotFoundError @staticmethod -def ls(url, target=None, rev=None, recursive=None, outs_only=False): +def ls( + url, target=None, rev=None, recursive=None, outs_only=False, +): + """Methods for getting files and outputs for the repo. + + Args: + url (str): the repo url + target (str, optional): relative path into the repo + rev (str, optional): SHA commit, branch or tag name + recursive (bool, optional): recursively walk the repo + outs_only (bool, optional): show only DVC-artifacts + + Returns: + list of `entry` + + Notes: + `entry` is a dictionary with structure + { + "path": str, + "isout": bool, + "isdir": bool, + "isexec": bool, + } + """ from dvc.external_repo import external_repo from dvc.repo import Repo from dvc.utils import relpath with external_repo(url, rev) as repo: target_path_info = _get_target_path_info(repo, target) - result = [] + fs_nodes = [] if isinstance(repo, Repo): - result.extend(_ls_outs_repo(repo, target_path_info, recursive)) + fs_nodes.extend(_ls_outs_repo(repo, target_path_info, recursive)) if not outs_only: - result.extend(_ls_files_repo(target_path_info, recursive)) + fs_nodes.extend(_ls_files_repo(target_path_info, recursive)) - if target and not result: + if target and not fs_nodes: raise PathMissingError(target, repo, output_only=outs_only) - def prettify(path_info): - if path_info == target_path_info: - return path_info.name - return relpath(path_info, target_path_info) + fs_nodes = {n["path_info"]: n for n in fs_nodes}.values() - result = list(set(map(prettify, result))) - result.sort() - return result + def get_entry(fs_node): + path_info = fs_node["path_info"] + path = ( + path_info.name + if path_info == target_path_info + else relpath(path_info, target_path_info) + ) + return { + "path": path, + "isout": fs_node.get("isout", False), + "isdir": fs_node.get("isdir", False), + "isexec": fs_node.get("isexec", False), + } + entries = sorted(map(get_entry, fs_nodes), key=lambda f: f["path"]) + return entries -def _ls_files_repo(target_path_info, recursive=None): + +def _ls_files_repo(path_info, recursive=None): from dvc.compat import fspath from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.scm.tree import WorkingTree - if not os.path.exists(fspath(target_path_info)): + if not os.path.exists(fspath(path_info)): return [] files = [] - tree = CleanTree(WorkingTree(target_path_info)) + tree = CleanTree(WorkingTree(path_info)) try: - for dirpath, dirnames, filenames in tree.walk(target_path_info): - files.extend(map(lambda f: PathInfo(dirpath, f), filenames)) + for dirpath, dirnames, filenames in tree.walk(path_info): + files.extend(PathInfo(dirpath, f) for f in filenames) if not recursive: - files.extend(map(lambda d: PathInfo(dirpath, d), dirnames)) + files.extend(PathInfo(dirpath, d) for d in dirnames) break except NotADirectoryError: - if os.path.isfile(fspath(target_path_info)): - return [target_path_info] + if os.path.isfile(fspath(path_info)): + files = [path_info] - return files + return [_get_fs_node(f) for f in files] -def _ls_outs_repo(repo, target_path_info, recursive=None): +def _ls_outs_repo(repo, path_info, recursive=None): from dvc.compat import fspath from dvc.path_info import PathInfo try: - outs = repo.find_outs_by_path(fspath(target_path_info), recursive=True) + outs = repo.find_outs_by_path(fspath(path_info), recursive=True) except OutputNotFoundError: return [] if recursive: - return [out.path_info for out in outs] - - def get_top_part(path_info): - relpath = path_info.relpath(target_path_info) + return [_get_fs_node(out.path_info, out) for out in outs] + + def get_first_segment(out): + """Returns tuple with path_info and related out + + path_info calculated as the first relpath segment + Example: + dir/file -> dir + dir/subdir/file -> dir + file -> file + """ + relpath = out.path_info.relpath(path_info) if relpath.parts: - return PathInfo(target_path_info, relpath.parts[0]) - return path_info + out_path_info = PathInfo(path_info, relpath.parts[0]) + isout = len(relpath.parts) == 1 + return (out_path_info, out if isout else None) + return (out.path_info, out) - return list({get_top_part(out.path_info) for out in outs}) + return [ + _get_fs_node(p, out) + for (p, out) in {get_first_segment(out) for out in outs} + ] def _get_target_path_info(repo, target=None): @@ -82,3 +129,26 @@ def _get_target_path_info(repo, target=None): if not target: return PathInfo(repo.root_dir) return PathInfo(repo.root_dir, target) + + +def _get_fs_node(path_info, out=None): + from dvc.compat import fspath + + if out: + isdir = out.is_dir_checksum if out.checksum else False + isexec = False + else: + try: + isdir = os.path.isdir(fspath(path_info)) + mode = os.stat(fspath(path_info)).st_mode + isexec = mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + except FileNotFoundError: + isdir = False + isexec = False + + return { + "path_info": path_info, + "isout": bool(out), + "isdir": isdir, + "isexec": isexec, + } diff --git a/tests/func/test_ls.py b/tests/func/test_ls.py index 4286cec65a..4b61f63985 100644 --- a/tests/func/test_ls.py +++ b/tests/func/test_ls.py @@ -1,5 +1,5 @@ -import shutil import os +import shutil import pytest from dvc.compat import fspath @@ -23,9 +23,9 @@ def match_files(files, expected_files): - assert set(files) == set( - map(lambda args: os.path.join(*args), expected_files) - ) + left = {(f["path"], f["isout"]) for f in files} + right = {(os.path.join(*args), isout) for (args, isout) in expected_files} + assert left == right def create_dvc_pipeline(tmp_dir, dvc): @@ -57,16 +57,44 @@ def test_ls_repo(tmp_dir, dvc, scm): match_files( files, ( - (".gitignore",), - ("README.md",), - ("structure.xml.dvc",), - ("model",), - ("data",), - ("structure.xml",), + ((".gitignore",), False), + (("README.md",), False), + (("structure.xml.dvc",), False), + (("model",), False), + (("data",), False), + (("structure.xml",), True), ), ) +def test_ls_repo_with_color(tmp_dir, dvc, scm, mocker, monkeypatch, caplog): + import logging + from dvc.cli import parse_args + + tmp_dir.scm_gen(FS_STRUCTURE, commit="init") + tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc") + + monkeypatch.setenv("LS_COLORS", "rs=0:di=01;34:*.xml=01;31:*.dvc=01;33:") + cli_args = parse_args(["list", fspath(tmp_dir)]) + cmd = cli_args.func(cli_args) + + caplog.clear() + with mocker.patch("sys.stdout.isatty", return_value=True): + with caplog.at_level(logging.INFO, logger="dvc.command.ls"): + assert cmd.run() == 0 + + assert caplog.records[-1].msg == "\n".join( + [ + ".gitignore", + "README.md", + "\x1b[01;34mdata\x1b[0m", + "\x1b[01;34mmodel\x1b[0m", + "\x1b[01;31mstructure.xml\x1b[0m", + "\x1b[01;33mstructure.xml.dvc\x1b[0m", + ] + ) + + def test_ls_repo_recursive(tmp_dir, dvc, scm): tmp_dir.scm_gen(FS_STRUCTURE, commit="init") tmp_dir.dvc_gen(DVC_STRUCTURE, commit="dvc") @@ -75,18 +103,18 @@ def test_ls_repo_recursive(tmp_dir, dvc, scm): match_files( files, ( - (".gitignore",), - ("README.md",), - ("structure.xml.dvc",), - ("model", "script.py"), - ("model", "train.py"), - ("model", "people.csv.dvc"), - ("data", "subcontent", "data.xml.dvc"), - ("data", "subcontent", "statistics", "data.csv.dvc"), - ("data", "subcontent", "statistics", "data.csv"), - ("data", "subcontent", "data.xml"), - ("model", "people.csv"), - ("structure.xml",), + ((".gitignore",), False), + (("README.md",), False), + (("structure.xml.dvc",), False), + (("model", "script.py"), False), + (("model", "train.py"), False), + (("model", "people.csv.dvc"), False), + (("data", "subcontent", "data.xml.dvc"), False), + (("data", "subcontent", "statistics", "data.csv.dvc"), False), + (("data", "subcontent", "statistics", "data.csv"), True), + (("data", "subcontent", "data.xml"), True), + (("model", "people.csv"), True), + (("structure.xml",), True), ), ) @@ -99,10 +127,10 @@ def test_ls_repo_outs_only_recursive(tmp_dir, dvc, scm): match_files( files, ( - ("data", "subcontent", "statistics", "data.csv"), - ("data", "subcontent", "data.xml"), - ("model", "people.csv"), - ("structure.xml",), + (("data", "subcontent", "statistics", "data.csv"), True), + (("data", "subcontent", "data.xml"), True), + (("model", "people.csv"), True), + (("structure.xml",), True), ), ) @@ -114,7 +142,12 @@ def test_ls_repo_with_target_dir(tmp_dir, dvc, scm): files = Repo.ls(fspath(tmp_dir), target="model") match_files( files, - (("script.py",), ("train.py",), ("people.csv",), ("people.csv.dvc",)), + ( + (("script.py",), False), + (("train.py",), False), + (("people.csv",), True), + (("people.csv.dvc",), False), + ), ) @@ -133,7 +166,14 @@ def test_ls_repo_with_target_subdir(tmp_dir, dvc, scm): target = os.path.join("data", "subcontent") files = Repo.ls(fspath(tmp_dir), target) - match_files(files, (("data.xml",), ("data.xml.dvc",), ("statistics",))) + match_files( + files, + ( + (("data.xml",), True), + (("data.xml.dvc",), False), + (("statistics",), False), + ), + ) def test_ls_repo_with_target_subdir_outs_only(tmp_dir, dvc, scm): @@ -142,7 +182,7 @@ def test_ls_repo_with_target_subdir_outs_only(tmp_dir, dvc, scm): target = os.path.join("data", "subcontent") files = Repo.ls(fspath(tmp_dir), target, outs_only=True) - match_files(files, (("data.xml",), ("statistics",))) + match_files(files, ((("data.xml",), True), (("statistics",), False),)) def test_ls_repo_with_target_subdir_outs_only_recursive(tmp_dir, dvc, scm): @@ -151,7 +191,9 @@ def test_ls_repo_with_target_subdir_outs_only_recursive(tmp_dir, dvc, scm): target = os.path.join("data", "subcontent") files = Repo.ls(fspath(tmp_dir), target, outs_only=True, recursive=True) - match_files(files, (("data.xml",), ("statistics", "data.csv"))) + match_files( + files, ((("data.xml",), True), (("statistics", "data.csv"), True),) + ) def test_ls_repo_with_target_file_out(tmp_dir, dvc, scm): @@ -160,7 +202,7 @@ def test_ls_repo_with_target_file_out(tmp_dir, dvc, scm): target = os.path.join("data", "subcontent", "data.xml") files = Repo.ls(fspath(tmp_dir), target) - match_files(files, (("data.xml",),)) + match_files(files, ((("data.xml",), True),)) def test_ls_repo_with_file_target_fs(tmp_dir, dvc, scm): @@ -169,7 +211,7 @@ def test_ls_repo_with_file_target_fs(tmp_dir, dvc, scm): target = "README.md" files = Repo.ls(fspath(tmp_dir), target, recursive=True) - match_files(files, (("README.md",),)) + match_files(files, ((("README.md",), False),)) def test_ls_repo_with_missed_target(tmp_dir, dvc, scm): @@ -200,7 +242,14 @@ def test_ls_repo_with_removed_dvc_dir(tmp_dir, dvc, scm): files = Repo.ls(fspath(tmp_dir)) match_files( - files, (("script.py",), ("dep.dvc",), ("out.dvc",), ("dep",), ("out",)) + files, + ( + (("script.py",), False), + (("dep.dvc",), False), + (("out.dvc",), False), + (("dep",), True), + (("out",), False), + ), ) @@ -211,11 +260,11 @@ def test_ls_repo_with_removed_dvc_dir_recursive(tmp_dir, dvc, scm): match_files( files, ( - ("script.py",), - ("dep.dvc",), - ("out.dvc",), - ("dep",), - ("out", "file"), + (("script.py",), False), + (("dep.dvc",), False), + (("out.dvc",), False), + (("dep",), True), + (("out", "file"), True), ), ) @@ -225,7 +274,7 @@ def test_ls_repo_with_removed_dvc_dir_with_target_dir(tmp_dir, dvc, scm): target = "out" files = Repo.ls(fspath(tmp_dir), target) - match_files(files, (("file",),)) + match_files(files, ((("file",), True),)) def test_ls_repo_with_removed_dvc_dir_with_target_file(tmp_dir, dvc, scm): @@ -233,7 +282,7 @@ def test_ls_repo_with_removed_dvc_dir_with_target_file(tmp_dir, dvc, scm): target = os.path.join("out", "file") files = Repo.ls(fspath(tmp_dir), target) - match_files(files, (("file",),)) + match_files(files, ((("file",), True),)) def test_ls_remote_repo(erepo_dir): @@ -246,12 +295,12 @@ def test_ls_remote_repo(erepo_dir): match_files( files, ( - (".gitignore",), - ("README.md",), - ("structure.xml.dvc",), - ("model",), - ("data",), - ("structure.xml",), + ((".gitignore",), False), + (("README.md",), False), + (("structure.xml.dvc",), False), + (("model",), False), + (("data",), False), + (("structure.xml",), True), ), ) @@ -266,18 +315,18 @@ def test_ls_remote_repo_recursive(erepo_dir): match_files( files, ( - (".gitignore",), - ("README.md",), - ("structure.xml.dvc",), - ("model", "script.py"), - ("model", "train.py"), - ("model", "people.csv.dvc"), - ("data", "subcontent", "data.xml.dvc"), - ("data", "subcontent", "statistics", "data.csv.dvc"), - ("data", "subcontent", "statistics", "data.csv"), - ("data", "subcontent", "data.xml"), - ("model", "people.csv"), - ("structure.xml",), + ((".gitignore",), False), + (("README.md",), False), + (("structure.xml.dvc",), False), + (("model", "script.py"), False), + (("model", "train.py"), False), + (("model", "people.csv.dvc"), False), + (("data", "subcontent", "data.xml.dvc"), False), + (("data", "subcontent", "statistics", "data.csv.dvc"), False), + (("data", "subcontent", "statistics", "data.csv"), True), + (("data", "subcontent", "data.xml"), True), + (("model", "people.csv"), True), + (("structure.xml",), True), ), ) @@ -291,10 +340,10 @@ def test_ls_remote_git_only_repo_recursive(git_dir): match_files( files, ( - (".gitignore",), - ("README.md",), - ("model", "script.py"), - ("model", "train.py"), + ((".gitignore",), False), + (("README.md",), False), + (("model", "script.py"), False), + (("model", "train.py"), False), ), ) @@ -309,7 +358,12 @@ def test_ls_remote_repo_with_target_dir(erepo_dir): files = Repo.ls(url, target) match_files( files, - (("script.py",), ("train.py",), ("people.csv",), ("people.csv.dvc",)), + ( + (("script.py",), False), + (("train.py",), False), + (("people.csv",), True), + (("people.csv.dvc",), False), + ), ) @@ -321,7 +375,14 @@ def test_ls_remote_repo_with_rev(erepo_dir): rev = erepo_dir.scm.list_all_commits()[1] url = "file://{}".format(erepo_dir) files = Repo.ls(url, rev=rev) - match_files(files, ((".gitignore",), ("README.md",), ("model",))) + match_files( + files, + ( + ((".gitignore",), False), + (("README.md",), False), + (("model",), False), + ), + ) def test_ls_remote_repo_with_rev_recursive(erepo_dir): @@ -335,14 +396,14 @@ def test_ls_remote_repo_with_rev_recursive(erepo_dir): match_files( files, ( - ("structure.xml.dvc",), - ("model", "people.csv.dvc"), - ("data", "subcontent", "data.xml.dvc"), - ("data", "subcontent", "statistics", "data.csv.dvc"), - ("data", "subcontent", "statistics", "data.csv"), - ("data", "subcontent", "data.xml"), - ("model", "people.csv"), - ("structure.xml",), + (("structure.xml.dvc",), False), + (("model", "people.csv.dvc"), False), + (("data", "subcontent", "data.xml.dvc"), False), + (("data", "subcontent", "statistics", "data.csv.dvc"), False), + (("data", "subcontent", "statistics", "data.csv"), True), + (("data", "subcontent", "data.xml"), True), + (("model", "people.csv"), True), + (("structure.xml",), True), ), ) diff --git a/tests/unit/command/ls/__init__.py b/tests/unit/command/ls/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/command/test_ls.py b/tests/unit/command/ls/test_ls.py similarity index 100% rename from tests/unit/command/test_ls.py rename to tests/unit/command/ls/test_ls.py diff --git a/tests/unit/command/ls/test_ls_colors.py b/tests/unit/command/ls/test_ls_colors.py new file mode 100644 index 0000000000..7d53a4de7e --- /dev/null +++ b/tests/unit/command/ls/test_ls_colors.py @@ -0,0 +1,58 @@ +from dvc.command.ls.ls_colors import LsColors + + +def colorize(ls_colors): + def _colorize(f, spec=""): + fs_path = { + "path": f, + "isexec": "e" in spec, + "isdir": "d" in spec, + "isout": "o" in spec, + } + return ls_colors.format(fs_path) + + return _colorize + + +def test_ls_colors_out_file(): + ls_colors = LsColors(LsColors.default) + assert colorize(ls_colors)("file", "o") == "file" + + +def test_ls_colors_out_dir(): + ls_colors = LsColors(LsColors.default) + assert colorize(ls_colors)("dir", "do") == "\x1b[01;34mdir\x1b[0m" + + +def test_ls_colors_out_exec(): + ls_colors = LsColors(LsColors.default) + assert ( + colorize(ls_colors)("script.sh", "eo") == "\x1b[01;32mscript.sh\x1b[0m" + ) + + +def test_ls_colors_out_ext(): + ls_colors = LsColors(LsColors.default + ":*.xml=01;33") + assert colorize(ls_colors)("file.xml", "o") == "\x1b[01;33mfile.xml\x1b[0m" + + +def test_ls_colors_file(): + ls_colors = LsColors(LsColors.default) + assert colorize(ls_colors)("file") == "file" + + +def test_ls_colors_dir(): + ls_colors = LsColors(LsColors.default) + assert colorize(ls_colors)("dir", "d") == "\x1b[01;34mdir\x1b[0m" + + +def test_ls_colors_exec(): + ls_colors = LsColors(LsColors.default) + assert ( + colorize(ls_colors)("script.sh", "e") == "\x1b[01;32mscript.sh\x1b[0m" + ) + + +def test_ls_colors_ext(): + ls_colors = LsColors(LsColors.default + ":*.xml=01;33") + assert colorize(ls_colors)("file.xml") == "\x1b[01;33mfile.xml\x1b[0m"