Skip to content

Commit

Permalink
Add more tests according to gitignore (#4166)
Browse files Browse the repository at this point in the history
* Add more tests according to .gitignore

* Test update

* Update test

* Solve question of path

* update one test

* Update 2 tests

* Add one tests

* comment those failed tests caused by other package

* parametrize some tests

Co-authored-by: karajan1001 <[email protected]>
  • Loading branch information
karajan1001 and karajan1001 authored Jul 7, 2020
1 parent 8964967 commit 253a0ea
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 48 deletions.
20 changes: 13 additions & 7 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def __init__(self, ignore_file_path, tree):

def __call__(self, root, dirs, files):
files = [f for f in files if not self.matches(root, f)]
dirs = [d for d in dirs if not self.matches(root, d)]
dirs = [d for d in dirs if not self.matches(root, d, True)]

return dirs, files

def matches(self, dirname, basename):
def matches(self, dirname, basename, is_dir=False):
# NOTE: `relpath` is too slow, so we have to assume that both
# `dirname` and `self.dirname` are relative or absolute together.
prefix = self.dirname + os.sep
Expand All @@ -63,13 +63,19 @@ def matches(self, dirname, basename):

if not System.is_unix():
path = normalize_file(path)
return self.ignore(path)
return self.ignore(path, is_dir)

def ignore(self, path):
def ignore(self, path, is_dir):
result = False
for ignore, pattern in self.ignore_spec:
if pattern.match(path):
result = ignore
if is_dir:
path_dir = f"{path}/"
for ignore, pattern in self.ignore_spec:
if pattern.match(path) or pattern.match(path_dir):
result = ignore
else:
for ignore, pattern in self.ignore_spec:
if pattern.match(path):
result = ignore
return result

def __hash__(self):
Expand Down
63 changes: 63 additions & 0 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,66 @@ def test_ignore_blank_line(tmp_dir, dvc):
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored")

assert _files_set("dir", dvc.tree) == {"dir/other"}


# It is not possible to re-include a file if a parent directory of
# that file is excluded.
# Git doesn’t list excluded directories for performance reasons,
# so any patterns on contained files have no effect,
# no matter where they are defined.
@pytest.mark.parametrize(
"data_struct, pattern_list, result_set",
[
(
{"dir": {"subdir": {"not_ignore": "121"}}},
["subdir/*", "!not_ignore"],
{"dir/subdir/not_ignore"},
),
(
{"dir": {"subdir": {"should_ignore": "121"}}},
["subdir", "!should_ignore"],
set(),
),
(
{"dir": {"subdir": {"should_ignore": "121"}}},
["subdir/", "!should_ignore"],
set(),
),
],
)
def test_ignore_file_in_parent_path(
tmp_dir, dvc, data_struct, pattern_list, result_set
):
tmp_dir.gen(data_struct)
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list))
assert _files_set("dir", dvc.tree) == result_set


# If there is a separator at the end of the pattern then the pattern
# will only match directories,
# otherwise the pattern can match both files and directories.
# For example, a pattern doc/frotz/ matches doc/frotz directory,
# but not a/doc/frotz directory;
def test_ignore_sub_directory(tmp_dir, dvc):
tmp_dir.gen(
{
"dir": {
"doc": {"fortz": {"b": "b"}},
"a": {"doc": {"fortz": {"a": "a"}}},
}
}
)
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}})
assert _files_set("dir", dvc.tree) == {
"dir/a/doc/fortz/a",
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
}


# however frotz/ matches frotz and a/frotz that is a directory
def test_ignore_directory(tmp_dir, dvc):
tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}})
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}})
assert _files_set("dir", dvc.tree) == {
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
}
160 changes: 119 additions & 41 deletions tests/unit/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,51 @@ def mock_dvcignore(dvcignore_path, patterns):
return ignore_patterns


def test_ignore_from_file_should_filter_dirs_and_files():
dvcignore_path = os.path.join(
os.path.sep, "full", "path", "to", "ignore", "file", ".dvcignore"
)

patterns = ["dir_to_ignore", "file_to_ignore"]

root = os.path.dirname(dvcignore_path)
dirs = ["dir1", "dir2", "dir_to_ignore"]
files = ["file1", "file2", "file_to_ignore"]

ignore = mock_dvcignore(dvcignore_path, patterns)
new_dirs, new_files = ignore(root, dirs, files)

assert {"dir1", "dir2"} == set(new_dirs)
assert {"file1", "file2"} == set(new_files)


@pytest.mark.parametrize(
"file_to_ignore_relpath, patterns, expected_match",
[
# all rules from https://git-scm.com/docs/gitignore
("to_ignore", ["to_ignore"], True),
("dont_ignore.txt", ["dont_ignore"], False),
# A blank line matches no files, so it can serve as a separator for
# readability.
("to_ignore", ["", "to_ignore"], True),
# A line starting with # serves as a comment.
# Put a backslash ("\") in front of the first hash for patterns
# that begin with a hash.
("#to_ignore", ["\\#to_ignore"], True),
("#to_ignore", ["#to_ignore"], False),
# Trailing spaces are ignored unless they are quoted with
# backslash ("\").
(" to_ignore", [" to_ignore"], False),
(" to_ignore", ["\\ to_ignore"], True),
# An optional prefix "!" which negates the pattern; any matching file
# excluded by a previous pattern will become included again.
("to_ignore.txt", ["to_ignore*"], True),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["rel/**/to_ignore"],
True,
),
("to_ignore.txt", ["to_ignore*", "!to_ignore.txt"], False),
("to_ignore.txt", ["!to_ignore.txt", "to_ignore*"], True),
# It is not possible to re-include a file if a parent directory of
# that file is excluded.
# Git doesn’t list excluded directories for performance reasons,
# so any patterns on contained files have no effect,
# no matter where they are defined.
# see (`tests/func/test_ignore.py::test_ignore_parent_path`)
# Put a backslash ("\") in front of the first "!"
# for patterns that begin with a literal "!",
# for example, "\!important!.txt".
("!to_ignore.txt", ["\\!to_ignore.txt"], True),
# The slash / is used as the directory separator.
# Separators may occur at the beginning, middle or end of the
# .gitignore search pattern.
# If there is a separator at the beginning or middle (or both)
# of the pattern, then the pattern is relative to the directory
# level of the particular .gitignore file itself.
# Otherwise the pattern may also match at any level below
# the .gitignore level.
("file", ["/file"], True),
(os.path.join("data", "file"), ["/file"], False),
(os.path.join("data", "file"), ["data/file"], True),
(os.path.join("other", "data", "file"), ["data/file"], False),
(
os.path.join(
os.path.sep,
Expand All @@ -55,20 +72,96 @@ def test_ignore_from_file_should_filter_dirs_and_files():
["to_ignore"],
True,
),
# If there is a separator at the end of the pattern then the pattern
# will only match directories,
# otherwise the pattern can match both files and directories.
# For example, a pattern doc/frotz/ matches doc/frotz directory,
# but not a/doc/frotz directory;
# see (`tests/func/test_ignore.py::test_ignore_sub_directory`)
# however frotz/ matches frotz and a/frotz that is a directory
# (all paths are relative from the .gitignore file).
# see (`tests/func/test_ignore.py::test_ignore_directory`)
# An asterisk "*" matches anything except a slash.
("to_ignore.txt", ["/*.txt"], True),
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
(os.path.join("data", "file.txt"), ["data/*"], True),
# wait for Git
# (os.path.join("data", "sub", "file.txt"), ["data/*"], True),
(
os.path.join("rel", "path", "path2", "to_ignore"),
["rel/*/to_ignore"],
False,
),
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
("file.txt", ["file.*"], True),
# The character "?" matches any one character except "/".
("file.txt", ["fi?e.t?t"], True),
("fi/e.txt", ["fi?e.t?t"], False),
# The range notation, e.g. [a-zA-Z], can be used
# to match one of the characters in a range. See fnmatch(3) and
# the FNM_PATHNAME flag for a more detailed description.
("file.txt", ["[a-zA-Z]ile.txt"], True),
("2ile.txt", ["[a-zA-Z]ile.txt"], False),
# Two consecutive asterisks ("**") in patterns matched against
# full pathname may have special meaning:
# A leading "**" followed by a slash means match in all directories.
# For example, "**/foo" matches file or directory "foo" anywhere, the
# same as pattern "foo".
# "**/foo/bar" matches file or directory "bar" anywhere that is
# directly under directory "foo".
(os.path.join("rel", "p", "p2", "to_ignore"), ["**/to_ignore"], True,),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["**/p2/to_ignore"],
True,
),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["**/to_ignore"],
False,
),
# A trailing "/**" matches everything inside.
# For example, "abc/**" matches all files inside directory "abc",
# relative to the location of the .gitignore file, with infinite depth.
(os.path.join("rel", "p", "p2", "to_ignore"), ["rel/**"], True,),
(os.path.join("rel", "p", "p2", "to_ignore"), ["p/**"], False,),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["rel/**"],
True,
),
# A slash followed by two consecutive asterisks then a slash matches
# zero or more directories.
# For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
(os.path.join("rel", "p", "to_ignore"), ["rel/**/to_ignore"], True,),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["rel/**/to_ignore"],
True,
),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["rel/**/to_ignore"],
False,
),
("dont_ignore.txt", ["dont_ignore"], False),
("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["path/**/dont_ignore"],
False,
),
# Other consecutive asterisks are considered regular asterisks
# and will match according to the previous rules.
("to_ignore.txt", ["/***.txt"], True),
(os.path.join("path", "to_ignore.txt"), ["/****.txt"], False),
(os.path.join("path", "to_ignore.txt"), ["****.txt"], True),
(os.path.join("data", "file.txt"), ["data/***"], True),
# bug from PathSpec
# (os.path.join("data", "p", "file.txt"), ["data/***"], False),
(os.path.join("data", "p", "file.txt"), ["***/file.txt"], False),
(
os.path.join("rel", "path", "path2", "to_ignore"),
["rel/***/to_ignore"],
False,
),
],
)
def test_match_ignore_from_file(
Expand Down Expand Up @@ -99,18 +192,3 @@ def test_should_ignore_dir(omit_dir):
new_dirs, _ = ignore(root, dirs, files)

assert set(new_dirs) == {"dir1", "dir2"}


def test_ignore_order():
dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore")

patterns = ["!ac*", "a*", "!ab*"]

root = os.path.dirname(dvcignore_path)
dirs = ["ignore_order"]
files = ["ac", "ab", "aa"]

ignore = mock_dvcignore(dvcignore_path, patterns)
_, new_files = ignore(root, dirs, files)

assert {"ab"} == set(new_files)

0 comments on commit 253a0ea

Please sign in to comment.