From 253a0eae1374ff0bc81046507714353af977d8f3 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Wed, 8 Jul 2020 04:04:52 +0800 Subject: [PATCH] Add more tests according to gitignore (#4166) * Add more tests according to .gitignore * Test update * Update test * Solve question of path * update one test * Update 2 tests * Add one tests * comment those failed tests caused by other package * parametrize some tests Co-authored-by: karajan1001 --- dvc/ignore.py | 20 +++-- tests/func/test_ignore.py | 63 +++++++++++++++ tests/unit/test_ignore.py | 160 ++++++++++++++++++++++++++++---------- 3 files changed, 195 insertions(+), 48 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index c9257e5204..35d7ede2d4 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -44,11 +44,11 @@ def __init__(self, ignore_file_path, tree): def __call__(self, root, dirs, files): files = [f for f in files if not self.matches(root, f)] - dirs = [d for d in dirs if not self.matches(root, d)] + dirs = [d for d in dirs if not self.matches(root, d, True)] return dirs, files - def matches(self, dirname, basename): + def matches(self, dirname, basename, is_dir=False): # NOTE: `relpath` is too slow, so we have to assume that both # `dirname` and `self.dirname` are relative or absolute together. prefix = self.dirname + os.sep @@ -63,13 +63,19 @@ def matches(self, dirname, basename): if not System.is_unix(): path = normalize_file(path) - return self.ignore(path) + return self.ignore(path, is_dir) - def ignore(self, path): + def ignore(self, path, is_dir): result = False - for ignore, pattern in self.ignore_spec: - if pattern.match(path): - result = ignore + if is_dir: + path_dir = f"{path}/" + for ignore, pattern in self.ignore_spec: + if pattern.match(path) or pattern.match(path_dir): + result = ignore + else: + for ignore, pattern in self.ignore_spec: + if pattern.match(path): + result = ignore return result def __hash__(self): diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 03b1e95678..ee9abd9431 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -173,3 +173,66 @@ def test_ignore_blank_line(tmp_dir, dvc): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored") assert _files_set("dir", dvc.tree) == {"dir/other"} + + +# It is not possible to re-include a file if a parent directory of +# that file is excluded. +# Git doesn’t list excluded directories for performance reasons, +# so any patterns on contained files have no effect, +# no matter where they are defined. +@pytest.mark.parametrize( + "data_struct, pattern_list, result_set", + [ + ( + {"dir": {"subdir": {"not_ignore": "121"}}}, + ["subdir/*", "!not_ignore"], + {"dir/subdir/not_ignore"}, + ), + ( + {"dir": {"subdir": {"should_ignore": "121"}}}, + ["subdir", "!should_ignore"], + set(), + ), + ( + {"dir": {"subdir": {"should_ignore": "121"}}}, + ["subdir/", "!should_ignore"], + set(), + ), + ], +) +def test_ignore_file_in_parent_path( + tmp_dir, dvc, data_struct, pattern_list, result_set +): + tmp_dir.gen(data_struct) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list)) + assert _files_set("dir", dvc.tree) == result_set + + +# If there is a separator at the end of the pattern then the pattern +# will only match directories, +# otherwise the pattern can match both files and directories. +# For example, a pattern doc/frotz/ matches doc/frotz directory, +# but not a/doc/frotz directory; +def test_ignore_sub_directory(tmp_dir, dvc): + tmp_dir.gen( + { + "dir": { + "doc": {"fortz": {"b": "b"}}, + "a": {"doc": {"fortz": {"a": "a"}}}, + } + } + ) + tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}}) + assert _files_set("dir", dvc.tree) == { + "dir/a/doc/fortz/a", + "dir/{}".format(DvcIgnore.DVCIGNORE_FILE), + } + + +# however frotz/ matches frotz and a/frotz that is a directory +def test_ignore_directory(tmp_dir, dvc): + tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}}) + tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}}) + assert _files_set("dir", dvc.tree) == { + "dir/{}".format(DvcIgnore.DVCIGNORE_FILE), + } diff --git a/tests/unit/test_ignore.py b/tests/unit/test_ignore.py index 40f936d091..85c46edee4 100644 --- a/tests/unit/test_ignore.py +++ b/tests/unit/test_ignore.py @@ -14,34 +14,51 @@ def mock_dvcignore(dvcignore_path, patterns): return ignore_patterns -def test_ignore_from_file_should_filter_dirs_and_files(): - dvcignore_path = os.path.join( - os.path.sep, "full", "path", "to", "ignore", "file", ".dvcignore" - ) - - patterns = ["dir_to_ignore", "file_to_ignore"] - - root = os.path.dirname(dvcignore_path) - dirs = ["dir1", "dir2", "dir_to_ignore"] - files = ["file1", "file2", "file_to_ignore"] - - ignore = mock_dvcignore(dvcignore_path, patterns) - new_dirs, new_files = ignore(root, dirs, files) - - assert {"dir1", "dir2"} == set(new_dirs) - assert {"file1", "file2"} == set(new_files) - - @pytest.mark.parametrize( "file_to_ignore_relpath, patterns, expected_match", [ + # all rules from https://git-scm.com/docs/gitignore ("to_ignore", ["to_ignore"], True), + ("dont_ignore.txt", ["dont_ignore"], False), + # A blank line matches no files, so it can serve as a separator for + # readability. + ("to_ignore", ["", "to_ignore"], True), + # A line starting with # serves as a comment. + # Put a backslash ("\") in front of the first hash for patterns + # that begin with a hash. + ("#to_ignore", ["\\#to_ignore"], True), + ("#to_ignore", ["#to_ignore"], False), + # Trailing spaces are ignored unless they are quoted with + # backslash ("\"). + (" to_ignore", [" to_ignore"], False), + (" to_ignore", ["\\ to_ignore"], True), + # An optional prefix "!" which negates the pattern; any matching file + # excluded by a previous pattern will become included again. ("to_ignore.txt", ["to_ignore*"], True), - ( - os.path.join("rel", "p", "p2", "to_ignore"), - ["rel/**/to_ignore"], - True, - ), + ("to_ignore.txt", ["to_ignore*", "!to_ignore.txt"], False), + ("to_ignore.txt", ["!to_ignore.txt", "to_ignore*"], True), + # It is not possible to re-include a file if a parent directory of + # that file is excluded. + # Git doesn’t list excluded directories for performance reasons, + # so any patterns on contained files have no effect, + # no matter where they are defined. + # see (`tests/func/test_ignore.py::test_ignore_parent_path`) + # Put a backslash ("\") in front of the first "!" + # for patterns that begin with a literal "!", + # for example, "\!important!.txt". + ("!to_ignore.txt", ["\\!to_ignore.txt"], True), + # The slash / is used as the directory separator. + # Separators may occur at the beginning, middle or end of the + # .gitignore search pattern. + # If there is a separator at the beginning or middle (or both) + # of the pattern, then the pattern is relative to the directory + # level of the particular .gitignore file itself. + # Otherwise the pattern may also match at any level below + # the .gitignore level. + ("file", ["/file"], True), + (os.path.join("data", "file"), ["/file"], False), + (os.path.join("data", "file"), ["data/file"], True), + (os.path.join("other", "data", "file"), ["data/file"], False), ( os.path.join( os.path.sep, @@ -55,20 +72,96 @@ def test_ignore_from_file_should_filter_dirs_and_files(): ["to_ignore"], True, ), + # If there is a separator at the end of the pattern then the pattern + # will only match directories, + # otherwise the pattern can match both files and directories. + # For example, a pattern doc/frotz/ matches doc/frotz directory, + # but not a/doc/frotz directory; + # see (`tests/func/test_ignore.py::test_ignore_sub_directory`) + # however frotz/ matches frotz and a/frotz that is a directory + # (all paths are relative from the .gitignore file). + # see (`tests/func/test_ignore.py::test_ignore_directory`) + # An asterisk "*" matches anything except a slash. ("to_ignore.txt", ["/*.txt"], True), + (os.path.join("path", "to_ignore.txt"), ["/*.txt"], False), + (os.path.join("data", "file.txt"), ["data/*"], True), + # wait for Git + # (os.path.join("data", "sub", "file.txt"), ["data/*"], True), ( os.path.join("rel", "path", "path2", "to_ignore"), ["rel/*/to_ignore"], False, ), - (os.path.join("path", "to_ignore.txt"), ["/*.txt"], False), + ("file.txt", ["file.*"], True), + # The character "?" matches any one character except "/". + ("file.txt", ["fi?e.t?t"], True), + ("fi/e.txt", ["fi?e.t?t"], False), + # The range notation, e.g. [a-zA-Z], can be used + # to match one of the characters in a range. See fnmatch(3) and + # the FNM_PATHNAME flag for a more detailed description. + ("file.txt", ["[a-zA-Z]ile.txt"], True), + ("2ile.txt", ["[a-zA-Z]ile.txt"], False), + # Two consecutive asterisks ("**") in patterns matched against + # full pathname may have special meaning: + # A leading "**" followed by a slash means match in all directories. + # For example, "**/foo" matches file or directory "foo" anywhere, the + # same as pattern "foo". + # "**/foo/bar" matches file or directory "bar" anywhere that is + # directly under directory "foo". + (os.path.join("rel", "p", "p2", "to_ignore"), ["**/to_ignore"], True,), + ( + os.path.join("rel", "p", "p2", "to_ignore"), + ["**/p2/to_ignore"], + True, + ), + ( + os.path.join("rel", "path", "path2", "dont_ignore"), + ["**/to_ignore"], + False, + ), + # A trailing "/**" matches everything inside. + # For example, "abc/**" matches all files inside directory "abc", + # relative to the location of the .gitignore file, with infinite depth. + (os.path.join("rel", "p", "p2", "to_ignore"), ["rel/**"], True,), + (os.path.join("rel", "p", "p2", "to_ignore"), ["p/**"], False,), + ( + os.path.join("rel", "path", "path2", "dont_ignore"), + ["rel/**"], + True, + ), + # A slash followed by two consecutive asterisks then a slash matches + # zero or more directories. + # For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on. + (os.path.join("rel", "p", "to_ignore"), ["rel/**/to_ignore"], True,), + ( + os.path.join("rel", "p", "p2", "to_ignore"), + ["rel/**/to_ignore"], + True, + ), ( os.path.join("rel", "path", "path2", "dont_ignore"), ["rel/**/to_ignore"], False, ), - ("dont_ignore.txt", ["dont_ignore"], False), - ("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False), + ( + os.path.join("rel", "path", "path2", "dont_ignore"), + ["path/**/dont_ignore"], + False, + ), + # Other consecutive asterisks are considered regular asterisks + # and will match according to the previous rules. + ("to_ignore.txt", ["/***.txt"], True), + (os.path.join("path", "to_ignore.txt"), ["/****.txt"], False), + (os.path.join("path", "to_ignore.txt"), ["****.txt"], True), + (os.path.join("data", "file.txt"), ["data/***"], True), + # bug from PathSpec + # (os.path.join("data", "p", "file.txt"), ["data/***"], False), + (os.path.join("data", "p", "file.txt"), ["***/file.txt"], False), + ( + os.path.join("rel", "path", "path2", "to_ignore"), + ["rel/***/to_ignore"], + False, + ), ], ) def test_match_ignore_from_file( @@ -99,18 +192,3 @@ def test_should_ignore_dir(omit_dir): new_dirs, _ = ignore(root, dirs, files) assert set(new_dirs) == {"dir1", "dir2"} - - -def test_ignore_order(): - dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore") - - patterns = ["!ac*", "a*", "!ab*"] - - root = os.path.dirname(dvcignore_path) - dirs = ["ignore_order"] - files = ["ac", "ab", "aa"] - - ignore = mock_dvcignore(dvcignore_path, patterns) - _, new_files = ignore(root, dirs, files) - - assert {"ab"} == set(new_files)