From 8028f1b3ec4faff26e88133a69bf11e09b5f5d5f Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 26 Aug 2024 15:26:00 -0400 Subject: [PATCH 1/8] Pass more change information to apply_copyright_check --- src/rapids_pre_commit_hooks/copyright.py | 40 +- src/rapids_pre_commit_hooks/lint.py | 4 +- .../rapids_pre_commit_hooks/test_copyright.py | 362 ++++++++++++------ 3 files changed, 267 insertions(+), 139 deletions(-) diff --git a/src/rapids_pre_commit_hooks/copyright.py b/src/rapids_pre_commit_hooks/copyright.py index 9c46ad4..d02c600 100644 --- a/src/rapids_pre_commit_hooks/copyright.py +++ b/src/rapids_pre_commit_hooks/copyright.py @@ -81,7 +81,12 @@ def apply_copyright_update(linter: Linter, match: re.Match, year: int) -> None: ) -def apply_copyright_check(linter: Linter, old_content: Optional[str]) -> None: +def apply_copyright_check( + linter: Linter, + change_type: str, + old_filename: Optional[str], + old_content: Optional[str], +) -> None: if linter.content != old_content: current_year = datetime.datetime.now().year new_copyright_matches = match_copyright(linter.content) @@ -232,22 +237,24 @@ def try_get_ref(remote: "git.Remote") -> Optional["git.Reference"]: def get_changed_files( args: argparse.Namespace, -) -> dict[Union[str, os.PathLike[str]], Optional["git.Blob"]]: +) -> dict[Union[str, os.PathLike[str]], tuple[str, Optional["git.Blob"]]]: try: repo = git.Repo() except git.InvalidGitRepositoryError: return { - os.path.relpath(os.path.join(dirpath, filename), "."): None + os.path.relpath(os.path.join(dirpath, filename), "."): ("A", None) for dirpath, dirnames, filenames in os.walk(".") for filename in filenames } - changed_files: dict[Union[str, os.PathLike[str]], Optional["git.Blob"]] = { - f: None for f in repo.untracked_files - } + changed_files: dict[ + Union[str, os.PathLike[str]], tuple[str, Optional["git.Blob"]] + ] = {f: ("A", None) for f in repo.untracked_files} target_branch_upstream_commit = get_target_branch_upstream_commit(repo, args) if target_branch_upstream_commit is None: - changed_files.update({blob.path: None for _, blob in repo.index.iter_blobs()}) + changed_files.update( + {blob.path: ("A", None) for _, blob in repo.index.iter_blobs()} + ) return changed_files for merge_base in repo.merge_base( @@ -261,9 +268,9 @@ def get_changed_files( ) for diff in diffs: if diff.change_type == "A": - changed_files[diff.b_path] = None + changed_files[diff.b_path] = (diff.change_type, None) elif diff.change_type != "D": - changed_files[diff.b_path] = diff.a_blob + changed_files[diff.b_path] = (diff.change_type, diff.a_blob) return changed_files @@ -312,16 +319,17 @@ def the_check(linter: Linter, args: argparse.Namespace): return try: - changed_file = changed_files[git_filename] + change_type, changed_file = changed_files[git_filename] except KeyError: return - old_content = ( - changed_file.data_stream.read().decode() - if changed_file is not None - else None - ) - apply_copyright_check(linter, old_content) + if changed_file is None: + old_filename = None + old_content = None + else: + old_filename = changed_file.name + old_content = changed_file.data_stream.read().decode() + apply_copyright_check(linter, change_type, old_filename, old_content) return the_check diff --git a/src/rapids_pre_commit_hooks/lint.py b/src/rapids_pre_commit_hooks/lint.py index 781cecb..3a3b033 100644 --- a/src/rapids_pre_commit_hooks/lint.py +++ b/src/rapids_pre_commit_hooks/lint.py @@ -65,9 +65,9 @@ class LintWarning: pos: _PosType msg: str replacements: list[Replacement] = dataclasses.field( - default_factory=list, init=False + default_factory=list, kw_only=True ) - notes: list[Note] = dataclasses.field(default_factory=list, init=False) + notes: list[Note] = dataclasses.field(default_factory=list, kw_only=True) def add_replacement(self, pos: _PosType, newtext: str) -> None: self.replacements.append(Replacement(pos, newtext)) diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index 340c9cc..6044163 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -23,7 +23,7 @@ from freezegun import freeze_time from rapids_pre_commit_hooks import copyright -from rapids_pre_commit_hooks.lint import Linter +from rapids_pre_commit_hooks.lint import Linter, LintWarning, Replacement def test_match_copyright(): @@ -92,84 +92,188 @@ def test_strip_copyright(): assert stripped == ["No copyright here"] +@pytest.mark.parametrize( + [ + "change_type", + "old_filename", + "old_content", + "new_filename", + "new_content", + "warnings", + ], + [ + ( + "A", + None, + None, + "file.txt", + "No copyright notice", + [ + LintWarning((0, 0), "no copyright notice found"), + ], + ), + ( + "M", + "file.txt", + "No copyright notice", + "file.txt", + "No copyright notice", + [], + ), + ( + "M", + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + [], + ), + ( + "M", + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is out of date", + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (58, 62), + "copyright is out of date", + replacements=[ + Replacement( + (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" + ), + ], + ), + ], + ), + ( + "A", + None, + None, + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is out of date", + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (58, 62), + "copyright is out of date", + replacements=[ + Replacement( + (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" + ), + ], + ), + ], + ), + ( + "M", + "file.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file.txt", + dedent( + r""" + Copyright (c) 2021-2024 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA Corporation + This file has not been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is not out of date and should not be updated", + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2023 NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (120, 157), + "copyright is not out of date and should not be updated", + replacements=[ + Replacement( + (120, 157), "Copyright (c) 2025 NVIDIA CORPORATION" + ), + ], + ), + ], + ), + ], +) @freeze_time("2024-01-18") -def test_apply_copyright_check(): - def run_apply_copyright_check(old_content, new_content): - linter = Linter("file.txt", new_content) - copyright.apply_copyright_check(linter, old_content) - return linter - - expected_linter = Linter("file.txt", "No copyright notice") - expected_linter.add_warning((0, 0), "no copyright notice found") - - linter = run_apply_copyright_check(None, "No copyright notice") - assert linter.warnings == expected_linter.warnings - - linter = run_apply_copyright_check("No copyright notice", "No copyright notice") - assert linter.warnings == [] - - OLD_CONTENT = dedent( - r""" - Copyright (c) 2021-2023 NVIDIA CORPORATION - Copyright (c) 2023 NVIDIA CORPORATION - Copyright (c) 2024 NVIDIA CORPORATION - Copyright (c) 2025 NVIDIA CORPORATION - This file has not been changed - """ - ) - linter = run_apply_copyright_check(OLD_CONTENT, OLD_CONTENT) - assert linter.warnings == [] - - NEW_CONTENT = dedent( - r""" - Copyright (c) 2021-2023 NVIDIA CORPORATION - Copyright (c) 2023 NVIDIA CORPORATION - Copyright (c) 2024 NVIDIA CORPORATION - Copyright (c) 2025 NVIDIA CORPORATION - This file has been changed - """ - ) - expected_linter = Linter("file.txt", NEW_CONTENT) - expected_linter.add_warning((15, 24), "copyright is out of date").add_replacement( - (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" - ) - expected_linter.add_warning((58, 62), "copyright is out of date").add_replacement( - (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" - ) - - linter = run_apply_copyright_check(OLD_CONTENT, NEW_CONTENT) - assert linter.warnings == expected_linter.warnings - - expected_linter = Linter("file.txt", NEW_CONTENT) - expected_linter.add_warning((15, 24), "copyright is out of date").add_replacement( - (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" - ) - expected_linter.add_warning((58, 62), "copyright is out of date").add_replacement( - (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" - ) - - linter = run_apply_copyright_check(None, NEW_CONTENT) - assert linter.warnings == expected_linter.warnings - - NEW_CONTENT = dedent( - r""" - Copyright (c) 2021-2024 NVIDIA CORPORATION - Copyright (c) 2023 NVIDIA CORPORATION - Copyright (c) 2024 NVIDIA CORPORATION - Copyright (c) 2025 NVIDIA Corporation - This file has not been changed - """ - ) - expected_linter = Linter("file.txt", NEW_CONTENT) - expected_linter.add_warning( - (15, 24), "copyright is not out of date and should not be updated" - ).add_replacement((1, 43), "Copyright (c) 2021-2023 NVIDIA CORPORATION") - expected_linter.add_warning( - (120, 157), "copyright is not out of date and should not be updated" - ).add_replacement((120, 157), "Copyright (c) 2025 NVIDIA CORPORATION") - - linter = run_apply_copyright_check(OLD_CONTENT, NEW_CONTENT) - assert linter.warnings == expected_linter.warnings +def test_apply_copyright_check( + change_type, old_filename, old_content, new_filename, new_content, warnings +): + linter = Linter(new_filename, new_content) + copyright.apply_copyright_check(linter, change_type, old_filename, old_content) + assert linter.warnings == warnings @pytest.fixture @@ -537,8 +641,8 @@ def mock_os_walk(top): with open(os.path.join(non_git_dir, "subdir1", "subdir2", "sub.txt"), "w") as f: f.write("Subdir file\n") assert copyright.get_changed_files(Mock()) == { - "top.txt": None, - "subdir1/subdir2/sub.txt": None, + "top.txt": ("A", None), + "subdir1/subdir2/sub.txt": ("A", None), } def fn(filename): @@ -582,16 +686,16 @@ def file_contents(verbed): Mock(return_value=None), ): assert copyright.get_changed_files(Mock()) == { - "untouched.txt": None, - "copied.txt": None, - "modified_and_copied.txt": None, - "copied_and_modified.txt": None, - "deleted.txt": None, - "renamed.txt": None, - "modified_and_renamed.txt": None, - "modified.txt": None, - "chmodded.txt": None, - "untracked.txt": None, + "untouched.txt": ("A", None), + "copied.txt": ("A", None), + "modified_and_copied.txt": ("A", None), + "copied_and_modified.txt": ("A", None), + "deleted.txt": ("A", None), + "renamed.txt": ("A", None), + "modified_and_renamed.txt": ("A", None), + "modified.txt": ("A", None), + "chmodded.txt": ("A", None), + "untracked.txt": ("A", None), } git_repo.index.commit("Initial commit") @@ -655,20 +759,20 @@ def file_contents(verbed): # Truly need to be checked changed = { - "added.txt": None, - "untracked.txt": None, - "modified_and_renamed_2.txt": "modified_and_renamed.txt", - "modified.txt": "modified.txt", - "copied_and_modified_2.txt": "copied_and_modified.txt", - "modified_and_copied.txt": "modified_and_copied.txt", + "added.txt": ("A", None), + "untracked.txt": ("A", None), + "modified_and_renamed_2.txt": ("R", "modified_and_renamed.txt"), + "modified.txt": ("M", "modified.txt"), + "copied_and_modified_2.txt": ("C", "copied_and_modified.txt"), + "modified_and_copied.txt": ("M", "modified_and_copied.txt"), } # Superfluous, but harmless because the content is identical superfluous = { - "chmodded.txt": "chmodded.txt", - "modified_and_copied_2.txt": "modified_and_copied.txt", - "copied_2.txt": "copied.txt", - "renamed_2.txt": "renamed.txt", + "chmodded.txt": ("M", "chmodded.txt"), + "modified_and_copied_2.txt": ("C", "modified_and_copied.txt"), + "copied_2.txt": ("C", "copied.txt"), + "renamed_2.txt": ("R", "renamed.txt"), } with patch("os.getcwd", Mock(return_value=git_repo.working_tree_dir)), patch( @@ -677,25 +781,25 @@ def file_contents(verbed): ): changed_files = copyright.get_changed_files(Mock()) assert { - path: old_blob.path if old_blob else None + path: (old_blob[0], old_blob[1].path if old_blob[1] else None) for path, old_blob in changed_files.items() } == changed | superfluous for new, old in changed.items(): - if old: + if old[1]: with open(fn(new), "rb") as f: new_contents = f.read() - old_contents = old_files[old].data_stream.read() + old_contents = old_files[old[1]].data_stream.read() assert new_contents != old_contents - assert changed_files[new].data_stream.read() == old_contents + assert changed_files[new][1].data_stream.read() == old_contents for new, old in superfluous.items(): - if old: + if old[1]: with open(fn(new), "rb") as f: new_contents = f.read() - old_contents = old_files[old].data_stream.read() + old_contents = old_files[old[1]].data_stream.read() assert new_contents == old_contents - assert changed_files[new].data_stream.read() == old_contents + assert changed_files[new][1].data_stream.read() == old_contents def test_get_changed_files_multiple_merge_bases(git_repo): @@ -768,12 +872,12 @@ def write_file(filename, contents): ): changed_files = copyright.get_changed_files(Mock()) assert { - path: old_blob.path if old_blob else None + path: (old_blob[0], old_blob[1].path if old_blob[1] else None) for path, old_blob in changed_files.items() } == { - "file1.txt": "file1.txt", - "file2.txt": "file2.txt", - "file3.txt": "file3.txt", + "file1.txt": ("M", "file1.txt"), + "file2.txt": ("M", "file2.txt"), + "file3.txt": ("M", "file3.txt"), } @@ -914,22 +1018,28 @@ def mock_apply_copyright_check(): linter = Linter("file5.txt", file_contents(2)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(2)) + apply_copyright_check.assert_called_once_with( + linter, "R", "file2.txt", file_contents(2) + ) linter = Linter("file3.txt", file_contents_modified(3)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(3)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file3.txt", file_contents(3) + ) linter = Linter("file4.txt", file_contents_modified(4)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(4)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file4.txt", file_contents(4) + ) linter = Linter("file6.txt", file_contents(6)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, None) + apply_copyright_check.assert_called_once_with(linter, "A", None, None) ############################# # branch-2 is target branch @@ -943,12 +1053,16 @@ def mock_apply_copyright_check(): linter = Linter("file1.txt", file_contents_modified(1)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(1)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file1.txt", file_contents(1) + ) linter = Linter("./file1.txt", file_contents_modified(1)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(1)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file1.txt", file_contents(1) + ) linter = Linter("../file1.txt", file_contents_modified(1)) with mock_apply_copyright_check() as apply_copyright_check: @@ -963,19 +1077,25 @@ def mock_apply_copyright_check(): linter = Linter("file5.txt", file_contents(2)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(2)) + apply_copyright_check.assert_called_once_with( + linter, "R", "file2.txt", file_contents(2) + ) linter = Linter("file3.txt", file_contents_modified(3)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(3)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file3.txt", file_contents(3) + ) linter = Linter("file4.txt", file_contents_modified(4)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, file_contents(4)) + apply_copyright_check.assert_called_once_with( + linter, "M", "file4.txt", file_contents(4) + ) linter = Linter("file6.txt", file_contents(6)) with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) - apply_copyright_check.assert_called_once_with(linter, None) + apply_copyright_check.assert_called_once_with(linter, "A", None, None) From be50381e59fb962bca9b8abd2e043442f0abbf1a Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 26 Aug 2024 15:39:44 -0400 Subject: [PATCH 2/8] Display notice when copyrighted file is copied or renamed Fixes https://github.com/rapidsai/pre-commit-hooks/issues/48 --- src/rapids_pre_commit_hooks/copyright.py | 27 ++++- .../rapids_pre_commit_hooks/test_copyright.py | 100 +++++++++++++++++- 2 files changed, 123 insertions(+), 4 deletions(-) diff --git a/src/rapids_pre_commit_hooks/copyright.py b/src/rapids_pre_commit_hooks/copyright.py index d02c600..8af24af 100644 --- a/src/rapids_pre_commit_hooks/copyright.py +++ b/src/rapids_pre_commit_hooks/copyright.py @@ -71,14 +71,33 @@ def apply_copyright_revert( ).add_replacement(new_match.span(), old_match.group()) -def apply_copyright_update(linter: Linter, match: re.Match, year: int) -> None: - linter.add_warning(match.span("years"), "copyright is out of date").add_replacement( +def apply_copyright_update( + linter: Linter, + change_type: str, + old_filename: Optional[str], + match: re.Match, + year: int, +) -> None: + CHANGE_VERBS = { + "C": "copied", + "R": "renamed", + } + w = linter.add_warning(match.span("years"), "copyright is out of date") + w.add_replacement( match.span(), COPYRIGHT_REPLACEMENT.format( first_year=match.group("first_year"), last_year=year, ), ) + try: + change_verb = CHANGE_VERBS[change_type] + except KeyError: + pass + else: + w.add_note( + (0, len(linter.content)), f"file was {change_verb} from '{old_filename}'" + ) def apply_copyright_check( @@ -108,7 +127,9 @@ def apply_copyright_check( int(match.group("last_year") or match.group("first_year")) < current_year ): - apply_copyright_update(linter, match, current_year) + apply_copyright_update( + linter, change_type, old_filename, match, current_year + ) else: linter.add_warning((0, 0), "no copyright notice found") diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index 6044163..0d2e210 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -23,7 +23,7 @@ from freezegun import freeze_time from rapids_pre_commit_hooks import copyright -from rapids_pre_commit_hooks.lint import Linter, LintWarning, Replacement +from rapids_pre_commit_hooks.lint import Linter, LintWarning, Note, Replacement def test_match_copyright(): @@ -265,6 +265,104 @@ def test_strip_copyright(): ), ], ), + ( + "R", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is out of date", + notes=[ + Note((0, 185), "file was renamed from 'file1.txt'"), + ], + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (58, 62), + "copyright is out of date", + notes=[ + Note((0, 185), "file was renamed from 'file1.txt'"), + ], + replacements=[ + Replacement( + (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" + ), + ], + ), + ], + ), + ( + "C", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is out of date", + notes=[ + Note((0, 185), "file was copied from 'file1.txt'"), + ], + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (58, 62), + "copyright is out of date", + notes=[ + Note((0, 185), "file was copied from 'file1.txt'"), + ], + replacements=[ + Replacement( + (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" + ), + ], + ), + ], + ), ], ) @freeze_time("2024-01-18") From 10a766e50e7196314c4c918f1a6de76b67ee1ef5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 26 Aug 2024 15:42:57 -0400 Subject: [PATCH 3/8] Require Python 3.10 --- .github/workflows/run-tests.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 75178b1..d48a350 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -20,6 +20,6 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.10' - name: Build & Test run: ./ci/build-test.sh diff --git a/pyproject.toml b/pyproject.toml index 994bb5c..763113e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", ] -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "PyYAML", "bashlex", From 89731fe80b8a3c997c5064c46a1f4ba3b91230a4 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 26 Aug 2024 15:46:34 -0400 Subject: [PATCH 4/8] Use itertools.pairwise() --- src/rapids_pre_commit_hooks/lint.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/rapids_pre_commit_hooks/lint.py b/src/rapids_pre_commit_hooks/lint.py index 3a3b033..5a6f3f1 100644 --- a/src/rapids_pre_commit_hooks/lint.py +++ b/src/rapids_pre_commit_hooks/lint.py @@ -19,24 +19,12 @@ import functools import re import warnings -from typing import Callable, Generator, Iterable, Optional +from itertools import pairwise +from typing import Callable, Optional from rich.console import Console from rich.markup import escape - -# Taken from Python docs -# (https://docs.python.org/3.12/library/itertools.html#itertools.pairwise) -# Replace with itertools.pairwise after dropping Python 3.9 support -def _pairwise(iterable: Iterable) -> Generator: - # pairwise('ABCDEFG') → AB BC CD DE EF FG - iterator = iter(iterable) - a = next(iterator, None) - for b in iterator: - yield a, b - a = b - - _PosType = tuple[int, int] @@ -101,7 +89,7 @@ def fix(self) -> str: key=lambda replacement: replacement.pos, ) - for r1, r2 in _pairwise(sorted_replacements): + for r1, r2 in pairwise(sorted_replacements): if r1.pos[1] > r2.pos[0]: raise OverlappingReplacementsError(f"{r1} overlaps with {r2}") From da23822e23d38dc578f1f96980daece0fea7ed10 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Mon, 26 Aug 2024 17:49:13 -0400 Subject: [PATCH 5/8] Smarter tuple unpacking --- .../rapids_pre_commit_hooks/test_copyright.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index 0d2e210..9ac74f0 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -879,23 +879,23 @@ def file_contents(verbed): ): changed_files = copyright.get_changed_files(Mock()) assert { - path: (old_blob[0], old_blob[1].path if old_blob[1] else None) - for path, old_blob in changed_files.items() + path: (change_type, old_blob.path if old_blob else None) + for path, (change_type, old_blob) in changed_files.items() } == changed | superfluous - for new, old in changed.items(): - if old[1]: + for new, (_, old) in changed.items(): + if old: with open(fn(new), "rb") as f: new_contents = f.read() - old_contents = old_files[old[1]].data_stream.read() + old_contents = old_files[old].data_stream.read() assert new_contents != old_contents assert changed_files[new][1].data_stream.read() == old_contents - for new, old in superfluous.items(): - if old[1]: + for new, (_, old) in superfluous.items(): + if old: with open(fn(new), "rb") as f: new_contents = f.read() - old_contents = old_files[old[1]].data_stream.read() + old_contents = old_files[old].data_stream.read() assert new_contents == old_contents assert changed_files[new][1].data_stream.read() == old_contents @@ -970,8 +970,8 @@ def write_file(filename, contents): ): changed_files = copyright.get_changed_files(Mock()) assert { - path: (old_blob[0], old_blob[1].path if old_blob[1] else None) - for path, old_blob in changed_files.items() + path: (change_type, old_blob.path if old_blob else None) + for path, (change_type, old_blob) in changed_files.items() } == { "file1.txt": ("M", "file1.txt"), "file2.txt": ("M", "file2.txt"), From db8e686c9aed2907bc7868e6fba8f378f7ef8855 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 27 Aug 2024 10:38:42 -0400 Subject: [PATCH 6/8] Show notice if file was copied/renamed without changes --- src/rapids_pre_commit_hooks/copyright.py | 48 +++++---- .../rapids_pre_commit_hooks/test_copyright.py | 98 +++++++++++++++++++ 2 files changed, 129 insertions(+), 17 deletions(-) diff --git a/src/rapids_pre_commit_hooks/copyright.py b/src/rapids_pre_commit_hooks/copyright.py index 14fa353..ad5cc90 100644 --- a/src/rapids_pre_commit_hooks/copyright.py +++ b/src/rapids_pre_commit_hooks/copyright.py @@ -23,7 +23,7 @@ import git -from .lint import Linter, LintMain +from .lint import Linter, LintMain, LintWarning COPYRIGHT_RE: re.Pattern = re.compile( r"Copyright *(?:\(c\))? *(?P(?P\d{4})(-(?P\d{4}))?),?" @@ -59,17 +59,40 @@ def append_stripped(start: int, item: re.Match): return lines +def add_copy_rename_note( + linter: Linter, warning: LintWarning, change_type: str, old_filename: Optional[str] +): + CHANGE_VERBS = { + "C": "copied", + "R": "renamed", + } + try: + change_verb = CHANGE_VERBS[change_type] + except KeyError: + pass + else: + warning.add_note( + (0, len(linter.content)), f"file was {change_verb} from '{old_filename}'" + ) + + def apply_copyright_revert( - linter: Linter, old_match: re.Match, new_match: re.Match + linter: Linter, + change_type: str, + old_filename: Optional[str], + old_match: re.Match, + new_match: re.Match, ) -> None: if old_match.group("years") == new_match.group("years"): warning_pos = new_match.span() else: warning_pos = new_match.span("years") - linter.add_warning( + w = linter.add_warning( warning_pos, "copyright is not out of date and should not be updated", - ).add_replacement(new_match.span(), old_match.group()) + ) + w.add_replacement(new_match.span(), old_match.group()) + add_copy_rename_note(linter, w, change_type, old_filename) def apply_copyright_update( @@ -79,10 +102,6 @@ def apply_copyright_update( match: re.Match, year: int, ) -> None: - CHANGE_VERBS = { - "C": "copied", - "R": "renamed", - } w = linter.add_warning(match.span("years"), "copyright is out of date") w.add_replacement( match.span(), @@ -91,14 +110,7 @@ def apply_copyright_update( last_year=year, ), ) - try: - change_verb = CHANGE_VERBS[change_type] - except KeyError: - pass - else: - w.add_note( - (0, len(linter.content)), f"file was {change_verb} from '{old_filename}'" - ) + add_copy_rename_note(linter, w, change_type, old_filename) def apply_copyright_check( @@ -121,7 +133,9 @@ def apply_copyright_check( old_copyright_matches, new_copyright_matches ): if old_match.group() != new_match.group(): - apply_copyright_revert(linter, old_match, new_match) + apply_copyright_revert( + linter, change_type, old_filename, old_match, new_match + ) elif new_copyright_matches: for match in new_copyright_matches: if ( diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index 9ac74f0..782d5b2 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -363,6 +363,104 @@ def test_strip_copyright(): ), ], ), + ( + "R", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2021-2024 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA Corporation + This file has not been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is not out of date and should not be updated", + notes=[ + Note((0, 189), "file was renamed from 'file1.txt'"), + ], + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2023 NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (120, 157), + "copyright is not out of date and should not be updated", + notes=[ + Note((0, 189), "file was renamed from 'file1.txt'"), + ], + replacements=[ + Replacement( + (120, 157), "Copyright (c) 2025 NVIDIA CORPORATION" + ), + ], + ), + ], + ), + ( + "C", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2021-2024 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA Corporation + This file has not been changed + """ + ), + [ + LintWarning( + (15, 24), + "copyright is not out of date and should not be updated", + notes=[ + Note((0, 189), "file was copied from 'file1.txt'"), + ], + replacements=[ + Replacement( + (1, 43), "Copyright (c) 2021-2023 NVIDIA CORPORATION" + ), + ], + ), + LintWarning( + (120, 157), + "copyright is not out of date and should not be updated", + notes=[ + Note((0, 189), "file was copied from 'file1.txt'"), + ], + replacements=[ + Replacement( + (120, 157), "Copyright (c) 2025 NVIDIA CORPORATION" + ), + ], + ), + ], + ), ], ) @freeze_time("2024-01-18") From 151e9d7eca5995204a01f3da0b5b8230c8478191 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 27 Aug 2024 10:48:10 -0400 Subject: [PATCH 7/8] s/name/path/ --- src/rapids_pre_commit_hooks/copyright.py | 13 ++++++++----- test/rapids_pre_commit_hooks/test_copyright.py | 15 ++++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/rapids_pre_commit_hooks/copyright.py b/src/rapids_pre_commit_hooks/copyright.py index ad5cc90..4a12fdf 100644 --- a/src/rapids_pre_commit_hooks/copyright.py +++ b/src/rapids_pre_commit_hooks/copyright.py @@ -60,7 +60,10 @@ def append_stripped(start: int, item: re.Match): def add_copy_rename_note( - linter: Linter, warning: LintWarning, change_type: str, old_filename: Optional[str] + linter: Linter, + warning: LintWarning, + change_type: str, + old_filename: Optional[Union[str, os.PathLike[str]]], ): CHANGE_VERBS = { "C": "copied", @@ -79,7 +82,7 @@ def add_copy_rename_note( def apply_copyright_revert( linter: Linter, change_type: str, - old_filename: Optional[str], + old_filename: Optional[Union[str, os.PathLike[str]]], old_match: re.Match, new_match: re.Match, ) -> None: @@ -98,7 +101,7 @@ def apply_copyright_revert( def apply_copyright_update( linter: Linter, change_type: str, - old_filename: Optional[str], + old_filename: Optional[Union[str, os.PathLike[str]]], match: re.Match, year: int, ) -> None: @@ -116,7 +119,7 @@ def apply_copyright_update( def apply_copyright_check( linter: Linter, change_type: str, - old_filename: Optional[str], + old_filename: Optional[Union[str, os.PathLike[str]]], old_content: Optional[str], ) -> None: if linter.content != old_content: @@ -363,7 +366,7 @@ def the_check(linter: Linter, args: argparse.Namespace): old_filename = None old_content = None else: - old_filename = changed_file.name + old_filename = changed_file.path old_content = changed_file.data_stream.read().decode() apply_copyright_check(linter, change_type, old_filename, old_content) diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index 782d5b2..ee6c1db 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -1147,11 +1147,12 @@ def file_contents_modified(num): """ ) + os.mkdir(os.path.join(git_repo.working_tree_dir, "dir")) write_file("file1.txt", file_contents(1)) - write_file("file2.txt", file_contents(2)) + write_file("dir/file2.txt", file_contents(2)) write_file("file3.txt", file_contents(3)) write_file("file4.txt", file_contents(4)) - git_repo.index.add(["file1.txt", "file2.txt", "file3.txt", "file4.txt"]) + git_repo.index.add(["file1.txt", "dir/file2.txt", "file3.txt", "file4.txt"]) git_repo.index.commit("Initial commit") branch_1 = git_repo.create_head("branch-1", "master") @@ -1164,8 +1165,8 @@ def file_contents_modified(num): branch_2 = git_repo.create_head("branch-2", "master") git_repo.head.reference = branch_2 git_repo.head.reset(index=True, working_tree=True) - write_file("file2.txt", file_contents_modified(2)) - git_repo.index.add(["file2.txt"]) + write_file("dir/file2.txt", file_contents_modified(2)) + git_repo.index.add(["dir/file2.txt"]) git_repo.index.commit("Update file2.txt") pr = git_repo.create_head("pr", "branch-1") @@ -1177,7 +1178,7 @@ def file_contents_modified(num): write_file("file4.txt", file_contents_modified(4)) git_repo.index.add(["file4.txt"]) git_repo.index.commit("Update file4.txt") - git_repo.index.move(["file2.txt", "file5.txt"]) + git_repo.index.move(["dir/file2.txt", "file5.txt"]) git_repo.index.commit("Rename file2.txt to file5.txt") write_file("file6.txt", file_contents(6)) @@ -1215,7 +1216,7 @@ def mock_apply_copyright_check(): with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) apply_copyright_check.assert_called_once_with( - linter, "R", "file2.txt", file_contents(2) + linter, "R", "dir/file2.txt", file_contents(2) ) linter = Linter("file3.txt", file_contents_modified(3)) @@ -1274,7 +1275,7 @@ def mock_apply_copyright_check(): with mock_apply_copyright_check() as apply_copyright_check: copyright_checker(linter, mock_args) apply_copyright_check.assert_called_once_with( - linter, "R", "file2.txt", file_contents(2) + linter, "R", "dir/file2.txt", file_contents(2) ) linter = Linter("file3.txt", file_contents_modified(3)) From fbc7b9e21bdb9b7e1823a780fa76877b73f94118 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 27 Aug 2024 11:12:24 -0400 Subject: [PATCH 8/8] Update wording and logic --- src/rapids_pre_commit_hooks/copyright.py | 16 +-- .../rapids_pre_commit_hooks/test_copyright.py | 104 +++++++++++++++--- 2 files changed, 97 insertions(+), 23 deletions(-) diff --git a/src/rapids_pre_commit_hooks/copyright.py b/src/rapids_pre_commit_hooks/copyright.py index 4a12fdf..53de97e 100644 --- a/src/rapids_pre_commit_hooks/copyright.py +++ b/src/rapids_pre_commit_hooks/copyright.py @@ -75,7 +75,14 @@ def add_copy_rename_note( pass else: warning.add_note( - (0, len(linter.content)), f"file was {change_verb} from '{old_filename}'" + (0, len(linter.content)), + f"file was {change_verb} from '{old_filename}' and is assumed to share " + "history with it", + ) + warning.add_note( + (0, len(linter.content)), + "change file contents if you want its copyright dates to only be " + "determined by its own edit history", ) @@ -100,8 +107,6 @@ def apply_copyright_revert( def apply_copyright_update( linter: Linter, - change_type: str, - old_filename: Optional[Union[str, os.PathLike[str]]], match: re.Match, year: int, ) -> None: @@ -113,7 +118,6 @@ def apply_copyright_update( last_year=year, ), ) - add_copy_rename_note(linter, w, change_type, old_filename) def apply_copyright_check( @@ -145,9 +149,7 @@ def apply_copyright_check( int(match.group("last_year") or match.group("first_year")) < current_year ): - apply_copyright_update( - linter, change_type, old_filename, match, current_year - ) + apply_copyright_update(linter, match, current_year) else: linter.add_warning((0, 0), "no copyright notice found") diff --git a/test/rapids_pre_commit_hooks/test_copyright.py b/test/rapids_pre_commit_hooks/test_copyright.py index ee6c1db..3fdec5e 100644 --- a/test/rapids_pre_commit_hooks/test_copyright.py +++ b/test/rapids_pre_commit_hooks/test_copyright.py @@ -291,9 +291,6 @@ def test_strip_copyright(): LintWarning( (15, 24), "copyright is out of date", - notes=[ - Note((0, 185), "file was renamed from 'file1.txt'"), - ], replacements=[ Replacement( (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" @@ -303,9 +300,6 @@ def test_strip_copyright(): LintWarning( (58, 62), "copyright is out of date", - notes=[ - Note((0, 185), "file was renamed from 'file1.txt'"), - ], replacements=[ Replacement( (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" @@ -340,9 +334,6 @@ def test_strip_copyright(): LintWarning( (15, 24), "copyright is out of date", - notes=[ - Note((0, 185), "file was copied from 'file1.txt'"), - ], replacements=[ Replacement( (1, 43), "Copyright (c) 2021-2024, NVIDIA CORPORATION" @@ -352,9 +343,6 @@ def test_strip_copyright(): LintWarning( (58, 62), "copyright is out of date", - notes=[ - Note((0, 185), "file was copied from 'file1.txt'"), - ], replacements=[ Replacement( (44, 81), "Copyright (c) 2023-2024, NVIDIA CORPORATION" @@ -363,6 +351,54 @@ def test_strip_copyright(): ), ], ), + ( + "R", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2023-2024 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [], + ), + ( + "C", + "file1.txt", + dedent( + r""" + Copyright (c) 2021-2023 NVIDIA CORPORATION + Copyright (c) 2023 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has not been changed + """ + ), + "file2.txt", + dedent( + r""" + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2023-2024 NVIDIA CORPORATION + Copyright (c) 2024 NVIDIA CORPORATION + Copyright (c) 2025 NVIDIA CORPORATION + This file has been changed + """ + ), + [], + ), ( "R", "file1.txt", @@ -390,7 +426,16 @@ def test_strip_copyright(): (15, 24), "copyright is not out of date and should not be updated", notes=[ - Note((0, 189), "file was renamed from 'file1.txt'"), + Note( + (0, 189), + "file was renamed from 'file1.txt' and is assumed to " + "share history with it", + ), + Note( + (0, 189), + "change file contents if you want its copyright dates to " + "only be determined by its own edit history", + ), ], replacements=[ Replacement( @@ -402,7 +447,16 @@ def test_strip_copyright(): (120, 157), "copyright is not out of date and should not be updated", notes=[ - Note((0, 189), "file was renamed from 'file1.txt'"), + Note( + (0, 189), + "file was renamed from 'file1.txt' and is assumed to " + "share history with it", + ), + Note( + (0, 189), + "change file contents if you want its copyright dates to " + "only be determined by its own edit history", + ), ], replacements=[ Replacement( @@ -439,7 +493,16 @@ def test_strip_copyright(): (15, 24), "copyright is not out of date and should not be updated", notes=[ - Note((0, 189), "file was copied from 'file1.txt'"), + Note( + (0, 189), + "file was copied from 'file1.txt' and is assumed to share " + "history with it", + ), + Note( + (0, 189), + "change file contents if you want its copyright dates to " + "only be determined by its own edit history", + ), ], replacements=[ Replacement( @@ -451,7 +514,16 @@ def test_strip_copyright(): (120, 157), "copyright is not out of date and should not be updated", notes=[ - Note((0, 189), "file was copied from 'file1.txt'"), + Note( + (0, 189), + "file was copied from 'file1.txt' and is assumed to share " + "history with it", + ), + Note( + (0, 189), + "change file contents if you want its copyright dates to " + "only be determined by its own edit history", + ), ], replacements=[ Replacement(