From 613d3a774674f0033e55721935d2af915fc418a2 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 26 Jun 2024 14:59:49 +0200 Subject: [PATCH] untar_file: remove common leading directory before unpacking Fixes: #12781 --- news/12781.bugfix.rst | 1 + src/pip/_internal/utils/unpacking.py | 14 +++++++-- tests/unit/test_utils_unpacking.py | 43 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 news/12781.bugfix.rst diff --git a/news/12781.bugfix.rst b/news/12781.bugfix.rst new file mode 100644 index 00000000000..6bd43d347db --- /dev/null +++ b/news/12781.bugfix.rst @@ -0,0 +1 @@ +Fix finding hardlink targets in tar files with an ignored top-level directory. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 341269550ce..875e30e13ab 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -190,9 +190,19 @@ def untar_file(filename: str, location: str) -> None: else: default_mode_plus_executable = _get_default_mode_plus_executable() + if leading: + # Strip the leading directory from all files in the archive, + # including hardlink targets (which are relative to the + # unpack location). + for member in tar.getmembers(): + name_lead, name_rest = split_leading_dir(member.name) + member.name = name_rest + if member.islnk(): + lnk_lead, lnk_rest = split_leading_dir(member.linkname) + if lnk_lead == name_lead: + member.linkname = lnk_rest + def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo: - if leading: - member.name = split_leading_dir(member.name)[1] orig_mode = member.mode try: try: diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 3fdd822e739..50500868061 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -197,6 +197,49 @@ def test_unpack_tar_filter(self) -> None: assert "is outside the destination" in str(e.value) + @pytest.mark.parametrize( + ("input_prefix", "unpack_prefix"), + [ + ("", ""), + ("dir/", ""), # pip ignores a common leading directory + ("dir/sub/", "sub/"), # pip ignores *one* common leading directory + ], + ) + def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: + """ + Test unpacking a *.tar with file containing hard & soft links + """ + test_tar = os.path.join(self.tempdir, "test_tar_links.tar") + content = b"file content" + with tarfile.open(test_tar, "w") as mytar: + file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt") + file_tarinfo.size = len(content) + mytar.addfile(file_tarinfo, io.BytesIO(content)) + + hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt") + hardlink_tarinfo.type = tarfile.LNKTYPE + hardlink_tarinfo.linkname = input_prefix + "regular_file.txt" + mytar.addfile(hardlink_tarinfo) + + symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt") + symlink_tarinfo.type = tarfile.SYMTYPE + symlink_tarinfo.linkname = "regular_file.txt" + mytar.addfile(symlink_tarinfo) + + untar_file(test_tar, self.tempdir) + + os.system(f"ls -alR {self.tempdir}") + + unpack_dir = os.path.join(self.tempdir, unpack_prefix) + with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f: + assert f.read() == content + def test_unpack_tar_unicode(tmpdir: Path) -> None: test_tar = tmpdir / "test.tar"