From ea8941d5b2a215d1d990f7f171e10fe9d27f6825 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 26 Jun 2024 14:59:49 +0200 Subject: [PATCH 1/4] untar_file: remove common leading directory before unpacking Fixes: #12781 --- news/12781.bugfix.rst | 1 + src/pip/_internal/utils/unpacking.py | 14 +++++++-- tests/unit/test_utils_unpacking.py | 43 ++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 news/12781.bugfix.rst diff --git a/news/12781.bugfix.rst b/news/12781.bugfix.rst new file mode 100644 index 00000000000..6bd43d347db --- /dev/null +++ b/news/12781.bugfix.rst @@ -0,0 +1 @@ +Fix finding hardlink targets in tar files with an ignored top-level directory. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 341269550ce..875e30e13ab 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -190,9 +190,19 @@ def untar_file(filename: str, location: str) -> None: else: default_mode_plus_executable = _get_default_mode_plus_executable() + if leading: + # Strip the leading directory from all files in the archive, + # including hardlink targets (which are relative to the + # unpack location). + for member in tar.getmembers(): + name_lead, name_rest = split_leading_dir(member.name) + member.name = name_rest + if member.islnk(): + lnk_lead, lnk_rest = split_leading_dir(member.linkname) + if lnk_lead == name_lead: + member.linkname = lnk_rest + def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo: - if leading: - member.name = split_leading_dir(member.name)[1] orig_mode = member.mode try: try: diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 3fdd822e739..50500868061 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -197,6 +197,49 @@ def test_unpack_tar_filter(self) -> None: assert "is outside the destination" in str(e.value) + @pytest.mark.parametrize( + ("input_prefix", "unpack_prefix"), + [ + ("", ""), + ("dir/", ""), # pip ignores a common leading directory + ("dir/sub/", "sub/"), # pip ignores *one* common leading directory + ], + ) + def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: + """ + Test unpacking a *.tar with file containing hard & soft links + """ + test_tar = os.path.join(self.tempdir, "test_tar_links.tar") + content = b"file content" + with tarfile.open(test_tar, "w") as mytar: + file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt") + file_tarinfo.size = len(content) + mytar.addfile(file_tarinfo, io.BytesIO(content)) + + hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt") + hardlink_tarinfo.type = tarfile.LNKTYPE + hardlink_tarinfo.linkname = input_prefix + "regular_file.txt" + mytar.addfile(hardlink_tarinfo) + + symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt") + symlink_tarinfo.type = tarfile.SYMTYPE + symlink_tarinfo.linkname = "regular_file.txt" + mytar.addfile(symlink_tarinfo) + + untar_file(test_tar, self.tempdir) + + os.system(f"ls -alR {self.tempdir}") + + unpack_dir = os.path.join(self.tempdir, unpack_prefix) + with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f: + assert f.read() == content + def test_unpack_tar_unicode(tmpdir: Path) -> None: test_tar = tmpdir / "test.tar" From 0f9250c474f6c32fa8ebd49caee57f18209c0cc5 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 26 Jun 2024 18:03:25 +0200 Subject: [PATCH 2/4] Update tests/unit/test_utils_unpacking.py --- tests/unit/test_utils_unpacking.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 50500868061..efccbdccb0d 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -228,8 +228,6 @@ def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: untar_file(test_tar, self.tempdir) - os.system(f"ls -alR {self.tempdir}") - unpack_dir = os.path.join(self.tempdir, unpack_prefix) with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f: assert f.read() == content From cf723877c8c3ec70bdc0d6b9f79dfde44c8bff90 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 26 Jun 2024 20:15:04 +0200 Subject: [PATCH 3/4] Add a functional test --- tests/functional/test_install.py | 122 +++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index eaea12a163c..1603a1afff0 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1,9 +1,11 @@ import hashlib +import io import os import re import ssl import sys import sysconfig +import tarfile import textwrap from os.path import curdir, join, pardir from pathlib import Path @@ -2590,3 +2592,123 @@ def test_install_pip_prints_req_chain_pypi(script: PipTestEnvironment) -> None: f"Collecting python-openid " f"(from Paste[openid]==1.7.5.1->-r {req_path} (line 1))" in result.stdout ) + + +@pytest.mark.parametrize("common_prefix", ("", "linktest-1.0/")) +def test_install_sdist_links(script: PipTestEnvironment, common_prefix: str) -> None: + """ + Test installing an sdist with hard and symbolic links. + """ + + # Build an unpack an sdist that contains data files: + # - root.dat + # - sub/inner.dat + # and links (symbolic and hard) to both of those, both in the top-level + # and 'sub/' directories. That's 8 links total. + + # We build the sdist from in-memory data, since the filesystem + # might not support both kinds of links. + + sdist_path = script.scratch_path.joinpath("linktest-1.0.tar.gz") + + def add_file(tar: tarfile.TarFile, name: str, content: str) -> None: + info = tarfile.TarInfo(common_prefix + name) + content_bytes = content.encode("utf-8") + info.size = len(content_bytes) + tar.addfile(info, io.BytesIO(content_bytes)) + + def add_link(tar: tarfile.TarFile, name: str, linktype: str, target: str) -> None: + info = tarfile.TarInfo(common_prefix + name) + info.type = {"sym": tarfile.SYMTYPE, "hard": tarfile.LNKTYPE}[linktype] + info.linkname = target + tar.addfile(info) + + with tarfile.open(sdist_path, "w:gz") as sdist_tar: + add_file( + sdist_tar, + "PKG-INFO", + textwrap.dedent( + """ + Metadata-Version: 2.1 + Name: linktest + Version: 1.0 + """ + ), + ) + + add_file(sdist_tar, "src/linktest/__init__.py", "") + add_file(sdist_tar, "src/linktest/root.dat", "Data") + add_file(sdist_tar, "src/linktest/sub/__init__.py", "") + add_file(sdist_tar, "src/linktest/sub/inner.dat", "Data") + linknames = [] + pkg_root = f"{common_prefix}src/linktest" + for prefix, target_tag, linktype, target in [ + ("", "root", "sym", "root.dat"), + ("", "root", "hard", f"{pkg_root}/root.dat"), + ("", "inner", "sym", "sub/inner.dat"), + ("", "inner", "hard", f"{pkg_root}/sub/inner.dat"), + ("sub/", "root", "sym", "../root.dat"), + ("sub/", "root", "hard", f"{pkg_root}/root.dat"), + ("sub/", "inner", "sym", "inner.dat"), + ("sub/", "inner", "hard", f"{pkg_root}/sub/inner.dat"), + ]: + name = f"{prefix}link.{target_tag}.{linktype}.dat" + add_link(sdist_tar, "src/linktest/" + name, linktype, target) + linknames.append(name) + + add_file( + sdist_tar, + "pyproject.toml", + textwrap.dedent( + """ + [build-system] + requires = ["setuptools"] + build-backend = "setuptools.build_meta" + [project] + name = "linktest" + version = "1.0" + [tool.setuptools] + include-package-data = true + [tool.setuptools.packages.find] + where = ["src"] + [tool.setuptools.package-data] + "*" = ["*.dat"] + """ + ), + ) + + add_file( + sdist_tar, + "src/linktest/__main__.py", + textwrap.dedent( + f""" + from pathlib import Path + linknames = {linknames!r} + + # we could use importlib.resources here once + # it has stable convenient API across supported versions + res_path = Path(__file__).parent + + for name in linknames: + data_text = res_path.joinpath(name).read_text() + assert data_text == "Data" + print(str(len(linknames)) + ' files checked') + """ + ), + ) + + # Show sdist content, for debugging the test + result = script.run("python", "-m", "tarfile", "-vl", str(sdist_path)) + print(result) + + # Install the package + result = script.pip("install", str(sdist_path)) + print(result) + + # Show installed content, for debugging the test + result = script.pip("show", "-f", "linktest") + print(result) + + # Run the internal test + result = script.run("python", "-m", "linktest") + assert result.stdout.strip() == "8 files checked" From 4fd295ed4d320a40242ffc0f1efb25a1abc7cc59 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 27 Jun 2024 14:53:28 +0200 Subject: [PATCH 4/4] Use native path separator on Windows --- tests/functional/test_install.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 1603a1afff0..4e220328d29 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -2641,13 +2641,20 @@ def add_link(tar: tarfile.TarFile, name: str, linktype: str, target: str) -> Non add_file(sdist_tar, "src/linktest/sub/__init__.py", "") add_file(sdist_tar, "src/linktest/sub/inner.dat", "Data") linknames = [] + + # Windows requires native path separators in symlink targets. + # (see https://github.com/python/cpython/issues/57911) + # (This is not needed for hardlinks, nor for the workaround tarfile + # uses if symlinking is disabled.) + SEP = os.path.sep + pkg_root = f"{common_prefix}src/linktest" for prefix, target_tag, linktype, target in [ ("", "root", "sym", "root.dat"), ("", "root", "hard", f"{pkg_root}/root.dat"), - ("", "inner", "sym", "sub/inner.dat"), + ("", "inner", "sym", f"sub{SEP}inner.dat"), ("", "inner", "hard", f"{pkg_root}/sub/inner.dat"), - ("sub/", "root", "sym", "../root.dat"), + ("sub/", "root", "sym", f"..{SEP}root.dat"), ("sub/", "root", "hard", f"{pkg_root}/root.dat"), ("sub/", "inner", "sym", "inner.dat"), ("sub/", "inner", "hard", f"{pkg_root}/sub/inner.dat"),