Skip to content

Commit

Permalink
untar_file: remove common leading directory before unpacking
Browse files Browse the repository at this point in the history
Fixes: pypa#12781
  • Loading branch information
encukou committed Jun 26, 2024
1 parent 300ed75 commit 5dcdd8e
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
1 change: 1 addition & 0 deletions news/12781.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix finding hardlink targets in tar files with an ignored top-level directory.
14 changes: 12 additions & 2 deletions src/pip/_internal/utils/unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,19 @@ def untar_file(filename: str, location: str) -> None:
else:
default_mode_plus_executable = _get_default_mode_plus_executable()

def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
if leading:
if leading:
# Strip the leading directory from all files in the archive,
# including hardlink targets (which are relative to the
# unpack location).
for member in tar.getmembers():
name_lead, name_rest = split_leading_dir(member.name)
member.name = split_leading_dir(member.name)[1]
if member.islnk():
lnk_lead, lnk_rest = split_leading_dir(member.linkname)
if lnk_lead == name_lead:
member.linkname = lnk_rest

def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
orig_mode = member.mode
try:
try:
Expand Down
43 changes: 43 additions & 0 deletions tests/unit/test_utils_unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,49 @@ def test_unpack_tar_filter(self) -> None:

assert "is outside the destination" in str(e.value)

@pytest.mark.parametrize(
("input_prefix", "unpack_prefix"),
[
("", ""),
("dir/", ""), # pip ignores a common leading directory
("dir/sub/", "sub/"), # pip ignores *one* common leading directory
],
)
def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None:
"""
Test unpacking a *.tar with file containing hard & soft links
"""
test_tar = os.path.join(self.tempdir, "test_tar_links.tar")
content = b"file content"
with tarfile.open(test_tar, "w") as mytar:
file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt")
file_tarinfo.size = len(content)
mytar.addfile(file_tarinfo, io.BytesIO(content))

hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt")
hardlink_tarinfo.type = tarfile.LNKTYPE
hardlink_tarinfo.linkname = input_prefix + "regular_file.txt"
mytar.addfile(hardlink_tarinfo)

symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt")
symlink_tarinfo.type = tarfile.SYMTYPE
symlink_tarinfo.linkname = "regular_file.txt"
mytar.addfile(symlink_tarinfo)

untar_file(test_tar, self.tempdir)

os.system(f"ls -alR {self.tempdir}")

unpack_dir = os.path.join(self.tempdir, unpack_prefix)
with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f:
assert f.read() == content

with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f:
assert f.read() == content

with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f:
assert f.read() == content


def test_unpack_tar_unicode(tmpdir: Path) -> None:
test_tar = tmpdir / "test.tar"
Expand Down

0 comments on commit 5dcdd8e

Please sign in to comment.