Skip to content

Commit

Permalink
warn before overwriting files owned by previously installed wheels
Browse files Browse the repository at this point in the history
Before installing a wheel, look for other distributions that have also
written files into the same top level directory. Warn if any of the
same files will be modified by the new wheel.

Addresses #4625

Signed-off-by: Doug Hellmann <[email protected]>
  • Loading branch information
dhellmann committed Apr 24, 2020
1 parent daff811 commit bf7b22b
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 0 deletions.
2 changes: 2 additions & 0 deletions news/4625.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add a FutureWarning when installing a wheel will overwrite a file
owned by another distribution.
106 changes: 106 additions & 0 deletions src/pip/_internal/operations/install/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pip._vendor import pkg_resources
from pip._vendor.distlib.scripts import ScriptMaker
from pip._vendor.distlib.util import get_export_entry
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.six import StringIO

from pip._internal.exceptions import InstallationError
Expand Down Expand Up @@ -283,6 +284,104 @@ def make(self, specification, options=None):
return super(PipScriptMaker, self).make(specification, options)


def _get_file_owners(
lib_dir, # type: str
top_level, # type: str
ignore_name, # type: str
):
# type: (...) -> Dict[str, List[str]]
"""Return dict mapping distributions to files under a top level directory
Look through lib_dir for distributions that own files under the
top_level directory specified. Skip distributions that match the
ignore_name. Return a dict mapping filenames to the distributions
that own them.
"""
file_owners = {} # type: Dict[str, List[str]]
existing_env = pkg_resources.Environment([lib_dir])
canonical_name = canonicalize_name(ignore_name)
for existing_dist_name in existing_env:
for d in existing_env[existing_dist_name]:
if canonicalize_name(d.project_name) == canonical_name:
continue

existing_info_dir = os.path.join(
lib_dir,
'{}-{}.dist-info'.format(d.project_name, d.version),
)

top_level_path = os.path.join(existing_info_dir, 'top_level.txt')
if not os.path.exists(top_level_path):
continue
with open(top_level_path, 'r') as f:
existing_top_level = f.read().strip()
if existing_top_level != top_level:
continue

record_path = os.path.join(existing_info_dir, 'RECORD')
if not os.path.exists(record_path):
continue
with open(record_path, **csv_io_kwargs('r')) as record_file:
for row in csv.reader(record_file):
# Normalize the path before saving the owners,
# since the record always contains forward
# slashes, but when we look for a path later we
# will be using a value with the native path
# separator.
o = file_owners.setdefault(os.path.normpath(row[0]), [])
o.append(str(d))

return file_owners


def _report_file_owner_conflicts(
lib_dir, # type: str
name, # type: str
source_dir, # type: str
info_dir, # type: str
):
# type: (...) -> None
"""Report files owned by other distributions that are being overwritten.
Scan the lib_dir for distributions that own files under the same
top level directory as the wheel being installed and report any
files owned by those other distributions that are going to be
overwritten.
"""
installing_top_level_path = os.path.join(
source_dir, info_dir, 'top_level.txt')
if not os.path.exists(installing_top_level_path):
# We cannot determine the top level directory, so there is no
# point in continuing.
return

with open(installing_top_level_path, 'r') as fd:
installing_top_level = fd.read().strip()
files_from_other_owners = _get_file_owners(
lib_dir, installing_top_level, name)
if not files_from_other_owners:
# Nothing else owns files under this top level directory, so
# we don't need to scan the source.
return

for dir, subdirs, files in os.walk(source_dir):
basedir = dir[len(source_dir):].lstrip(os.path.sep)
for f in files:
partial_src = os.path.join(basedir, f)
if partial_src not in files_from_other_owners:
# There are no other owners for this file.
continue
destfile = os.path.join(lib_dir, basedir, f)
for owner in files_from_other_owners[partial_src]:
warnings.warn(
'Overwriting or removing {} for {} '
'which is also owned by {}'.format(
destfile, name, owner),
FutureWarning)


def install_unpacked_wheel(
name, # type: str
wheeldir, # type: str
Expand Down Expand Up @@ -415,6 +514,13 @@ def clobber(
changed = fixer(destfile)
record_installed(srcfile, destfile, changed)

# Look for packages containing files that are already using the
# same toplevel directory as the wheel we are installing but that
# have a different dist name. Do this before calling clobber(), so
# that when the warning is eventually changed to a hard error no
# partial installation occurs.
_report_file_owner_conflicts(lib_dir, name, source, info_dir)

clobber(source, lib_dir, True)

dest_info_dir = os.path.join(lib_dir, info_dir)
Expand Down
54 changes: 54 additions & 0 deletions tests/functional/test_install_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,3 +587,57 @@ def test_wheel_install_fails_with_badly_encoded_metadata(script):
assert "Error decoding metadata for" in result.stderr
assert "simple-0.1.0-py2.py3-none-any.whl" in result.stderr
assert "METADATA" in result.stderr


def test_report_file_owner_conflicts(script, tmpdir):
"""
Test installing from a wheel that wants to overwrite
files owned by another wheel to ensure that a warning
is reported and that the second wheel does overwrite
the first.
"""
from tests.lib import TestFailure

make_wheel_with_file(
name="wheel1",
version="1.0",
extra_files={
'thetop/a.py': '# from wheel1',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

make_wheel_with_file(
name="wheel2",
version="2.0",
extra_files={
'thetop/a.py': '# from wheel2',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

result = script.pip(
'install', 'wheel1', 'wheel2', '--no-index',
'--find-links', tmpdir,
)

# FIXME: Both wheels should be installed, for now. In the future
# when the warning is changed to a hard error this part of the
# test needs to be updated to show that only wheel1 is installed.
with pytest.raises(TestFailure):
result.assert_installed('wheel1')
result.assert_installed('wheel2')

# FIXME: When the warning is changed to a hard error, wheel2 won't
# be installed and the file contents will be those from wheel1.
thetop = script.site_packages_path / 'thetop'
assert (thetop / 'a.py').read_text() == '# from wheel2'

full_path = thetop / 'a.py'
msg = 'Overwriting or removing {} for {} which is also owned by {}'.format(
full_path, 'wheel2', 'wheel1 1.0')
assert msg in result.stderr

0 comments on commit bf7b22b

Please sign in to comment.