Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

warn before overwriting files owned by previously installed wheels #8119

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/4625.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Warn when a file owned by another distribution is overwritten.
104 changes: 104 additions & 0 deletions src/pip/_internal/operations/install/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
from pip._vendor import pkg_resources
from pip._vendor.distlib.scripts import ScriptMaker
from pip._vendor.distlib.util import get_export_entry
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.six import PY2, ensure_str, ensure_text, itervalues, text_type

from pip._internal.exceptions import InstallationError
from pip._internal.locations import get_major_minor_version
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl
from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
from pip._internal.utils.messages import oxford_comma_join
from pip._internal.utils.misc import captured_stdout, ensure_dir, hash_file
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
Expand Down Expand Up @@ -324,6 +327,101 @@ def make(self, specification, options=None):
return super(PipScriptMaker, self).make(specification, options)


def _get_file_owners(lib_dir, top_level, ignore_name):
# type: (str, str, str) -> Dict[str, List[str]]
"""Return dict mapping distributions to files under a top level directory

Look through lib_dir for distributions that own files under the
top_level directory specified. Skip distributions that match the
ignore_name. Return a dict mapping filenames to the distributions
that own them.

"""
file_owners = {} # type: Dict[str, List[str]]
existing_env = pkg_resources.Environment([lib_dir])
canonical_name = canonicalize_name(ignore_name)
for existing_dist_name in existing_env:
for d in existing_env[existing_dist_name]:
if canonicalize_name(d.project_name) == canonical_name:
continue

existing_info_dir = os.path.join(
lib_dir,
'{}-{}.dist-info'.format(d.project_name, d.version),
)

top_level_path = os.path.join(existing_info_dir, 'top_level.txt')
if not os.path.exists(top_level_path):
continue
with open(top_level_path, 'r') as f:
existing_top_level = f.read().strip()
if existing_top_level != top_level:
continue

record_path = os.path.join(existing_info_dir, 'RECORD')
if not os.path.exists(record_path):
continue
with open(record_path, **csv_io_kwargs('r')) as record_file:
for row in csv.reader(record_file):
# Normalize the path before saving the owners,
# since the record always contains forward
# slashes, but when we look for a path later we
# will be using a value with the native path
# separator.
o = file_owners.setdefault(os.path.normpath(row[0]), [])
o.append(str(d))

return file_owners


def _report_file_owner_conflicts(lib_dir, name, source_dir, info_dir):
# type: (str, str, str, str) -> None
"""Report files owned by other distributions that are being overwritten.

Scan the lib_dir for distributions that own files under the same
top level directory as the wheel being installed and report any
files owned by those other distributions that are going to be
overwritten.

"""
installing_top_level_path = os.path.join(
source_dir, info_dir, 'top_level.txt')
if not os.path.exists(installing_top_level_path):
# We cannot determine the top level directory, so there is no
# point in continuing.
return

with open(installing_top_level_path, 'r') as fd:
installing_top_level = fd.read().strip()
files_from_other_owners = _get_file_owners(
lib_dir, installing_top_level, name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it should be necessary to ignore the installing package, since we should have uninstalled the existing distribution before trying to install a new version.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually a file conflict will be an error that prevents installation. We want to report the error before removing the existing version and breaking the environment.

Copy link
Member

@chrahunt chrahunt Jul 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently have a rollback mechanism that reverts an upgrading package on any installation failure. I think this should have us covered, if I understand the concern you're raising.

if not files_from_other_owners:
# Nothing else owns files under this top level directory, so
# we don't need to scan the source.
return

for dir, _subdirs, files in os.walk(source_dir):
basedir = dir[len(source_dir):].lstrip(os.path.sep)
for f in files:
partial_src = os.path.join(basedir, f)
if partial_src not in files_from_other_owners:
# There are no other owners for this file.
continue
destfile = os.path.join(lib_dir, basedir, f)
deprecated(
reason=(
'Overwriting or removing {} for {} '
'which is also owned by {}'.format(
destfile, name,
oxford_comma_join(
sorted(files_from_other_owners[partial_src])),
)),
replacement=None,
gone_in="21.0",
issue=4625,
)


def install_unpacked_wheel(
name, # type: str
wheeldir, # type: str
Expand Down Expand Up @@ -457,6 +555,12 @@ def clobber(
changed = fixer(destfile)
record_installed(srcfile, destfile, changed)

# Look for packages containing files that are already using the
# same toplevel directory as the wheel we are installing but that
# have a different dist name. Do this before calling clobber(), so
# that when the warning is eventually changed to a hard error no
# partial installation occurs.
_report_file_owner_conflicts(lib_dir, name, source, info_dir)
clobber(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),
Expand Down
26 changes: 26 additions & 0 deletions src/pip/_internal/utils/messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Utility functions for building messages.
"""

from pip._internal.utils.typing import MYPY_CHECK_RUNNING

if MYPY_CHECK_RUNNING:
from typing import List


def oxford_comma_join(values, conjunction='and'):
# type: (List[str], str) -> str
"Join a list of strings for output in a message."
if not values:
return ''
if len(values) == 1:
return values[0]
comma = ''
if len(values) > 2:
comma = ','
return '{}{} {} {}'.format(
', '.join(values[:-1]),
comma,
conjunction,
values[-1],
)
123 changes: 123 additions & 0 deletions tests/functional/test_install_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,126 @@ def test_correct_package_name_while_creating_wheel_bug(script, package_name):
package = create_basic_wheel_for_package(script, package_name, '1.0')
wheel_name = os.path.basename(package)
assert wheel_name == 'simple_package-1.0-py2.py3-none-any.whl'


def test_report_file_owner_conflicts(script, tmpdir):
"""
Test installing from a wheel that wants to overwrite
files owned by another wheel to ensure that a warning
is reported and that the second wheel does overwrite
the first.
"""
from tests.lib import TestFailure
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see a few other tests in this file importing this inside the test. Maybe it's a good time to move this import along with the others at the top?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just following the local coding patterns in this PR. Changing all of that seems like a task for another PR.


make_wheel_with_file(
name="wheel1",
version="1.0",
extra_files={
'thetop/a.py': '# from wheel1',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

make_wheel_with_file(
name="wheel2",
version="2.0",
extra_files={
'thetop/a.py': '# from wheel2',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

result = script.pip(
'install', 'wheel1', 'wheel2', '--no-index',
'--find-links', tmpdir,
)

# FIXME: Both wheels should be installed, for now. In the future
# when the warning is changed to a hard error this part of the
# test needs to be updated to show that only wheel1 is installed.
with pytest.raises(TestFailure):
result.assert_installed('wheel1')
result.assert_installed('wheel2')

# FIXME: When the warning is changed to a hard error, wheel2 won't
# be installed and the file contents will be those from wheel1.
thetop = script.site_packages_path / 'thetop'
assert (thetop / 'a.py').read_text() == '# from wheel2'

full_path = thetop / 'a.py'
msg = 'Overwriting or removing {} for {} which is also owned by {}'.format(
full_path, 'wheel2', 'wheel1 1.0')
assert msg in result.stderr


def test_report_file_owner_conflicts_multiple(script, tmpdir):
"""
Test installing from a wheel that wants to overwrite
files owned by two other wheels to ensure that a warning
is reported and that the second wheel does overwrite
the first.
"""
from tests.lib import TestFailure

make_wheel_with_file(
name="wheel1",
version="1.0",
extra_files={
'thetop/a.py': '# from wheel1',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

make_wheel_with_file(
name="wheel2",
version="2.0",
extra_files={
'thetop/a.py': '# from wheel2',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

make_wheel_with_file(
name="wheel3",
version="3.0",
extra_files={
'thetop/a.py': '# from wheel3',
},
extra_metadata_files={
'top_level.txt': 'thetop',
},
).save_to_dir(tmpdir)

result = script.pip(
'install', 'wheel1', 'wheel2', 'wheel3', '--no-index',
'--find-links', tmpdir,
)

# FIXME: Both wheels should be installed, for now. In the future
# when the warning is changed to a hard error this part of the
# test needs to be updated to show that only wheel1 is installed.
with pytest.raises(TestFailure):
result.assert_installed('wheel1')
result.assert_installed('wheel2')

# FIXME: When the warning is changed to a hard error, wheel2 won't
# be installed and the file contents will be those from wheel1.
thetop = script.site_packages_path / 'thetop'
assert (thetop / 'a.py').read_text() == '# from wheel3'

full_path = thetop / 'a.py'
msg = 'Overwriting or removing {} for {} which is also owned by {}'.format(
full_path, 'wheel2', 'wheel1 1.0')
assert msg in result.stderr

msg = 'Overwriting or removing {} for {} which is also owned by {}'.format(
full_path, 'wheel3', 'wheel1 1.0 and wheel2 2.0')
assert msg in result.stderr
23 changes: 23 additions & 0 deletions tests/unit/test_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Tests for pip._internal.messages
"""

import pytest

from pip._internal.utils.messages import oxford_comma_join


@pytest.mark.parametrize("test_input,expected",
[([], ''),
(['a'], 'a'),
(['a', 'b'], 'a and b'),
(['a', 'b', 'c'], 'a, b, and c')])
def test_oxford_comma_join_implicit_conjunction(test_input, expected):
assert expected == oxford_comma_join(test_input)


@pytest.mark.parametrize("test_input,conjunction,expected",
[(['a', 'b'], 'and', 'a and b',),
(['a', 'b'], 'or', 'a or b')])
def test_oxford_comma_join_explicit_conjunction(
test_input, conjunction, expected):
assert expected == oxford_comma_join(test_input, conjunction)