Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn about and ignore invalid package metadata in the environment #373

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions src/pipdeptree/_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
from importlib.metadata import Distribution, distributions
from pathlib import Path
from typing import Tuple
from typing import Iterable, Tuple

from packaging.utils import canonicalize_name

Expand Down Expand Up @@ -44,7 +44,10 @@ def get_installed_distributions(
if user_only:
paths = [p for p in paths if p.startswith(site.getusersitepackages())]

original_dists = distributions(path=paths)
return filter_valid_distributions(distributions(path=paths))


def filter_valid_distributions(iterable_dists: Iterable[Distribution]) -> list[Distribution]:
warning_printer = get_warning_printer()

# Since importlib.metadata.distributions() can return duplicate packages, we need to handle this. pip's approach is
Expand All @@ -53,8 +56,17 @@ def get_installed_distributions(
# See https://github.com/pypa/pip/blob/7c49d06ea4be4635561f16a524e3842817d1169a/src/pip/_internal/metadata/importlib/_envs.py#L34
seen_dists: dict[str, Distribution] = {}
first_seen_to_already_seen_dists_dict: dict[Distribution, list[Distribution]] = {}

# We also need to handle invalid metadata, though we can't get paths to invalid distribution metadata directly since
# importlib doesn't expose an API for it. We do have the directory they reside in, so let's use that.
site_dir_with_invalid_metadata: set[str] = set()

dists = []
for dist in original_dists:
for dist in iterable_dists:
if not has_valid_metadata(dist):
site_dir = str(dist.locate_file(""))
site_dir_with_invalid_metadata.add(site_dir)
continue
normalized_name = canonicalize_name(dist.metadata["Name"])
if normalized_name not in seen_dists:
seen_dists[normalized_name] = dist
Expand All @@ -64,17 +76,31 @@ def get_installed_distributions(
already_seen_dists = first_seen_to_already_seen_dists_dict.setdefault(seen_dists[normalized_name], [])
already_seen_dists.append(dist)

should_print_warning = warning_printer.should_warn() and first_seen_to_already_seen_dists_dict
if should_print_warning:
warning_printer.print_multi_line(
"Duplicate package metadata found",
lambda: render_duplicated_dist_metadata_text(first_seen_to_already_seen_dists_dict),
ignore_fail=True,
)
if warning_printer.should_warn():
if site_dir_with_invalid_metadata:
warning_printer.print_multi_line(
"Missing or invalid metadata found in the following site dirs",
lambda: render_invalid_metadata_text(site_dir_with_invalid_metadata),
)
if first_seen_to_already_seen_dists_dict:
warning_printer.print_multi_line(
"Duplicate package metadata found",
lambda: render_duplicated_dist_metadata_text(first_seen_to_already_seen_dists_dict),
ignore_fail=True,
)

return dists


def has_valid_metadata(dist: Distribution) -> bool:
return dist.metadata["Name"] is not None


def render_invalid_metadata_text(site_dirs_with_invalid_metadata: set[str]) -> None:
for site_dir in site_dirs_with_invalid_metadata:
print(site_dir, file=sys.stderr) # noqa: T201
kemzeb marked this conversation as resolved.
Show resolved Hide resolved


FirstSeenWithDistsPair = Tuple[Distribution, Distribution]


Expand Down
13 changes: 12 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,18 @@ def fake_dist(tmp_path: Path) -> Path:
fake_dist_path = fake_site_pkgs / "bar-2.4.5.dist-info"
fake_dist_path.mkdir(parents=True)
fake_metadata = Path(fake_dist_path) / "METADATA"
with Path(fake_metadata).open("w", encoding=locale.getpreferredencoding(False)) as f:
with fake_metadata.open("w", encoding=locale.getpreferredencoding(False)) as f:
f.write("Metadata-Version: 2.3\n" "Name: bar\n" "Version: 2.4.5\n")

return fake_dist_path


@pytest.fixture()
def fake_dist_with_invalid_metadata(tmp_path: Path) -> Path:
"Similar to `fake_dist()`, but creates an invalid METADATA file."
fake_site_pkgs = tmp_path / "site-packages"
fake_dist_path = fake_site_pkgs / "bar-2.4.5.dist-info"
fake_dist_path.mkdir(parents=True)
fake_metadata = Path(fake_dist_path) / "METADATA"
fake_metadata.touch()
return fake_dist_path
19 changes: 19 additions & 0 deletions tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,22 @@ def test_duplicate_metadata(mocker: MockerFixture, capfd: pytest.CaptureFixture[
"---------------------------------------\n"
)
assert err == expected


def test_invalid_metadata(
mocker: MockerFixture, capfd: pytest.CaptureFixture[str], fake_dist_with_invalid_metadata: Path
) -> None:
fake_site_dir = str(fake_dist_with_invalid_metadata.parent)
mocked_sys_path = [fake_site_dir]
mocker.patch("pipdeptree._discovery.sys.path", mocked_sys_path)

dists = get_installed_distributions()

assert len(dists) == 0
out, err = capfd.readouterr()
assert not out
assert err == (
"Warning!!! Missing or invalid metadata found in the following site dirs:\n"
f"{fake_site_dir}\n"
"------------------------------------------------------------------------\n"
)