diff --git a/news/12042.bugfix.rst b/news/12042.bugfix.rst
new file mode 100644
index 00000000000..34d97743540
--- /dev/null
+++ b/news/12042.bugfix.rst
@@ -0,0 +1 @@
+Correctly parse ``dist-info-metadata`` values from JSON-format index data.
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index e741c3283cd..4453519ad02 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -69,18 +69,6 @@ class LinkHash:
def __post_init__(self) -> None:
assert self.name in _SUPPORTED_HASHES
- @classmethod
- def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
- """Parse a PEP 658 data-dist-info-metadata hash."""
- if dist_info_metadata == "true":
- return None
- name, sep, value = dist_info_metadata.partition("=")
- if not sep:
- return None
- if name not in _SUPPORTED_HASHES:
- return None
- return cls(name=name, value=value)
-
@classmethod
@functools.lru_cache(maxsize=None)
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
@@ -107,6 +95,28 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
+@dataclass(frozen=True)
+class MetadataFile:
+ """Information about a core metadata file associated with a distribution."""
+
+ hashes: Optional[Dict[str, str]]
+
+ def __post_init__(self) -> None:
+ if self.hashes is not None:
+ assert all(name in _SUPPORTED_HASHES for name in self.hashes)
+
+
+def supported_hashes(hashes: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+ # Remove any unsupported hash types from the mapping. If this leaves no
+ # supported hashes, return None
+ if hashes is None:
+ return None
+ hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
+ if not hashes:
+ return None
+ return hashes
+
+
def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -179,7 +189,7 @@ class Link(KeyBasedCompareMixin):
"comes_from",
"requires_python",
"yanked_reason",
- "dist_info_metadata",
+ "metadata_file_data",
"cache_link_parsing",
"egg_fragment",
]
@@ -190,7 +200,7 @@ def __init__(
comes_from: Optional[Union[str, "IndexContent"]] = None,
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
- dist_info_metadata: Optional[str] = None,
+ metadata_file_data: Optional[MetadataFile] = None,
cache_link_parsing: bool = True,
hashes: Optional[Mapping[str, str]] = None,
) -> None:
@@ -208,11 +218,10 @@ def __init__(
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
- :param dist_info_metadata: the metadata attached to the file, or None if no such
- metadata is provided. This is the value of the "data-dist-info-metadata"
- attribute, if present, in a simple repository HTML link. This may be parsed
- into its own `Link` by `self.metadata_link()`. See PEP 658 for more
- information and the specification.
+ :param metadata_file_data: the metadata attached to the file, or None if
+ no such metadata is provided. This argument, if not None, indicates
+ that a separate metadata file exists, and also optionally supplies
+ hashes for that file.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link should be cached. PyPI
URLs should generally have this set to False, for example.
@@ -220,6 +229,10 @@ def __init__(
determine the validity of a download.
"""
+ # The comes_from, requires_python, and metadata_file_data arguments are
+ # only used by classmethods of this class, and are not used in client
+ # code directly.
+
# url can be a UNC windows share
if url.startswith("\\\\"):
url = path_to_url(url)
@@ -239,7 +252,7 @@ def __init__(
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
- self.dist_info_metadata = dist_info_metadata
+ self.metadata_file_data = metadata_file_data
super().__init__(key=url, defining_class=Link)
@@ -262,9 +275,25 @@ def from_json(
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
pyrequire = file_data.get("requires-python")
yanked_reason = file_data.get("yanked")
- dist_info_metadata = file_data.get("dist-info-metadata")
hashes = file_data.get("hashes", {})
+ # PEP 714: Indexes must use the name core-metadata, but
+ # clients should support the old name as a fallback for compatibility.
+ metadata_info = file_data.get("core-metadata")
+ if metadata_info is None:
+ metadata_info = file_data.get("dist-info-metadata")
+
+ # The metadata info value may be a boolean, or a dict of hashes.
+ if isinstance(metadata_info, dict):
+ # The file exists, and hashes have been supplied
+ metadata_file_data = MetadataFile(supported_hashes(metadata_info))
+ elif metadata_info:
+ # The file exists, but there are no hashes
+ metadata_file_data = MetadataFile(None)
+ else:
+ # False or not present: the file does not exist
+ metadata_file_data = None
+
# The Link.yanked_reason expects an empty string instead of a boolean.
if yanked_reason and not isinstance(yanked_reason, str):
yanked_reason = ""
@@ -278,7 +307,7 @@ def from_json(
requires_python=pyrequire,
yanked_reason=yanked_reason,
hashes=hashes,
- dist_info_metadata=dist_info_metadata,
+ metadata_file_data=metadata_file_data,
)
@classmethod
@@ -298,14 +327,39 @@ def from_element(
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
pyrequire = anchor_attribs.get("data-requires-python")
yanked_reason = anchor_attribs.get("data-yanked")
- dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
+
+ # PEP 714: Indexes must use the name data-core-metadata, but
+ # clients should support the old name as a fallback for compatibility.
+ metadata_info = anchor_attribs.get("data-core-metadata")
+ if metadata_info is None:
+ metadata_info = anchor_attribs.get("data-dist-info-metadata")
+ # The metadata info value may be the string "true", or a string of
+ # the form "hashname=hashval"
+ if metadata_info == "true":
+ # The file exists, but there are no hashes
+ metadata_file_data = MetadataFile(None)
+ elif metadata_info is None:
+ # The file does not exist
+ metadata_file_data = None
+ else:
+ # The file exists, and hashes have been supplied
+ hashname, sep, hashval = metadata_info.partition("=")
+ if sep == "=":
+ metadata_file_data = MetadataFile(supported_hashes({hashname: hashval}))
+ else:
+ # Error - data is wrong. Treat as no hashes supplied.
+ logger.debug(
+ "Index returned invalid data-dist-info-metadata value: %s",
+ metadata_info,
+ )
+ metadata_file_data = MetadataFile(None)
return cls(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
- dist_info_metadata=dist_info_metadata,
+ metadata_file_data=metadata_file_data,
)
def __str__(self) -> str:
@@ -407,17 +461,13 @@ def subdirectory_fragment(self) -> Optional[str]:
return match.group(1)
def metadata_link(self) -> Optional["Link"]:
- """Implementation of PEP 658 parsing."""
- # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
- # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
- # gets set.
- if self.dist_info_metadata is None:
+ """Return a link to the associated core metadata file (if any)."""
+ if self.metadata_file_data is None:
return None
metadata_url = f"{self.url_without_fragment}.metadata"
- metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
- if metadata_link_hash is None:
+ if self.metadata_file_data.hashes is None:
return Link(metadata_url)
- return Link(metadata_url, hashes=metadata_link_hash.as_dict())
+ return Link(metadata_url, hashes=self.metadata_file_data.hashes)
def as_hashes(self) -> Hashes:
return Hashes({k: [v] for k, v in self._hashes.items()})
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index e855d78e126..5410a4afc03 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -30,6 +30,7 @@
from pip._internal.models.link import (
Link,
LinkHash,
+ MetadataFile,
_clean_url_path,
_ensure_quoted_url,
)
@@ -485,13 +486,30 @@ def test_parse_links_json() -> None:
"requires-python": ">=3.7",
"dist-info-metadata": False,
},
- # Same as above, but parsing dist-info-metadata.
+ # Same as above, but parsing core-metadata.
{
"filename": "holygrail-1.0-py3-none-any.whl",
"url": "/files/holygrail-1.0-py3-none-any.whl",
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
"requires-python": ">=3.7",
- "dist-info-metadata": "sha512=aabdd41",
+ "core-metadata": {"sha512": "aabdd41"},
+ },
+ # Ensure fallback to dist-info-metadata works
+ {
+ "filename": "holygrail-1.0-py3-none-any.whl",
+ "url": "/files/holygrail-1.0-py3-none-any.whl",
+ "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ "requires-python": ">=3.7",
+ "dist-info-metadata": {"sha512": "aabdd41"},
+ },
+ # Ensure that core-metadata gets priority.
+ {
+ "filename": "holygrail-1.0-py3-none-any.whl",
+ "url": "/files/holygrail-1.0-py3-none-any.whl",
+ "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ "requires-python": ">=3.7",
+ "core-metadata": {"sha512": "aabdd41"},
+ "dist-info-metadata": {"sha512": "this_is_wrong"},
},
],
}
@@ -527,7 +545,23 @@ def test_parse_links_json() -> None:
requires_python=">=3.7",
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
- dist_info_metadata="sha512=aabdd41",
+ metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
+ ),
+ Link(
+ "https://example.com/files/holygrail-1.0-py3-none-any.whl",
+ comes_from=page.url,
+ requires_python=">=3.7",
+ yanked_reason=None,
+ hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
+ ),
+ Link(
+ "https://example.com/files/holygrail-1.0-py3-none-any.whl",
+ comes_from=page.url,
+ requires_python=">=3.7",
+ yanked_reason=None,
+ hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
),
]
@@ -585,30 +619,42 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
),
# Test with value "true".
(
- '',
- "true",
+ '',
+ MetadataFile(None),
{},
),
# Test with a provided hash value.
(
- '', # noqa: E501
- "sha256=aa113592bbe",
+ '', # noqa: E501
+ MetadataFile({"sha256": "aa113592bbe"}),
{},
),
# Test with a provided hash value for both the requirement as well as metadata.
(
- '', # noqa: E501
- "sha256=aa113592bbe",
+ '', # noqa: E501
+ MetadataFile({"sha256": "aa113592bbe"}),
{"sha512": "abc132409cb"},
),
+ # Ensure the fallback to the old name works.
+ (
+ '', # noqa: E501
+ MetadataFile({"sha256": "aa113592bbe"}),
+ {},
+ ),
+ # Ensure that the data-core-metadata name gets priority.
+ (
+ '', # noqa: E501
+ MetadataFile({"sha256": "aa113592bbe"}),
+ {},
+ ),
],
)
-def test_parse_links__dist_info_metadata(
+def test_parse_links__metadata_file_data(
anchor_html: str,
expected: Optional[str],
hashes: Dict[str, str],
) -> None:
- link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
+ link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
assert link._hashes == hashes
@@ -1080,17 +1126,26 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
@pytest.mark.parametrize(
- "dist_info_metadata, result",
+ "metadata_attrib, expected",
[
- ("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
- ("sha256=", LinkHash("sha256", "")),
- ("sha500=aa113592bbe", None),
- ("true", None),
- ("", None),
- ("aa113592bbe", None),
+ ("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
+ ("sha256=", MetadataFile({"sha256": ""})),
+ ("sha500=aa113592bbe", MetadataFile(None)),
+ ("true", MetadataFile(None)),
+ (None, None),
+ # Attribute is present but invalid
+ ("", MetadataFile(None)),
+ ("aa113592bbe", MetadataFile(None)),
],
)
-def test_pep658_hash_parsing(
- dist_info_metadata: str, result: Optional[LinkHash]
+def test_metadata_file_info_parsing_html(
+ metadata_attrib: str, expected: Optional[MetadataFile]
) -> None:
- assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
+ attribs: Dict[str, Optional[str]] = {
+ "href": "something",
+ "data-dist-info-metadata": metadata_attrib,
+ }
+ page_url = "dummy_for_comes_from"
+ base_url = "https://index.url/simple"
+ link = Link.from_element(attribs, page_url, base_url)
+ assert link is not None and link.metadata_file_data == expected