diff --git a/news/12042.bugfix.rst b/news/12042.bugfix.rst new file mode 100644 index 00000000000..34d97743540 --- /dev/null +++ b/news/12042.bugfix.rst @@ -0,0 +1 @@ +Correctly parse ``dist-info-metadata`` values from JSON-format index data. diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index e741c3283cd..4453519ad02 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -69,18 +69,6 @@ class LinkHash: def __post_init__(self) -> None: assert self.name in _SUPPORTED_HASHES - @classmethod - def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]: - """Parse a PEP 658 data-dist-info-metadata hash.""" - if dist_info_metadata == "true": - return None - name, sep, value = dist_info_metadata.partition("=") - if not sep: - return None - if name not in _SUPPORTED_HASHES: - return None - return cls(name=name, value=value) - @classmethod @functools.lru_cache(maxsize=None) def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]: @@ -107,6 +95,28 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: return hashes.is_hash_allowed(self.name, hex_digest=self.value) +@dataclass(frozen=True) +class MetadataFile: + """Information about a core metadata file associated with a distribution.""" + + hashes: Optional[Dict[str, str]] + + def __post_init__(self) -> None: + if self.hashes is not None: + assert all(name in _SUPPORTED_HASHES for name in self.hashes) + + +def supported_hashes(hashes: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: + # Remove any unsupported hash types from the mapping. If this leaves no + # supported hashes, return None + if hashes is None: + return None + hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES} + if not hashes: + return None + return hashes + + def _clean_url_path_part(part: str) -> str: """ Clean a "part" of a URL path (i.e. after splitting on "@" characters). @@ -179,7 +189,7 @@ class Link(KeyBasedCompareMixin): "comes_from", "requires_python", "yanked_reason", - "dist_info_metadata", + "metadata_file_data", "cache_link_parsing", "egg_fragment", ] @@ -190,7 +200,7 @@ def __init__( comes_from: Optional[Union[str, "IndexContent"]] = None, requires_python: Optional[str] = None, yanked_reason: Optional[str] = None, - dist_info_metadata: Optional[str] = None, + metadata_file_data: Optional[MetadataFile] = None, cache_link_parsing: bool = True, hashes: Optional[Mapping[str, str]] = None, ) -> None: @@ -208,11 +218,10 @@ def __init__( a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. - :param dist_info_metadata: the metadata attached to the file, or None if no such - metadata is provided. This is the value of the "data-dist-info-metadata" - attribute, if present, in a simple repository HTML link. This may be parsed - into its own `Link` by `self.metadata_link()`. See PEP 658 for more - information and the specification. + :param metadata_file_data: the metadata attached to the file, or None if + no such metadata is provided. This argument, if not None, indicates + that a separate metadata file exists, and also optionally supplies + hashes for that file. :param cache_link_parsing: A flag that is used elsewhere to determine whether resources retrieved from this link should be cached. PyPI URLs should generally have this set to False, for example. @@ -220,6 +229,10 @@ def __init__( determine the validity of a download. """ + # The comes_from, requires_python, and metadata_file_data arguments are + # only used by classmethods of this class, and are not used in client + # code directly. + # url can be a UNC windows share if url.startswith("\\\\"): url = path_to_url(url) @@ -239,7 +252,7 @@ def __init__( self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason - self.dist_info_metadata = dist_info_metadata + self.metadata_file_data = metadata_file_data super().__init__(key=url, defining_class=Link) @@ -262,9 +275,25 @@ def from_json( url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)) pyrequire = file_data.get("requires-python") yanked_reason = file_data.get("yanked") - dist_info_metadata = file_data.get("dist-info-metadata") hashes = file_data.get("hashes", {}) + # PEP 714: Indexes must use the name core-metadata, but + # clients should support the old name as a fallback for compatibility. + metadata_info = file_data.get("core-metadata") + if metadata_info is None: + metadata_info = file_data.get("dist-info-metadata") + + # The metadata info value may be a boolean, or a dict of hashes. + if isinstance(metadata_info, dict): + # The file exists, and hashes have been supplied + metadata_file_data = MetadataFile(supported_hashes(metadata_info)) + elif metadata_info: + # The file exists, but there are no hashes + metadata_file_data = MetadataFile(None) + else: + # False or not present: the file does not exist + metadata_file_data = None + # The Link.yanked_reason expects an empty string instead of a boolean. if yanked_reason and not isinstance(yanked_reason, str): yanked_reason = "" @@ -278,7 +307,7 @@ def from_json( requires_python=pyrequire, yanked_reason=yanked_reason, hashes=hashes, - dist_info_metadata=dist_info_metadata, + metadata_file_data=metadata_file_data, ) @classmethod @@ -298,14 +327,39 @@ def from_element( url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href)) pyrequire = anchor_attribs.get("data-requires-python") yanked_reason = anchor_attribs.get("data-yanked") - dist_info_metadata = anchor_attribs.get("data-dist-info-metadata") + + # PEP 714: Indexes must use the name data-core-metadata, but + # clients should support the old name as a fallback for compatibility. + metadata_info = anchor_attribs.get("data-core-metadata") + if metadata_info is None: + metadata_info = anchor_attribs.get("data-dist-info-metadata") + # The metadata info value may be the string "true", or a string of + # the form "hashname=hashval" + if metadata_info == "true": + # The file exists, but there are no hashes + metadata_file_data = MetadataFile(None) + elif metadata_info is None: + # The file does not exist + metadata_file_data = None + else: + # The file exists, and hashes have been supplied + hashname, sep, hashval = metadata_info.partition("=") + if sep == "=": + metadata_file_data = MetadataFile(supported_hashes({hashname: hashval})) + else: + # Error - data is wrong. Treat as no hashes supplied. + logger.debug( + "Index returned invalid data-dist-info-metadata value: %s", + metadata_info, + ) + metadata_file_data = MetadataFile(None) return cls( url, comes_from=page_url, requires_python=pyrequire, yanked_reason=yanked_reason, - dist_info_metadata=dist_info_metadata, + metadata_file_data=metadata_file_data, ) def __str__(self) -> str: @@ -407,17 +461,13 @@ def subdirectory_fragment(self) -> Optional[str]: return match.group(1) def metadata_link(self) -> Optional["Link"]: - """Implementation of PEP 658 parsing.""" - # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute - # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute - # gets set. - if self.dist_info_metadata is None: + """Return a link to the associated core metadata file (if any).""" + if self.metadata_file_data is None: return None metadata_url = f"{self.url_without_fragment}.metadata" - metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata) - if metadata_link_hash is None: + if self.metadata_file_data.hashes is None: return Link(metadata_url) - return Link(metadata_url, hashes=metadata_link_hash.as_dict()) + return Link(metadata_url, hashes=self.metadata_file_data.hashes) def as_hashes(self) -> Hashes: return Hashes({k: [v] for k, v in self._hashes.items()}) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index e855d78e126..5410a4afc03 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -30,6 +30,7 @@ from pip._internal.models.link import ( Link, LinkHash, + MetadataFile, _clean_url_path, _ensure_quoted_url, ) @@ -485,13 +486,30 @@ def test_parse_links_json() -> None: "requires-python": ">=3.7", "dist-info-metadata": False, }, - # Same as above, but parsing dist-info-metadata. + # Same as above, but parsing core-metadata. { "filename": "holygrail-1.0-py3-none-any.whl", "url": "/files/holygrail-1.0-py3-none-any.whl", "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"}, "requires-python": ">=3.7", - "dist-info-metadata": "sha512=aabdd41", + "core-metadata": {"sha512": "aabdd41"}, + }, + # Ensure fallback to dist-info-metadata works + { + "filename": "holygrail-1.0-py3-none-any.whl", + "url": "/files/holygrail-1.0-py3-none-any.whl", + "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + "requires-python": ">=3.7", + "dist-info-metadata": {"sha512": "aabdd41"}, + }, + # Ensure that core-metadata gets priority. + { + "filename": "holygrail-1.0-py3-none-any.whl", + "url": "/files/holygrail-1.0-py3-none-any.whl", + "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + "requires-python": ">=3.7", + "core-metadata": {"sha512": "aabdd41"}, + "dist-info-metadata": {"sha512": "this_is_wrong"}, }, ], } @@ -527,7 +545,23 @@ def test_parse_links_json() -> None: requires_python=">=3.7", yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, - dist_info_metadata="sha512=aabdd41", + metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + ), + Link( + "https://example.com/files/holygrail-1.0-py3-none-any.whl", + comes_from=page.url, + requires_python=">=3.7", + yanked_reason=None, + hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + metadata_file_data=MetadataFile({"sha512": "aabdd41"}), + ), + Link( + "https://example.com/files/holygrail-1.0-py3-none-any.whl", + comes_from=page.url, + requires_python=">=3.7", + yanked_reason=None, + hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + metadata_file_data=MetadataFile({"sha512": "aabdd41"}), ), ] @@ -585,30 +619,42 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) - ), # Test with value "true". ( - '', - "true", + '', + MetadataFile(None), {}, ), # Test with a provided hash value. ( - '', # noqa: E501 - "sha256=aa113592bbe", + '', # noqa: E501 + MetadataFile({"sha256": "aa113592bbe"}), {}, ), # Test with a provided hash value for both the requirement as well as metadata. ( - '', # noqa: E501 - "sha256=aa113592bbe", + '', # noqa: E501 + MetadataFile({"sha256": "aa113592bbe"}), {"sha512": "abc132409cb"}, ), + # Ensure the fallback to the old name works. + ( + '', # noqa: E501 + MetadataFile({"sha256": "aa113592bbe"}), + {}, + ), + # Ensure that the data-core-metadata name gets priority. + ( + '', # noqa: E501 + MetadataFile({"sha256": "aa113592bbe"}), + {}, + ), ], ) -def test_parse_links__dist_info_metadata( +def test_parse_links__metadata_file_data( anchor_html: str, expected: Optional[str], hashes: Dict[str, str], ) -> None: - link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected) + link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected) assert link._hashes == hashes @@ -1080,17 +1126,26 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None: @pytest.mark.parametrize( - "dist_info_metadata, result", + "metadata_attrib, expected", [ - ("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")), - ("sha256=", LinkHash("sha256", "")), - ("sha500=aa113592bbe", None), - ("true", None), - ("", None), - ("aa113592bbe", None), + ("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})), + ("sha256=", MetadataFile({"sha256": ""})), + ("sha500=aa113592bbe", MetadataFile(None)), + ("true", MetadataFile(None)), + (None, None), + # Attribute is present but invalid + ("", MetadataFile(None)), + ("aa113592bbe", MetadataFile(None)), ], ) -def test_pep658_hash_parsing( - dist_info_metadata: str, result: Optional[LinkHash] +def test_metadata_file_info_parsing_html( + metadata_attrib: str, expected: Optional[MetadataFile] ) -> None: - assert LinkHash.parse_pep658_hash(dist_info_metadata) == result + attribs: Dict[str, Optional[str]] = { + "href": "something", + "data-dist-info-metadata": metadata_attrib, + } + page_url = "dummy_for_comes_from" + base_url = "https://index.url/simple" + link = Link.from_element(attribs, page_url, base_url) + assert link is not None and link.metadata_file_data == expected