Skip to content

Commit

Permalink
Merge pull request #12078 from pfmoore/core_metadata
Browse files Browse the repository at this point in the history
Fix parsing of JSON index dist-info-metadata values
  • Loading branch information
pfmoore authored Jun 27, 2023
2 parents 98814e5 + a934f38 commit 778b42f
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 53 deletions.
1 change: 1 addition & 0 deletions news/12042.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Correctly parse ``dist-info-metadata`` values from JSON-format index data.
114 changes: 82 additions & 32 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,6 @@ class LinkHash:
def __post_init__(self) -> None:
assert self.name in _SUPPORTED_HASHES

@classmethod
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
"""Parse a PEP 658 data-dist-info-metadata hash."""
if dist_info_metadata == "true":
return None
name, sep, value = dist_info_metadata.partition("=")
if not sep:
return None
if name not in _SUPPORTED_HASHES:
return None
return cls(name=name, value=value)

@classmethod
@functools.lru_cache(maxsize=None)
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
Expand All @@ -107,6 +95,28 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
return hashes.is_hash_allowed(self.name, hex_digest=self.value)


@dataclass(frozen=True)
class MetadataFile:
"""Information about a core metadata file associated with a distribution."""

hashes: Optional[Dict[str, str]]

def __post_init__(self) -> None:
if self.hashes is not None:
assert all(name in _SUPPORTED_HASHES for name in self.hashes)


def supported_hashes(hashes: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
# Remove any unsupported hash types from the mapping. If this leaves no
# supported hashes, return None
if hashes is None:
return None
hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
if not hashes:
return None
return hashes


def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
Expand Down Expand Up @@ -179,7 +189,7 @@ class Link(KeyBasedCompareMixin):
"comes_from",
"requires_python",
"yanked_reason",
"dist_info_metadata",
"metadata_file_data",
"cache_link_parsing",
"egg_fragment",
]
Expand All @@ -190,7 +200,7 @@ def __init__(
comes_from: Optional[Union[str, "IndexContent"]] = None,
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
dist_info_metadata: Optional[str] = None,
metadata_file_data: Optional[MetadataFile] = None,
cache_link_parsing: bool = True,
hashes: Optional[Mapping[str, str]] = None,
) -> None:
Expand All @@ -208,18 +218,21 @@ def __init__(
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
:param dist_info_metadata: the metadata attached to the file, or None if no such
metadata is provided. This is the value of the "data-dist-info-metadata"
attribute, if present, in a simple repository HTML link. This may be parsed
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
information and the specification.
:param metadata_file_data: the metadata attached to the file, or None if
no such metadata is provided. This argument, if not None, indicates
that a separate metadata file exists, and also optionally supplies
hashes for that file.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link should be cached. PyPI
URLs should generally have this set to False, for example.
:param hashes: A mapping of hash names to digests to allow us to
determine the validity of a download.
"""

# The comes_from, requires_python, and metadata_file_data arguments are
# only used by classmethods of this class, and are not used in client
# code directly.

# url can be a UNC windows share
if url.startswith("\\\\"):
url = path_to_url(url)
Expand All @@ -239,7 +252,7 @@ def __init__(
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
self.dist_info_metadata = dist_info_metadata
self.metadata_file_data = metadata_file_data

super().__init__(key=url, defining_class=Link)

Expand All @@ -262,9 +275,25 @@ def from_json(
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
pyrequire = file_data.get("requires-python")
yanked_reason = file_data.get("yanked")
dist_info_metadata = file_data.get("dist-info-metadata")
hashes = file_data.get("hashes", {})

# PEP 714: Indexes must use the name core-metadata, but
# clients should support the old name as a fallback for compatibility.
metadata_info = file_data.get("core-metadata")
if metadata_info is None:
metadata_info = file_data.get("dist-info-metadata")

# The metadata info value may be a boolean, or a dict of hashes.
if isinstance(metadata_info, dict):
# The file exists, and hashes have been supplied
metadata_file_data = MetadataFile(supported_hashes(metadata_info))
elif metadata_info:
# The file exists, but there are no hashes
metadata_file_data = MetadataFile(None)
else:
# False or not present: the file does not exist
metadata_file_data = None

# The Link.yanked_reason expects an empty string instead of a boolean.
if yanked_reason and not isinstance(yanked_reason, str):
yanked_reason = ""
Expand All @@ -278,7 +307,7 @@ def from_json(
requires_python=pyrequire,
yanked_reason=yanked_reason,
hashes=hashes,
dist_info_metadata=dist_info_metadata,
metadata_file_data=metadata_file_data,
)

@classmethod
Expand All @@ -298,14 +327,39 @@ def from_element(
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
pyrequire = anchor_attribs.get("data-requires-python")
yanked_reason = anchor_attribs.get("data-yanked")
dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")

# PEP 714: Indexes must use the name data-core-metadata, but
# clients should support the old name as a fallback for compatibility.
metadata_info = anchor_attribs.get("data-core-metadata")
if metadata_info is None:
metadata_info = anchor_attribs.get("data-dist-info-metadata")
# The metadata info value may be the string "true", or a string of
# the form "hashname=hashval"
if metadata_info == "true":
# The file exists, but there are no hashes
metadata_file_data = MetadataFile(None)
elif metadata_info is None:
# The file does not exist
metadata_file_data = None
else:
# The file exists, and hashes have been supplied
hashname, sep, hashval = metadata_info.partition("=")
if sep == "=":
metadata_file_data = MetadataFile(supported_hashes({hashname: hashval}))
else:
# Error - data is wrong. Treat as no hashes supplied.
logger.debug(
"Index returned invalid data-dist-info-metadata value: %s",
metadata_info,
)
metadata_file_data = MetadataFile(None)

return cls(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
dist_info_metadata=dist_info_metadata,
metadata_file_data=metadata_file_data,
)

def __str__(self) -> str:
Expand Down Expand Up @@ -407,17 +461,13 @@ def subdirectory_fragment(self) -> Optional[str]:
return match.group(1)

def metadata_link(self) -> Optional["Link"]:
"""Implementation of PEP 658 parsing."""
# Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
# from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
# gets set.
if self.dist_info_metadata is None:
"""Return a link to the associated core metadata file (if any)."""
if self.metadata_file_data is None:
return None
metadata_url = f"{self.url_without_fragment}.metadata"
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
if metadata_link_hash is None:
if self.metadata_file_data.hashes is None:
return Link(metadata_url)
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
return Link(metadata_url, hashes=self.metadata_file_data.hashes)

def as_hashes(self) -> Hashes:
return Hashes({k: [v] for k, v in self._hashes.items()})
Expand Down
97 changes: 76 additions & 21 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pip._internal.models.link import (
Link,
LinkHash,
MetadataFile,
_clean_url_path,
_ensure_quoted_url,
)
Expand Down Expand Up @@ -485,13 +486,30 @@ def test_parse_links_json() -> None:
"requires-python": ">=3.7",
"dist-info-metadata": False,
},
# Same as above, but parsing dist-info-metadata.
# Same as above, but parsing core-metadata.
{
"filename": "holygrail-1.0-py3-none-any.whl",
"url": "/files/holygrail-1.0-py3-none-any.whl",
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
"requires-python": ">=3.7",
"dist-info-metadata": "sha512=aabdd41",
"core-metadata": {"sha512": "aabdd41"},
},
# Ensure fallback to dist-info-metadata works
{
"filename": "holygrail-1.0-py3-none-any.whl",
"url": "/files/holygrail-1.0-py3-none-any.whl",
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
"requires-python": ">=3.7",
"dist-info-metadata": {"sha512": "aabdd41"},
},
# Ensure that core-metadata gets priority.
{
"filename": "holygrail-1.0-py3-none-any.whl",
"url": "/files/holygrail-1.0-py3-none-any.whl",
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
"requires-python": ">=3.7",
"core-metadata": {"sha512": "aabdd41"},
"dist-info-metadata": {"sha512": "this_is_wrong"},
},
],
}
Expand Down Expand Up @@ -527,7 +545,23 @@ def test_parse_links_json() -> None:
requires_python=">=3.7",
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
dist_info_metadata="sha512=aabdd41",
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
),
Link(
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
comes_from=page.url,
requires_python=">=3.7",
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
),
Link(
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
comes_from=page.url,
requires_python=">=3.7",
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
),
]

Expand Down Expand Up @@ -585,30 +619,42 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
),
# Test with value "true".
(
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="true"></a>',
"true",
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="true"></a>',
MetadataFile(None),
{},
),
# Test with a provided hash value.
(
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
"sha256=aa113592bbe",
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="sha256=aa113592bbe"></a>', # noqa: E501
MetadataFile({"sha256": "aa113592bbe"}),
{},
),
# Test with a provided hash value for both the requirement as well as metadata.
(
'<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
"sha256=aa113592bbe",
'<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-core-metadata="sha256=aa113592bbe"></a>', # noqa: E501
MetadataFile({"sha256": "aa113592bbe"}),
{"sha512": "abc132409cb"},
),
# Ensure the fallback to the old name works.
(
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
MetadataFile({"sha256": "aa113592bbe"}),
{},
),
# Ensure that the data-core-metadata name gets priority.
(
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="sha256=aa113592bbe" data-dist-info-metadata="sha256=invalid_value"></a>', # noqa: E501
MetadataFile({"sha256": "aa113592bbe"}),
{},
),
],
)
def test_parse_links__dist_info_metadata(
def test_parse_links__metadata_file_data(
anchor_html: str,
expected: Optional[str],
hashes: Dict[str, str],
) -> None:
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
assert link._hashes == hashes


Expand Down Expand Up @@ -1080,17 +1126,26 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:


@pytest.mark.parametrize(
"dist_info_metadata, result",
"metadata_attrib, expected",
[
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
("sha256=", LinkHash("sha256", "")),
("sha500=aa113592bbe", None),
("true", None),
("", None),
("aa113592bbe", None),
("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
("sha256=", MetadataFile({"sha256": ""})),
("sha500=aa113592bbe", MetadataFile(None)),
("true", MetadataFile(None)),
(None, None),
# Attribute is present but invalid
("", MetadataFile(None)),
("aa113592bbe", MetadataFile(None)),
],
)
def test_pep658_hash_parsing(
dist_info_metadata: str, result: Optional[LinkHash]
def test_metadata_file_info_parsing_html(
metadata_attrib: str, expected: Optional[MetadataFile]
) -> None:
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
attribs: Dict[str, Optional[str]] = {
"href": "something",
"data-dist-info-metadata": metadata_attrib,
}
page_url = "dummy_for_comes_from"
base_url = "https://index.url/simple"
link = Link.from_element(attribs, page_url, base_url)
assert link is not None and link.metadata_file_data == expected

0 comments on commit 778b42f

Please sign in to comment.