-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add shallow download network utilities
make types pass add --shallow-wheels cli arg add news rename news make the metadata test pass on windows use --shallow-wheels unconditionally and remove the cli arg download all wheels at the end of the run add a hack to avoid signal() erroring in a background thread avoid using shallow wheels for non-remote file paths add --unstable-feature=shallow_wheels!
- Loading branch information
1 parent
92c7eeb
commit 9df5c8f
Showing
22 changed files
with
868 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add a set of utilities in ``pip._internal.network.shallow`` for fetching metadata from remote wheel files without downloading the entire file. Link these utilities into the v2 resolver by adding a new ShallowWheelDistribution AbstractDistribution subclass. Expose this behavior via a --unstable-feature=shallow_wheels command-line option to ``pip download``. This produces a marked performance improvement. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import os | ||
|
||
from pip._vendor.pkg_resources import DistInfoDistribution | ||
|
||
from pip._internal.distributions.base import AbstractDistribution | ||
from pip._internal.network.shallow.httpfile import Context as HttpContext | ||
from pip._internal.network.shallow.httpfile import Url | ||
from pip._internal.network.shallow.wheel import Context as WheelContext | ||
from pip._internal.network.shallow.wheel import ( | ||
ProjectName, | ||
WheelMetadataRequest, | ||
) | ||
from pip._internal.network.shallow.zipfile import Context as ZipContext | ||
from pip._internal.utils.typing import MYPY_CHECK_RUNNING | ||
from pip._internal.utils.wheel import WheelMetadata | ||
|
||
if MYPY_CHECK_RUNNING: | ||
from typing import Any | ||
from pip._vendor.pkg_resources import Distribution | ||
from pip._internal.index.package_finder import PackageFinder | ||
from pip._internal.models.link import Link | ||
from pip._internal.network.download import Downloader | ||
from pip._internal.req import InstallRequirement | ||
|
||
|
||
class DistributionNeedingFinalHydration(DistInfoDistribution): | ||
def __init__(self, link, downloader, download_dir, *args, **kwargs): | ||
# type: (Link, Downloader, str, Any, Any) -> None | ||
super(DistributionNeedingFinalHydration, self).__init__( | ||
*args, **kwargs) | ||
self.final_link = link | ||
self.downloader = downloader | ||
self.download_dir = download_dir | ||
|
||
def finally_hydrate(self): | ||
# type: () -> None | ||
download = self.downloader(self.final_link) | ||
output_filename = os.path.join(self.download_dir, download.filename) | ||
with open(output_filename, 'wb') as f: | ||
for chunk in download.chunks: | ||
f.write(chunk) | ||
|
||
|
||
class ShallowWheelDistribution(AbstractDistribution): | ||
"""Represents a wheel distribution. | ||
This does not need any preparation as wheels can be directly unpacked. | ||
""" | ||
|
||
def __init__(self, req, downloader, download_dir): | ||
# type: (InstallRequirement, Downloader, str) -> None | ||
super(ShallowWheelDistribution, self).__init__(req) | ||
self._downloader = downloader | ||
self._download_dir = download_dir | ||
|
||
@property | ||
def _wheel_context(self): | ||
# type: () -> WheelContext | ||
http_ctx = HttpContext(self._downloader.get_session()) | ||
zip_ctx = ZipContext(http_ctx) | ||
wheel_ctx = WheelContext(zip_ctx) | ||
return wheel_ctx | ||
|
||
def get_pkg_resources_distribution(self): | ||
# type: () -> Distribution | ||
"""Loads the metadata from the shallow wheel file into memory and | ||
returns a Distribution that uses it, not relying on the wheel file or | ||
requirement. | ||
""" | ||
# Wheels are never unnamed. | ||
assert self.req.name | ||
assert self.req.link | ||
|
||
project_name = ProjectName(self.req.name) | ||
remote_location = Url(self.req.link.url) | ||
|
||
wheel_req = WheelMetadataRequest( | ||
url=remote_location, | ||
project_name=project_name, | ||
) | ||
metadata = (self | ||
._wheel_context | ||
.extract_wheel_metadata(wheel_req) | ||
.contents) | ||
|
||
wheel_filename = self.req.link.filename | ||
wheel_metadata = WheelMetadata({'METADATA': metadata}, wheel_filename) | ||
|
||
return DistributionNeedingFinalHydration( | ||
link=self.req.link, | ||
downloader=self._downloader, | ||
download_dir=self._download_dir, | ||
location=wheel_filename, | ||
metadata=wheel_metadata, | ||
project_name=project_name.name, | ||
) | ||
|
||
def prepare_distribution_metadata(self, finder, build_isolation): | ||
# type: (PackageFinder, bool) -> None | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
""" | ||
Download ranges of files over remote http. | ||
""" | ||
|
||
from collections import namedtuple | ||
|
||
from pip._vendor import requests | ||
|
||
from pip._internal.utils.typing import MYPY_CHECK_RUNNING | ||
from pip._internal.utils.urls import get_url_scheme | ||
|
||
if MYPY_CHECK_RUNNING: | ||
from typing import Any, Optional | ||
|
||
|
||
def url_is_remote(url): | ||
# type: (str) -> bool | ||
return get_url_scheme(url) in ['http', 'https'] | ||
|
||
|
||
class Url(namedtuple('Url', ['url'])): | ||
|
||
def __new__(cls, url): | ||
# type: (str) -> Url | ||
assert url_is_remote(url) | ||
return super(Url, cls).__new__(cls, url) | ||
|
||
|
||
class HttpFileRequest(namedtuple('HttpFileRequest', ['url'])): | ||
pass | ||
|
||
|
||
class Size(namedtuple('Size', ['size'])): | ||
def __new__(cls, size=0): | ||
# type: (int) -> Size | ||
assert size >= 0 | ||
return super(Size, cls).__new__(cls, size) | ||
|
||
def __add__(self, other): | ||
# type: (Any) -> Size | ||
assert isinstance(other, type(self)) | ||
return Size(self.size + other.size) | ||
|
||
def __sub__(self, other): | ||
# type: (Any) -> Size | ||
assert isinstance(other, type(self)) | ||
return Size(self.size - other.size) | ||
|
||
def __lt__(self, other): | ||
# type: (Any) -> bool | ||
assert isinstance(other, type(self)) | ||
return self.size < other.size | ||
|
||
def __le__(self, other): | ||
# type: (Any) -> bool | ||
assert isinstance(other, type(self)) | ||
return self.size <= other.size | ||
|
||
def __gt__(self, other): | ||
# type: (Any) -> bool | ||
assert isinstance(other, type(self)) | ||
return self.size > other.size | ||
|
||
def __ge__(self, other): | ||
# type: (Any) -> bool | ||
assert isinstance(other, type(self)) | ||
return self.size >= other.size | ||
|
||
|
||
class ByteRange(namedtuple('ByteRange', ['start', 'end'])): | ||
def __new__(cls, start, end): | ||
# type: (Size, Size) -> ByteRange | ||
assert end >= start | ||
return super(ByteRange, cls).__new__(cls, start, end) | ||
|
||
def as_bytes_range_header(self): | ||
# type: () -> str | ||
return "bytes={start}-{end}".format( | ||
start=self.start.size, | ||
# NB: The byte ranges accepted here are inclusive, so remove one | ||
# from the end. | ||
end=(self.end.size - 1)) | ||
|
||
def size_diff(self): | ||
# type: () -> Size | ||
return self.end - self.start | ||
|
||
|
||
class BytesRangeRequest(namedtuple('BytesRangeRequest', ['start', 'end'])): | ||
def __new__(cls, start, end): | ||
# type: (Optional[Size], Optional[Size]) -> BytesRangeRequest | ||
if (start is not None) and (end is not None): | ||
assert end >= start | ||
return super(BytesRangeRequest, cls).__new__(cls, start, end) | ||
|
||
def get_byte_range(self, size): | ||
# type: (Size) -> ByteRange | ||
if self.start is None: | ||
start = 0 | ||
else: | ||
assert self.start <= size, "???/start={start},size={size}".format( | ||
start=self.start, size=size) | ||
start = self.start.size | ||
|
||
if self.end is None: | ||
end = size.size | ||
else: | ||
assert self.end <= size | ||
end = self.end.size | ||
|
||
return ByteRange(start=Size(start), end=Size(end)) | ||
|
||
|
||
class HttpFile(namedtuple('HttpFile', ['url', 'size'])): | ||
pass | ||
|
||
|
||
class Context(object): | ||
|
||
def __init__(self, session=None): | ||
# type: (Optional[requests.Session]) -> None | ||
self.session = session or requests.Session() | ||
|
||
def head(self, request): | ||
# type: (HttpFileRequest) -> HttpFile | ||
resp = self.session.head(request.url.url) | ||
resp.raise_for_status() | ||
assert ( | ||
"bytes" in resp.headers["Accept-Ranges"] | ||
), "???/bytes was not found in range headers" | ||
content_length = int(resp.headers["Content-Length"]) | ||
return HttpFile(url=request.url, size=Size(content_length)) | ||
|
||
def range_request(self, http_file, request): | ||
# type: (HttpFile, BytesRangeRequest) -> bytes | ||
byte_range = request.get_byte_range(http_file.size) | ||
resp = self.session.get( | ||
http_file.url.url, | ||
headers={"Range": byte_range.as_bytes_range_header()}) | ||
resp.raise_for_status() | ||
|
||
if Size(len(resp.content)) == http_file.size: | ||
# This request for the full URL contents is cached, and we should | ||
# return just the requested byte range. | ||
start = byte_range.start.size | ||
end = byte_range.end.size | ||
response_bytes = resp.content[start:end] | ||
else: | ||
response_bytes = resp.content | ||
|
||
size_diff = byte_range.size_diff() | ||
assert ( | ||
Size(len(response_bytes)) == size_diff | ||
), ("???/response should have been length {}, but got (size {}):\n{!r}" | ||
.format(size_diff, len(response_bytes), response_bytes)) | ||
return response_bytes |
Oops, something went wrong.