From 9df5c8fe3b04b80b01acb3448322432aa9425d36 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jun 2020 03:00:05 -0700 Subject: [PATCH] add shallow download network utilities make types pass add --shallow-wheels cli arg add news rename news make the metadata test pass on windows use --shallow-wheels unconditionally and remove the cli arg download all wheels at the end of the run add a hack to avoid signal() erroring in a background thread avoid using shallow wheels for non-remote file paths add --unstable-feature=shallow_wheels! --- news/8448.feature | 1 + src/pip/_internal/cli/cmdoptions.py | 2 +- src/pip/_internal/cli/progress_bars.py | 19 +- src/pip/_internal/cli/req_command.py | 1 + src/pip/_internal/commands/download.py | 1 + src/pip/_internal/commands/install.py | 1 + src/pip/_internal/commands/wheel.py | 1 + .../_internal/distributions/shallow_wheel.py | 100 +++++++++ src/pip/_internal/network/download.py | 4 + src/pip/_internal/network/shallow/__init__.py | 0 src/pip/_internal/network/shallow/httpfile.py | 156 +++++++++++++ src/pip/_internal/network/shallow/wheel.py | 75 +++++++ src/pip/_internal/network/shallow/zipfile.py | 149 +++++++++++++ src/pip/_internal/operations/prepare.py | 9 + src/pip/_internal/req/req_set.py | 38 ++++ .../resolution/resolvelib/resolver.py | 7 + tests/unit/shallow/__init__.py | 0 tests/unit/shallow/test_httpfile.py | 29 +++ tests/unit/shallow/test_wheel.py | 39 ++++ tests/unit/shallow/test_zipfile.py | 31 +++ tests/unit/shallow/util.py | 208 ++++++++++++++++++ tests/unit/test_req.py | 1 + 22 files changed, 868 insertions(+), 4 deletions(-) create mode 100644 news/8448.feature create mode 100644 src/pip/_internal/distributions/shallow_wheel.py create mode 100644 src/pip/_internal/network/shallow/__init__.py create mode 100644 src/pip/_internal/network/shallow/httpfile.py create mode 100644 src/pip/_internal/network/shallow/wheel.py create mode 100644 src/pip/_internal/network/shallow/zipfile.py create mode 100644 tests/unit/shallow/__init__.py create mode 100644 tests/unit/shallow/test_httpfile.py create mode 100644 tests/unit/shallow/test_wheel.py create mode 100644 tests/unit/shallow/test_zipfile.py create mode 100644 tests/unit/shallow/util.py diff --git a/news/8448.feature b/news/8448.feature new file mode 100644 index 00000000000..4ca54441fed --- /dev/null +++ b/news/8448.feature @@ -0,0 +1 @@ +Add a set of utilities in ``pip._internal.network.shallow`` for fetching metadata from remote wheel files without downloading the entire file. Link these utilities into the v2 resolver by adding a new ShallowWheelDistribution AbstractDistribution subclass. Expose this behavior via a --unstable-feature=shallow_wheels command-line option to ``pip download``. This produces a marked performance improvement. diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 4c557efa80f..2af2decc784 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -919,7 +919,7 @@ def check_list_path_option(options): metavar='feature', action='append', default=[], - choices=['resolver'], + choices=['resolver', 'shallow_wheels'], help=SUPPRESS_HELP, # TODO: Enable this when the resolver actually works. # help='Enable unstable feature(s) that may be backward incompatible.', ) # type: Callable[..., Option] diff --git a/src/pip/_internal/cli/progress_bars.py b/src/pip/_internal/cli/progress_bars.py index 69338552f13..f357b89fef4 100644 --- a/src/pip/_internal/cli/progress_bars.py +++ b/src/pip/_internal/cli/progress_bars.py @@ -14,7 +14,7 @@ from pip._internal.utils.typing import MYPY_CHECK_RUNNING if MYPY_CHECK_RUNNING: - from typing import Any, Dict, List + from typing import Any, Dict, List, Optional try: from pip._vendor import colorama @@ -24,6 +24,18 @@ colorama = None +def _signal_unless_backgrounded(signum, handler): + # type: (int, Any) -> Optional[Any] + try: + return signal(signum, handler) + except ValueError: + # FIXME: this otherwise doesn't work when called from a non-main + # thread. This therefore fails if we try to download more than one + # wheel at once via threading, which calls back to Downloader, which + # uses this progress bar. + return None + + def _select_progress_class(preferred, fallback): # type: (Bar, Bar) -> Bar encoding = getattr(preferred.file, "encoding", None) @@ -84,7 +96,8 @@ def __init__(self, *args, **kwargs): **kwargs ) - self.original_handler = signal(SIGINT, self.handle_sigint) + self.original_handler = _signal_unless_backgrounded( + SIGINT, self.handle_sigint) # If signal() returns None, the previous handler was not installed from # Python, and we cannot restore it. This probably should not happen, @@ -103,7 +116,7 @@ def finish(self): normally, or gets interrupted. """ super(InterruptibleMixin, self).finish() # type: ignore - signal(SIGINT, self.original_handler) + _signal_unless_backgrounded(SIGINT, self.original_handler) def handle_sigint(self, signum, frame): # type: ignore """ diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 1bc59c175c7..8b1ca24eacb 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -231,6 +231,7 @@ def make_requirement_preparer( finder=finder, require_hashes=options.require_hashes, use_user_site=use_user_site, + use_shallow_wheels=('shallow_wheels' in options.unstable_features), ) @staticmethod diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 46e8371261e..db209e9d76e 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -133,6 +133,7 @@ def run(self, options, args): requirement_set = resolver.resolve( reqs, check_supported_wheels=True ) + requirement_set.perform_all_final_hydration() downloaded = ' '.join([req.name # type: ignore for req in requirement_set.requirements.values() diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index df21e7ceca2..2e416faeca8 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -326,6 +326,7 @@ def run(self, options, args): requirement_set = resolver.resolve( reqs, check_supported_wheels=not options.target_dir ) + requirement_set.perform_all_final_hydration() try: pip_req = requirement_set.get_requirement("pip") diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index 0f718566bd0..6aaa4f01b70 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -155,6 +155,7 @@ def run(self, options, args): requirement_set = resolver.resolve( reqs, check_supported_wheels=True ) + requirement_set.perform_all_final_hydration() reqs_to_build = [ r for r in requirement_set.requirements.values() diff --git a/src/pip/_internal/distributions/shallow_wheel.py b/src/pip/_internal/distributions/shallow_wheel.py new file mode 100644 index 00000000000..5ddc55a16ac --- /dev/null +++ b/src/pip/_internal/distributions/shallow_wheel.py @@ -0,0 +1,100 @@ +import os + +from pip._vendor.pkg_resources import DistInfoDistribution + +from pip._internal.distributions.base import AbstractDistribution +from pip._internal.network.shallow.httpfile import Context as HttpContext +from pip._internal.network.shallow.httpfile import Url +from pip._internal.network.shallow.wheel import Context as WheelContext +from pip._internal.network.shallow.wheel import ( + ProjectName, + WheelMetadataRequest, +) +from pip._internal.network.shallow.zipfile import Context as ZipContext +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.wheel import WheelMetadata + +if MYPY_CHECK_RUNNING: + from typing import Any + from pip._vendor.pkg_resources import Distribution + from pip._internal.index.package_finder import PackageFinder + from pip._internal.models.link import Link + from pip._internal.network.download import Downloader + from pip._internal.req import InstallRequirement + + +class DistributionNeedingFinalHydration(DistInfoDistribution): + def __init__(self, link, downloader, download_dir, *args, **kwargs): + # type: (Link, Downloader, str, Any, Any) -> None + super(DistributionNeedingFinalHydration, self).__init__( + *args, **kwargs) + self.final_link = link + self.downloader = downloader + self.download_dir = download_dir + + def finally_hydrate(self): + # type: () -> None + download = self.downloader(self.final_link) + output_filename = os.path.join(self.download_dir, download.filename) + with open(output_filename, 'wb') as f: + for chunk in download.chunks: + f.write(chunk) + + +class ShallowWheelDistribution(AbstractDistribution): + """Represents a wheel distribution. + + This does not need any preparation as wheels can be directly unpacked. + """ + + def __init__(self, req, downloader, download_dir): + # type: (InstallRequirement, Downloader, str) -> None + super(ShallowWheelDistribution, self).__init__(req) + self._downloader = downloader + self._download_dir = download_dir + + @property + def _wheel_context(self): + # type: () -> WheelContext + http_ctx = HttpContext(self._downloader.get_session()) + zip_ctx = ZipContext(http_ctx) + wheel_ctx = WheelContext(zip_ctx) + return wheel_ctx + + def get_pkg_resources_distribution(self): + # type: () -> Distribution + """Loads the metadata from the shallow wheel file into memory and + returns a Distribution that uses it, not relying on the wheel file or + requirement. + """ + # Wheels are never unnamed. + assert self.req.name + assert self.req.link + + project_name = ProjectName(self.req.name) + remote_location = Url(self.req.link.url) + + wheel_req = WheelMetadataRequest( + url=remote_location, + project_name=project_name, + ) + metadata = (self + ._wheel_context + .extract_wheel_metadata(wheel_req) + .contents) + + wheel_filename = self.req.link.filename + wheel_metadata = WheelMetadata({'METADATA': metadata}, wheel_filename) + + return DistributionNeedingFinalHydration( + link=self.req.link, + downloader=self._downloader, + download_dir=self._download_dir, + location=wheel_filename, + metadata=wheel_metadata, + project_name=project_name.name, + ) + + def prepare_distribution_metadata(self, finder, build_isolation): + # type: (PackageFinder, bool) -> None + pass diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 2f3e08ae62e..a134f9596f8 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -183,6 +183,10 @@ def __init__( self._session = session self._progress_bar = progress_bar + def get_session(self): + # type: () -> PipSession + return self._session + def __call__(self, link): # type: (Link) -> Download try: diff --git a/src/pip/_internal/network/shallow/__init__.py b/src/pip/_internal/network/shallow/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_internal/network/shallow/httpfile.py b/src/pip/_internal/network/shallow/httpfile.py new file mode 100644 index 00000000000..26407cece18 --- /dev/null +++ b/src/pip/_internal/network/shallow/httpfile.py @@ -0,0 +1,156 @@ +""" +Download ranges of files over remote http. +""" + +from collections import namedtuple + +from pip._vendor import requests + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from pip._internal.utils.urls import get_url_scheme + +if MYPY_CHECK_RUNNING: + from typing import Any, Optional + + +def url_is_remote(url): + # type: (str) -> bool + return get_url_scheme(url) in ['http', 'https'] + + +class Url(namedtuple('Url', ['url'])): + + def __new__(cls, url): + # type: (str) -> Url + assert url_is_remote(url) + return super(Url, cls).__new__(cls, url) + + +class HttpFileRequest(namedtuple('HttpFileRequest', ['url'])): + pass + + +class Size(namedtuple('Size', ['size'])): + def __new__(cls, size=0): + # type: (int) -> Size + assert size >= 0 + return super(Size, cls).__new__(cls, size) + + def __add__(self, other): + # type: (Any) -> Size + assert isinstance(other, type(self)) + return Size(self.size + other.size) + + def __sub__(self, other): + # type: (Any) -> Size + assert isinstance(other, type(self)) + return Size(self.size - other.size) + + def __lt__(self, other): + # type: (Any) -> bool + assert isinstance(other, type(self)) + return self.size < other.size + + def __le__(self, other): + # type: (Any) -> bool + assert isinstance(other, type(self)) + return self.size <= other.size + + def __gt__(self, other): + # type: (Any) -> bool + assert isinstance(other, type(self)) + return self.size > other.size + + def __ge__(self, other): + # type: (Any) -> bool + assert isinstance(other, type(self)) + return self.size >= other.size + + +class ByteRange(namedtuple('ByteRange', ['start', 'end'])): + def __new__(cls, start, end): + # type: (Size, Size) -> ByteRange + assert end >= start + return super(ByteRange, cls).__new__(cls, start, end) + + def as_bytes_range_header(self): + # type: () -> str + return "bytes={start}-{end}".format( + start=self.start.size, + # NB: The byte ranges accepted here are inclusive, so remove one + # from the end. + end=(self.end.size - 1)) + + def size_diff(self): + # type: () -> Size + return self.end - self.start + + +class BytesRangeRequest(namedtuple('BytesRangeRequest', ['start', 'end'])): + def __new__(cls, start, end): + # type: (Optional[Size], Optional[Size]) -> BytesRangeRequest + if (start is not None) and (end is not None): + assert end >= start + return super(BytesRangeRequest, cls).__new__(cls, start, end) + + def get_byte_range(self, size): + # type: (Size) -> ByteRange + if self.start is None: + start = 0 + else: + assert self.start <= size, "???/start={start},size={size}".format( + start=self.start, size=size) + start = self.start.size + + if self.end is None: + end = size.size + else: + assert self.end <= size + end = self.end.size + + return ByteRange(start=Size(start), end=Size(end)) + + +class HttpFile(namedtuple('HttpFile', ['url', 'size'])): + pass + + +class Context(object): + + def __init__(self, session=None): + # type: (Optional[requests.Session]) -> None + self.session = session or requests.Session() + + def head(self, request): + # type: (HttpFileRequest) -> HttpFile + resp = self.session.head(request.url.url) + resp.raise_for_status() + assert ( + "bytes" in resp.headers["Accept-Ranges"] + ), "???/bytes was not found in range headers" + content_length = int(resp.headers["Content-Length"]) + return HttpFile(url=request.url, size=Size(content_length)) + + def range_request(self, http_file, request): + # type: (HttpFile, BytesRangeRequest) -> bytes + byte_range = request.get_byte_range(http_file.size) + resp = self.session.get( + http_file.url.url, + headers={"Range": byte_range.as_bytes_range_header()}) + resp.raise_for_status() + + if Size(len(resp.content)) == http_file.size: + # This request for the full URL contents is cached, and we should + # return just the requested byte range. + start = byte_range.start.size + end = byte_range.end.size + response_bytes = resp.content[start:end] + else: + response_bytes = resp.content + + size_diff = byte_range.size_diff() + assert ( + Size(len(response_bytes)) == size_diff + ), ("???/response should have been length {}, but got (size {}):\n{!r}" + .format(size_diff, len(response_bytes), response_bytes)) + return response_bytes diff --git a/src/pip/_internal/network/shallow/wheel.py b/src/pip/_internal/network/shallow/wheel.py new file mode 100644 index 00000000000..1c0a6f70d40 --- /dev/null +++ b/src/pip/_internal/network/shallow/wheel.py @@ -0,0 +1,75 @@ +""" +Download a wheel's METADATA file over http without downloading the rest of the +wheel file. +""" + +import re +from collections import namedtuple + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +from .httpfile import HttpFileRequest +from .zipfile import Context as ZipFileContext +from .zipfile import ZipFileExtractionRequest, ZipMemberNameMatcher + +if MYPY_CHECK_RUNNING: + from typing import Optional + + +class ProjectName(namedtuple('ProjectName', ['name'])): + + def __new__(cls, name): + # type: (str) -> ProjectName + assert isinstance(name, str) + return super(ProjectName, cls).__new__(cls, name) + + +class WheelMetadataRequest(namedtuple('WheelMetadataRequest', [ + 'url', + 'project_name', +])): + pass + + +class WheelMetadataContents(namedtuple('WheelMetadataContents', ['contents'])): + + def __new__(cls, contents): + # type: (bytes) -> WheelMetadataContents + return super(WheelMetadataContents, cls).__new__(cls, contents) + + +class Context(object): + + def __init__(self, zip_context=None): + # type: (Optional[ZipFileContext]) -> None + self.zip_context = zip_context or ZipFileContext() + + @classmethod + def _create_metadata_pattern(cls, project_name): + # type: (ProjectName) -> ZipMemberNameMatcher + sanitized_requirement_name = ( + project_name + .name + .lower() + .replace("-", "_")) + return ZipMemberNameMatcher( + re.compile( + ("{sanitized_requirement_name}[^/]+?.dist-info/METADATAPK" + .format(sanitized_requirement_name=sanitized_requirement_name) + .encode()), + flags=re.IGNORECASE, + ) + ) + + def extract_wheel_metadata(self, request): + # type: (WheelMetadataRequest) -> WheelMetadataContents + url = request.url + http_file = self.zip_context.http_context.head(HttpFileRequest(url)) + + metadata_pattern = self._create_metadata_pattern(request.project_name) + contents = self.zip_context.extract_zip_member_shallow( + ZipFileExtractionRequest( + http_file=http_file, member_pattern=metadata_pattern, + ) + ) + return WheelMetadataContents(contents) diff --git a/src/pip/_internal/network/shallow/zipfile.py b/src/pip/_internal/network/shallow/zipfile.py new file mode 100644 index 00000000000..aa65f6b4e96 --- /dev/null +++ b/src/pip/_internal/network/shallow/zipfile.py @@ -0,0 +1,149 @@ +""" +Extract files from remote zip archives without downloading more than a few +extra KB. +""" + +import re +import struct +import zlib +from collections import namedtuple + +from pip._vendor.six import PY3 + +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +from .httpfile import BytesRangeRequest +from .httpfile import Context as HttpContext +from .httpfile import Size + +if MYPY_CHECK_RUNNING: + from typing import Any, Optional + + if PY3: + ZipMemberPattern = re.Pattern[bytes] + else: + ZipMemberPattern = Any + + +# From https://stackoverflow.com/a/1089787/2518889: +def _inflate(data): + # type: (bytes) -> bytes + decompress = zlib.decompressobj(-zlib.MAX_WBITS) + inflated = decompress.decompress(data) + inflated += decompress.flush() + return inflated + + +def _decode_4_byte_unsigned(byte_string): + # type: (bytes) -> int + """Unpack as a little-endian unsigned long.""" + assert isinstance(byte_string, bytes) and len(byte_string) == 4 + return struct.unpack(" int + """Unpack as a little-endian unsigned short.""" + assert isinstance(byte_string, bytes) and len(byte_string) == 2 + return struct.unpack(" ZipMemberNameMatcher + # Matching file names in zip files without the zipfile library requires + # a binary regex, not "text". + assert isinstance(pattern.pattern, bytes) # type: ignore + return super(ZipMemberNameMatcher, cls).__new__(cls, pattern) + + +class ZipFileExtractionRequest(namedtuple('ZipFileExtractionRequest', [ + 'http_file', + 'member_pattern', +])): + pass + + +class Context(object): + + def __init__(self, http_context=None): + # type: (Optional[HttpContext]) -> None + self.http_context = http_context or HttpContext() + + _ABSOLUTE_MINIMUM_CENTRAL_DIRECTORY_SIZE = 2000 + _CENTRAL_DIRECTORY_MAX_SIZE_FACTOR = 0.01 + + @classmethod + def _estimate_minimum_central_directory_record_size(cls, size): + # type: (Size) -> Size + lower_bound = int( + max( + cls._ABSOLUTE_MINIMUM_CENTRAL_DIRECTORY_SIZE, + size.size * cls._CENTRAL_DIRECTORY_MAX_SIZE_FACTOR, + ) + ) + actual_record_size = min(lower_bound, size.size) + return Size(actual_record_size) + + def extract_zip_member_shallow(self, request): + # type: (ZipFileExtractionRequest) -> bytes + http_file = request.http_file + full_size = http_file.size + + estimated_directory_record_size = ( + self._estimate_minimum_central_directory_record_size(full_size)) + central_directory_range_request = BytesRangeRequest( + start=(full_size - estimated_directory_record_size), end=full_size, + ) + + zip_tail = self.http_context.range_request( + http_file, central_directory_range_request + ) + + filename_in_central_dir_header = request.member_pattern.pattern.search( + zip_tail) + + assert filename_in_central_dir_header is not None + matched_filename = filename_in_central_dir_header.group(0) + + filename_start = filename_in_central_dir_header.start() + offset_start = filename_start - 4 + encoded_offset_for_local_file = zip_tail[offset_start:filename_start] + local_file_offset = _decode_4_byte_unsigned( + encoded_offset_for_local_file) + + local_file_header_range_request = BytesRangeRequest( + start=Size(local_file_offset + 18), + end=Size(local_file_offset + 30), + ) + file_header_no_filename = self.http_context.range_request( + http_file, local_file_header_range_request + ) + + compressed_size = _decode_4_byte_unsigned( + file_header_no_filename[:4]) + uncompressed_size = _decode_4_byte_unsigned( + file_header_no_filename[4:8]) + file_name_length = _decode_2_byte_unsigned( + file_header_no_filename[8:10]) + assert file_name_length == (len(matched_filename) - 2) + extra_field_length = _decode_2_byte_unsigned( + file_header_no_filename[10:12]) + + compressed_start = ( + local_file_offset + 30 + file_name_length + extra_field_length + ) + compressed_end = compressed_start + compressed_size + + compressed_file_range_request = BytesRangeRequest( + start=Size(compressed_start), end=Size(compressed_end), + ) + compressed_file = self.http_context.range_request( + http_file, compressed_file_range_request + ) + + uncompressed_file_contents = _inflate(compressed_file) + assert len(uncompressed_file_contents) == uncompressed_size + + return uncompressed_file_contents diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index d8a4bde5ca0..063b7c3c982 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -16,6 +16,7 @@ make_distribution_for_install_requirement, ) from pip._internal.distributions.installed import InstalledDistribution +from pip._internal.distributions.shallow_wheel import ShallowWheelDistribution from pip._internal.exceptions import ( DirectoryUrlHashUnsupported, HashMismatch, @@ -24,6 +25,7 @@ PreviousBuildDirError, VcsHashUnsupported, ) +from pip._internal.network.shallow.httpfile import url_is_remote from pip._internal.utils.filesystem import copy2_fixed from pip._internal.utils.hashes import MissingHashes from pip._internal.utils.logging import indent_log @@ -329,6 +331,7 @@ def __init__( finder, # type: PackageFinder require_hashes, # type: bool use_user_site, # type: bool + use_shallow_wheels, # type: bool ): # type: (...) -> None super(RequirementPreparer, self).__init__() @@ -362,6 +365,9 @@ def __init__( # Should install in user site-packages? self.use_user_site = use_user_site + # Can wheels be partially downloaded to improve resolve performance? + self.use_shallow_wheels = use_shallow_wheels + @property def _download_should_save(self): # type: () -> bool @@ -401,6 +407,9 @@ def prepare_linked_requirement( download_dir = self.wheel_download_dir if link.is_wheel: + if self.use_shallow_wheels and url_is_remote(link.url): + return ShallowWheelDistribution( + req, self.downloader, download_dir) if download_dir: # When downloading, we only unpack wheels to get # metadata. diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index d64bb78a327..b24f9663e0d 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -1,10 +1,14 @@ from __future__ import absolute_import import logging +import threading from collections import OrderedDict from pip._vendor.packaging.utils import canonicalize_name +from pip._internal.distributions.shallow_wheel import ( + DistributionNeedingFinalHydration, +) from pip._internal.exceptions import InstallationError from pip._internal.models.wheel import Wheel from pip._internal.utils import compatibility_tags @@ -29,6 +33,7 @@ def __init__(self, check_supported_wheels=True): self.check_supported_wheels = check_supported_wheels self.unnamed_requirements = [] # type: List[InstallRequirement] + self.dists_needing_final_hydration = [] # type: List[DistributionNeedingFinalHydration] # noqa: E501 def __str__(self): # type: () -> str @@ -52,6 +57,39 @@ def __repr__(self): reqs=', '.join(str(req.req) for req in requirements), ) + def add_dist_needing_final_hydration(self, dist): + # type: (DistributionNeedingFinalHydration) -> None + assert isinstance(dist, DistributionNeedingFinalHydration) + self.dists_needing_final_hydration.append(dist) + + def perform_all_final_hydration(self): + # type: () -> None + if not self.dists_needing_final_hydration: + return + + exceptions = [] + + def do_hydrate(dist): + # type: (DistributionNeedingFinalHydration) -> None + try: + dist.finally_hydrate() + except Exception as e: + exceptions.append(e) + + all_threads = [ + threading.Thread( + target=do_hydrate, name='download dist {}'.format(dist), + args=(dist,), + ) for dist in self.dists_needing_final_hydration + ] + for t in all_threads: + t.start() + for t in all_threads: + t.join() + if exceptions: + raise ValueError('at least one thread failed (errors below):\n{}' + .format('\n'.join(str(e) for e in exceptions))) + def add_unnamed_requirement(self, install_req): # type: (InstallRequirement) -> None assert not install_req.name diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index a8c76816790..35cb6dda42d 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -6,6 +6,9 @@ from pip._vendor.resolvelib import BaseReporter, ResolutionImpossible from pip._vendor.resolvelib import Resolver as RLResolver +from pip._internal.distributions.shallow_wheel import ( + DistributionNeedingFinalHydration, +) from pip._internal.exceptions import InstallationError from pip._internal.req.req_set import RequirementSet from pip._internal.resolution.base import BaseResolver @@ -167,6 +170,10 @@ def resolve(self, root_reqs, check_supported_wheels): ireq.should_reinstall = self.factory.should_reinstall(candidate) req_set.add_named_requirement(ireq) + dist = candidate.dist + if isinstance(dist, DistributionNeedingFinalHydration): + req_set.add_dist_needing_final_hydration(dist) + return req_set def get_installation_order(self, req_set): diff --git a/tests/unit/shallow/__init__.py b/tests/unit/shallow/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/unit/shallow/test_httpfile.py b/tests/unit/shallow/test_httpfile.py new file mode 100644 index 00000000000..51ff4004e00 --- /dev/null +++ b/tests/unit/shallow/test_httpfile.py @@ -0,0 +1,29 @@ +from pip._internal.network.shallow.httpfile import ( + BytesRangeRequest, + Context, + HttpFile, + HttpFileRequest, + Size, +) + +from .util import serve_file + +_test_contents = b"this is the file contents" + +context = Context() + + +def test_http_range(): + with serve_file(_test_contents) as url: + req = HttpFileRequest(url) + expected = HttpFile(url=url, size=Size(len(_test_contents))) + assert context.head(req) == expected + + get_whole_file = BytesRangeRequest(start=None, end=None) + contents = context.range_request(expected, get_whole_file) + assert contents == _test_contents + + half_extent = len(_test_contents) // 2 + get_half_file = BytesRangeRequest(start=None, end=Size(half_extent)) + half_contents = context.range_request(expected, get_half_file) + assert half_contents == _test_contents[:half_extent] diff --git a/tests/unit/shallow/test_wheel.py b/tests/unit/shallow/test_wheel.py new file mode 100644 index 00000000000..e828d0efff1 --- /dev/null +++ b/tests/unit/shallow/test_wheel.py @@ -0,0 +1,39 @@ +from textwrap import dedent + +from pip._internal.network.shallow.wheel import ( + Context, + ProjectName, + WheelMetadataRequest, +) + +from .util import serve_wheel + +context = Context() + + +def _strip_carriage_returns(s): + # type: (bytes) -> str + return s.decode().strip().replace('\r', '') + + +def test_extract_metadata_from_wheel(): + name = ProjectName("asdf") + with serve_wheel(name, version="0.0.1") as url: + wheel_req = WheelMetadataRequest(url, project_name=name,) + + metadata_contents = context.extract_wheel_metadata(wheel_req) + assert _strip_carriage_returns(metadata_contents.contents) == dedent( + """\ + Metadata-Version: 2.1 + Name: asdf + Version: 0.0.1 + Summary: UNKNOWN + Home-page: UNKNOWN + Author: UNKNOWN + Author-email: UNKNOWN + License: UNKNOWN + Platform: UNKNOWN + Requires-Dist: requests + + UNKNOWN + """).strip() diff --git a/tests/unit/shallow/test_zipfile.py b/tests/unit/shallow/test_zipfile.py new file mode 100644 index 00000000000..3b7ab92b78e --- /dev/null +++ b/tests/unit/shallow/test_zipfile.py @@ -0,0 +1,31 @@ +import re +import zipfile + +from pip._internal.network.shallow.httpfile import HttpFileRequest +from pip._internal.network.shallow.zipfile import ( + Context, + ZipFileExtractionRequest, + ZipMemberNameMatcher, +) +from tests.lib.path import Path + +from .util import serve_zip + +context = Context() + +_asdf_contents = b"asdf\n" + + +def test_extract_file_from_deflated_zip(): + with serve_zip( + Path("asdf.txt"), _asdf_contents, compression=zipfile.ZIP_DEFLATED + ) as url: + req = HttpFileRequest(url) + http_file = context.http_context.head(req) + + zip_req = ZipFileExtractionRequest( + http_file=http_file, + member_pattern=ZipMemberNameMatcher(re.compile(b"asdf.txtPK")), + ) + zip_member = context.extract_zip_member_shallow(zip_req) + assert zip_member == _asdf_contents diff --git a/tests/unit/shallow/util.py b/tests/unit/shallow/util.py new file mode 100644 index 00000000000..d319e116f36 --- /dev/null +++ b/tests/unit/shallow/util.py @@ -0,0 +1,208 @@ +import shutil +import subprocess +import sys +import tempfile +import threading +import zipfile +from contextlib import contextmanager +from textwrap import dedent + +from pip._vendor.six import PY3 + +from pip._internal.network.shallow.httpfile import Url +from pip._internal.network.shallow.wheel import ProjectName +from pip._internal.utils.typing import MYPY_CHECK_RUNNING +from tests.lib.path import Path + +if PY3: + from http.server import BaseHTTPRequestHandler + from queue import Queue + from socketserver import TCPServer +else: + from SimpleHTTPServer import ( + SimpleHTTPRequestHandler as BaseHTTPRequestHandler) + from Queue import Queue + from SocketServer import TCPServer + + +if MYPY_CHECK_RUNNING: + from typing import Iterator, List, Type + + +@contextmanager +def _http_port(handler_class): + # type: (Type) -> Iterator[int] + def serve(port_queue, shutdown_queue): + # type: (Queue[int], Queue[bool]) -> None + httpd = TCPServer(("", 0), handler_class) + httpd.timeout = 0.1 + port_queue.put(httpd.server_address[1]) + while shutdown_queue.empty(): + httpd.handle_request() + + port_queue = Queue() # type: Queue[int] + shutdown_queue = Queue() # type: Queue[bool] + t = threading.Thread(target=lambda: serve(port_queue, shutdown_queue)) + t.daemon = True + t.start() + + try: + yield port_queue.get(block=True) + finally: + shutdown_queue.put(True) + t.join() + + +class _StubHandler(BaseHTTPRequestHandler): + _response_text = b"" + _response_path = "/" + + def do_HEAD(self): + self.send_headers() + + def do_GET(self): + self.send_headers() + assert self._response_path.startswith("/") + self.wfile.write(self._response_text) + + def send_headers(self): + code = 200 if self.path == self._response_path else 404 + self.send_response(code) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Type", "text/utf-8") + self.send_header("Content-Length", str(len(self._response_text))) + self.end_headers() + + +@contextmanager +def _serve_http(handler_class): + # type: (Type[_StubHandler]) -> Iterator[Url] + with _http_port(handler_class) as port: + yield Url("http://localhost:{port}{response_path}" + .format(port=port, + response_path=handler_class._response_path)) + + +@contextmanager +def serve_file(file_contents): + # type: (bytes) -> Iterator[Url] + class FileHandler(_StubHandler): + _response_text = file_contents + + with _serve_http(FileHandler) as url: + yield url + + +@contextmanager +def mock_zip(single_file_path, single_file_contents, compression): + # type: (Path, bytes, int) -> Iterator[Path] + with temporary_file_path(Path("test.zip")) as zip_path: + with zipfile.ZipFile( + zip_path, + mode="w", + compression=compression, + ) as zf: + zf.writestr(str(single_file_path), single_file_contents) + assert zip_path.exists() + + yield zip_path + + +@contextmanager +def serve_zip(single_file_path, single_file_contents, compression): + # type: (Path, bytes, int) -> Iterator[Url] + with mock_zip( + single_file_path=single_file_path, + single_file_contents=single_file_contents, + compression=compression, + ) as zip_path: + zip_contents = _read_file(zip_path) + + class ZipHandler(_StubHandler): + _response_text = zip_contents + + with _serve_http(ZipHandler) as url: + yield url + + +@contextmanager +def temporary_dir(): + # type: () -> Iterator[Path] + """A with-context that creates a temporary directory.""" + path = tempfile.mkdtemp() + + try: + yield Path(path) + finally: + shutil.rmtree(path, ignore_errors=True) + + +@contextmanager +def temporary_file_path(filename): + # type: (Path) -> Iterator[Path] + with temporary_dir() as td: + yield td / filename + + +def _dump_file(path, contents): + # type: (Path, bytes) -> None + with open(path, "wb") as f: + f.write(contents) + + +def _read_file(path): + # type: (Path) -> bytes + with open(path, "rb") as f: + return f.read() + + +def _run_python(argv, cwd): + # type: (List[str], Path) -> None + subprocess.check_call([sys.executable] + argv, cwd=cwd) + + +@contextmanager +def mock_wheel(name, version): + # type: (ProjectName, str) -> Iterator[Path] + with temporary_dir() as td: + _dump_file( + td / "setup.py", + dedent( + """\ + from setuptools import setup + setup() + """ + ).encode(), + ) + + _dump_file( + td / "setup.cfg", + dedent( + """\ + [metadata] + name = {name} + version = {version} + + [options] + install_requires = + requests + """.format(name=name.name, version=version)).encode(), + ) + + _run_python(["setup.py", "bdist_wheel"], cwd=td) + globbed_wheel = list(td.glob("dist/*.whl")) + assert len(globbed_wheel) == 1 + yield globbed_wheel[0] + + +@contextmanager +def serve_wheel(name, version): + # type: (ProjectName, str) -> Iterator[Url] + with mock_wheel(name, version=version) as wheel_path: + wheel_contents = _read_file(wheel_path) + + class WheelHandler(_StubHandler): + _response_text = wheel_contents + + with _serve_http(WheelHandler) as url: + yield url diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 2da0b62dbfc..fb324336fe0 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -89,6 +89,7 @@ def _basic_resolver(self, finder, require_hashes=False): finder=finder, require_hashes=require_hashes, use_user_site=False, + use_shallow_wheels=False, ) yield Resolver( preparer=preparer,