Skip to content

Commit

Permalink
Extract URLFetcher for reuse with lock files.
Browse files Browse the repository at this point in the history
Lock file generation will need to download and hash dists when their
hashes are not presented in PEP 503 link fragments and we'll also need
to download dists directly if we optimize the resolving from a lock file
case when the lock file contains urls ala PEP-665.

Work towards pex-tool#1401.
  • Loading branch information
jsirois committed Aug 19, 2021
1 parent c9ff7d9 commit 8a18b8a
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 73 deletions.
2 changes: 2 additions & 0 deletions pex/compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ def exec_function(ast, globals_map):

from urllib.error import HTTPError as HTTPError
from urllib.request import build_opener as build_opener
from urllib.request import FileHandler as FileHandler
from urllib.request import HTTPSHandler as HTTPSHandler
from urllib.request import ProxyHandler as ProxyHandler
from urllib.request import Request as Request
else:
import urlparse as urlparse

from urllib2 import build_opener as build_opener
from urllib2 import FileHandler as FileHandler
from urllib2 import HTTPError as HTTPError
from urllib2 import HTTPSHandler as HTTPSHandler
from urllib2 import ProxyHandler as ProxyHandler
Expand Down
90 changes: 90 additions & 0 deletions pex/fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import ssl
import time
from contextlib import closing, contextmanager

from pex.compatibility import FileHandler, HTTPError, HTTPSHandler, ProxyHandler, build_opener
from pex.network_configuration import NetworkConfiguration
from pex.typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import BinaryIO, Dict, Iterator, Optional, Text
else:
BinaryIO = None


class URLFetcher(object):
def __init__(
self,
network_configuration=None, # type: Optional[NetworkConfiguration]
handle_file_urls=False, # type: bool
):
# type: (...) -> None
network_configuration = network_configuration or NetworkConfiguration()

self._timeout = network_configuration.timeout
self._max_retries = network_configuration.retries

ssl_context = ssl.create_default_context(cafile=network_configuration.cert)
if network_configuration.client_cert:
ssl_context.load_cert_chain(network_configuration.client_cert)

proxies = None # type: Optional[Dict[str, str]]
if network_configuration.proxy:
proxies = {protocol: network_configuration.proxy for protocol in ("http", "https")}

handlers = [ProxyHandler(proxies), HTTPSHandler(context=ssl_context)]
if handle_file_urls:
handlers.append(FileHandler())
self._handlers = tuple(handlers)

@contextmanager
def get_body_stream(self, url):
# type: (Text) -> Iterator[BinaryIO]
retries = 0
retry_delay_secs = 0.1
last_error = None # type: Optional[Exception]
while retries <= self._max_retries:
if retries > 0:
time.sleep(retry_delay_secs)
retry_delay_secs *= 2

opener = build_opener(*self._handlers)
# The fp is typed as Optional[...] for Python 2 only in the typeshed. A `None`
# can only be returned if a faulty custom handler is installed and we only
# install stdlib handlers.
fp = cast(BinaryIO, opener.open(url, timeout=self._timeout))
try:
with closing(fp) as body_stream:
yield body_stream
return
except HTTPError as e:
# See: https://tools.ietf.org/html/rfc2616#page-39
if e.code not in (
408, # Request Time-out
500, # Internal Server Error
503, # Service Unavailable
504, # Gateway Time-out
):
raise e
last_error = e
except (IOError, OSError) as e:
# Unfortunately errors are overly broad at this point. We can get either OSError or
# URLError (a subclass of OSError) which at times indicates retryable socket level
# errors. Since retrying a non-retryable socket level error just wastes local
# machine resources we err towards always retrying.
last_error = e
finally:
retries += 1

raise cast(Exception, last_error)

@contextmanager
def get_body_iter(self, url):
# type: (Text) -> Iterator[Iterator[Text]]
with self.get_body_stream(url) as body_stream:
yield (line.decode("utf-8") for line in body_stream.readlines())
74 changes: 4 additions & 70 deletions pex/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,21 @@

import os
import re
import ssl
import time
from contextlib import closing, contextmanager
from contextlib import contextmanager

from pex import attrs, dist_metadata
from pex.compatibility import HTTPError, HTTPSHandler, ProxyHandler, build_opener, urlparse
from pex.compatibility import urlparse
from pex.dist_metadata import MetadataError, ProjectNameAndVersion
from pex.network_configuration import NetworkConfiguration
from pex.fetcher import URLFetcher
from pex.third_party.packaging.markers import Marker
from pex.third_party.packaging.specifiers import SpecifierSet
from pex.third_party.packaging.version import InvalidVersion, Version
from pex.third_party.pkg_resources import Requirement, RequirementParseError
from pex.typing import TYPE_CHECKING, cast
from pex.typing import TYPE_CHECKING

if TYPE_CHECKING:
import attr # vendor:skip
from typing import (
BinaryIO,
Dict,
Iterable,
Iterator,
Match,
Expand Down Expand Up @@ -51,68 +47,6 @@ def render_location(self):
return "{} lines {}-{}".format(self.source, self.start_line, self.end_line)


class URLFetcher(object):
def __init__(self, network_configuration=None):
# type: (Optional[NetworkConfiguration]) -> None
network_configuration = network_configuration or NetworkConfiguration()

self._timeout = network_configuration.timeout
self._max_retries = network_configuration.retries

ssl_context = ssl.create_default_context(cafile=network_configuration.cert)
if network_configuration.client_cert:
ssl_context.load_cert_chain(network_configuration.client_cert)

proxies = None # type: Optional[Dict[str, str]]
if network_configuration.proxy:
proxies = {
protocol: network_configuration.proxy for protocol in ("ftp", "http", "https")
}

self._handlers = (ProxyHandler(proxies), HTTPSHandler(context=ssl_context))

@contextmanager
def get_body_iter(self, url):
# type: (Text) -> Iterator[Iterator[Text]]
retries = 0
retry_delay_secs = 0.1
last_error = None # type: Optional[Exception]
while retries <= self._max_retries:
if retries > 0:
time.sleep(retry_delay_secs)
retry_delay_secs *= 2

opener = build_opener(*self._handlers)
try:
with closing(opener.open(url, timeout=self._timeout)) as fp:
# The fp is typed as Optional[...] for Python 2 only in the typeshed. A `None`
# can only be returned if a faulty custom handler is installed and we only
# install stdlib handlers.
body_stream = cast("BinaryIO", fp)
yield (line.decode("utf-8") for line in body_stream.readlines())
return
except HTTPError as e:
# See: https://tools.ietf.org/html/rfc2616#page-39
if e.code not in (
408, # Request Time-out
500, # Internal Server Error
503, # Service Unavailable
504, # Gateway Time-out
):
raise e
last_error = e
except (IOError, OSError) as e:
# Unfortunately errors are overly broad at this point. We can get either OSError or
# URLError (a subclass of OSError) which at times indicates retryable socket level
# errors. Since retrying a non-retryable socket level error just wastes local
# machine resources we err towards always retrying.
last_error = e
finally:
retries += 1

raise cast(Exception, last_error)


@attr.s(frozen=True)
class Source(object):
@classmethod
Expand Down
2 changes: 1 addition & 1 deletion pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pex.common import AtomicDirectory, atomic_directory, safe_mkdtemp
from pex.distribution_target import DistributionTarget
from pex.environment import PEXEnvironment, ResolveError
from pex.fetcher import URLFetcher
from pex.interpreter import PythonInterpreter
from pex.jobs import Raise, SpawnedJob, execute_parallel
from pex.network_configuration import NetworkConfiguration
Expand All @@ -24,7 +25,6 @@
from pex.requirements import (
Constraint,
LocalProjectRequirement,
URLFetcher,
parse_requirement_file,
parse_requirement_strings,
)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@
)
from pex.compatibility import WINDOWS, to_bytes
from pex.executor import Executor
from pex.fetcher import URLFetcher
from pex.interpreter import PythonInterpreter
from pex.network_configuration import NetworkConfiguration
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
from pex.pip import get_pip
from pex.requirements import LogicalLine, PyPIRequirement, URLFetcher, parse_requirement_file
from pex.requirements import LogicalLine, PyPIRequirement, parse_requirement_file
from pex.testing import (
IS_MAC,
IS_PYPY,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
import pytest

from pex.common import safe_open, temporary_dir, touch
from pex.fetcher import URLFetcher
from pex.requirements import (
Constraint,
LocalProjectRequirement,
LogicalLine,
ParseError,
PyPIRequirement,
Source,
URLFetcher,
URLRequirement,
parse_requirement_file,
parse_requirement_from_project_name_and_specifier,
Expand Down

0 comments on commit 8a18b8a

Please sign in to comment.