diff --git a/pex/fetcher.py b/pex/fetcher.py index 6e94da241..15e98cb8b 100644 --- a/pex/fetcher.py +++ b/pex/fetcher.py @@ -7,9 +7,17 @@ import time from contextlib import closing, contextmanager -from pex.compatibility import FileHandler, HTTPError, HTTPSHandler, ProxyHandler, build_opener +from pex.compatibility import ( + FileHandler, + HTTPError, + HTTPSHandler, + ProxyHandler, + Request, + build_opener, +) from pex.network_configuration import NetworkConfiguration from pex.typing import TYPE_CHECKING, cast +from pex.version import __version__ if TYPE_CHECKING: from typing import BinaryIO, Dict, Iterator, Optional, Text @@ -18,6 +26,8 @@ class URLFetcher(object): + USER_AGENT = "pex/{version}".format(version=__version__) + def __init__( self, network_configuration=None, # type: Optional[NetworkConfiguration] @@ -54,10 +64,16 @@ def get_body_stream(self, url): retry_delay_secs *= 2 opener = build_opener(*self._handlers) + request = Request( + # N.B.: MyPy incorrectly thinks url must be a str in Python 2 where a unicode url + # actually works fine. + url, # type: ignore[arg-type] + headers={"User-Agent": self.USER_AGENT}, + ) # The fp is typed as Optional[...] for Python 2 only in the typeshed. A `None` # can only be returned if a faulty custom handler is installed and we only # install stdlib handlers. - fp = cast(BinaryIO, opener.open(url, timeout=self._timeout)) + fp = cast(BinaryIO, opener.open(request, timeout=self._timeout)) try: with closing(fp) as body_stream: yield body_stream diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py new file mode 100644 index 000000000..f73fefc1f --- /dev/null +++ b/tests/test_fetcher.py @@ -0,0 +1,57 @@ +# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import print_function + +from threading import Thread + +import pytest + +from pex.compatibility import PY2 +from pex.fetcher import URLFetcher +from pex.typing import TYPE_CHECKING +from pex.version import __version__ + +if PY2: + from BaseHTTPServer import BaseHTTPRequestHandler + from SocketServer import TCPServer +else: + from http.server import BaseHTTPRequestHandler + from socketserver import TCPServer + +if TYPE_CHECKING: + from typing import Tuple + + +@pytest.fixture +def server_address(): + class GETRequestHandler(BaseHTTPRequestHandler): + def do_GET(self): + body = self.headers["User-Agent"].encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/octet-stream") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + server = TCPServer(("127.0.0.1", 0), GETRequestHandler) + host, port = server.server_address + + server_thread = Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + try: + yield host, port + finally: + server.shutdown() + server_thread.join() + + +def test_user_agent(server_address): + # type: (Tuple[str, int]) -> None + + host, port = server_address + url = "http://{host}:{port}".format(host=host, port=port) + url_fetcher = URLFetcher() + with url_fetcher.get_body_stream(url) as fp: + assert "pex/{version}".format(version=__version__) == fp.read().decode("utf-8")