Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
allow specifying https:// proxy (#10411)
Browse files Browse the repository at this point in the history
  • Loading branch information
dklimpel authored Jul 27, 2021
1 parent e16eab2 commit 076dead
Show file tree
Hide file tree
Showing 3 changed files with 450 additions and 133 deletions.
1 change: 1 addition & 0 deletions changelog.d/10411.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for https connections to a proxy server. Contributed by @Bubu and @dklimpel.
184 changes: 109 additions & 75 deletions synapse/http/proxyagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,32 @@
import base64
import logging
import re
from typing import Optional, Tuple
from urllib.request import getproxies_environment, proxy_bypass_environment
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlparse
from urllib.request import ( # type: ignore[attr-defined]
getproxies_environment,
proxy_bypass_environment,
)

import attr
from zope.interface import implementer

from twisted.internet import defer
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
from twisted.python.failure import Failure
from twisted.web.client import URI, BrowserLikePolicyForHTTPS, _AgentBase
from twisted.web.client import (
URI,
BrowserLikePolicyForHTTPS,
HTTPConnectionPool,
_AgentBase,
)
from twisted.web.error import SchemeNotSupported
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent, IPolicyForHTTPS
from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS

from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint
from synapse.types import ISynapseReactor

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,35 +74,38 @@ class ProxyAgent(_AgentBase):
reactor might have some blacklisting applied (i.e. for DNS queries),
but we need unblocked access to the proxy.
contextFactory (IPolicyForHTTPS): A factory for TLS contexts, to control the
contextFactory: A factory for TLS contexts, to control the
verification parameters of OpenSSL. The default is to use a
`BrowserLikePolicyForHTTPS`, so unless you have special
requirements you can leave this as-is.
connectTimeout (Optional[float]): The amount of time that this Agent will wait
connectTimeout: The amount of time that this Agent will wait
for the peer to accept a connection, in seconds. If 'None',
HostnameEndpoint's default (30s) will be used.
This is used for connections to both proxies and destination servers.
bindAddress (bytes): The local address for client sockets to bind to.
bindAddress: The local address for client sockets to bind to.
pool (HTTPConnectionPool|None): connection pool to be used. If None, a
pool: connection pool to be used. If None, a
non-persistent pool instance will be created.
use_proxy (bool): Whether proxy settings should be discovered and used
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.
Raises:
ValueError if use_proxy is set and the environment variables
contain an invalid proxy specification.
"""

def __init__(
self,
reactor,
proxy_reactor=None,
reactor: IReactorCore,
proxy_reactor: Optional[ISynapseReactor] = None,
contextFactory: Optional[IPolicyForHTTPS] = None,
connectTimeout=None,
bindAddress=None,
pool=None,
use_proxy=False,
connectTimeout: Optional[float] = None,
bindAddress: Optional[bytes] = None,
pool: Optional[HTTPConnectionPool] = None,
use_proxy: bool = False,
):
contextFactory = contextFactory or BrowserLikePolicyForHTTPS()

Expand All @@ -102,7 +116,7 @@ def __init__(
else:
self.proxy_reactor = proxy_reactor

self._endpoint_kwargs = {}
self._endpoint_kwargs: Dict[str, Any] = {}
if connectTimeout is not None:
self._endpoint_kwargs["timeout"] = connectTimeout
if bindAddress is not None:
Expand All @@ -117,24 +131,26 @@ def __init__(
https_proxy = proxies["https"].encode() if "https" in proxies else None
no_proxy = proxies["no"] if "no" in proxies else None

# Parse credentials from http and https proxy connection string if present
self.http_proxy_creds, http_proxy = parse_username_password(http_proxy)
self.https_proxy_creds, https_proxy = parse_username_password(https_proxy)

self.http_proxy_endpoint = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, **self._endpoint_kwargs
self.http_proxy_endpoint, self.http_proxy_creds = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.https_proxy_endpoint = _http_proxy_endpoint(
https_proxy, self.proxy_reactor, **self._endpoint_kwargs
self.https_proxy_endpoint, self.https_proxy_creds = _http_proxy_endpoint(
https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.no_proxy = no_proxy

self._policy_for_https = contextFactory
self._reactor = reactor

def request(self, method, uri, headers=None, bodyProducer=None):
def request(
self,
method: bytes,
uri: bytes,
headers: Optional[Headers] = None,
bodyProducer: Optional[IBodyProducer] = None,
) -> defer.Deferred:
"""
Issue a request to the server indicated by the given uri.
Expand All @@ -146,16 +162,15 @@ def request(self, method, uri, headers=None, bodyProducer=None):
See also: twisted.web.iweb.IAgent.request
Args:
method (bytes): The request method to use, such as `GET`, `POST`, etc
method: The request method to use, such as `GET`, `POST`, etc
uri (bytes): The location of the resource to request.
uri: The location of the resource to request.
headers (Headers|None): Extra headers to send with the request
headers: Extra headers to send with the request
bodyProducer (IBodyProducer|None): An object which can generate bytes to
make up the body of this request (for example, the properly encoded
contents of a file for a file upload). Or, None if the request is to
have no body.
bodyProducer: An object which can generate bytes to make up the body of
this request (for example, the properly encoded contents of a file for
a file upload). Or, None if the request is to have no body.
Returns:
Deferred[IResponse]: completes when the header of the response has
Expand Down Expand Up @@ -253,70 +268,89 @@ def request(self, method, uri, headers=None, bodyProducer=None):
)


def _http_proxy_endpoint(proxy: Optional[bytes], reactor, **kwargs):
def _http_proxy_endpoint(
proxy: Optional[bytes],
reactor: IReactorCore,
tls_options_factory: IPolicyForHTTPS,
**kwargs,
) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]:
"""Parses an http proxy setting and returns an endpoint for the proxy
Args:
proxy: the proxy setting in the form: [<username>:<password>@]<host>[:<port>]
Note that compared to other apps, this function currently lacks support
for specifying a protocol schema (i.e. protocol://...).
proxy: the proxy setting in the form: [scheme://][<username>:<password>@]<host>[:<port>]
This currently supports http:// and https:// proxies.
A hostname without scheme is assumed to be http.
reactor: reactor to be used to connect to the proxy
tls_options_factory: the TLS options to use when connecting through a https proxy
kwargs: other args to be passed to HostnameEndpoint
Returns:
interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
or None
a tuple of
endpoint to use to connect to the proxy, or None
ProxyCredentials or if no credentials were found, or None
Raise:
ValueError if proxy has no hostname or unsupported scheme.
"""
if proxy is None:
return None
return None, None

# Parse the connection string
host, port = parse_host_port(proxy, default_port=1080)
return HostnameEndpoint(reactor, host, port, **kwargs)
# Note: urlsplit/urlparse cannot be used here as that does not work (for Python
# 3.9+) on scheme-less proxies, e.g. host:port.
scheme, host, port, credentials = parse_proxy(proxy)

proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs)

def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], bytes]:
"""
Parses the username and password from a proxy declaration e.g
username:password@hostname:port.
if scheme == b"https":
tls_options = tls_options_factory.creatorForNetloc(host, port)
proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint)

Args:
proxy: The proxy connection string.
return proxy_endpoint, credentials

Returns
An instance of ProxyCredentials and the proxy connection string with any credentials
stripped, i.e u:p@host:port -> host:port. If no credentials were found, the
ProxyCredentials instance is replaced with None.
"""
if proxy and b"@" in proxy:
# We use rsplit here as the password could contain an @ character
credentials, proxy_without_credentials = proxy.rsplit(b"@", 1)
return ProxyCredentials(credentials), proxy_without_credentials

return None, proxy
def parse_proxy(
proxy: bytes, default_scheme: bytes = b"http", default_port: int = 1080
) -> Tuple[bytes, bytes, int, Optional[ProxyCredentials]]:
"""
Parse a proxy connection string.
Given a HTTP proxy URL, breaks it down into components and checks that it
has a hostname (otherwise it is not useful to us when trying to find a
proxy) and asserts that the URL has a scheme we support.
def parse_host_port(hostport: bytes, default_port: int = None) -> Tuple[bytes, int]:
"""
Parse the hostname and port from a proxy connection byte string.
Args:
hostport: The proxy connection string. Must be in the form 'host[:port]'.
default_port: The default port to return if one is not found in `hostport`.
proxy: The proxy connection string. Must be in the form '[scheme://][<username>:<password>@]host[:port]'.
default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
default_port: The default port to return if one is not found in `proxy`. Defaults to 1080
Returns:
A tuple containing the hostname and port. Uses `default_port` if one was not found.
A tuple containing the scheme, hostname, port and ProxyCredentials.
If no credentials were found, the ProxyCredentials instance is replaced with None.
Raise:
ValueError if proxy has no hostname or unsupported scheme.
"""
if b":" in hostport:
host, port = hostport.rsplit(b":", 1)
try:
port = int(port)
return host, port
except ValueError:
# the thing after the : wasn't a valid port; presumably this is an
# IPv6 address.
pass
# First check if we have a scheme present
# Note: urlsplit/urlparse cannot be used (for Python # 3.9+) on scheme-less proxies, e.g. host:port.
if b"://" not in proxy:
proxy = b"".join([default_scheme, b"://", proxy])

url = urlparse(proxy)

if not url.hostname:
raise ValueError("Proxy URL did not contain a hostname! Please specify one.")

if url.scheme not in (b"http", b"https"):
raise ValueError(
f"Unknown proxy scheme {url.scheme!s}; only 'http' and 'https' is supported."
)

credentials = None
if url.username and url.password:
credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))

return hostport, default_port
return url.scheme, url.hostname, url.port or default_port, credentials
Loading

0 comments on commit 076dead

Please sign in to comment.