Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Refactor SimpleHttpClient to pull out reusable methods (#15427)
Browse files Browse the repository at this point in the history
Pulls out some methods to `BaseHttpClient` to eventually be
reused in other contexts.
  • Loading branch information
realtyem authored Apr 14, 2023
1 parent 8a47d6e commit c932614
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 56 deletions.
1 change: 1 addition & 0 deletions changelog.d/15427.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `SimpleHttpClient` to pull out a base class.
132 changes: 76 additions & 56 deletions synapse/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,35 +312,27 @@ def request(
)


class SimpleHttpClient:
class BaseHttpClient:
"""
A simple, no-frills HTTP client with methods that wrap up common ways of
using HTTP in Matrix
using HTTP in Matrix. Does not come with a default Agent, subclasses will need to
define their own.
Args:
hs: The HomeServer instance to pass in
treq_args: Extra keyword arguments to be given to treq.request.
"""

agent: IAgent

def __init__(
self,
hs: "HomeServer",
treq_args: Optional[Dict[str, Any]] = None,
ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None,
use_proxy: bool = False,
):
"""
Args:
hs
treq_args: Extra keyword arguments to be given to treq.request.
ip_blacklist: The IP addresses that are blacklisted that
we may not request.
ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.
"""
self.hs = hs
self.reactor = hs.get_reactor()

self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist
self._extra_treq_args = treq_args or {}
self.clock = hs.get_clock()

Expand All @@ -356,44 +348,6 @@ def __init__(
# reactor.
self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))

if self._ip_blacklist:
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
)
else:
self.reactor = hs.get_reactor()

# the pusher makes lots of concurrent SSL connections to sygnal, and
# tends to do so in batches, so we need to allow the pool to keep
# lots of idle connections around.
pool = HTTPConnectionPool(self.reactor)
# XXX: The justification for using the cache factor here is that larger instances
# will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60

self.agent: IAgent = ProxyAgent(
self.reactor,
hs.get_reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
use_proxy=use_proxy,
)

if self._ip_blacklist:
# If we have an IP blacklist, we then install the blacklisting Agent
# which prevents direct access to IP addresses, that are not caught
# by the DNS resolution.
self.agent = BlacklistingAgentWrapper(
self.agent,
ip_blacklist=self._ip_blacklist,
ip_whitelist=self._ip_whitelist,
)

async def request(
self,
method: str,
Expand Down Expand Up @@ -799,6 +753,72 @@ async def get_file(
)


class SimpleHttpClient(BaseHttpClient):
"""
An HTTP client capable of crossing a proxy and respecting a block/allow list.
This also configures a larger / longer lasting HTTP connection pool.
Args:
hs: The HomeServer instance to pass in
treq_args: Extra keyword arguments to be given to treq.request.
ip_blacklist: The IP addresses that are blacklisted that
we may not request.
ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist.
use_proxy: Whether proxy settings should be discovered and used
from conventional environment variables.
"""

def __init__(
self,
hs: "HomeServer",
treq_args: Optional[Dict[str, Any]] = None,
ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None,
use_proxy: bool = False,
):
super().__init__(hs, treq_args=treq_args)
self._ip_whitelist = ip_whitelist
self._ip_blacklist = ip_blacklist

if self._ip_blacklist:
# If we have an IP blacklist, we need to use a DNS resolver which
# filters out blacklisted IP addresses, to prevent DNS rebinding.
self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
self.reactor, self._ip_whitelist, self._ip_blacklist
)

# the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
# do so in batches, so we need to allow the pool to keep lots of idle
# connections around.
pool = HTTPConnectionPool(self.reactor)
# XXX: The justification for using the cache factor here is that larger
# instances will need both more cache and more connections.
# Still, this should probably be a separate dial
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
pool.cachedConnectionTimeout = 2 * 60

self.agent: IAgent = ProxyAgent(
self.reactor,
hs.get_reactor(),
connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(),
pool=pool,
use_proxy=use_proxy,
)

if self._ip_blacklist:
# If we have an IP blacklist, we then install the blacklisting Agent
# which prevents direct access to IP addresses, that are not caught
# by the DNS resolution.
self.agent = BlacklistingAgentWrapper(
self.agent,
ip_blacklist=self._ip_blacklist,
ip_whitelist=self._ip_whitelist,
)


def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
# The TCP connection has its own timeout (set by the 'connectTimeout' param
Expand Down

0 comments on commit c932614

Please sign in to comment.