Skip to content

Commit

Permalink
Introduction of the proxy settings.
Browse files Browse the repository at this point in the history
This patch fixes #132.

Indeed, before this patch, one could use the HTTP_PROXY and
HTTPS_PROXY settings. But, PyFunceble wouldn't actually
handle some special rules.

This patch introduce the `--http-proxy` and `https-proxy`
arguments.

This patch introduce the ability to define some extra rules -
within the configuration file. Indeed, this is something that
was missing.
Therefore, from now on, you can define some extra rules around
your proxies.

For example, if you want to use `example.com` as main proxy but
want all `.com` and `.org` subjects to be requested through
`example.dev`, this is now possible through the configuration:

```yaml
proxy:
  global:
    http: http://example.com:8080
    https: http://example.com:8080
  rules:
    - http: http://example.dev:8080
      https: http://example.dev:8080
      tld:
        - com
        - org
```

Contributors:
  * @spirillen
  • Loading branch information
funilrys committed Mar 5, 2022
1 parent 152dc4e commit c30c466
Show file tree
Hide file tree
Showing 14 changed files with 527 additions and 9 deletions.
32 changes: 32 additions & 0 deletions PyFunceble/cli/entry_points/pyfunceble/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,36 @@ def get_dns_control_group_data() -> List[Tuple[List[str], dict]]:
),
]

def get_proxy_control_group_data() -> List[Tuple[List[str], dict]]:
"""
Provides the argument of the proxy control group.
"""

return [
(
[
"--http-proxy",
],
{
"dest": "proxy.global.http",
"type": str,
"help": "Sets the proxy to use when testing subjects over HTTP. %s"
% get_configured_value("proxy.global.http"),
},
),
(
[
"--https-proxy",
],
{
"dest": "proxy.global.https",
"type": str,
"help": "Sets the proxy to use when testing subjects over HTTPS. %s"
% get_configured_value("proxy.global.https"),
},
),
]


def get_database_control_group_data() -> List[Tuple[List[str], dict]]:
"""
Expand Down Expand Up @@ -1241,6 +1271,7 @@ def tool() -> None:
)
test_control_group = parser.add_argument_group("Test control")
dns_control_group = parser.add_argument_group("DNS control")
proxy_control_group = parser.add_argument_group("Proxy control")
database_control_group = parser.add_argument_group("Databases")
output_control_group = parser.add_argument_group("Output control")
multiprocessing_group = parser.add_argument_group("Multiprocessing")
Expand All @@ -1251,6 +1282,7 @@ def tool() -> None:
get_filtering_group_data,
get_test_control_group_data,
get_dns_control_group_data,
get_proxy_control_group_data,
get_database_control_group_data,
get_output_control_group_data,
get_multiprocessing_group_data,
Expand Down
1 change: 1 addition & 0 deletions PyFunceble/config/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def is_local_identical(self) -> bool:
or "not_found_default" in self.local_config["http_codes"]
or "self_managed" not in self.local_config["http_codes"]
or "dns" not in self.local_config
or "proxy" not in self.local_config
or "follow_server_order" not in self.local_config["dns"]
or "trust_server" not in self.local_config["dns"]
or "collection" not in self.local_config
Expand Down
6 changes: 5 additions & 1 deletion PyFunceble/config/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,13 @@ def start(self) -> "ConfigLoader":
PyFunceble.storage.HTTP_CODES = Box(
config["http_codes"],
)
if "collection" in config:
if "collection" in config and config["collection"]:
PyFunceble.storage.COLLECTION = Box(config["collection"])
PyFunceble.storage.LINKS = Box(config["links"])

if "proxy" in config and config["proxy"]:
PyFunceble.storage.PROXY = Box(config["proxy"])

return self

def destroy(self) -> "ConfigLoader":
Expand All @@ -417,6 +420,7 @@ def destroy(self) -> "ConfigLoader":
PyFunceble.storage.HTTP_CODES = Box({})
PyFunceble.storage.COLLECTION = Box({})
PyFunceble.storage.LINKS = Box({})
PyFunceble.storage.PROXY = Box({})
except (AttributeError, TypeError): # pragma: no cover ## Safety.
pass

Expand Down
43 changes: 43 additions & 0 deletions PyFunceble/data/infrastructure/.PyFunceble_production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,49 @@ user_agent:
# WARNING: If given, this will be used systematically.
custom: null

proxy:
# Provides everything related to the proxy.
#
# The idea:
# We have two main keys, "global" and "rules".
# The system will always follow the global keys unless you define an explit
# TLD.
#
# Example:
#
# Let's say we want all HTTP requests to go through example.org but we want
# all HTTP request for domains ending with `.com`, `.org` and `.dev` to go
# through example.com. And, we want all domains ending with `.onion` to go
# through example.dev.
#
# This is how it will look like.
#
# global:
# http: http://example.org:8080
# https: http://example.org:8080
#
# rules:
# - http: http://example.com:8080
# https: http://example.org:8080
# tld:
# - com
# - org
# - dev
# - http: socks5://example.dev:8080
# https: socks5://example.dev:8080
# tld:
# - onion
#

global:
# Global HTTP proxy to use when no rule is given or matched.
http: null
# Global HTTPS proxy to use when no rule is given or matched.
https: null

rules: []


# Activates the verification of the certificate.
verify_ssl_certificate: False

Expand Down
101 changes: 101 additions & 0 deletions PyFunceble/query/requests/adapter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class RequestAdapterBase(requests.adapters.HTTPAdapter):
resolving_cache: dict = {}
resolving_use_cache: bool = False
timeout: float = 5.0
proxy_pattern: dict = {}

def __init__(self, *args, **kwargs):
if "timeout" in kwargs:
Expand All @@ -86,6 +87,12 @@ def __init__(self, *args, **kwargs):
else:
self.dns_query_tool = DNSQueryTool()

if "proxy_pattern" in kwargs:
self.proxy_pattern = kwargs["proxy_pattern"]
del kwargs["proxy_pattern"]
else:
self.proxy_pattern = {}

super().__init__(*args, **kwargs)

@staticmethod
Expand All @@ -99,6 +106,100 @@ def fake_response() -> requests.models.Response:
"Could not resolve."
)

@staticmethod
def extract_extension(subject: str) -> str:
"""
Provides the extension of the given subject.
:param str subject:
The subject to get extract the extension from.
:raise TypeError:
When the given :code:`subject` is not a :py:class:`str`.
:raise ValueError:
When the given :code:`subject` is an empty :py:class:`str`.
"""

if not isinstance(subject, str):
raise TypeError(f"<subject> should be {str}, type(subject) given.")

if not subject:
raise ValueError("<subject> should not be empty.")

if subject.endswith("."):
# Absolute needs a little correction.
last_point = subject[:-1].rfind(".")
else:
last_point = subject.rindex(".")

extension = subject[last_point + 1 :]

if extension.endswith("."):
return extension[:-1]
return extension

def fetch_proxy_from_pattern(self, subject: str) -> dict:
"""
Provides the proxy settings to use for the given subject.
:param str subject:
The subject to work with.
:raise TypeError:
When the given :code:`subject` is not a :py:class:`str`.
:raise ValueError:
When the given :code:`subject` is an empty :py:class:`str`.
"""

def correct_input(pattern_input: dict) -> dict:
result = {}

if "http" in pattern_input and pattern_input["http"]:
result["http"] = pattern_input["http"]

if "https" in pattern_input and pattern_input["https"]:
result["https"] = pattern_input["https"]

if "http" in result and "https" not in result:
result["https"] = result["http"]

if "https" in result and "http" not in result:
result["http"] = result["https"]

return result

if not isinstance(subject, str):
raise TypeError(f"<subject> should be {str}, type(subject) given.")

if not subject:
raise ValueError("<subject> should not be empty.")

extension = self.extract_extension(subject)

proxies = {}

if "rules" in self.proxy_pattern:
for rule in self.proxy_pattern["rules"]:
local_proxy = {}

if "http" in rule and rule["http"]:
local_proxy["http"] = rule["http"]
if "https" in rule and rule["https"]:
local_proxy["https"] = rule["https"]


if not local_proxy:
continue

if "tld" in rule and extension in rule["tld"]:
proxies = correct_input(local_proxy)
break

if not proxies and "global" in self.proxy_pattern:
proxies = correct_input(self.proxy_pattern["global"])

return proxies

def resolve_with_cache(self, hostname: str) -> Optional[str]:
"""
Try to resolve using an internal cache.
Expand Down
3 changes: 3 additions & 0 deletions PyFunceble/query/requests/adapter/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ def send(self, request, **kwargs) -> requests.Response:
# Ensure that the Hosts header is present. Otherwise, connection might
# not work.
request.headers["Host"] = parsed_url.hostname
kwargs["proxies"] = self.fetch_proxy_from_pattern(parsed_url.hostname)

raise Exception(kwargs)
else:
self.poolmanager.connection_pool_kw.pop(
"server_hostname", PyFunceble.storage.NOT_RESOLVED_STD_HOSTNAME
Expand Down
1 change: 1 addition & 0 deletions PyFunceble/query/requests/adapter/https.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def send(self, request, **kwargs) -> requests.Response:
# Ensure that the Hosts header is present. Otherwise, connection might
# not work.
request.headers["Host"] = parsed_url.hostname
kwargs["proxies"] = self.fetch_proxy_from_pattern(parsed_url.hostname)
else:
self.poolmanager.connection_pool_kw.pop(
"server_hostname", PyFunceble.storage.NOT_RESOLVED_STD_HOSTNAME
Expand Down
Loading

0 comments on commit c30c466

Please sign in to comment.