From af91ab848a4e03f99e3695c79cce3c5355f5b14c Mon Sep 17 00:00:00 2001 From: funilrys Date: Fri, 31 Dec 2021 18:21:33 +0100 Subject: [PATCH] Do DNS Lookup before querying URL. This patch fixes #164. Indeed, before this patch, we were trying to query URL even if the domain didn't have an IP address. This patch fixes the issue by doing a classic (NS, CNAME, A, AAA) query before trying to query any URL. Note: The DNS Lookup is the same as the one we do for a standard domain/ip test. Contributors: * @mitchellkrogza * @spirillen --- PyFunceble/checker/availability/base.py | 9 +++-- PyFunceble/checker/availability/url.py | 45 ++++++++++++++++++++++++- PyFunceble/checker/syntax/url.py | 35 ++++++++++++++----- tests/checker/availability/test_url.py | 41 ++++++++++++++++++++++ tests/checker/syntax/test_url.py | 25 ++++++++++++++ 5 files changed, 143 insertions(+), 12 deletions(-) diff --git a/PyFunceble/checker/availability/base.py b/PyFunceble/checker/availability/base.py index 3d16bc04..479fe1de 100644 --- a/PyFunceble/checker/availability/base.py +++ b/PyFunceble/checker/availability/base.py @@ -800,11 +800,14 @@ def try_to_query_status_from_whois( def try_to_query_status_from_dns(self) -> "AvailabilityCheckerBase": """ Tries to query the status from the DNS lookup. + + .. versionchanged:: 4.1.0b7.dev + Logging return the correct subject. """ PyFunceble.facility.Logger.info( "Started to try to query the status of %r from: DNS Lookup", - self.status.idna_subject, + self.dns_query_tool.subject, ) lookup_result = self.query_dns_record() @@ -816,12 +819,12 @@ def try_to_query_status_from_dns(self) -> "AvailabilityCheckerBase": PyFunceble.facility.Logger.info( "Could define the status of %r from: DNS Lookup", - self.status.idna_subject, + self.dns_query_tool.subject, ) PyFunceble.facility.Logger.info( "Finished to try to query the status of %r from: DNS Lookup", - self.status.idna_subject, + self.dns_query_tool.subject, ) return self diff --git a/PyFunceble/checker/availability/url.py b/PyFunceble/checker/availability/url.py index 1f0b02cc..36da3198 100644 --- a/PyFunceble/checker/availability/url.py +++ b/PyFunceble/checker/availability/url.py @@ -57,6 +57,7 @@ from PyFunceble.checker.availability.base import AvailabilityCheckerBase from PyFunceble.checker.availability.status import AvailabilityCheckerStatus from PyFunceble.checker.reputation.url import URLReputationChecker +from PyFunceble.checker.syntax.url import URLSyntaxChecker class URLAvailabilityChecker(AvailabilityCheckerBase): @@ -96,9 +97,16 @@ def subject_propagator(self) -> "URLAvailabilityChecker": .. warning:: You are not invited to run this method directly. + + .. versionchanged:: 4.1.0b7.dev + DNS Lookup capability. """ self.http_status_code_query_tool.set_subject(self.idna_subject) + self.dns_query_tool.set_subject( + URLSyntaxChecker.get_hostname_from_url(self.idna_subject) + or self.idna_subject + ) self.domain_syntax_checker.subject = self.idna_subject self.ip_syntax_checker.subject = self.idna_subject @@ -106,7 +114,7 @@ def subject_propagator(self) -> "URLAvailabilityChecker": self.status = AvailabilityCheckerStatus() self.status.params = self.params - self.status.dns_lookup_record = None + self.status.dns_lookup_record = self.dns_query_tool.lookup_record self.status.whois_lookup_record = None self.status.subject = self.subject @@ -193,6 +201,35 @@ def try_to_query_status_from_reputation(self) -> "URLAvailabilityChecker": return self + def try_to_query_status_from_dns(self) -> "AvailabilityCheckerBase": + """ + Tries to query the status from the DNS lookup after switching the + idna subject to the url base. + + .. warning:: + This method does not answer as you may expect. + + Indeed, if a DNS lookup failed, this method will overwrite the + standard response by setting the status to :code:`INACTIVE` and the + status source to :code:`DNSLOOKUP`. + + .. versionadded:: 4.1.0b7 + DNS Lookup as a "down" switcher. + """ + + result = super().try_to_query_status_from_dns() + + if self.status.status == PyFunceble.storage.STATUS.up: + # DNS should only be use to take subject down. + # Therefore, switching back as if nothing happened. + self.status.status = None + self.status.status_source = None + else: + self.status.status = PyFunceble.storage.STATUS.down + self.status.status_source = "DNSLOOKUP" + + return result + @AvailabilityCheckerBase.ensure_subject_is_given @AvailabilityCheckerBase.update_status_date_after_query def query_status( @@ -200,6 +237,9 @@ def query_status( ) -> "URLAvailabilityChecker": # pragma: no cover """ Queries the result without anything more. + + .. versionchanged:: 4.1.0b7.dev + DNS Query - first. """ ## Test Methods are more important. @@ -217,6 +257,9 @@ def query_status( ): self.try_to_query_status_from_reputation() + if self.should_we_continue_test(status_post_syntax_checker): + self.try_to_query_status_from_dns() + if self.should_we_continue_test(status_post_syntax_checker): self.try_to_query_status_from_http_status_code() diff --git a/PyFunceble/checker/syntax/url.py b/PyFunceble/checker/syntax/url.py index 53295a51..624aeeeb 100644 --- a/PyFunceble/checker/syntax/url.py +++ b/PyFunceble/checker/syntax/url.py @@ -52,6 +52,7 @@ import urllib.parse +from typing import Optional from PyFunceble.checker.base import CheckerBase from PyFunceble.checker.syntax.base import SyntaxCheckerBase @@ -67,28 +68,46 @@ class URLSyntaxChecker(SyntaxCheckerBase): Optional, The subject to work with. """ - @CheckerBase.ensure_subject_is_given - def is_valid(self) -> bool: + @staticmethod + def get_hostname_from_url(url: str) -> Optional[str]: """ - Validate the given subject. + Extract the hostname part of the given URL. - .. versionchanged:: 4.1.0b5.dev - URL with scheme and port are no longer :code:`INVALID`. + .. versionadded:: 4.1.0b7 """ - parsed = urllib.parse.urlparse(self.idna_subject) + parsed = urllib.parse.urlparse(url) if not parsed.scheme or not parsed.netloc: - return False + return None if parsed.hostname: if parsed.hostname != parsed.netloc: hostname = parsed.hostname else: hostname = parsed.netloc - else: + else: ## pragma: no cover ## Safety check. hostname = parsed.netloc + return hostname + + @CheckerBase.ensure_subject_is_given + def is_valid(self) -> bool: + """ + Validate the given subject. + + .. versionchanged:: 4.1.0b5.dev + URL with scheme and port are no longer :code:`INVALID`. + + .. versionchanged:: 4.1.0b7.dev + Hostname taken from :code:`get_hostname_from_url` + """ + + hostname = self.get_hostname_from_url(self.idna_subject) + + if not hostname: + return False + if ( DomainSyntaxChecker(hostname).is_valid() or IPSyntaxChecker(hostname).is_valid() diff --git a/tests/checker/availability/test_url.py b/tests/checker/availability/test_url.py index a9bba71b..e8e63ed3 100644 --- a/tests/checker/availability/test_url.py +++ b/tests/checker/availability/test_url.py @@ -82,11 +82,15 @@ def test_subject_propagator(self) -> None: """ Tests that the subjects and its IDNA counterpart are correctly propagated. + + .. versionchanged:: 4.1.0b7.dev + URL base propagation to allow DNS lookup. """ given = "http://äxample.org" expected_subject = "http://äxample.org" expected_idna_subject = "http://xn--xample-9ta.org" + expected_base_url = "xn--xample-9ta.org" self.checker.subject = given @@ -98,6 +102,7 @@ def test_subject_propagator(self) -> None: ] self.assertEqual(expected_subject, actual_subject) + self.assertEqual(expected_base_url, self.checker.dns_query_tool.subject) for actual in actual_idna_propagated: self.assertEqual(expected_idna_subject, actual) @@ -201,6 +206,42 @@ def test_try_to_query_status_from_reputation( self.assertEqual(expected_source, actual_source) + def test_try_to_query_status_from_dns(self) -> None: + """ + Tests the method that tries to define the status from the DNS lookup. + + .. versionadded:: 4.1.0b7.dev + """ + + # Let's test the case that no answer is given back. + # pylint: disable=unnecessary-lambda + self.checker.subject = "http://example.org" + self.checker.query_dns_record = ( + lambda: dict() # pylint: disable=use-dict-literal + ) + + self.checker.try_to_query_status_from_dns() + + expected_status = "INACTIVE" + actual_status = self.checker.status.status + + self.assertEqual(expected_status, actual_status) + + # Let's test the case that an answer is given back. + self.checker.query_dns_record = lambda: {"NS": ["ns1.example.org"]} + + self.checker.try_to_query_status_from_dns() + + expected_status = None + actual_status = self.checker.status.status + + self.assertEqual(expected_status, actual_status) + + expected_source = None + actual_source = self.checker.status.status_source + + self.assertEqual(expected_source, actual_source) + if __name__ == "__main__": unittest.main() diff --git a/tests/checker/syntax/test_url.py b/tests/checker/syntax/test_url.py index d396b2da..8cfac2b8 100644 --- a/tests/checker/syntax/test_url.py +++ b/tests/checker/syntax/test_url.py @@ -178,6 +178,31 @@ def test_is_not_valid_no_scheme(self) -> None: self.assertEqual(expected, actual, subject) + def test_get_hostname_from_url(self) -> None: + """ + Tests the method that let us get the hostname (or url base if you + prefer) of a given URL. + + .. versionadded:: 4.1.0b7. + """ + + url_checker = URLSyntaxChecker() + + given2expected = { + "https://example.org/hello-world": "example.org", + "example.org": None, + "://example.org/hello-world": None, + "https://example.org:8888/hello-world": "example.org", + "example.org:8080": None, + "http:///hello-world": None, + "http://10.3.0.1/hello-world": "10.3.0.1" + } + + for given, expected in given2expected.items(): + actual = url_checker.get_hostname_from_url(given) + + self.assertEqual(expected, actual, given) + if __name__ == "__main__": unittest.main()