diff --git a/publicsuffixlist/__init__.py b/publicsuffixlist/__init__.py index 290f0de..563264b 100644 --- a/publicsuffixlist/__init__.py +++ b/publicsuffixlist/__init__.py @@ -167,32 +167,25 @@ def _countpublic(self, labels, accept_unknown=None) -> int: if ll == 1 and accept_unknown: return 1 - # There is the PSL algorithm definition, - # https://github.com/publicsuffix/list/wiki/Format + # There is confusion in rule evaluation. # - # A domain is said to match a rule if and only if all of the following - # conditions are met: - # 1. When the domain and rule are split into corresponding labels, that - # the domain contains as many or more labels than the rule. - # 2. Beginning with the right-most labels of both the domain and the - # rule, and continuing for all labels in the rule, one finds that for - # every pair, either they are identical, or that the label from the - # rule is "*". - # - # Bacause of rule 1, `foo.com` does not match `*.foo.com`. - # - # However, there is some confusion in rule evaluation. - # test_psl.txt states that city.kobe.jp -> city.kobe.jp + # The test data, test_psl.txt states that + # city.kobe.jp -> city.kobe.jp # so kobe.jp is public, although kobe.jp is not listed. That means # test_psl.txt assumes !city.example.com or *.example.com implicitly # declares example.com as also public. # - # This module dropped support for the conflicting test case. + # This implicit declaration of wildcard is required and checked by + # the linter. + # https://github.com/publicsuffix/list/blame/de747b657fb0f479667015423c12f98fd47ebf1d/linter/pslint.py#L230 + # + # The PSL wiki had listed a wrong example regarding the wildcard. + # This should be resolved by issue: + # https://github.com/publicsuffix/list/issues/1989 # We start from longest to shortcircuit startfrom = max(0, ll - (self._maxlabel + 1)) - excluded = True for i in range(startfrom, ll): depth = ll - i s = ".".join(labels[-depth:]) @@ -200,29 +193,26 @@ def _countpublic(self, labels, accept_unknown=None) -> int: # the check order must be wild > exact > exception # this is required to backtrack subdomain wildcard + # exception rule + if ("!" + s) in self._publicsuffix: + # exception rule has wildcard sibiling. + # and the wildcard has implicit root. + return depth - 1 + # wildcard match if ("*." + s) in self._publicsuffix: # if we have subdomain, that must be checked against exception - # rule. - if i > startfrom and not excluded: + # rule. The backtrack check was performed in the previous loop. + if i > 0: return depth + 1 - # If this is entire match, it is not public from the PSL example. - # ignore it. + # If this is entire match, it is implicit root of wildcard. + return depth # exact match if s in self._publicsuffix: return depth - # exception rule - if ("!" + s) in self._publicsuffix: - # exception rule has wildcard sibiling. - # Although the test case assumes it has implicit public domain on the root, - # in the PSL definition, the next is not always public. - excluded = True - else: - excluded = False - if accept_unknown: return 1 return 0 diff --git a/publicsuffixlist/test.py b/publicsuffixlist/test.py index a8b96c4..e446d95 100644 --- a/publicsuffixlist/test.py +++ b/publicsuffixlist/test.py @@ -96,7 +96,8 @@ def test_wiki_example(self): """ psl = PublicSuffixList(source.splitlines()) - self.assertEqual(psl.is_private("foo.com"), True) + # According to the linter, this rule is incorrect + # self.assertEqual(psl.is_private("foo.com"), True) self.assertEqual(psl.is_private("bar.foo.com"), False) self.assertEqual(psl.is_private("example.bar.foo.com"), True) self.assertEqual(psl.is_private("foo.bar.jp"), True) @@ -326,6 +327,22 @@ def test_subdomain_keep_case(self): bytestuple(b"Www.Example.Co.Jp")) + def test_wildcardonlytld(self): + source = """ +*.bd +""" + psl = PublicSuffixList(source.splitlines(), accept_unknown=False) + + self.assertEqual(psl.publicsuffix("bd"), "bd") + self.assertEqual(psl.privatesuffix("bd"), None) + + self.assertEqual(psl.publicsuffix("example.bd"), "example.bd") + self.assertEqual(psl.privatesuffix("example.bd"), None) + + self.assertEqual(psl.publicsuffix("example.example.bd"), "example.bd") + self.assertEqual(psl.privatesuffix("example.example.bd"), "example.example.bd") + + def test_longwildcard(self): source = """ com @@ -339,8 +356,9 @@ def test_longwildcard(self): self.assertEqual(psl.publicsuffix("example.com"), "com") self.assertEqual(psl.privatesuffix("example.com"), "example.com") - self.assertEqual(psl.publicsuffix("compute.example.com"), "com") - self.assertEqual(psl.privatesuffix("compute.example.com"), "example.com") + # wildcard implies the root is also public suffix + self.assertEqual(psl.publicsuffix("compute.example.com"), "compute.example.com") + self.assertEqual(psl.privatesuffix("compute.example.com"), None) self.assertEqual(psl.publicsuffix("region.compute.example.com"), "region.compute.example.com") self.assertEqual(psl.privatesuffix("region.compute.example.com"), None) diff --git a/publicsuffixlist/test_psl.txt b/publicsuffixlist/test_psl.txt index 0d9559e..012e5fe 100644 --- a/publicsuffixlist/test_psl.txt +++ b/publicsuffixlist/test_psl.txt @@ -57,10 +57,8 @@ checkPublicSuffix('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp'); checkPublicSuffix('c.kobe.jp', null); checkPublicSuffix('b.c.kobe.jp', 'b.c.kobe.jp'); checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp'); -// This are not valid anymore -// https://github.com/publicsuffix/list/issues/1890 -// checkPublicSuffix('city.kobe.jp', 'city.kobe.jp'); -// checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp'); +checkPublicSuffix('city.kobe.jp', 'city.kobe.jp'); +checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp'); // TLD with a wildcard rule and exceptions. checkPublicSuffix('ck', null); checkPublicSuffix('test.ck', null);