Skip to content

Commit

Permalink
logic: follow the PSL linter evaluation rules
Browse files Browse the repository at this point in the history
This commit reverts that internal logic change.

In the previous commit 72111bd this module changed the evaluation
logic to follow the PSL definition described in the wiki.

Now, it follows the rule described in the linter of the PSL code
repository, where a wildcard declaration implies that its zone root is
also a public suffix.

Signed-off-by: ko-zu <[email protected]>
ko-zu committed Jun 2, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 61668fd commit a6b950b
Showing 3 changed files with 43 additions and 37 deletions.
50 changes: 20 additions & 30 deletions publicsuffixlist/__init__.py
Original file line number Diff line number Diff line change
@@ -167,62 +167,52 @@ def _countpublic(self, labels, accept_unknown=None) -> int:
if ll == 1 and accept_unknown:
return 1

# There is the PSL algorithm definition,
# https://github.com/publicsuffix/list/wiki/Format
# There is confusion in rule evaluation.
#
# A domain is said to match a rule if and only if all of the following
# conditions are met:
# 1. When the domain and rule are split into corresponding labels, that
# the domain contains as many or more labels than the rule.
# 2. Beginning with the right-most labels of both the domain and the
# rule, and continuing for all labels in the rule, one finds that for
# every pair, either they are identical, or that the label from the
# rule is "*".
#
# Bacause of rule 1, `foo.com` does not match `*.foo.com`.
#
# However, there is some confusion in rule evaluation.
# test_psl.txt states that city.kobe.jp -> city.kobe.jp
# The test data, test_psl.txt states that
# city.kobe.jp -> city.kobe.jp
# so kobe.jp is public, although kobe.jp is not listed. That means
# test_psl.txt assumes !city.example.com or *.example.com implicitly
# declares example.com as also public.
#
# This module dropped support for the conflicting test case.
# This implicit declaration of wildcard is required and checked by
# the linter.
# https://github.com/publicsuffix/list/blame/de747b657fb0f479667015423c12f98fd47ebf1d/linter/pslint.py#L230
#
# The PSL wiki had listed a wrong example regarding the wildcard.
# This should be resolved by issue:
# https://github.com/publicsuffix/list/issues/1989

# We start from longest to shortcircuit
startfrom = max(0, ll - (self._maxlabel + 1))

excluded = True
for i in range(startfrom, ll):
depth = ll - i
s = ".".join(labels[-depth:])

# the check order must be wild > exact > exception
# this is required to backtrack subdomain wildcard

# exception rule
if ("!" + s) in self._publicsuffix:
# exception rule has wildcard sibiling.
# and the wildcard has implicit root.
return depth - 1

# wildcard match
if ("*." + s) in self._publicsuffix:
# if we have subdomain, that must be checked against exception
# rule.
if i > startfrom and not excluded:
# rule. The backtrack check was performed in the previous loop.
if i > 0:
return depth + 1

# If this is entire match, it is not public from the PSL example.
# ignore it.
# If this is entire match, it is implicit root of wildcard.
return depth

# exact match
if s in self._publicsuffix:
return depth

# exception rule
if ("!" + s) in self._publicsuffix:
# exception rule has wildcard sibiling.
# Although the test case assumes it has implicit public domain on the root,
# in the PSL definition, the next is not always public.
excluded = True
else:
excluded = False

if accept_unknown:
return 1
return 0
24 changes: 21 additions & 3 deletions publicsuffixlist/test.py
Original file line number Diff line number Diff line change
@@ -96,7 +96,8 @@ def test_wiki_example(self):
"""
psl = PublicSuffixList(source.splitlines())

self.assertEqual(psl.is_private("foo.com"), True)
# According to the linter, this rule is incorrect
# self.assertEqual(psl.is_private("foo.com"), True)
self.assertEqual(psl.is_private("bar.foo.com"), False)
self.assertEqual(psl.is_private("example.bar.foo.com"), True)
self.assertEqual(psl.is_private("foo.bar.jp"), True)
@@ -326,6 +327,22 @@ def test_subdomain_keep_case(self):
bytestuple(b"Www.Example.Co.Jp"))


def test_wildcardonlytld(self):
source = """
*.bd
"""
psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

self.assertEqual(psl.publicsuffix("bd"), "bd")
self.assertEqual(psl.privatesuffix("bd"), None)

self.assertEqual(psl.publicsuffix("example.bd"), "example.bd")
self.assertEqual(psl.privatesuffix("example.bd"), None)

self.assertEqual(psl.publicsuffix("example.example.bd"), "example.bd")
self.assertEqual(psl.privatesuffix("example.example.bd"), "example.example.bd")


def test_longwildcard(self):
source = """
com
@@ -339,8 +356,9 @@ def test_longwildcard(self):
self.assertEqual(psl.publicsuffix("example.com"), "com")
self.assertEqual(psl.privatesuffix("example.com"), "example.com")

self.assertEqual(psl.publicsuffix("compute.example.com"), "com")
self.assertEqual(psl.privatesuffix("compute.example.com"), "example.com")
# wildcard implies the root is also public suffix
self.assertEqual(psl.publicsuffix("compute.example.com"), "compute.example.com")
self.assertEqual(psl.privatesuffix("compute.example.com"), None)

self.assertEqual(psl.publicsuffix("region.compute.example.com"), "region.compute.example.com")
self.assertEqual(psl.privatesuffix("region.compute.example.com"), None)
6 changes: 2 additions & 4 deletions publicsuffixlist/test_psl.txt
Original file line number Diff line number Diff line change
@@ -57,10 +57,8 @@ checkPublicSuffix('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp');
checkPublicSuffix('c.kobe.jp', null);
checkPublicSuffix('b.c.kobe.jp', 'b.c.kobe.jp');
checkPublicSuffix('a.b.c.kobe.jp', 'b.c.kobe.jp');
// This are not valid anymore
// https://github.com/publicsuffix/list/issues/1890
// checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
// checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
checkPublicSuffix('city.kobe.jp', 'city.kobe.jp');
checkPublicSuffix('www.city.kobe.jp', 'city.kobe.jp');
// TLD with a wildcard rule and exceptions.
checkPublicSuffix('ck', null);
checkPublicSuffix('test.ck', null);

0 comments on commit a6b950b

Please sign in to comment.