Skip to content

Commit

Permalink
feat: optimize parsing invalid chars
Browse files Browse the repository at this point in the history
Image refs with many and long subdomains that have invalid characters
in the image reference cause the regex engine to get bogged down
and could become a regular expression denial of service attack.

For example `docker.artifactory.us.foo.mycompany.com/bar/node?18` takes >4mins to parse and return as an invalid reference. With the updates in this change set the whole test set runs in 2ms.
  • Loading branch information
cronik committed May 1, 2024
1 parent 7a9bd8c commit ab3d36f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,6 @@ ENV/

# Rope project settings
.ropeproject

# Idea project settings
.idea
4 changes: 3 additions & 1 deletion docker_image/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
DEFAULT_DOMAIN = 'docker.io'
LEGACY_DEFAULT_DOMAIN = 'index.docker.io'
OFFICIAL_REPO_NAME = 'library'

INVALID_REF_CHARS_TABLE = {ord(i): None for i in "?[]{}~!#$%^&*()+|<>,'\""}

class InvalidReference(Exception):
@classmethod
Expand Down Expand Up @@ -122,6 +122,8 @@ def try_validate(cls, s):
hostname, _ = s.split('/', 1)
if '.' not in hostname:
return
if s.translate(INVALID_REF_CHARS_TABLE) != s:
raise ReferenceInvalidFormat.default()
matched = ImageRegexps.ANCHORED_HOSTNAME_REGEXP.match(hostname)
if not matched:
raise ReferenceInvalidFormat.default()
Expand Down
2 changes: 2 additions & 0 deletions tests/test_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def create_test_case(input_, err=None, repository=None, hostname=None, tag=None,
create_test_case(input_='foo_bar.com:8080', repository='foo_bar.com', tag='8080'),
create_test_case(input_='foo/foo_bar.com:8080', repository='foo/foo_bar.com', hostname='foo', tag='8080'),
create_test_case(input_='123.dkr.ecr.eu-west-1.amazonaws.com:lol/abc:d', err=reference.ReferenceInvalidFormat),
create_test_case(input_='docker.artifactory.us.foo.mycompany.com/bar/node?18', err=reference.ReferenceInvalidFormat),
]

for tc in test_cases:
Expand Down Expand Up @@ -159,6 +160,7 @@ def test_validate_reference_name(self):
"docker.io/docker/Docker",
"docker.io/docker///docker",
"1a3f5e7d9c1b3a5f7e9d1c3b5a7f9e1d3c5b7a9f1e3d5d7c9b1a3f5e7d9c1b3a",
"docker.artifactory.us.foo.mycompany.com/bar/node?18"
]
for name in valid_repo_names:
ref = reference.Reference.parse_normalized_named(name)
Expand Down

0 comments on commit ab3d36f

Please sign in to comment.