Skip to content

Commit

Permalink
[ZDU] Finalize Collection Version sha256
Browse files Browse the repository at this point in the history
This allows uploads of different files claiming to be the same
collections into different repositories.

fixes #1052
  • Loading branch information
mdellweg committed Nov 20, 2024
1 parent a946077 commit 58719fc
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 90 deletions.
2 changes: 2 additions & 0 deletions CHANGES/1052.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CollectionVersion global uniqueness constraint is now its sha256 digest. Repository level uniqueness
is still (namespace, name, version).
7 changes: 7 additions & 0 deletions CHANGES/1052.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Added the final migration to make the sha256 of the collection version artifact the uniqueness
constraint. This allows users to serve their own interpretation of the content in their private
repositories.
The migration will only succeed if all the content has been adjusted. To account for content that
was not migrated by the migration shipped with 0.22.0, you can run the content repair command
``datarepair-ansible-collection-sha256`` prior to upgrading.
This version removed the content repair command.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.2.14 on 2022-07-21 23:05

from django.db import migrations, models
from pulpcore.plugin.migrations import RequireVersion


class Migration(migrations.Migration):

dependencies = [
('ansible', '0057_collectionversion_sha256_migrate'),
('core', '0110_apiappstatus'),
]

operations = [
RequireVersion("ansible", "0.23.0"),
migrations.AlterField(
model_name='collectionversion',
name='sha256',
field=models.CharField(db_index=True, max_length=64, null=False),
),
migrations.AlterUniqueTogether(
name='collectionversion',
unique_together={('sha256',)},
),
]
9 changes: 2 additions & 7 deletions pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)
sha256 = models.CharField(max_length=64, db_index=True, null=False, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
Expand All @@ -207,11 +207,6 @@ class CollectionVersion(Content):
# search_vector is built.
search_vector = psql_search.SearchVectorField(default="")

@classmethod
def natural_key_fields(cls):
# This method should be removed when the uniqueness is properly switched to sha256.
return cls._meta.unique_together[0]

@hook(BEFORE_SAVE)
def calculate_version_parts(self):
v = Version(self.version)
Expand All @@ -236,7 +231,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = (("namespace", "name", "version"), ("sha256",))
unique_together = ("sha256",)
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
Expand Down
24 changes: 3 additions & 21 deletions pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,10 +505,9 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")

if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256

collection_info = process_collection_artifact(
artifact=artifact,
Expand All @@ -519,35 +518,18 @@ def deferred_validate(self, data):
# repository field clashes
collection_info["origin_repository"] = collection_info.pop("repository", None)
data.update(collection_info)
# `retrieve` needs artifact, but it won't be in validated_data because of `get_artifacts`
self.context["artifact"] = artifact

return data

def retrieve(self, validated_data):
"""Reuse existing CollectionVersion if provided artifact matches."""
namespace = validated_data["namespace"]
name = validated_data["name"]
version = validated_data["version"]
artifact = self.context["artifact"]
# TODO switch this check to use digest when ColVersion uniqueness constraint is changed
col = CollectionVersion.objects.filter(
namespace=namespace, name=name, version=version
).first()
if col:
if col._artifacts.get() != artifact:
raise ValidationError(
_("Collection {}.{}-{} already exists with a different artifact").format(
namespace, name, version
)
)

return col
return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first()

def create(self, validated_data):
"""Final step in creating the CollectionVersion."""
tags = validated_data.pop("tags")
origin_repository = validated_data.pop("origin_repository")

# Create CollectionVersion from its metadata and adds to repository if specified
content = super().create(validated_data)

Expand Down
8 changes: 3 additions & 5 deletions pulp_ansible/app/tasks/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ async def declarative_content_from_git_repo(remote, url, git_ref=None, metadata_
version = metadata["metadata"]["version"]
else:
raise e
collection_version = await sync_to_async(CollectionVersion.objects.get)(
collection_version = await CollectionVersion.objects.aget(
namespace=namespace, name=name, version=version
)
if artifact is None:
Expand Down Expand Up @@ -275,7 +275,7 @@ def import_collection(
temp_file = PulpTemporaryFile.objects.get(pk=temp_file_pk)
filename = CollectionFilename(expected_namespace, expected_name, expected_version)
log.info(f"Processing collection from {temp_file.file.name}")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collections.import_collection")

try:
with temp_file.file.open() as artifact_file:
Expand Down Expand Up @@ -673,9 +673,7 @@ async def _add_namespace(self, name, namespace_sha):
"""Adds A Namespace metadata content to the pipeline."""

try:
ns = await sync_to_async(AnsibleNamespaceMetadata.objects.get)(
metadata_sha256=namespace_sha
)
ns = await AnsibleNamespaceMetadata.objects.aget(metadata_sha256=namespace_sha)
await self.put(DeclarativeContent(ns))
return True
except AnsibleNamespaceMetadata.DoesNotExist:
Expand Down
4 changes: 3 additions & 1 deletion pulp_ansible/app/tasks/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def process_collection_artifact(artifact, namespace, name, version):

# Set up logging for CollectionImport object
CollectionImport.objects.get_or_create(task_id=Task.current().pulp_id)
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger(
"pulp_ansible.app.tasks.upload.process_collection_artifact"
)

artifact_url = reverse("artifacts-detail", args=[artifact.pk])
filename = CollectionFilename(namespace, name, version)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tests related to sync ansible plugin collection content type."""

import hashlib
from pathlib import Path

import pytest

from pulp_ansible.tests.functional.utils import randstr
Expand Down Expand Up @@ -99,3 +102,32 @@ def test_content_unit_lifecycle(ansible_bindings, build_and_upload_collection, m
ansible_bindings.ContentCollectionVersionsApi.create(file=collection_artifact.filename).task
)
assert content_unit_href in create_task.created_resources


@pytest.mark.parallel
def test_duplicate_collection_key(
ansible_bindings,
ansible_repo_factory,
build_and_upload_collection,
monitor_task,
):
"""Create two content units with the same name but different artifacts."""
repository1 = ansible_repo_factory()
repository2 = ansible_repo_factory()

attrs = {"namespace": randstr(), "name": "squeezer", "version": "0.0.9"}
collection_artifact1, content_unit_href1 = build_and_upload_collection(
repository1, config=attrs
)
collection_artifact2, content_unit_href2 = build_and_upload_collection(
repository2, config=attrs
)

sha256_1 = hashlib.sha256(Path(collection_artifact1.filename).read_bytes()).hexdigest()
sha256_2 = hashlib.sha256(Path(collection_artifact2.filename).read_bytes()).hexdigest()
assert sha256_1 != sha256_2

content_unit_1 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href1)
assert content_unit_1.sha256 == sha256_1
content_unit_2 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href2)
assert content_unit_2.sha256 == sha256_2
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ def test_collection_version_sha256_migrate(migrate):

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"

apps = migrate([("ansible", "0058_collectionversion_unique_sha256")])
CollectionVersion = apps.get_model("ansible", "CollectionVersion")

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"

0 comments on commit 58719fc

Please sign in to comment.