Skip to content

Commit

Permalink
Add sha256 field to CollectionVersion
Browse files Browse the repository at this point in the history
This should be ZDU compatible. The transition will be completed with the
next y-release. This commit only adds the field an starts to populate it.
It comes with a data repair command to help transition to an actual
uniqueness constraint in the next release.

relates: #1052

Co-authored-by: Matthias Dellweg <[email protected]>
  • Loading branch information
gerrod3 and mdellweg committed Nov 6, 2024
1 parent 5a4c32a commit 58f25f4
Show file tree
Hide file tree
Showing 12 changed files with 327 additions and 85 deletions.
3 changes: 3 additions & 0 deletions CHANGES/+sha256_uniqueness.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added sha256 to collection versions.
This is the first part of a change to make this field the uniqueness constraint in the database.
The `datarepair-ansible-collection-sha256` management command is provided to prepare for the next release bringing the second and final step.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from gettext import gettext as _

from django.core.management import BaseCommand
from django.db import transaction

from pulp_ansible.app.models import CollectionVersion


class Command(BaseCommand):
"""
Django management command to repair ansible collection versions without sha256.
"""

help = (
"This script repairs ansible collection versions without sha256 if artifacts are available."
)

def add_arguments(self, parser):
"""Set up arguments."""
parser.add_argument(
"--dry-run",
action="store_true",
help=_("Don't modify anything, just collect results."),
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
failed_units = 0
repaired_units = 0

unit_qs = CollectionVersion.objects.filter(sha256__isnull=True)
count = unit_qs.count()
print(f"CollectionVersions to repair: {count}")
if count == 0:
return

for unit in unit_qs.iterator():
try:
content_artifact = unit.contentartifact_set.get()
artifact = content_artifact.artifact
unit.sha256 = artifact.sha256

if not dry_run:
with transaction.atomic():
unit.save(update_fields=["sha256"])
except Exception as e:
failed_units += 1
print(
f"Failed to migrate collection version '{unit.namespace}.{unit.name}' "
f"'{unit.version}': {e}"
)
else:
repaired_units += 1

print(f"Successfully repaired collection versions: {repaired_units}")
print(f"Collection versions failed to repair: {failed_units}")
23 changes: 23 additions & 0 deletions pulp_ansible/app/migrations/0056_collectionversion_sha256.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.14 on 2022-07-15 22:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ansible', '0055_alter_collectionversion_version_alter_role_version'),
]

operations = [
migrations.AddField(
model_name='collectionversion',
name='sha256',
field=models.CharField(default='', max_length=64, null=True),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="collectionversion",
unique_together={("sha256",), ("namespace", "name", "version")},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Generated by Django 3.2.14 on 2022-07-21 22:35
from django.db import migrations


# This migration is supposed to be zero downtime safe.
# It will attempt to populate the sha256 field wherever possible. But it is expected that wherever
# older versions pulp_ansible are still running, CollectionVersions without this digest will be
# created.


def add_sha256_to_current_models(apps, schema_editor):
"""Adds the sha256 to current CollectionVersion models."""
CollectionVersion = apps.get_model("ansible", "CollectionVersion")
collection_versions_to_update = []
collection_versions_on_demand = []

for collection_version in (
CollectionVersion.objects.prefetch_related(
"content_artifacts", "content_artifacts__artifact"
)
.filter(sha256="")
.only("pk", "sha256")
.iterator()
):
content_artifact = collection_version.contentartifact_set.get()
if content_artifact.artifact:
collection_version.sha256 = content_artifact.artifact.sha256
collection_versions_to_update.append(collection_version)
else:
collection_versions_on_demand.append(collection_version)
if len(collection_versions_to_update) >= 1024:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)
collection_versions_to_update.clear()
# Update remaining collection versions
if len(collection_versions_to_update) > 0:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)

# If there are on-demand collections then the next migration will fail, so error here with
# helpful message on how to fix. No work will be performed by this migration on a second-run.
if len(collection_versions_on_demand) > 0:
raise Exception(
f"On demand collections found. Please remove or upload/sync their data: "
f"{[c.pk for c in collection_versions_on_demand]}"
)


class Migration(migrations.Migration):

dependencies = [
("ansible", "0056_collectionversion_sha256"),
]

operations = [migrations.RunPython(add_sha256_to_current_models, migrations.RunPython.noop, elidable=True)]
10 changes: 9 additions & 1 deletion pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class CollectionVersion(Content):
"""

TYPE = "collection_version"
repo_key_fields = ("name", "namespace", "version")

# Data Fields
authors = psql_fields.ArrayField(models.CharField(max_length=64), default=list, editable=False)
Expand All @@ -179,6 +180,7 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
Expand All @@ -205,6 +207,11 @@ class CollectionVersion(Content):
# search_vector is built.
search_vector = psql_search.SearchVectorField(default="")

@classmethod
def natural_key_fields(cls):
# This method should be removed when the uniqueness is properly switched to sha256.
return cls._meta.unique_together[0]

@hook(BEFORE_SAVE)
def calculate_version_parts(self):
v = Version(self.version)
Expand All @@ -229,7 +236,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("namespace", "name", "version")
unique_together = (("namespace", "name", "version"), ("sha256",))
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
Expand Down Expand Up @@ -535,6 +542,7 @@ class Meta:

def finalize_new_version(self, new_version):
"""Finalize repo version."""
remove_duplicates(new_version)
removed_collection_versions = new_version.removed(
base_version=new_version.base_version
).filter(pulp_type=CollectionVersion.get_pulp_type())
Expand Down
7 changes: 6 additions & 1 deletion pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,10 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")
if (sha256 := data.pop("sha256", None)) and sha256 != artifact.sha256:
if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256

collection_info = process_collection_artifact(
artifact=artifact,
Expand Down Expand Up @@ -564,6 +566,8 @@ class Meta:
"expected_version",
)
model = CollectionVersion
# There was an autogenerated validator rendering sha256 required.
validators = []


class CollectionVersionSerializer(ContentChecksumSerializer, CollectionVersionUploadSerializer):
Expand Down Expand Up @@ -688,6 +692,7 @@ def is_valid(self, raise_exception=False):
write_fields = set(CollectionVersionUploadSerializer.Meta.fields) - {
"pulp_created",
"pulp_last_updated",
"sha256",
}
if hasattr(self, "initial_data"):
if any((x in self.initial_data for x in self.Meta.read_fields)):
Expand Down
Loading

0 comments on commit 58f25f4

Please sign in to comment.