Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sha256 uniqueness to CollectionVersion #1815

Merged
merged 2 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES/+sha256_uniqueness.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added sha256 to collection versions.
This is the first part of a change to make this field the uniqueness constraint in the database.
The `datarepair-ansible-collection-sha256` management command is provided to prepare for the next release bringing the second and final step.
27 changes: 10 additions & 17 deletions pulp_ansible/app/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ class AnsibleFileDownloader(FileDownloader):
"""

def __init__(self, *args, **kwargs):
"""
Initialize the downloader.
"""
kwargs.pop("silence_errors_for_response_status_codes", None)
super().__init__(*args, **kwargs)

Expand All @@ -44,9 +41,6 @@ class TokenAuthHttpDownloader(HttpDownloader):
def __init__(
self, url, auth_url, token, silence_errors_for_response_status_codes=None, **kwargs
):
"""
Initialize the downloader.
"""
self.ansible_auth_url = auth_url
self.token = token
if silence_errors_for_response_status_codes is None:
Expand Down Expand Up @@ -174,19 +168,18 @@ async def get_or_update_token(self):


class AnsibleDownloaderFactory(DownloaderFactory):
"""A factory for creating downloader objects that are configured from with remote settings."""
"""
A factory for creating downloader objects that are configured from with remote settings

Args:
remote (:class:`~pulpcore.plugin.models.Remote`): The remote used to populate
downloader settings.
downloader_overrides (dict): Keyed on a scheme name, e.g. 'https' or 'ftp' and the value
is the downloader class to be used for that scheme, e.g.
{'https': MyCustomDownloader}. These override the default values.
"""

def __init__(self, remote, downloader_overrides=None):
"""
Initialize AnsibleDownloaderFactory.

Args:
remote (:class:`~pulpcore.plugin.models.Remote`): The remote used to populate
downloader settings.
downloader_overrides (dict): Keyed on a scheme name, e.g. 'https' or 'ftp' and the value
is the downloader class to be used for that scheme, e.g.
{'https': MyCustomDownloader}. These override the default values.
"""
if not downloader_overrides:
downloader_overrides = {
"http": TokenAuthHttpDownloader,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from gettext import gettext as _

from django.core.management import BaseCommand
from django.db import transaction

from pulp_ansible.app.models import CollectionVersion


class Command(BaseCommand):
"""
Django management command to repair ansible collection versions without sha256.
"""

help = (
"This script repairs ansible collection versions without sha256 if artifacts are available."
)

def add_arguments(self, parser):
"""Set up arguments."""
parser.add_argument(
"--dry-run",
action="store_true",
help=_("Don't modify anything, just collect results."),
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
failed_units = 0
repaired_units = 0

unit_qs = CollectionVersion.objects.filter(sha256__isnull=True)
count = unit_qs.count()
print(f"CollectionVersions to repair: {count}")
if count == 0:
return

for unit in unit_qs.prefetch_related("contenartifact_set").iterator():
try:
content_artifact = unit.contentartifact_set.get()
artifact = content_artifact.artifact
unit.sha256 = artifact.sha256

if not dry_run:
with transaction.atomic():
unit.save(update_fields=["sha256"])
except Exception as e:
failed_units += 1
print(
f"Failed to migrate collection version '{unit.namespace}.{unit.name}' "
f"'{unit.version}': {e}"
)
else:
repaired_units += 1

print(f"Successfully repaired collection versions: {repaired_units}")
print(f"Collection versions failed to repair: {failed_units}")
23 changes: 23 additions & 0 deletions pulp_ansible/app/migrations/0056_collectionversion_sha256.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.14 on 2022-07-15 22:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ansible', '0055_alter_collectionversion_version_alter_role_version'),
]

operations = [
migrations.AddField(
model_name='collectionversion',
name='sha256',
field=models.CharField(default='', max_length=64, null=True),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="collectionversion",
unique_together={("sha256",), ("namespace", "name", "version")},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Generated by Django 3.2.14 on 2022-07-21 22:35
from django.db import migrations


# This migration is supposed to be zero downtime safe.
# It will attempt to populate the sha256 field wherever possible. But it is expected that wherever
# older versions pulp_ansible are still running, CollectionVersions without this digest will be
# created.


def add_sha256_to_current_models(apps, schema_editor):
"""Adds the sha256 to current CollectionVersion models."""
CollectionVersion = apps.get_model("ansible", "CollectionVersion")
collection_versions_to_update = []
collection_versions_on_demand = []

for collection_version in (
CollectionVersion.objects.prefetch_related(
"content_artifacts", "content_artifacts__artifact"
)
.filter(sha256="")
.only("pk", "sha256")
.iterator()
):
content_artifact = collection_version.contentartifact_set.get()
if content_artifact.artifact:
collection_version.sha256 = content_artifact.artifact.sha256
collection_versions_to_update.append(collection_version)
else:
collection_versions_on_demand.append(collection_version)
if len(collection_versions_to_update) >= 1024:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)
collection_versions_to_update.clear()
# Update remaining collection versions
if len(collection_versions_to_update) > 0:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)

# If there are on-demand collections then the next migration will fail, so error here with
# helpful message on how to fix. No work will be performed by this migration on a second-run.
if len(collection_versions_on_demand) > 0:
raise Exception(
f"On demand collections found. Please remove or upload/sync their data: "
f"{[c.pk for c in collection_versions_on_demand]}"
)


class Migration(migrations.Migration):

dependencies = [
("ansible", "0056_collectionversion_sha256"),
]

operations = [migrations.RunPython(add_sha256_to_current_models, migrations.RunPython.noop, elidable=True)]
10 changes: 9 additions & 1 deletion pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class CollectionVersion(Content):
"""

TYPE = "collection_version"
repo_key_fields = ("name", "namespace", "version")

# Data Fields
authors = psql_fields.ArrayField(models.CharField(max_length=64), default=list, editable=False)
Expand All @@ -179,6 +180,7 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
Expand All @@ -205,6 +207,11 @@ class CollectionVersion(Content):
# search_vector is built.
search_vector = psql_search.SearchVectorField(default="")

@classmethod
def natural_key_fields(cls):
# This method should be removed when the uniqueness is properly switched to sha256.
return cls._meta.unique_together[0]

@hook(BEFORE_SAVE)
def calculate_version_parts(self):
v = Version(self.version)
Expand All @@ -229,7 +236,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("namespace", "name", "version")
unique_together = (("namespace", "name", "version"), ("sha256",))
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
Expand Down Expand Up @@ -535,6 +542,7 @@ class Meta:

def finalize_new_version(self, new_version):
"""Finalize repo version."""
remove_duplicates(new_version)
removed_collection_versions = new_version.removed(
base_version=new_version.base_version
).filter(pulp_type=CollectionVersion.get_pulp_type())
Expand Down
7 changes: 6 additions & 1 deletion pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,10 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")
if (sha256 := data.pop("sha256", None)) and sha256 != artifact.sha256:
if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256
gerrod3 marked this conversation as resolved.
Show resolved Hide resolved

collection_info = process_collection_artifact(
artifact=artifact,
Expand Down Expand Up @@ -564,6 +566,8 @@ class Meta:
"expected_version",
)
model = CollectionVersion
# There was an autogenerated validator rendering sha256 required.
validators = []
Comment on lines +569 to +570
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually a really annoying feature of DRF. It tripped me up a lot when I was originally adding domains.



class CollectionVersionSerializer(ContentChecksumSerializer, CollectionVersionUploadSerializer):
Expand Down Expand Up @@ -688,6 +692,7 @@ def is_valid(self, raise_exception=False):
write_fields = set(CollectionVersionUploadSerializer.Meta.fields) - {
"pulp_created",
"pulp_last_updated",
"sha256",
}
if hasattr(self, "initial_data"):
if any((x in self.initial_data for x in self.Meta.read_fields)):
Expand Down
Loading