From 83a0bed9dd661ae277cc37007525249ff42f2411 Mon Sep 17 00:00:00 2001 From: Allen Lee Date: Fri, 25 Oct 2024 19:03:34 -0700 Subject: [PATCH] refactor: prefix doi management commands - prefix all one-off destructive DOI commands with `doi_` - add reset_staging to mint new DOIs on staging using the datacite sandbox, doi_reset_staging -> step 3, doi_mint_parent_codebase_dois - bump deps for datacite schema 4.5 and django cve --- django/core/pagination.py | 2 +- .../commands/curator_register_dois.py | 16 - .../commands/doi_mint_parent_codebases.py} | 47 +- .../commands/doi_mint_pending_releases.py | 44 ++ .../commands/doi_reset_production.py | 113 ++++ .../management/commands/doi_reset_staging.py | 92 ++++ .../management/commands/doi_sync_metadata.py | 120 +++++ .../commands/sync_user_contributors.py | 30 +- django/library/doi.py | 484 ++++++++++-------- .../commands/clean_peer_reviewed_dois_02.py | 64 --- .../delete_all_existing_codebase_dois_01.py | 62 --- .../management/commands/sync_doi_metadata.py | 135 ----- ...pdate_metadata_for_all_existing_dois_04.py | 122 ----- .../0031_dataciteregistrationlog_and_more.py | 96 ++++ django/library/models.py | 224 ++++---- django/library/serializers.py | 2 +- django/library/tests/test_datacite_api.py | 236 +++++---- django/requirements.txt | 4 +- frontend/package.json | 2 +- frontend/src/components/UserSearch.vue | 4 +- .../releaseEditor/ContributorSearch.vue | 4 +- frontend/src/types.ts | 2 - 22 files changed, 1054 insertions(+), 851 deletions(-) delete mode 100644 django/curator/management/commands/curator_register_dois.py rename django/{library/management/commands/fix_existing_dois_03.py => curator/management/commands/doi_mint_parent_codebases.py} (85%) create mode 100644 django/curator/management/commands/doi_mint_pending_releases.py create mode 100644 django/curator/management/commands/doi_reset_production.py create mode 100644 django/curator/management/commands/doi_reset_staging.py create mode 100644 django/curator/management/commands/doi_sync_metadata.py delete mode 100644 django/library/management/commands/clean_peer_reviewed_dois_02.py delete mode 100644 django/library/management/commands/delete_all_existing_codebase_dois_01.py delete mode 100644 django/library/management/commands/sync_doi_metadata.py delete mode 100644 django/library/management/commands/update_metadata_for_all_existing_dois_04.py create mode 100644 django/library/migrations/0031_dataciteregistrationlog_and_more.py diff --git a/django/core/pagination.py b/django/core/pagination.py index c1cbbb2c2..768b7c196 100644 --- a/django/core/pagination.py +++ b/django/core/pagination.py @@ -31,7 +31,7 @@ def _to_search_terms(query_params): @staticmethod def _to_filter_display_terms(query_params): """ - Convert query parameters into a list of displayable filter terms (replaces underscores withs paces, etc) + Convert query parameters into a list of displayable filter terms (replaces underscores withs spaces, etc) Args: query_params (QueryDict): The query parameters. Returns: diff --git a/django/curator/management/commands/curator_register_dois.py b/django/curator/management/commands/curator_register_dois.py deleted file mode 100644 index f1d7c2e3e..000000000 --- a/django/curator/management/commands/curator_register_dois.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.core.management.base import BaseCommand - -from library.doi import mint_dois_for_peer_reviewed_releases_without_dois - -import logging - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - help = "Mint DOIs for all peer reviewed codebase releases without a DOI" - - def handle(self, *args, **options): - logger.debug("Registering all peer reviewed codebases") - codebases = mint_dois_for_peer_reviewed_releases_without_dois() - logger.debug("DOIs minted for %s", codebases) diff --git a/django/library/management/commands/fix_existing_dois_03.py b/django/curator/management/commands/doi_mint_parent_codebases.py similarity index 85% rename from django/library/management/commands/fix_existing_dois_03.py rename to django/curator/management/commands/doi_mint_parent_codebases.py index 8a4563ce4..fee7caa65 100644 --- a/django/library/management/commands/fix_existing_dois_03.py +++ b/django/curator/management/commands/doi_mint_parent_codebases.py @@ -1,12 +1,13 @@ +import argparse +import logging from django.core.management.base import BaseCommand from django.conf import settings -import logging from library.models import CodebaseRelease from library.doi import ( DataCiteApi, VERIFICATION_MESSAGE, - doi_matches_pattern, + is_valid_doi, get_welcome_message, ) @@ -29,7 +30,7 @@ def update_existing_dois(interactive=True, dry_run=True): total_peer_reviewed_releases_count = peer_reviewed_releases.count() logger.info( - "Updating DOIs for %s peer reviewed CodebaseReleases with DOIs", + "Updating DOIs for parent Codebases of %s peer reviewed CodebaseReleases with DOIs", total_peer_reviewed_releases_count, ) @@ -56,13 +57,14 @@ def update_existing_dois(interactive=True, dry_run=True): if not codebase_doi: # request to DataCite API logger.debug("Minting DOI for parent codebase: %s", codebase.pk) - codebase_doi, success = datacite_api.mint_new_doi_for_codebase(codebase) + log, ok = datacite_api.mint_public_doi(codebase) - if not success: + if not ok: logger.error( - "Could not mint DOI for parent codebase %s. Skipping release %s.", + "Unable to mint DOI for parent codebase %s of release %s: %s", codebase.pk, release.pk, + log.status_code, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") @@ -70,7 +72,7 @@ def update_existing_dois(interactive=True, dry_run=True): logger.debug("New codebase DOI: %s. Saving codebase...", codebase_doi) if not dry_run: - codebase.doi = codebase_doi + codebase.doi = log.doi codebase.save() else: logger.debug( @@ -108,12 +110,12 @@ def update_existing_dois(interactive=True, dry_run=True): release_doi, ) # set up DataCite API request to mint new DOI - release_doi, success = datacite_api.mint_new_doi_for_release(release) - if not success: + log, ok = datacite_api.mint_public_doi(release) + if not ok: logger.error( - "Could not mint DOI for release %s. DOI: %s. Skipping.", + "Could not mint DOI for release %s - status code: %s.", release.pk, - release_doi, + log.status_code, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") @@ -126,7 +128,7 @@ def update_existing_dois(interactive=True, dry_run=True): release.doi, ) if not dry_run: - release.doi = release_doi + release.doi = log.doi release.save() if interactive: @@ -139,12 +141,13 @@ def update_existing_dois(interactive=True, dry_run=True): release_doi, ) # request to DataCite API: mint new DOI! - release_doi, success = datacite_api.mint_new_doi_for_release(release) - if not success: + log, ok = datacite_api.mint_public_doi(release) + release_doi = log.doi + if not ok: logger.error( - "Could not mint DOI for release %s. DOI: %s. Skipping.", + "Could not mint DOI for release %s - status code: %s.", release.pk, - release_doi, + log.status_code, ) if interactive: input("Press Enter to continue or CTRL+C to quit...") @@ -190,13 +193,13 @@ def update_existing_dois(interactive=True, dry_run=True): if release.doi is None: logger.error("DOI should not be None for release %s", release.pk) - if not doi_matches_pattern(release.codebase.doi): + if not is_valid_doi(release.codebase.doi): logger.error( "%s Codebase DOI doesn't match DataCite pattern!", release.codebase.doi, ) - if not doi_matches_pattern(release.doi): + if not is_valid_doi(release.doi): logger.error( "%s CodebaseRelease DOI doesn't match DataCite pattern!", release.doi, @@ -212,12 +215,14 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument( "--interactive", - action="store_true", + action=argparse.BooleanOptionalAction, help="Wait for user to press enter to continue.", - default=True, + default=False, ) parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." + "--dry-run", + action=argparse.BooleanOptionalAction, + help="Output what would have happened.", ) def handle(self, *args, **options): diff --git a/django/curator/management/commands/doi_mint_pending_releases.py b/django/curator/management/commands/doi_mint_pending_releases.py new file mode 100644 index 000000000..4aa786f7d --- /dev/null +++ b/django/curator/management/commands/doi_mint_pending_releases.py @@ -0,0 +1,44 @@ +from django.core.management.base import BaseCommand +from library.doi import DataCiteApi, VERIFICATION_MESSAGE, get_welcome_message + +import argparse +import logging + +logger = logging.getLogger(__name__) + + +def mint_pending_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + if interactive: + input( + "Minting new DOIs for all reviewed releases and parent codebases without DOIs. Press Enter to continue or CTRL+C to quit..." + ) + api = DataCiteApi(dry_run) + api.mint_pending_dois() + print(VERIFICATION_MESSAGE) + + +class Command(BaseCommand): + """ + Syncs metadata for all codebases and releases with Datacite metadata service. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action=argparse.BooleanOptionalAction, + help="Wait for explicit user confirmation to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + help="Emit what would have happened.", + default=True, + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + logger.info("minting new DOIs for reviewed releases without DOIs") + mint_pending_dois(interactive, dry_run) diff --git a/django/curator/management/commands/doi_reset_production.py b/django/curator/management/commands/doi_reset_production.py new file mode 100644 index 000000000..61864cf18 --- /dev/null +++ b/django/curator/management/commands/doi_reset_production.py @@ -0,0 +1,113 @@ +import argparse +import csv +import logging +import sys +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def cleanup_existing_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + + api = DataCiteApi(dry_run=dry_run) + + # clean up all Codebases with existing DOIs + codebases_with_dois = Codebase.objects.with_doi() + logger.info("Removing all Codebase DOIs") + if interactive and codebases_with_dois.exists(): + confirm = input( + "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if not confirm.lower() == "delete": + logger.info("Aborting.") + sys.exit() + + """ + assert that all Codebase DOIs have been reset + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + with open("codebase_with_doi.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + codebases_with_dois.update(doi=None) + assert not Codebase.objects.with_doi().exists() + logger.info("Success. All existing codebase DOIs deleted.") + + # clean up unreviewed release DOIs + + unreviewed_releases_with_dois = CodebaseRelease.objects.unreviewed().with_doi() + total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() + logger.info( + "Removing %s unreviewed CodebaseRelease DOIs", + total_unreviewed_releases_with_dois, + ) + if interactive: + confirm = input( + f"Deleting all DOIs for {total_unreviewed_releases_with_dois} unreviewed CodebaseReleases. Enter 'DELETE' to continue or CTRL+C to quit: " + ) + if not confirm.lower() == "delete": + logger.debug("Aborting...") + sys.exit() + + if not dry_run: + with open("unreviewed_releases_with_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in unreviewed_releases_with_dois: + writer.writerow([release.pk, release.doi]) + unreviewed_releases_with_dois.update(doi=None) + + # mint DOIs for all public peer reviewed CodebaseReleases without a DOI + reviewed_releases_without_dois = ( + CodebaseRelease.objects.reviewed().public().without_doi() + ) + invalid_releases = [] + for release in reviewed_releases_without_dois: + try: + log, ok = api.mint_public_doi(release) + if not ok: + invalid_releases.append((release, log)) + except Exception as e: + logger.error("Error minting DOI for release %s", release) + invalid_releases.append((release, e)) + + for release, log in invalid_releases: + with open("invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow( + ["CodebaseRelease ID", "Status Code", "Reason", "Datacite Metadata"] + ) + writer.writerow( + [release.pk, log.status_code, log.message, release.datacite.to_dict()] + ) + + +class Command(BaseCommand): + """ + Removes all existing parent Codebase DOIs and mints new DOIs for all Peer Reviewed CodebaseReleases + """ + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action=argparse.BooleanOptionalAction, + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + help="Output what would have happened.", + default=False, + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + cleanup_existing_dois(interactive, dry_run) diff --git a/django/curator/management/commands/doi_reset_staging.py b/django/curator/management/commands/doi_reset_staging.py new file mode 100644 index 000000000..5de6b0e72 --- /dev/null +++ b/django/curator/management/commands/doi_reset_staging.py @@ -0,0 +1,92 @@ +import argparse +import csv +import logging +import sys +from django.conf import settings +from django.core.management.base import BaseCommand +from library.doi import VERIFICATION_MESSAGE, get_welcome_message, DataCiteApi +from library.models import Codebase, CodebaseRelease + +logger = logging.getLogger(__name__) + + +def reset_all_dois(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + if settings.DEPLOY_ENVIRONMENT.is_production: + logger.error("This command is not allowed in production.") + sys.exit() + logger.info("(ENV: %s) Removing all DOIs", settings.DEPLOY_ENVIRONMENT) + releases_with_dois = CodebaseRelease.objects.with_doi() + codebases_with_dois = Codebase.objects.with_doi() + confirm = input( + "WARNING: this will remove ALL existing DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " + ) + if confirm.lower() == "delete": + with open("deleted_codebase_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "Codebase DOI"]) + for codebase in codebases_with_dois: + writer.writerow([codebase.pk, codebase.doi]) + Codebase.objects.update(doi=None) + with open("deleted_release_dois.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "CodebaseRelease DOI"]) + for release in releases_with_dois: + writer.writerow([release.pk, release.doi]) + CodebaseRelease.objects.update(doi=None) + else: + logger.info("Aborting.") + sys.exit() + + """ + assert correctness + """ + if not dry_run: + print(VERIFICATION_MESSAGE) + assert Codebase.objects.with_doi().count() == 0 + assert CodebaseRelease.objects.with_doi().count() == 0 + logger.info("Success. All existing codebase DOIs deleted.") + + """ Mint DOIs for all new Peer Reviewed Releases""" + peer_reviewed_releases = CodebaseRelease.objects.reviewed().public() + datacite_api = DataCiteApi(dry_run=dry_run) + invalid_releases = [] + for release in peer_reviewed_releases: + try: + log, ok = datacite_api.mint_public_doi(release) + if not ok: + invalid_releases.append((release, log)) + except Exception as e: + logger.error("Error minting DOI for release %s", release) + invalid_releases.append((release, e)) + + for release, log in invalid_releases: + with open("invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow( + ["CodebaseRelease ID", "Status Code", "Reason", "Datacite Metadata"] + ) + writer.writerow( + [release.pk, log.status_code, log.message, release.datacite.to_dict()] + ) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action=argparse.BooleanOptionalAction, + help="Wait for user to press enter to continue.", + default=True, + ) + parser.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + help="Output what would have happened.", + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + reset_all_dois(interactive, dry_run) diff --git a/django/curator/management/commands/doi_sync_metadata.py b/django/curator/management/commands/doi_sync_metadata.py new file mode 100644 index 000000000..601469d40 --- /dev/null +++ b/django/curator/management/commands/doi_sync_metadata.py @@ -0,0 +1,120 @@ +import argparse +import csv +import logging +from django.core.management.base import BaseCommand + +from library.models import CodebaseRelease, Codebase +from library.doi import DataCiteApi, VERIFICATION_MESSAGE, get_welcome_message + +logger = logging.getLogger(__name__) + + +def sync_all_doi_metadata(interactive=True, dry_run=True): + print(get_welcome_message(dry_run)) + + datacite_api = DataCiteApi(dry_run=dry_run) + all_codebases_with_dois = Codebase.objects.with_doi() + total_number_of_codebases_with_dois = all_codebases_with_dois.count() + invalid_codebases = [] + invalid_releases = [] + + logger.info( + "Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...", + total_number_of_codebases_with_dois, + ) + + for i, codebase in enumerate(all_codebases_with_dois): + logger.debug( + "Processing codebase %s - %s/%s", + codebase.pk, + i + 1, + total_number_of_codebases_with_dois, + ) + if interactive: + input("Press Enter to continue or CTRL+C to quit...") + + # first ensure parent codebase metadata is properly synced + log, ok = datacite_api.update_doi_metadata(codebase) + if not ok: + logger.error("Failed to update metadata for codebase {codebase.pk}") + invalid_codebases.append((codebase, log)) + + # next check all parent codebase release metadata + for j, release in enumerate(codebase.releases.with_doi()): + logger.debug( + "Processing release #%s (%s/%s)", + release.pk, + j + 1, + codebase.releases.count(), + ) + if interactive: + input("Press Enter to continue or CTRL+C to quit...") + + if release.peer_reviewed and release.doi: + log, ok = datacite_api.update_doi_metadata(release) + if not ok: + logger.error("Failed to update metadata for release %s", release.pk) + invalid_releases.append((release, log)) + else: + logger.debug("Skipping unreviewed / no DOI release %s", release.pk) + + if invalid_codebases: + with open("doi_sync_metadata_invalid_codebases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "HTTP Status Code", "Message"]) + for codebase, log in invalid_codebases: + writer.writerow([codebase.pk, log.status_code, log.message]) + if invalid_releases: + with open("doi_sync_metadata_invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "HTTP Status Code", "Message"]) + for release, log in invalid_releases: + writer.writerow([release.pk, log.status_code, log.message]) + logger.info("Metadata updated for all existing Codebase + CodebaseRelease DOIs.") + """ + FIXME: verify_metadata currently does not work with metadata responses from DataCite + if not dry_run: + print(VERIFICATION_MESSAGE) + logger.info("Checking that local metadata is in sync with DataCite...") + with open("doi_update_metadata_invalid_codebases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["Codebase ID", "DataCite metadata"]) + for codebase, valid_metadata in datacite_api.validate_metadata( + all_codebases_with_dois + ): + if not valid_metadata: + logger.warning("inconsistent metadata for codebase %s", codebase.pk) + writer.writerow([codebase.pk, codebase.datacite.to_json()]) + + all_releases_with_dois = CodebaseRelease.objects.with_doi() + with open("doi_update_metadata_invalid_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["CodebaseRelease ID", "DataCite metadata"]) + for release, valid_metadata in datacite_api.validate_metadata( + all_releases_with_dois + ): + if not valid_metadata: + logger.warning("inconsistent metadata for release %s", release.pk) + writer.writerow([release.pk, release.datacite.to_json()]) + """ + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + "--interactive", + action=argparse.BooleanOptionalAction, + help="Wait for user to press enter to continue.", + default=False, + ) + parser.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + help="Output what would have happened.", + ) + + def handle(self, *args, **options): + interactive = options["interactive"] + dry_run = options["dry_run"] + sync_all_doi_metadata(interactive, dry_run) diff --git a/django/curator/management/commands/sync_user_contributors.py b/django/curator/management/commands/sync_user_contributors.py index 627b9c1e8..709a68991 100644 --- a/django/curator/management/commands/sync_user_contributors.py +++ b/django/curator/management/commands/sync_user_contributors.py @@ -1,3 +1,4 @@ +import argparse import logging from django.core.management.base import BaseCommand @@ -11,18 +12,39 @@ class Command(BaseCommand): help = """Synchronize user metadata with contributor metadata for testing / development purposes.""" + def add_arguments(self, parser): + parser.add_argument( + "--force", + action=argparse.BooleanOptionalAction, + help="Force the update of all contributors metadata to current User metadata (first_name, last_name, email, json_affiliations).", + default=False, + ) + + def should_update(self, existing, candidate): + if not existing: + return True + return candidate and existing != candidate + def handle(self, *args, **options): # cannot update local model attributes to a join field attribute; this doesn't work: # Contributor.objects.filter(user__isnull=False).update(given_name=F('user__first_name'), ...) # see # https://docs.djangoproject.com/en/dev/topics/db/queries/#updating-multiple-objects-at-once # for more details + force = options["force"] for contributor in Contributor.objects.select_related("user").filter( user__isnull=False ): user = contributor.user - contributor.given_name = user.first_name - contributor.family_name = user.last_name - contributor.email = user.email - contributor.json_affiliations = user.member_profile.affiliations + if force or self.should_update(contributor.given_name, user.first_name): + contributor.given_name = user.first_name + if force or self.should_update(contributor.family_name, user.last_name): + contributor.family_name = user.last_name + if force or self.should_update(contributor.email, user.email): + contributor.email = user.email + member_profile_affiliations = user.member_profile.affiliations + if force or self.should_update( + contributor.json_affiliations, member_profile_affiliations + ): + contributor.json_affiliations = member_profile_affiliations contributor.save() diff --git a/django/library/doi.py b/django/library/doi.py index 1ede754a0..967a83993 100644 --- a/django/library/doi.py +++ b/django/library/doi.py @@ -1,12 +1,9 @@ +import csv import logging import re -import time -import threading -import queue import requests from collections import defaultdict -from concurrent.futures import ThreadPoolExecutor from django.conf import settings @@ -18,7 +15,7 @@ DataCiteRegistrationLog, ) -from datacite import DataCiteRESTClient, schema43 +from datacite import DataCiteRESTClient, schema45 from datacite.errors import ( DataCiteError, DataCiteNoContentError, @@ -37,7 +34,7 @@ IS_STAGING = settings.DEPLOY_ENVIRONMENT.is_staging IS_PRODUCTION = settings.DEPLOY_ENVIRONMENT.is_production -# prefix is different for (dev & staging) and production environments +# prefix differs across (dev + staging) and production DATACITE_PREFIX = settings.DATACITE_PREFIX MAX_DATACITE_API_WORKERS = 25 @@ -61,7 +58,7 @@ def get_welcome_message(dry_run: bool): +-+-+-+-+-+-+-+-+-+-+-+ |D|E|V|E|L|O|P|M|E|N|T| +-+-+-+-+-+-+-+-+-+-+-+ - Development Mode is On + Development Mode """ if IS_STAGING: ENV_MESSAGE = """ @@ -74,7 +71,7 @@ def get_welcome_message(dry_run: bool): \__ \ | | / _ \ | (_ | | | | .` | | (_ | |___/ |_| /_/ \_\ \___||___| |_|\_| \___| - Staging Mode is On + Staging Mode """ if IS_PRODUCTION: ENV_MESSAGE = """ @@ -87,20 +84,20 @@ def get_welcome_message(dry_run: bool): | _/ /| (_) | |) | |_| || (__ | | | | | (_) | .` | |_| |_|_\ \___/|___/ \___/ \___| |_| |___| \___/|_|\_| - Production Mode is On + Production Mode """ - if dry_run: - DRY_RUN_MESSAGE = """ - Dry Run Mode is On\n - """ - else: - DRY_RUN_MESSAGE = """ - Dry Run Mode is Off - """ - return ENV_MESSAGE + DRY_RUN_MESSAGE + return f"""{ENV_MESSAGE}\n\n + Dry Run Mode: {'On' if dry_run else 'Off'}\n + """ + + +def print_console_message(dry_run: bool, interactive: bool): + print(get_welcome_message(dry_run)) + if interactive: + input("Press Enter to continue or CTRL+C to quit...") -def doi_matches_pattern(doi: str) -> bool: +def is_valid_doi(doi: str) -> bool: # checks if DOI is formatted like this "00.12345/q2xt-rj46" pattern = re.compile(f"{DATACITE_PREFIX}/[-._;()/:a-zA-Z0-9]+") return re.match(pattern, doi) @@ -183,8 +180,12 @@ def _datacite_heartbeat_url(self): def _validate_metadata(self, datacite_metadata: DataCiteSchema): metadata_dict = datacite_metadata.to_dict() - if not schema43.validate(metadata_dict): - logger.error("Invalid DataCite metadata: %s", metadata_dict) + try: + schema45.validator.validate(metadata_dict) + except Exception as e: + logger.error( + "Invalid DataCite metadata: %s", schema45.tostring(metadata_dict), e + ) raise DataCiteError(f"Invalid DataCite metadata: {metadata_dict}") return datacite_metadata, metadata_dict @@ -202,17 +203,22 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): return "XX.DRYXX/XXXX-XRUN", True if hasattr(codebase_or_release, "datacite"): del codebase_or_release.datacite - datacite_metadata, metadata_dict = self._validate_metadata( - codebase_or_release.datacite - ) + doi = "Unassigned" http_status = 200 message = "Minted new DOI successfully." + datacite_metadata = codebase_or_release.datacite + try: + datacite_metadata, metadata_dict = self._validate_metadata( + datacite_metadata + ) doi = self.datacite_client.public_doi( metadata_dict, url=codebase_or_release.permanent_url ) + codebase_or_release.doi = doi + codebase_or_release.save() except DataCiteError as e: logger.error(e) message = str(e) @@ -232,10 +238,36 @@ def mint_public_doi(self, codebase_or_release: Codebase | CodebaseRelease): log_record_dict.update( release=codebase_or_release, action=DataCiteAction.CREATE_RELEASE_DOI ) - self._save_log_record(**log_record_dict) - return doi, http_status == 200 + return self._save_log_record(**log_record_dict), http_status == 200 + + @classmethod + def is_metadata_stale(cls, codebase_or_release: Codebase | CodebaseRelease): + """ + Returns true if the metadata for the given codebase or release (based on the metadata hash) is out of date with + its latest log entry + """ + try: + newest_log_entry = DataCiteRegistrationLog.objects.latest_entry( + codebase_or_release + ) + # always force refresh cached datacite property in case it was computed earlier + if hasattr(codebase_or_release, "datacite"): + del codebase_or_release.datacite + return newest_log_entry.metadata_hash != codebase_or_release.datacite.hash() + + except DataCiteRegistrationLog.DoesNotExist: + # no logs for this item, metadata is stale + logger.info("No registration logs available for %s", codebase_or_release) + + return True def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): + """ + Returns a (DataCiteRegistrationLog, bool) tuple where the boolean indicates if the metadata was successfully updated. + """ + if not self.is_metadata_stale(codebase_or_release): + logger.info("No need to update DOI metadata for %s", codebase_or_release) + return DataCiteRegistrationLog(), True doi = codebase_or_release.doi if self.dry_run: logger.debug("DRY RUN") @@ -243,7 +275,7 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): "Updating DOI metadata for codebase_or_release: %s", codebase_or_release ) logger.debug("Metadata: %s", codebase_or_release.datacite) - return doi, True + return DataCiteRegistrationLog(), True if hasattr(codebase_or_release, "datacite"): del codebase_or_release.datacite datacite_metadata, metadata_dict = self._validate_metadata( @@ -254,7 +286,7 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): updated_metadata_dict = {"attributes": {**metadata_dict}} try: self.datacite_client.put_doi(doi, updated_metadata_dict) - logger.debug("Successfully updated metadta for DOI: %s", doi) + logger.debug("Successfully updated metadata for DOI: %s", doi) except DataCiteError as e: logger.error(e) message = f"Unable to update metadata for {doi}: {e}" @@ -265,6 +297,7 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): "message": message, "metadata_hash": datacite_metadata.hash(), } + # FIXME: figure out how to better tie parameters to the requested action if isinstance(codebase_or_release, Codebase): log_record_dict.update( codebase=codebase_or_release, @@ -275,20 +308,8 @@ def update_doi_metadata(self, codebase_or_release: Codebase | CodebaseRelease): release=codebase_or_release, action=DataCiteAction.UPDATE_RELEASE_METADATA, ) - self._save_log_record(**log_record_dict) - return http_status == 200 - - def mint_new_doi_for_codebase(self, codebase: Codebase) -> str: - return self.mint_public_doi(codebase) - - def mint_new_doi_for_release(self, release: CodebaseRelease) -> str: - return self.mint_public_doi(release) - - def update_metadata_for_codebase(self, codebase: Codebase) -> bool: - return self.update_doi_metadata(codebase) - - def update_metadata_for_release(self, release: CodebaseRelease) -> bool: - return self.update_doi_metadata(release) + log = self._save_log_record(**log_record_dict) + return log, http_status == 200 @staticmethod def _is_deep_inclusive(elem1, elem2): @@ -332,21 +353,21 @@ def _is_deep_inclusive(elem1, elem2): return True @staticmethod - def _is_same_metadata(sent_data, received_data): + def is_metadata_equivalent(comses_metadata, datacite_metadata): """ Checks if the metadata attributes in the sent_data dictionary are the same as the corresponding attributes in the received_data dictionary. Args: - sent_data (dict): The dictionary containing the sent metadata attributes. - received_data (dict): The dictionary containing the received metadata attributes. + comses_metadata (dict): A DataCite-compatible dictionary drawn from CoMSES metadata for a given Codebase or CodebaseRelease. + datacite_metadata (dict): A DataCite delivered dictionary pulled for a given DOI Returns: bool: True if all attributes are the same, False otherwise. """ # Extract keys (attributes) from both dictionaries - sent_keys = set(sent_data.keys()) - received_keys = set(received_data.keys()) + sent_keys = set(comses_metadata.keys()) + received_keys = set(datacite_metadata.keys()) # Initialize array to store different attributes different_attributes = [] @@ -360,19 +381,19 @@ def _is_same_metadata(sent_data, received_data): # FIXME: this accounts for publicationYear: None or "" sent to DataCite EMPTY_VALS = [None, 0, "None", "0"] - if sent_data[key] and received_data[key]: - if str(sent_data[key]) != str(received_data[key]): + if comses_metadata[key] and datacite_metadata[key]: + if str(comses_metadata[key]) != str(datacite_metadata[key]): different_attributes.append(key) elif not ( - sent_data[key] in EMPTY_VALS - and received_data[key] in EMPTY_VALS + comses_metadata[key] in EMPTY_VALS + and datacite_metadata[key] in EMPTY_VALS ): different_attributes.append(key) else: continue if not DataCiteApi._is_deep_inclusive( - sent_data[key], received_data[key] + comses_metadata[key], datacite_metadata[key] ): # if sent_data[key] != received_data[key]: @@ -386,8 +407,8 @@ def _is_same_metadata(sent_data, received_data): logger.debug("Some attributes have different values:") for attr in different_attributes: logger.debug( - f"Attribute '{attr}':\nSent value - {sent_data[attr]}\n" - f"Received value - {received_data[attr]}\n\n" + f"Attribute '{attr}':\nSent value - {comses_metadata[attr]}\n" + f"Received value - {datacite_metadata[attr]}\n\n" ) return False else: @@ -398,59 +419,55 @@ def _is_same_metadata(sent_data, received_data): logger.debug("Missing attributes:", missing_attributes) return False - def check_metadata(self, item) -> bool: + def get_datacite_metadata(self, doi: str): + """ + Get the metadata for the given DOI. + + Args: + doi (str): The DOI for which to get the metadata. + + Returns: + dict: The metadata for the given DOI. + """ + return self.datacite_client.get_metadata(doi) + + def check_metadata(self, codebase_or_release: Codebase | CodebaseRelease) -> bool: """ 1. get metadata for item.doi 2. compare if the values match codebase.datacite.metadata - item: Codebase | CodebaseRelease """ - if not item.doi: + if self.dry_run: + logger.debug( + "Dry run metadata check for %s", codebase_or_release.datacite.to_dict() + ) + return True + if not codebase_or_release.doi: + logger.warning( + "Unnecessary metadata check for non-DOI codebase or release %s", + codebase_or_release, + ) return False try: - if not self.dry_run: - comses_metadata = item.datacite.to_dict() - datacite_metadata = self.datacite_client.get_metadata(item.doi) - return DataCiteApi._is_same_metadata(comses_metadata, datacite_metadata) - else: - logger.debug( - f"{'Codebase' if isinstance(item, Codebase) else 'CodebaseRelease'} metadata is in sync!" - ) - return True + comses_metadata = codebase_or_release.datacite.to_dict() + datacite_metadata = self.get_datacite_metadata(codebase_or_release.doi) + logger.debug( + "comparing datacite metadata\n\n%s\n\nwith comses metadata\n\n%s", + datacite_metadata, + comses_metadata, + ) + return DataCiteApi.is_metadata_equivalent( + comses_metadata, datacite_metadata + ) except Exception as e: logger.error(e) return False - def threaded_metadata_check(self, items): - def loading_animation(thread): - while thread.is_alive(): - print(".", end="", flush=True) - time.sleep(0.1) - print("\n") - - def _check_metadata(q: queue.Queue): - with ThreadPoolExecutor(max_workers=MAX_DATACITE_API_WORKERS) as executor: - results = executor.map( - lambda item: (item.pk, self.check_metadata(item)), items - ) - - q.put(results) - - # Create a queue to pass data between threads - result_queue = queue.Queue() - - # Start the long-running function in a separate thread - thread = threading.Thread(target=_check_metadata, args=(result_queue,)) - thread.start() - - # Display the loading animation in the main thread - loading_animation(thread) - - # Wait for the long-running function to finish - thread.join() - # Get the results from the queue - results = result_queue.get() - return results + def validate_metadata(self, items): + for item in items: + if item.doi: + yield (item, self.check_metadata(item)) def _save_log_record( self, @@ -464,14 +481,14 @@ def _save_log_record( ): item = release or codebase logger.debug( - "logging DOI action %s for item=%s, http_status=%s", + "DOI action: %s for item=%s, http_status=%s", action, item, http_status, ) if not self.dry_run: - DataCiteRegistrationLog.objects.create( + return DataCiteRegistrationLog.objects.create( release=release, codebase=codebase, doi=doi, @@ -480,159 +497,176 @@ def _save_log_record( message=message, metadata_hash=metadata_hash, ) + return None + def mint_pending_dois(self): + """ + for ALL published peer_reviewed releases without DOIs: + 1. Mint DOI for parent codebase, if codebase.doi doesn’t exist. + 2. Mint DOI for release. + 3. Update metadata for parent codebase and sibling releases + """ -def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=True): - """ - for ALL peer_reviewed releases without DOIs: - 1. Mints DOI for parent codebase, if codebase.doi doesn’t exist. - 2. Mints DOI for release. - 3. Updates metadata for parent codebase and sibling releases - """ - - print(get_welcome_message(dry_run)) - datacite_api = DataCiteApi() - - # CodebaseRelease.objects.filter(peer_reviewed=True).filter(Q(doi__isnull=True) | Q(doi="")) - peer_reviewed_releases_without_dois = ( - CodebaseRelease.objects.reviewed().without_doi() - ) - - total_peer_reviewed_releases_without_dois = ( - peer_reviewed_releases_without_dois.count() - ) - logger.info( - "Minting DOIs for %s peer reviewed releases without DOIs", - total_peer_reviewed_releases_without_dois, - ) + peer_reviewed_releases_without_dois = ( + CodebaseRelease.objects.public().reviewed().without_doi() + ) - for i, release in enumerate(peer_reviewed_releases_without_dois): - logger.debug( - "Processing release %s/%s - %s", - i + 1, + total_peer_reviewed_releases_without_dois = ( + peer_reviewed_releases_without_dois.count() + ) + logger.info( + "Minting DOIs for %s peer reviewed releases without DOIs", total_peer_reviewed_releases_without_dois, - release.pk, ) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - codebase = release.codebase - codebase_doi = codebase.doi + invalid_releases = [] + for i, release in enumerate(peer_reviewed_releases_without_dois): + logger.debug( + "Processing release %s/%s - %s", + i + 1, + total_peer_reviewed_releases_without_dois, + release.pk, + ) + if self.dry_run: + logger.debug("DRY RUN - SKIPPING RELEASE %s", release.pk) + continue - """ - Mint DOI for codebase(parent) if it doesn't exist. - """ - if not codebase_doi: - # request to DataCite API - codebase_doi = datacite_api.mint_new_doi_for_codebase(codebase) + codebase = release.codebase + codebase_doi = codebase.doi - if not codebase_doi: + """ + Mint DOI for codebase(parent) if it doesn't exist. + """ + if codebase_doi: + continue + # request to DataCite API + log, ok = self.mint_public_doi(codebase) + if not ok: logger.error( "Could not mint DOI for parent codebase %s. Skipping release %s.", codebase.pk, release.pk, ) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") + invalid_releases.append( + (release, log, "Unable to mint DOI for parent codebase") + ) continue + codebase.doi = codebase_doi + codebase.save() - if not dry_run: - codebase.doi = codebase_doi - codebase.save() - - """ - Mint DOI for release - """ - # request to DataCite API - release_doi = datacite_api.mint_new_doi_for_release(release) - if not release_doi: - logger.error("Could not mint DOI for release %s. Skipping.", release.pk) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - continue - - if not dry_run: - release.doi = release_doi - release.save() - - logger.debug("Updating metadata for parent codebase of release %s", release.pk) - """ - Since a new DOI has been minted for the release, we need to update it's parent's metadata (HasVersion) - """ - ok = datacite_api.update_metadata_for_codebase(codebase) - if not ok: - logger.error("Failed to update metadata for codebase %s", codebase.pk) - - """ - Since a new DOI has been minted for the release, we need to update its siblings' metadata (isNewVersionOf, isPreviousVersionOf) - """ - logger.debug("Updating metadata for sibling releases of release %s", release.pk) - - previous_release = release.get_previous_release() - next_release = release.get_next_release() - - if previous_release and previous_release.doi: - ok = datacite_api.update_metadata_for_release(previous_release) + """ + Mint DOI for release + """ + log, ok = self.mint_public_doi(release) if not ok: - logger.error( - "Failed to update metadata for previous_release %s", - previous_release.pk, + logger.error("Could not mint DOI for release %s. Skipping.", release.pk) + invalid_releases.append( + (release, log, "Unable to mint DOI for release") ) + continue + release.doi = log.doi + release.save() - if next_release and next_release.doi: - ok = datacite_api.update_metadata_for_release(next_release) + logger.debug( + "Updating metadata for parent codebase of release %s", release.pk + ) + """ + Update parent Codebase metadata for new release DOI + """ + log, ok = self.update_doi_metadata(codebase) if not ok: - logger.error( - "Failed to update metadata for next_release %s", next_release.pk + logger.error("Failed to update metadata for codebase %s", codebase.pk) + invalid_releases.append( + (release, log, "Failed to update codebase metadata") ) - logger.info( - "Minted %s DOIs for peer reviewed releases without DOIs.", - total_peer_reviewed_releases_without_dois, - ) + """ + Update sibling metadata for new release DOI + """ + logger.debug("Updating metadata for siblings of release %s", release.pk) + + previous_release = release.get_previous_release() + next_release = release.get_next_release() + + if previous_release and previous_release.doi: + log, ok = self.update_doi_metadata(previous_release) + if not ok: + logger.error( + "Failed to update metadata for previous_release %s", + previous_release.pk, + ) + invalid_releases.append( + ( + release, + log.status_code, + f"Unable to update previous release id {previous_release.pk} metadata {log.message}", + ) + ) + + if next_release and next_release.doi: + log, ok = self.update_doi_metadata(next_release) + if not ok: + logger.error( + "Failed to update metadata for next_release %s", next_release.pk + ) + invalid_releases.append( + ( + release, + log.status_code, + f"Unable to update next release id {next_release.pk} metadata {log.message}", + ) + ) - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) logger.info( - "Verifying: all peer reviewed releases without DOIs and their parent codebases have valid DOIs" + "Minted %s DOIs for peer reviewed releases without DOIs.", + total_peer_reviewed_releases_without_dois, ) - invalid_codebases = [] - invalid_releases = [] + with open("mint_pending_dois__invalid_pending_releases.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["release_id", "status_code", "message"]) + for release, status_code, message in invalid_releases: + writer.writerow([release.pk, status_code, message]) - for i, release in enumerate(peer_reviewed_releases_without_dois): - logger.debug( - "Verifying release: %s / %s", - i + 1, - total_peer_reviewed_releases_without_dois, + """ + assert correctness + """ + if not self.dry_run: + print(VERIFICATION_MESSAGE) + logger.info( + "Verifying: all peer reviewed releases without DOIs and their parent codebases have valid DOIs" ) + invalid_codebases = [] + invalid_releases = [] - if not release.doi or not doi_matches_pattern(release.doi): - invalid_releases.append(release.pk) - if not release.codebase.doi or not doi_matches_pattern( - release.codebase.doi - ): - invalid_codebases.append(release.codebase.pk) + for i, release in enumerate(peer_reviewed_releases_without_dois): + logger.debug( + "Verifying release: %s / %s", + i + 1, + total_peer_reviewed_releases_without_dois, + ) - if invalid_codebases: - logger.error( - "FAILURE: %s Codebases with invalid or missing DOIs: %s", - invalid_codebases.count(), - invalid_codebases, - ) - else: - logger.info( - "Success. All parent codebases for peer reviewed releases previously without DOIs have valid DOIs now." - ) - if invalid_releases: - logger.error( - "Failure. %s CodebaseReleases with invalid or missing DOIs: %s", - invalid_releases.count(), - invalid_releases, - ) - else: - logger.info( - "Success. All peer reviewed releases previously without DOIs have valid DOIs now." - ) + if not release.doi or not is_valid_doi(release.doi): + invalid_releases.append(release.pk) + if not release.codebase.doi or not is_valid_doi(release.codebase.doi): + invalid_codebases.append(release.codebase.pk) + + if invalid_codebases: + logger.error( + "FAILURE: %s Codebases with invalid or missing DOIs: %s", + len(invalid_codebases), + invalid_codebases, + ) + else: + logger.info( + "SUCCESS: All parent codebases of peer reviewed releases without DOIs have valid DOIs." + ) + if invalid_releases: + logger.error( + "FAILURE: %s CodebaseReleases with invalid or missing DOIs: %s", + len(invalid_releases), + invalid_releases, + ) + else: + logger.info( + "SUCCESS: All peer reviewed releases without DOIs have valid DOIs." + ) diff --git a/django/library/management/commands/clean_peer_reviewed_dois_02.py b/django/library/management/commands/clean_peer_reviewed_dois_02.py deleted file mode 100644 index 44ca82a29..000000000 --- a/django/library/management/commands/clean_peer_reviewed_dois_02.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -from django.core.management.base import BaseCommand -from library.doi import VERIFICATION_MESSAGE, get_welcome_message -from library.models import CodebaseRelease - -logger = logging.getLogger(__name__) - - -def remove_dois_from_unreviewed_releases(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - - unreviewed_releases_with_dois = CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ) - total_unreviewed_releases_with_dois = unreviewed_releases_with_dois.count() - - logger.info( - "Cleaning up DOIs for %s unreviewed CodebaseReleases with DOIs", - total_unreviewed_releases_with_dois, - ) - if interactive: - confirm = input( - "Deleting all DOIs for unreviewed CodebaseReleases. Enter 'DELETE' to continue or CTRL+C to quit: " - ) - if confirm.lower() == "delete": - unreviewed_releases_with_dois.update(doi=None) - - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - logger.info( - "Checking that DOIs for all not peer reviewed releases have been deleted..." - ) - assert ( - CodebaseRelease.objects.filter( - peer_reviewed=False, doi__isnull=False - ).count() - == 0 - ) - logger.info( - "All DOIs from not peer_reviewed CodebaseReleases %s with DOIs deleted successfully.", - total_unreviewed_releases_with_dois, - ) - - -class Command(BaseCommand): - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - default=True, - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - remove_dois_from_unreviewed_releases(interactive, dry_run) diff --git a/django/library/management/commands/delete_all_existing_codebase_dois_01.py b/django/library/management/commands/delete_all_existing_codebase_dois_01.py deleted file mode 100644 index 613bc02e1..000000000 --- a/django/library/management/commands/delete_all_existing_codebase_dois_01.py +++ /dev/null @@ -1,62 +0,0 @@ -import csv -import logging -import sys -from django.core.management.base import BaseCommand -from library.doi import VERIFICATION_MESSAGE, get_welcome_message -from library.models import Codebase - -logger = logging.getLogger(__name__) - - -def remove_existing_codebase_dois(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - codebases_with_dois = Codebase.objects.exclude(doi__isnull=True) - - logger.info( - f"Removing DOIs for {len(codebases_with_dois)} Codebases. Query: Codebase.objects.exclude(doi__isnull=True) ..." - ) - if interactive and codebases_with_dois.exists(): - confirm = input( - "WARNING: this will remove all existing codebase DOIs and is unrecoverable. Type 'DELETE' to continue or Ctrl+C to quit: " - ) - if confirm.lower() == "delete": - with open("codebases_with_dois.csv", "w") as f: - writer = csv.writer(f) - writer.writerow(["Codebase ID", "Codebase DOI"]) - for codebase in codebases_with_dois: - writer.writerow([codebase.pk, codebase.doi]) - codebases_with_dois.update(doi=None) - else: - logger.info("Aborting.") - sys.exit() - - logger.info( - "All DOIs from {len(codebases_with_dois)} codebases deleted successfully." - ) - - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - assert Codebase.objects.filter(doi__isnull=False).count() == 0 - logger.info("Success. All existing codebase DOIs deleted.") - - -class Command(BaseCommand): - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - default=True, - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - remove_existing_codebase_dois(interactive, dry_run) diff --git a/django/library/management/commands/sync_doi_metadata.py b/django/library/management/commands/sync_doi_metadata.py deleted file mode 100644 index 432d2740d..000000000 --- a/django/library/management/commands/sync_doi_metadata.py +++ /dev/null @@ -1,135 +0,0 @@ -from django.core.management.base import BaseCommand -from library.doi import DataCiteApi, VERIFICATION_MESSAGE, get_welcome_message -from library.models import CodebaseRelease, Codebase, DataciteRegistrationLog - -import logging - -logger = logging.getLogger(__name__) - - -def update_stale_metadata_for_all_codebases_with_dois(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - - datacite_api = DataCiteApi(dry_run=dry_run) - all_codebases_with_dois = Codebase.objects.exclude(doi__isnull=True) - total_codebases_count = all_codebases_with_dois.count() - - logger.info( - "Updating stale metadata for %s codebases with DOIs", - total_codebases_count, - ) - - for i, codebase in enumerate(all_codebases_with_dois): - logger.debug( - "Processing Codebase with pk %s (%s/%s)...", - codebase.pk, - i + 1, - total_codebases_count, - ) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - - if DataciteRegistrationLog.is_metadata_stale(codebase): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - success = datacite_api.update_metadata_for_codebase(codebase) - if not success: - logger.error("Failed to update metadata for codebase %s", codebase.pk) - else: - logger.debug("Metadata successfully updated.") - else: - logger.debug("Metadata is in sync. Skipping...") - - logger.info("Updated all codebases with stale metadata.") - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - logger.info( - "Checking that metadata for all codebases with DOIs is in sync with DataCite..." - ) - - results = datacite_api.threaded_metadata_check(all_codebases_with_dois) - - if all([is_meta_valid for pk, is_meta_valid in results]): - logger.info( - "Success. Metadata for all codebases with DOIs is in sync with DataCite." - ) - - -def update_stale_metadata_for_all_releases_with_dois(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - datacite_api = DataCiteApi() - all_releases_with_dois = CodebaseRelease.objects.exclude(doi__isnull=True) - total_releases_count = all_releases_with_dois.count() - - logger.debug( - "Updating stale metadata for %s releases with DOIs", - total_releases_count, - ) - - for i, release in enumerate(all_releases_with_dois): - logger.debug( - "Processing Release id %s (%s / %s)", - release.pk, - i + 1, - total_releases_count, - ) - - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - - if DataciteRegistrationLog.is_metadata_stale(release): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_release(release) - if not ok: - logger.error("Failed to update metadata for release %s", release.pk) - else: - logger.debug("Metadata successfully updated.") - else: - logger.debug("Metadata is up-to-date. Skipping...") - - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - continue - - logger.info("Updated all releases with stale metadata.") - - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - logger.info( - "Checking metadata for all releases with DOIs is in sync with DataCite..." - ) - - results = datacite_api.threaded_metadata_check(all_releases_with_dois) - - if all([is_meta_valid for pk, is_meta_valid in results]): - logger.info( - "Success. Metadata for all releases with DOIs is in sync with DataCite." - ) - - -class Command(BaseCommand): - """ - Syncs metadata for all codebases and releases with Datacite metadata service. - """ - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - default=True, - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - update_stale_metadata_for_all_codebases_with_dois(interactive, dry_run) - update_stale_metadata_for_all_releases_with_dois(interactive, dry_run) diff --git a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py b/django/library/management/commands/update_metadata_for_all_existing_dois_04.py deleted file mode 100644 index 1a691ca24..000000000 --- a/django/library/management/commands/update_metadata_for_all_existing_dois_04.py +++ /dev/null @@ -1,122 +0,0 @@ -import logging -from django.core.management.base import BaseCommand - -from library.models import CodebaseRelease, Codebase, DataciteRegistrationLog -from library.doi import DataCiteApi, VERIFICATION_MESSAGE, get_welcome_message - -logger = logging.getLogger(__name__) - - -def update_doi_metadata(interactive=True, dry_run=True): - print(get_welcome_message(dry_run)) - - datacite_api = DataCiteApi(dry_run=dry_run) - all_codebases_with_dois = Codebase.objects.with_doi() - - logger.info( - "Updating metadata for all codebases (%s) with DOIs and their releases with DOIs. ...", - all_codebases_with_dois.count(), - ) - - for i, codebase in enumerate(all_codebases_with_dois): - logger.debug( - "Processing codebase %s - %s/%s", - codebase.pk, - i + 1, - all_codebases_with_dois.count(), - ) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - - if DataciteRegistrationLog.is_metadata_stale(codebase): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_codebase(codebase) - if not ok: - logger.error("Failed to update metadata for codebase {codebase.pk}") - else: - logger.debug("Metadata for codebase {codebase.pk} is in sync!") - - for j, release in enumerate(codebase.releases.all()): - logger.debug( - "Processing release #%s (%s/%s)", - release.pk, - j + 1, - codebase.releases.count(), - ) - if interactive: - input("Press Enter to continue or CTRL+C to quit...") - - if release.peer_reviewed and release.doi: - if DataciteRegistrationLog.is_metadata_stale(release): - logger.debug("Metadata is stale. Updating metadata in DataCite...") - ok = datacite_api.update_metadata_for_release(release) - if not ok: - logger.error( - "Failed to update metadata for release %s", release.pk - ) - else: - logger.debug("Metadata for release %s is synced", release.pk) - else: - if not release.doi: - logger.warning("Release has no DOI") - if not release.peer_reviewed: - logger.warning("Release is not peer reviewed") - - logger.info("Metadata updated for all existing (Codebase & CodebaseRelease) DOIs.") - """ - assert correctness - """ - if not dry_run: - print(VERIFICATION_MESSAGE) - logger.info("Checking that Comses metadata is in sync with DataCite...") - invalid_codebases = [] - invalid_releases = [] - - results = datacite_api.threaded_metadata_check(all_codebases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_codebases.append(pk) - - if invalid_codebases: - logger.error( - "Failure. Metadata not in sync with DataCite for %s codebases: %s", - invalid_codebases.count(), - invalid_codebases, - ) - else: - logger.info( - "Success. Metadata in sync with DataCite for all codebases with DOI." - ) - - all_releases_with_dois = CodebaseRelease.objects.with_doi() - results = datacite_api.threaded_metadata_check(all_releases_with_dois) - for pk, is_meta_valid in results: - if not is_meta_valid: - invalid_releases.append(pk) - - if invalid_releases: - logger.error( - f"Failure. Metadata not in sync with DataCite for {len(invalid_releases)} releases: {invalid_releases}" - ) - else: - logger.info( - f"Success. Metadata in sync with DataCite for all releases with DOI." - ) - - -class Command(BaseCommand): - - def add_arguments(self, parser): - parser.add_argument( - "--interactive", - action="store_true", - help="Wait for user to press enter to continue.", - ) - parser.add_argument( - "--dry-run", action="store_true", help="Output what would have happened." - ) - - def handle(self, *args, **options): - interactive = options["interactive"] - dry_run = options["dry_run"] - update_doi_metadata(interactive, dry_run) diff --git a/django/library/migrations/0031_dataciteregistrationlog_and_more.py b/django/library/migrations/0031_dataciteregistrationlog_and_more.py new file mode 100644 index 000000000..ef45abe60 --- /dev/null +++ b/django/library/migrations/0031_dataciteregistrationlog_and_more.py @@ -0,0 +1,96 @@ +# Generated by Django 4.2.16 on 2024-10-29 21:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("library", "0030_peerreviewinvitation"), + ] + + operations = [ + migrations.CreateModel( + name="DataCiteRegistrationLog", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "action", + models.CharField( + choices=[ + ("CREATE_RELEASE_DOI", "create release DOI"), + ("CREATE_CODEBASE_DOI", "create codebase DOI"), + ("UPDATE_RELEASE_METADATA", "update release metadata"), + ("UPDATE_CODEBASE_METADATA", "update codebase metadata"), + ], + max_length=50, + ), + ), + ("timestamp", models.DateTimeField(auto_now_add=True)), + ("http_status", models.IntegerField(default=None, null=True)), + ("message", models.TextField(default=None, null=True)), + ("metadata_hash", models.CharField(max_length=255)), + ("doi", models.CharField(blank=True, max_length=255, null=True)), + ], + ), + migrations.AlterModelOptions( + name="peerreviewinvitation", + options={"ordering": ["-date_sent"]}, + ), + migrations.RemoveField( + model_name="contributor", + name="affiliations", + ), + migrations.AlterField( + model_name="codebase", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebaserelease", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="codebasereleasedownload", + name="date_created", + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name="peerreviewinvitation", + name="date_sent", + field=models.DateTimeField(auto_now=True), + ), + migrations.DeleteModel( + name="ContributorAffiliation", + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="codebase", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebase", + ), + ), + migrations.AddField( + model_name="dataciteregistrationlog", + name="release", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="datacite_logs", + to="library.codebaserelease", + ), + ), + ] diff --git a/django/library/models.py b/django/library/models.py index da4d784b7..76244b5ab 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -7,8 +7,8 @@ import uuid from abc import ABC -from collections import OrderedDict -from datetime import date, datetime, timedelta +from collections import OrderedDict, defaultdict +from datetime import date, timedelta from typing import List from django.conf import settings @@ -167,11 +167,18 @@ class Contributor(index.Indexed, ClusterableModel): @property def affiliations(self): - return self.json_affiliations + if self.json_affiliations: + return self.json_affiliations + if self.user: + return self.user.member_profile.affiliations + return [] @property def affiliation_ror_ids(self): - return [affiliation.get("ror_id") for affiliation in self.json_affiliations] + return [ + {"name": affiliation.get("name"), "ror_id": affiliation.get("ror_id")} + for affiliation in self.affiliations + ] @cached_property def json_affiliations_string(self): @@ -187,25 +194,13 @@ def to_affiliation_string(cls, afl): # e.g., "Arizona State University https://www.asu.edu ASU" return f"{afl.get('name')} {afl.get('url')} {afl.get('acronym')}" - @property - def codemeta_affiliation(self): - """ - For now codemeta affiliations appear to be a single https://schema.org/Organization - """ - if self.json_affiliations: - return CodeMetaSchema.convert_affiliation(self.json_affiliations[0]) - @property def primary_affiliation(self): - return self.json_affiliations[0] if self.json_affiliations else {} + return self.affiliations[0] if self.affiliations else {} @property def primary_affiliation_name(self): - return self.primary_json_affiliation_name - - @property - def primary_json_affiliation_name(self): - return self.json_affiliations[0]["name"] if self.json_affiliations else "" + return self.affiliations[0]["name"] if self.affiliations else "" @staticmethod def from_user(user): @@ -1132,6 +1127,9 @@ def accessible(self, user): def reviewed(self, **kwargs): return self.filter(peer_reviewed=True, **kwargs) + def unreviewed(self, **kwargs): + return self.exclude(peer_reviewed=True).filter(**kwargs) + def with_doi(self, **kwargs): return self.exclude(Q(doi__isnull=True) | Q(doi="")).filter(**kwargs) @@ -2594,15 +2592,16 @@ def __init__(self, release: CodebaseRelease): ] if release.live: - # should not generate CodeMeta or DataCite for non-published releases self.first_published = release.first_published_at.date() self.last_published = release.last_published_on.date() - self.copyright_year = self.last_published.year else: - # FIXME: default values? - self.first_published = self.last_published = self.copyright_year = ( - date.today() + # FIXME: default to today for unpublished releases + # should not generate CodeMeta or DataCite for non-published releases but CodeMeta is generated even for unpublished + logger.warning( + "Generating CommonMetadata for an unpublished release: %s", release ) + self.first_published = self.last_published = date.today() + self.copyright_year = self.last_published.year if release.license: self.license = release.license else: @@ -2642,10 +2641,12 @@ def descriptions(self): { "description": self.description, "descriptionType": "Abstract", + "lang": "en", # FIXME: this might not always be the case }, { "description": self.release_notes, "descriptionType": "TechnicalInfo", + "lang": "en", # FIXME: this might not always be the case }, ] @@ -2759,18 +2760,24 @@ def convert_authors(cls, common_metadata: CommonMetadata): ] @classmethod - def convert_ror_affiliation(cls, affiliation: dict): + def convert_affiliation(cls, affiliation: dict): + codemeta_affiliation = {} if affiliation: - return { + codemeta_affiliation = { # FIXME: may switch to https://schema.org/ResearchOrganization at some point "@type": "Organization", - "@id": affiliation.get("ror_id"), "name": affiliation.get("name"), "url": affiliation.get("url"), - "identifier": affiliation.get("ror_id"), - "sameAs": affiliation.get("ror_id"), } - return {} + if affiliation.get("ror_id"): + codemeta_affiliation.update( + { + "@id": affiliation.get("ror_id"), + "identifier": affiliation.get("ror_id"), + "sameAs": affiliation.get("ror_id"), + } + ) + return codemeta_affiliation @classmethod def convert_contributor(cls, contributor: Contributor): @@ -2782,8 +2789,8 @@ def convert_contributor(cls, contributor: Contributor): } if contributor.orcid_url: codemeta["@id"] = contributor.orcid_url - if contributor.json_affiliations: - codemeta["affiliation"] = cls.convert_ror_affiliation( + if contributor.affiliations: + codemeta["affiliation"] = cls.convert_affiliation( contributor.primary_affiliation ) if contributor.email: @@ -2837,8 +2844,9 @@ class DataCiteSchema(ABC): COMSES_PUBLISHER = { "publisherIdentifier": CommonMetadata.COMSES_ORGANIZATION["ror_id"], "publisherIdentifierScheme": "ROR", - "schemeURI": "https://ror.org", + "schemeUri": "https://ror.org", "name": CommonMetadata.COMSES_ORGANIZATION["name"], + "lang": "en", } INITIAL_DATA = { @@ -2848,6 +2856,7 @@ class DataCiteSchema(ABC): "resourceType": "Computational Model", "resourceTypeGeneral": "Software", }, + "formats": ["text/plain"], } def __init__(self, metadata: dict): @@ -2884,39 +2893,53 @@ def convert_release_contributor(cls, release_contributor: ReleaseContributor): nameType="Personal", givenName=contributor.given_name, familyName=contributor.family_name, - creatorName=f"{contributor.family_name}, {contributor.given_name}", + name=f"{contributor.family_name}, {contributor.given_name}", ) if contributor.orcid_url: creator.update( - nameIdentifier=contributor.orcid_url, - nameIdentifierScheme="ORCID", - schemeURI="https://orcid.org", + nameIdentifiers=[ + { + "nameIdentifier": contributor.orcid_url, + "nameIdentifierScheme": "ORCID", + "schemeUri": "https://orcid.org", + } + ] ) else: creator.update(nameType="Organizational", creatorName=contributor.name) # check for ROR affiliations or freetext: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/creator/#affiliation - affiliations = contributor.json_affiliations + affiliations = contributor.affiliations if affiliations: - ror_ids = contributor.affiliation_ror_ids - if ror_ids: - # set affiliationIdentifier to first ROR ID - creator.update( - affiliationIdentifier=ror_ids[0], + creator_affiliations = [ + cls.convert_affiliation(a) for a in affiliations if a + ] + creator.update(affiliation=creator_affiliations) + return creator + + @classmethod + def convert_affiliation(cls, affiliation: dict): + """ + Converts a CoMSES affiliation dict into a DataCite affiliation dict + """ + datacite_affiliation = {} + if affiliation.get("name"): + datacite_affiliation = { + "name": affiliation.get("name"), + } + # FIXME: should we validate the ror id + if affiliation.get("ror_id"): + affiliation.update( + affiliationIdentifier=affiliation.get("ror_id"), affiliationIdentifierScheme="ROR", - schemeURI="https://ror.org", ) - else: - # otherwise set to the first affiliation freetext name - creator.update(affiliation=contributor.primary_affiliation_name) - return creator + return datacite_affiliation @classmethod def to_citable_authors(cls, release_contributors): """ - Maps a Set of ReleaseContributors to a list dictionaries representing DataCite creators + Maps a set of ReleaseContributors to a list of dictionaries representing DataCite creators - (we're using DataCite schema 4.3 but this is still pretty much the same) https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/creator/ """ return [cls.convert_release_contributor(rc) for rc in release_contributors] @@ -2929,46 +2952,49 @@ def to_publication_year(cls, common_metadata: CommonMetadata): @classmethod def to_contributors(cls, common_metadata: CommonMetadata): + """ + Sets up non-author contributors in the DataCite metadata + + https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/contributor/ + """ nonauthor_contributors = common_metadata.release_contributor_nonauthors - contributors = [ - # FIXME: probably not the right way to bootstrap non author contributors - # perhaps this should be the provider institution, e.g., CML ROR - { - "contributorName": common_metadata.code_repository, - "contributorType": "hostingInstitution", - } - ] + contributors = [] if nonauthor_contributors: - role_mapping = { - "copyrightHolder": "RightsHolder", - "editor": "Editor", - "funder": "Sponsor", - "pointOfContact": "ContactPerson", - "resourceProvider": "Distributor", - } - + role_mapping = defaultdict( + lambda: "Other", + { + "copyrightHolder": "RightsHolder", + "editor": "Editor", + "funder": "Sponsor", + "pointOfContact": "ContactPerson", + "resourceProvider": "Distributor", + }, + ) + has_other_role_already = False for release_contributor in nonauthor_contributors: - # FIXME: what is other_role_added for? - other_role_added = False for role in release_contributor.roles: contributor_type = role_mapping.get(role, "Other") - if contributor_type == "Other" and not other_role_added: - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": "Other", - } - ) - other_role_added = True - elif contributor_type != "Other": - contributors.append( - { - "contributorName": release_contributor.contributor.name, - "contributorType": contributor_type, - } - ) + # only allow a single Other role per contributor + if contributor_type == "Other": + if has_other_role_already: + continue + else: + has_other_role_already = True + contributors.append( + { + "name": release_contributor.contributor.name, + "contributorType": contributor_type, + } + ) + if common_metadata.code_repository: + contributors.append( + { + "name": common_metadata.code_repository, + "contributorType": "HostingInstitution", + } + ) return contributors @@ -3012,17 +3038,16 @@ def convert(cls, common_metadata: CommonMetadata): common_metadata.release_contributor_authors ), "descriptions": common_metadata.descriptions, - "publicationYear": common_metadata.copyright_year, + "publicationYear": str(cls.to_publication_year(common_metadata)), "titles": [{"title": common_metadata.name}], "version": common_metadata.version, - "codeRepository": common_metadata.code_repository, "contributors": cls.to_contributors(common_metadata), "subjects": cls.convert_keywords(common_metadata), "rightsList": [ { "rights": common_metadata.license.name, "rightsIdentifier": common_metadata.license.name, - "rightsURI": common_metadata.license.url, + "rightsUri": common_metadata.license.url, } ], } @@ -3107,12 +3132,21 @@ def convert(cls, codebase: Codebase): { "description": codebase.description.raw, "descriptionType": "Abstract", - } + }, + { + "description": """The DOI pointing to this resource is a `concept version` representing all + versions of this computational model and will always redirect to the latest version of this + computational model. See https://zenodo.org/help/versioning for more details on the rationale + behind a concept version DOI that rolls up all versions of a given computational model or any + other digital research object. + """, + "descriptionType": "Other", + }, ], - "publicationYear": codebase.publication_year, + "publicationYear": str(codebase.publication_year), } - """ + """ Set codebase relatedIdentifiers """ @@ -3207,20 +3241,6 @@ class DataCiteRegistrationLog(models.Model): objects = DataCiteRegistrationLogQuerySet.as_manager() - @classmethod - def is_metadata_stale(cls, item): - try: - newest_log_entry = DataCiteRegistrationLog.objects.latest_entry(item) - # make sure item does not have stale datacite metadata - del item.datacite - return newest_log_entry.metadata_hash != item.datacite.hash() - - except DataCiteRegistrationLog.DoesNotExist: - # no logs for this item, metadata is stale - logger.info("No registration logs available for this item %s", item) - - return True - @property def codebase_or_release_id(self): if self.codebase: diff --git a/django/library/serializers.py b/django/library/serializers.py index 510cce1aa..0707e1517 100644 --- a/django/library/serializers.py +++ b/django/library/serializers.py @@ -193,7 +193,7 @@ class Meta: "user", "type", "json_affiliations", - "primary_json_affiliation_name", + "primary_affiliation_name", "profile_url", ) diff --git a/django/library/tests/test_datacite_api.py b/django/library/tests/test_datacite_api.py index 3d45e20de..5b26d3054 100644 --- a/django/library/tests/test_datacite_api.py +++ b/django/library/tests/test_datacite_api.py @@ -1,122 +1,180 @@ import logging -from django.conf import settings -from django.utils.crypto import get_random_string -from .base import ReleaseSetup from core.tests.base import BaseModelTestCase +from .base import ReleaseSetup +from ..doi import DataCiteApi from ..models import Codebase -from library.doi import DataCiteApi logger = logging.getLogger(__name__) -""" -The skelton python tests here need much more work but should pass successfully. -See -https://docs.google.com/document/d/19-FdJcLdNYpMW4rgc85EzBq1xuI3HqZ3PVagJ-1nXlM/edit#heading=h.73fadsz9a3mr -for the overall goal. -""" - class DataCiteApiTest(BaseModelTestCase): """ - Create a codebase with an initial release + Should exercise the DataCiteApi but currently can't test full integration with datacite sandbox. + Would also continue to "pollute" our sandbox repository on every test run. + + FIXME: add better tests over is_metadata_equivalent with actual datacite metadata + responses """ def setUp(self): super().setUp() - self.dc = DataCiteApi(dry_run=False) - - # determine current server's datacite prefix - env = settings.DEPLOY_ENVIRONMENT - self.DATACITE_PREFIX = ( - "10.82853" if env.is_development or env.is_staging else "10.25937" - ) + self.api = DataCiteApi(dry_run=False) # create a codebase (which do NOT automatically create a required release) - self.cb = Codebase.objects.create( - title="Test codebase and releases to Datacite", + self.codebase = Codebase.objects.create( + title="Test codebase with releases for DataCite", description="Test codebase description", - identifier="cb", + identifier="test.cb.101", submitter=self.user, ) - - """ - allowed_chars = string.ascii_lowercase + string.digits - # create 3 releases with fake DOIs to test the parent/child/sibling - for i in [1, 2, 3]: - r = self.cb.create_release() - r.doi = ( - self.DATACITE_PREFIX + "/test-" + get_random_string(4, allowed_chars) - ) - r.save() - """ - self.create_release_ready_to_mint_doi() + self.release = ReleaseSetup.setUpPublishableDraftRelease(self.codebase) + self.release.publish() """ - FIXME: when the issue with the creators metadata is solved, then this - function needs to be completed. See - https://docs.google.com/document/d/19-FdJcLdNYpMW4rgc85EzBq1xuI3HqZ3PVagJ-1nXlM/edit#heading=h.qu5a82ym9f0r - for description of the problem with creators metadata - """ + FIXME: DataCite test repository does not allow localhost urls so minting DOIs won't work + unless we mock out the permanent_url to a non-localhost value in test settings def test_mint_new_doi_for_codebase(self): - self.assertTrue(self.dc.is_datacite_available()) - self.assertEqual(self.DATACITE_PREFIX, settings.DATACITE_PREFIX) - """ - reply = self.dc.mint_public_doi(self.cb) - self.assertContains(reply, self.DATACITE_PREFIX + "/") - self.assertTrue(doi_matches_pattern(reply)) - self.cb.doi = reply - self.cb.save() - """ - - """ - Test by updating codebase title - """ + self.assertTrue(self.api.is_datacite_available()) + # verify datacite prefix logic ? + # self.assertEqual(self.DATACITE_PREFIX, settings.DATACITE_PREFIX) def test_update_metadata_for_codebase(self): - self.assertTrue(self.dc.is_datacite_available()) - self.assertEqual(self.DATACITE_PREFIX, settings.DATACITE_PREFIX) - self.cb.title = self.cb.title + " (updated)" + self.assertTrue(self.api.is_datacite_available()) + self.codebase.title = self.codebase.title + " (updated)" # self.assertTrue(self.dc.update_doi_metadata(self.cb)) def test_mint_new_doi_for_release(self): - self.assertTrue(self.dc.is_datacite_available()) - self.assertEqual(self.DATACITE_PREFIX, settings.DATACITE_PREFIX) - release = self.cb.releases.first() - """ - reply = self.dc.mint_public_doi(release) - self.assertContains(reply, self.DATACITE_PREFIX + "/") - self.assertTrue(doi_matches_pattern(reply)) - release.doi = reply - release.save() - """ - - """ - Note updating the title will update title for codebase and all releases; - so instead we'll update the release note field for the first release - """ + self.assertTrue(self.api.is_datacite_available()) + release = self.codebase.releases.first() + doi, status_code = self.api.mint_public_doi(release) + self.assertEquals(status_code, 200, "should have successfully minted a DOI") + self.assertTrue(self.api.doi_matches_pattern(doi)) def test_update_metadata_for_release(self): - self.assertTrue(self.dc.is_datacite_available()) - self.assertEqual(self.DATACITE_PREFIX, settings.DATACITE_PREFIX) - release = self.cb.releases.first() + self.assertTrue(self.api.is_datacite_available()) + release = self.codebase.releases.first() release.release_notes.raw += " (updated)" - # self.assertTrue(self.dc.update_doi_metadata(release)) - - def create_release_ready_to_mint_doi(self): - """ - following copied from test_models.py but doesn't seem to work here... - the creators metadata ends up either being empty or 2 test_users... - not sure if that's the cause of the problem but _validate_metadata() - fails... - """ - release = ReleaseSetup.setUpPublishableDraftRelease(self.cb) - - # publish() will call validate_publishable() so don't need to call here - # release.validate_publishable() - release.publish() - # release.save() - return release + # FIXME: won't work until we mock out the permanent_url to a non-localhost value in test settings + # self.assertTrue(self.api.update_doi_metadata(release)) + """ + + def test_is_metadata_equivalent_publication_year(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [{"name": "John Doe"}], + } + datacite_metadata = { + "title": "Sample Title", + "publicationYear": 2023, + "creators": [{"name": "John Doe"}], + } + self.assertTrue( + DataCiteApi.is_metadata_equivalent(comses_metadata, datacite_metadata) + ) + + def test_is_metadata_equivalent_different_title(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [{"name": "John Doe"}], + } + datacite_metadata = { + "title": "Different Title", + "publicationYear": 2023, + "creators": [{"name": "John Doe"}], + } + self.assertFalse( + DataCiteApi.is_metadata_equivalent(comses_metadata, datacite_metadata) + ) + + def test_is_metadata_equivalent_missing_keys(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [{"name": "John Doe"}], + } + datacite_metadata = { + "title": "Sample Title", + "creators": [{"name": "John Doe"}], + } + self.assertFalse( + DataCiteApi.is_metadata_equivalent(comses_metadata, datacite_metadata) + ) + + def test_is_same_metadata_empty_values(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": None, + "creators": [{"name": "John Doe"}], + } + dc_metadata = { + "title": "Sample Title", + "publicationYear": 0, + "creators": [{"name": "John Doe"}], + } + self.assertTrue( + DataCiteApi.is_metadata_equivalent(comses_metadata, dc_metadata) + ) + + def test_is_metadata_equivalent_nested_dict(self): + sent_data = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [ + { + "name": "John Doe", + "affiliation": { + "ror_id": "https://ror.org/12345", + "name": "University of Earth", + }, + } + ], + } + received_data = { + "title": "Sample Title", + "publicationYear": 2023, + "creators": [ + { + "name": "John Doe", + "affiliation": { + "ror_id": "https://ror.org/12345", + "name": "University of Earth", + }, + } + ], + } + self.assertTrue(DataCiteApi.is_metadata_equivalent(sent_data, received_data)) + + def test_is_metadata_equivalent_list_ordering(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [{"name": "John Doe"}, {"name": "Jane Doe"}], + } + dc_metadata = { + "title": "Sample Title", + "publicationYear": 2023, + "creators": [{"name": "Jane Doe"}, {"name": "John Doe"}], + } + self.assertTrue( + DataCiteApi.is_metadata_equivalent(comses_metadata, dc_metadata) + ) + + def test_is_same_metadata_different_nested_list(self): + comses_metadata = { + "title": "Sample Title", + "publicationYear": "2023", + "creators": [{"name": "John Doe"}, {"name": "Jane Doe"}], + } + dc_metadata = { + "title": "Sample Title", + "publicationYear": 2023, + "creators": [{"name": "John Doe"}, {"name": "Jane Smith"}], + } + self.assertFalse( + DataCiteApi.is_metadata_equivalent(comses_metadata, dc_metadata) + ) diff --git a/django/requirements.txt b/django/requirements.txt index b5a7e1356..d67dbd045 100644 --- a/django/requirements.txt +++ b/django/requirements.txt @@ -1,6 +1,6 @@ bagit==1.8.1 bleach==6.1.0 -datacite==1.1.4 +datacite==1.2.0 dedupe==3.0.2 django-allauth==0.63.6 django-anymail[mailgun]==10.3 @@ -24,7 +24,7 @@ django-vite==2.1.3 # latest is 3.0.4 django-waffle==4.1.0 djangorestframework==3.15.2 djangorestframework-camel-case==1.4.2 -Django==4.2.15 +Django==4.2.16 elasticsearch-dsl>=7.0.0,<8.0.0 elasticsearch>=7.0.0,<8.0.0 html2text>=2016.9.19 diff --git a/frontend/package.json b/frontend/package.json index 2b5576516..6d59fdcbc 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -29,7 +29,7 @@ "@vorms/resolvers": "^1.1.0", "@vuepic/vue-datepicker": "^4.5.1", "@vueuse/core": "^9.13.0", - "axios": "^1.7.4", + "axios": "^1.7.8", "bootstrap": "5.2.3", "highcharts": "^11.3.0", "highcharts-vue": "^1.4.3", diff --git a/frontend/src/components/UserSearch.vue b/frontend/src/components/UserSearch.vue index 28e106453..439400e98 100644 --- a/frontend/src/components/UserSearch.vue +++ b/frontend/src/components/UserSearch.vue @@ -51,8 +51,8 @@ ({{ option.email }}) - , {{ option.primaryJsonAffiliationName }}, {{ option.primaryAffiliationName }}
diff --git a/frontend/src/components/releaseEditor/ContributorSearch.vue b/frontend/src/components/releaseEditor/ContributorSearch.vue index 1211c44e4..354799507 100644 --- a/frontend/src/components/releaseEditor/ContributorSearch.vue +++ b/frontend/src/components/releaseEditor/ContributorSearch.vue @@ -98,8 +98,8 @@ function contributorEmail(contributor: Contributor) { function contributorAffiliation(contributor: Contributor) { let affiliation = ""; - if (contributor.primaryJsonAffiliationName) { - affiliation = contributor.primaryJsonAffiliationName || ""; + if (contributor.primaryAffiliationName) { + affiliation = contributor.primaryAffiliationName || ""; } else if (contributor.user) { affiliation = contributor.user.memberProfile.primaryAffiliationName || ""; } diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 1142ea668..1df9db0bb 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -149,7 +149,6 @@ export interface Contributor { affiliations: any[]; jsonAffiliations: any[]; primaryAffiliationName: string; - primaryJsonAffiliationName: string; email: string; id: number; name: string; @@ -172,7 +171,6 @@ export interface RelatedMemberProfile { email: string; profileUrl: string; primaryAffiliationName?: string; - primaryJsonAffiliationName?: string; tags: Tag[]; username: string; }