Skip to content

Commit

Permalink
refactor: renames to CodeMetaSchema/DataCiteSchema
Browse files Browse the repository at this point in the history
- replace "Metadata" with "Schema" for something slightly shorter and
  better reading than CodeMetaMetadata. Could also go with straight up
  DataCite and CodeMeta and have them serve as the front-end to both
  metadata worlds
- add two subtype classes to handle creating a DataCiteSchema from a
  Codebase or a CodebaseRelease. Should consider @sgfost decorators-based
  design in the future
- quiet down exceptions for degenerate codebases w/o Licenses and return
  partially consistent proxy objects if the codebase is not yet published
  • Loading branch information
alee committed Aug 16, 2024
1 parent fb008b8 commit d9f7c5d
Show file tree
Hide file tree
Showing 15 changed files with 484 additions and 335 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from django.core.management.base import BaseCommand



class Command(BaseCommand):
help = "Copy CoMSES users to discourse"

Expand Down
1 change: 0 additions & 1 deletion django/curator/admin.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@

# Register your models here.
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from django.core.management.base import BaseCommand

from curator.tag_deduplication import TagClusterManager
Expand Down
3 changes: 1 addition & 2 deletions django/curator/tag_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
class AbstractTagDeduper(abc.ABC):
# TRAINING_FILE = "curator/clustering_training.json"
TRAINING_FILE = pathlib.Path("curator", "clustering_training.json")
FIELDS = [{"field": "name", "type": "String"}]
FIELDS = [dedupe.variables.String("name")]

def uncertain_pairs(self):
return self.deduper.uncertain_pairs()
Expand Down Expand Up @@ -222,7 +222,6 @@ def remove_training_file(self):
class TagGazetteer(AbstractTagDeduper):
def __init__(self, search_threshold):
self.search_threshold = search_threshold

self.deduper = dedupe.Gazetteer(AbstractTagDeduper.FIELDS)
self.prepare_training()
self.deduper.train()
Expand Down
1 change: 0 additions & 1 deletion django/library/admin.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@

# Register your models here.
4 changes: 2 additions & 2 deletions django/library/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
Codebase,
CodebaseRelease,
DataCiteAction,
DataCiteMetadata,
DataCiteSchema,
DataCiteRegistrationLog,
)

Expand Down Expand Up @@ -182,7 +182,7 @@ def _datacite_heartbeat_url(self):
else "https://api.test.datacite.org/heartbeat"
)

def _validate_metadata(self, datacite_metadata: DataCiteMetadata):
def _validate_metadata(self, datacite_metadata: DataCiteSchema):
metadata_dict = datacite_metadata.to_dict()
if not schema43.validate(metadata_dict):
logger.error("Invalid DataCite metadata: %s", metadata_dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

logger = logging.getLogger(__name__)


def delete_all_existing_codebase_dois_01(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))
codebases_with_dois = Codebase.objects.exclude(doi__isnull=True)
Expand Down Expand Up @@ -35,18 +36,28 @@ def delete_all_existing_codebase_dois_01(interactive=True, dry_run=True):
print(VERIFICATION_MESSAGE)
logger.info("Checking that all existing codebase DOIs have been deleted...")
for i, codebase in enumerate(codebases_with_dois):
print(f"Processing Codebase {i}/{len(codebases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}", end=" \r")
print(
f"Processing Codebase {i}/{len(codebases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}",
end=" \r",
)
if codebase.doi is not None:
logger.error(f"DOI for codebase {codebase.pk} should be None!")
logger.info("Success. All existing codebase DOIs deleted.")


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument('--interactive', action='store_true', help='Wait for user to press enter to continue.')
parser.add_argument('--dry-run', action='store_true', help='Output what would have happened.')
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options['interactive']
dry_run = options['dry_run']
delete_all_existing_codebase_dois_01(interactive, dry_run)
interactive = options["interactive"]
dry_run = options["dry_run"]
delete_all_existing_codebase_dois_01(interactive, dry_run)
52 changes: 39 additions & 13 deletions django/library/management/commands/fix_existing_dois_03.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
from django.conf import settings

from library.models import CodebaseRelease
from library.doi import DataCiteApi, VERIFICATION_MESSAGE, doi_matches_pattern, get_welcome_message
from library.doi import (
DataCiteApi,
VERIFICATION_MESSAGE,
doi_matches_pattern,
get_welcome_message,
)

logger = logging.getLogger(__name__)


def fix_existing_dois_03(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))

Expand Down Expand Up @@ -123,38 +129,58 @@ def fix_existing_dois_03(interactive=True, dry_run=True):
continue

logger.info(
f'Successfully fixed DOIs for existing {len(peer_reviewed_releases_with_dois)} peer reviewed CodebaseReleases with DOIs and their Codebases.'
f"Successfully fixed DOIs for existing {len(peer_reviewed_releases_with_dois)} peer reviewed CodebaseReleases with DOIs and their Codebases."
)

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info("Checking that all existing peer reviewed releases with DOIs (and their parent codebases) have valid DOIs...")
logger.info(
"Checking that all existing peer reviewed releases with DOIs (and their parent codebases) have valid DOIs..."
)
for i, release in enumerate(peer_reviewed_releases_with_dois):
print(f"Processing Codebase {i}/{len(peer_reviewed_releases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}", end=" \r")
print(
f"Processing Codebase {i}/{len(peer_reviewed_releases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}",
end=" \r",
)
if release.codebase.doi is None:
logger.error(f"Codebase DOI should not be None for codebase {release.codebase.pk}")
logger.error(
f"Codebase DOI should not be None for codebase {release.codebase.pk}"
)

if release.doi is None:
logger.error(f"DOI should not be None for release {release.pk}")

if not doi_matches_pattern(release.codebase.doi):
logger.error(f"{release.codebase.doi} Codebase DOI doesn't match DataCite pattern!")
logger.error(
f"{release.codebase.doi} Codebase DOI doesn't match DataCite pattern!"
)

if not doi_matches_pattern(release.doi):
logger.error(f"{release.doi} CodebaseRelease DOI doesn't match DataCite pattern!")
logger.error(
f"{release.doi} CodebaseRelease DOI doesn't match DataCite pattern!"
)

logger.info(
"Success. All existing peer reviewed releases with DOIs (and their parent codebases) have valid DOIs now."
)

logger.info("Success. All existing peer reviewed releases with DOIs (and their parent codebases) have valid DOIs now.")

class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument('--interactive', action='store_true', help='Wait for user to press enter to continue.')
parser.add_argument('--dry-run', action='store_true', help='Output what would have happened.')
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options['interactive']
dry_run = options['dry_run']
fix_existing_dois_03(interactive, dry_run)
interactive = options["interactive"]
dry_run = options["dry_run"]
fix_existing_dois_03(interactive, dry_run)
60 changes: 45 additions & 15 deletions django/library/management/commands/mint_dois.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import logging
from django.core.management.base import BaseCommand
from library.models import CodebaseRelease
from library.doi import DataCiteApi, VERIFICATION_MESSAGE, doi_matches_pattern, get_welcome_message
from library.doi import (
DataCiteApi,
VERIFICATION_MESSAGE,
doi_matches_pattern,
get_welcome_message,
)

logger = logging.getLogger(__name__)


def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=True):
"""
for ALL peer_reviewed releases without DOIs:
Expand Down Expand Up @@ -98,44 +104,68 @@ def mint_dois_for_peer_reviewed_releases_without_dois(interactive=True, dry_run=
f"Failed to update metadata for next_release {next_release.pk}"
)

logger.info(f"Minted {len(peer_reviewed_releases_without_dois)} DOIs for peer reviewed releases without DOIs.")
logger.info(
f"Minted {len(peer_reviewed_releases_without_dois)} DOIs for peer reviewed releases without DOIs."
)

"""
assert correctness
"""
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info(f"Checking that: all peer reviewed releases (previously) without DOIs (and their parent codebases) have valid DOIs now...")
logger.info(
f"Checking that: all peer reviewed releases (previously) without DOIs (and their parent codebases) have valid DOIs now..."
)
invalid_codebases = []
invalid_releases = []

for i, release in enumerate(peer_reviewed_releases_without_dois):
print(f"Verifying release: {i}/{len(peer_reviewed_releases_without_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}", end=" \r")
print(
f"Verifying release: {i}/{len(peer_reviewed_releases_without_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}",
end=" \r",
)

if not release.doi or not doi_matches_pattern(release.doi):
invalid_releases.append(release.pk)
if not release.codebase.doi or not doi_matches_pattern(release.codebase.doi):
if not release.codebase.doi or not doi_matches_pattern(
release.codebase.doi
):
invalid_codebases.append(release.codebase.pk)

if invalid_codebases:
logger.error(f"Failure. Codebases with invalid or missing DOIs ({len(invalid_codebases)}): {invalid_codebases}")
logger.error(
f"Failure. Codebases with invalid or missing DOIs ({len(invalid_codebases)}): {invalid_codebases}"
)
else:
logger.info("Success. All parent codebases for peer reviewed releases previously without DOIs have valid DOIs now.")
logger.info(
"Success. All parent codebases for peer reviewed releases previously without DOIs have valid DOIs now."
)
if invalid_releases:
logger.error(f"Failure. CodebaseReleases with invalid or missing DOIs ({len(invalid_releases)}): {invalid_releases}")
logger.error(
f"Failure. CodebaseReleases with invalid or missing DOIs ({len(invalid_releases)}): {invalid_releases}"
)
else:
logger.info("Success. All peer reviewed releases previously without DOIs have valid DOIs now.")
class Command(BaseCommand):
logger.info(
"Success. All peer reviewed releases previously without DOIs have valid DOIs now."
)


class Command(BaseCommand):
"""
Mints DOIs for all peer reviewed CodebaseReleases.
"""

def add_arguments(self, parser):
parser.add_argument('--interactive', action='store_true', help='Wait for user to press enter to continue.')
parser.add_argument('--dry-run', action='store_true', help='Output what would have happened.')
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options['interactive']
dry_run = options['dry_run']
mint_dois_for_peer_reviewed_releases_without_dois(interactive, dry_run)
interactive = options["interactive"]
dry_run = options["dry_run"]
mint_dois_for_peer_reviewed_releases_without_dois(interactive, dry_run)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

logger = logging.getLogger(__name__)


def remove_dois_from_not_peer_reviewed_releases_02(interactive=True, dry_run=True):
print(get_welcome_message(dry_run))

Expand Down Expand Up @@ -36,20 +37,35 @@ def remove_dois_from_not_peer_reviewed_releases_02(interactive=True, dry_run=Tru
if not dry_run:
print(VERIFICATION_MESSAGE)
logger.info(
"Checking that DOIs for all not peer reviewed releases have been deleted...")
"Checking that DOIs for all not peer reviewed releases have been deleted..."
)
for i, release in enumerate(not_peer_reviewed_releases_with_dois):
print(f"Processing Codebase {i}/{len(not_peer_reviewed_releases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}", end=" \r")
print(
f"Processing Codebase {i}/{len(not_peer_reviewed_releases_with_dois)} {'' if (i+1)%8 == 0 else '.'*((i+1)%8)}",
end=" \r",
)
if release.doi is not None:
logger.error(f"DOI for not peer reviewed release {release.pk} should be None!")
logger.info("Success. All existing DOIs for non peer reviewed releases have been deleted.")
logger.error(
f"DOI for not peer reviewed release {release.pk} should be None!"
)
logger.info(
"Success. All existing DOIs for non peer reviewed releases have been deleted."
)


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument('--interactive', action='store_true', help='Wait for user to press enter to continue.')
parser.add_argument('--dry-run', action='store_true', help='Output what would have happened.')
parser.add_argument(
"--interactive",
action="store_true",
help="Wait for user to press enter to continue.",
)
parser.add_argument(
"--dry-run", action="store_true", help="Output what would have happened."
)

def handle(self, *args, **options):
interactive = options['interactive']
dry_run = options['dry_run']
remove_dois_from_not_peer_reviewed_releases_02(interactive, dry_run)
interactive = options["interactive"]
dry_run = options["dry_run"]
remove_dois_from_not_peer_reviewed_releases_02(interactive, dry_run)
Loading

0 comments on commit d9f7c5d

Please sign in to comment.