Skip to content

Commit

Permalink
github: added invenio github integration
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandromumo committed Jun 30, 2023
1 parent 6686b98 commit c5ce68c
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 0 deletions.
4 changes: 4 additions & 0 deletions invenio_rdm_records/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from .services.permissions import RDMRecordPermissionPolicy
from .services.pids import providers

from .services.github.release import RDMGithubRelease

# Invenio-RDM-Records
# ===================

Expand Down Expand Up @@ -479,3 +481,5 @@ def make_doi(prefix, record):
...
}]
"""

GITHUB_RELEASE_CLASS = RDMGithubRelease
7 changes: 7 additions & 0 deletions invenio_rdm_records/services/github/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""RDM records implementation of Github."""
167 changes: 167 additions & 0 deletions invenio_rdm_records/services/github/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""RDM github release metadata."""
import yaml
from flask import current_app
from invenio_github.errors import CustomGitHubMetadataError
from marshmallow import Schema, ValidationError
from mistune import markdown
from nameparser import HumanName


class RDMReleaseMetadata(object):
"""Wraps a realease object to extract its data to meet RDM specific needs."""

def __init__(self, rdm_github_release):
"""Constructor."""
self.rdm_release = rdm_github_release

@property
def related_identifiers(self):
"""Return related identifiers."""
repo_name = self.rdm_release.repository_payload["full_name"]
release_tag_name = self.rdm_release.release_payload["tag_name"]
return {
"identifier": "https://github.com/{}/tree/{}".format(
repo_name, release_tag_name
),
"scheme": "url",
"relation_type": {"id": "issupplementto"},
"resource_type": {"id": "software"},
}

@property
def title(self):
"""Generate a title from a release and its repository name."""
repo_name = self.rdm_release.repository_payload["full_name"]
release_name = (
self.rdm_release.release_payload.get("name")
or self.rdm_release.release_payload["tag_name"]
)
return f"{repo_name}: {release_name}"

@property
def description(self):
"""Extract description from a release.
If the relesae does not have any body, the repository description is used.
Falls back for "No description provided".
"""
if self.rdm_release.release_payload.get("body"):
return markdown(self.rdm_release.release_payload["body"])
elif self.rdm_release.repository_payload.get("description"):
return self.rdm_release.repository_payload["description"]
return "No description provided."

@property
def default_metadata(self):
"""Return default metadata for a release."""
# Get default right from app config or use cc-by-4.0 if default is not set in app
# TODO use the default software license
default_right = "cc-by-4.0"
version = self.rdm_release.release_payload.get("tag_name", "")

return dict(
description=self.description,
rights={"id": default_right},
publication_date=self.rdm_release.release_payload["published_at"][:10],
related_identifiers=[self.related_identifiers],
version=version,
title=self.title,
resource_type={"id": "software"},
creators=self.contributors,
)

@property
def contributors(self):
"""Serializes contributors retrieved from github."""

def serialize_author(gh_data):
"""Serializes github contributor data into RDM author."""
login = gh_data["login"]
name = gh_data.get("name", login)
company = gh_data.get("company", "")

human_name = HumanName(name)
given_name = human_name.first
family_name = human_name.surnames

rdm_contributor = {
"person_or_org": {
"type": "personal",
"given_name": given_name,
"family_name": family_name,
},
"affiliations": [{"name": company}],
}
return rdm_contributor

contributors = []

# Get contributors from api
for c in self.rdm_release.contributors:
rdm_author = serialize_author(c)
contributors.append(rdm_author)

return contributors

@property
def citation_metadata(self):
"""Get citation metadata for file in repository."""
citation_file_path = current_app.config.get("GITHUB_CITATION_FILE")

if not citation_file_path:
return {}

try:
# Read raw data from file
data = self.load_citation_file(citation_file_path)

# Load metadata from citation file and serialize it
metadata = self.load_citation_metadata(data)
return self.serialize_citation_metadata(metadata)
except ValidationError as e:
# Wrap the error into CustomGitHubMetadataError() so it can be handled upstream
raise CustomGitHubMetadataError(file=citation_file_path, message=e.messages)

def serialize_citation_metadata(self, data):
"""Serializes citation data to RDM."""
if not data:
return {}
# TODO to be implemented
return data

def load_citation_file(self, citation_file_name):
"""Returns the citation file data."""
if not citation_file_name:
return {}

# Fetch the citation file and load it
content = self.retrieve_remote_file(citation_file_name)

data = (
yaml.safe_load(content.decoded.decode("utf-8"))
if content is not None
else None
)

return data

def load_citation_metadata(self, citation_data):
"""Get the metadata file."""
if not citation_data:
return {}

citation_schema = current_app.config.get("GITHUB_CITATION_METADATA_SCHEMA")

assert isinstance(
citation_schema, Schema
), "Citation schema is needed to load citation metadata."

data = citation_schema().load(citation_data)

return data
142 changes: 142 additions & 0 deletions invenio_rdm_records/services/github/release.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Github release API implementation."""

from flask import current_app
from invenio_access.permissions import system_identity
from invenio_db import db
from invenio_github.api import GitHubRelease
from invenio_github.models import ReleaseStatus
from invenio_records_resources.services.uow import UnitOfWork

from invenio_rdm_records.proxies import current_rdm_records_service
from invenio_rdm_records.services.github.metadata import RDMReleaseMetadata
from invenio_rdm_records.services.github.utils import retrieve_recid_by_uuid


class RDMGithubRelease(GitHubRelease):
"""Implement release API instance for RDM."""

@property
def metadata(self):
"""Extracts metadata to create an RDM draft."""
metadata = RDMReleaseMetadata(self)
output = metadata.default_metadata
output.update(metadata.citation_metadata)
return output

def resolve_record(self):
"""Resolves an RDM record from a release."""
recid = retrieve_recid_by_uuid(self.release_object.record_id)
return current_rdm_records_service.read(system_identity, recid.pid_value)

def publish(self):
"""Publish GitHub release as record.
Drafts and records are created using the current records service.
The following steps are run inside a single transaction:
- Create a draft.
- The draft's ownership is set to the user's id via its parent.
- Upload files to the draft.
- Publish the draft.
In case of failure, the transaction is rolled back and the release status set to 'FAILED'
:raises ex: any exception generated by the records service (e.g. invalid metadata)
"""
try:
self.release_processing()
# Commit state change, in case the publishing is stuck
db.session.commit()

with UnitOfWork(db.session) as uow:
data = {
"metadata": self.metadata,
"access": {"record": "public", "files": "public"},
"files": {"enabled": True},
}

if self.is_first_release():
draft = current_rdm_records_service.create(
self.user_identity, data, uow=uow
)
else:
# Retrieve latest record id and its recid
latest_record_uuid = self.repository_object.latest_release(
ReleaseStatus.PUBLISHED
).record_id

recid = retrieve_recid_by_uuid(latest_record_uuid)

# Create a new version and update its contents
new_version_draft = current_rdm_records_service.new_version(
self.user_identity, recid.pid_value, uow=uow
)
draft = current_rdm_records_service.update_draft(
self.user_identity, new_version_draft.id, data, uow=uow
)

# Validate the release files are fetchable
self.test_zipball()

# Upload files to draft
draft_file_service = current_rdm_records_service.draft_files

draft_file_service.init_files(
self.user_identity,
draft.id,
data=[{"key": self.release_file_name}],
uow=uow,
)

with self.fetch_zipball_file() as file_stream:
draft_file_service.set_file_content(
self.user_identity,
draft.id,
self.release_file_name,
file_stream,
uow=uow,
)

draft_file_service.commit_file(
self.user_identity, draft.id, self.release_file_name, uow=uow
)

record = current_rdm_records_service.publish(
self.user_identity, draft.id, uow=uow
)

# Update release weak reference and set status to PUBLISHED
self.release_object.record_id = record._record.model.id
self.release_published()

# UOW must be committed manually since we're not using the decorator
uow.commit()
return record
except Exception as ex:
# Flag release as FAILED and raise the exception
self.release_failed()
# Commit the FAILED state, other changes were already rollbacked by the UOW
db.session.commit()
raise ex

def process_release(self):
"""Processes a github release.
The release might be first validated, in terms of sender, and then published.
:raises ex: any exception generated by the records service when creating a draft or publishing the release record.
"""
try:
record = self.publish()
return record
except Exception as ex:
current_app.logger.exception(
f"Error while processing GitHub release {self.release_object.id}: {str(ex)}"
)
raise ex
24 changes: 24 additions & 0 deletions invenio_rdm_records/services/github/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 CERN.
#
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Utility functions."""

from flask import current_app
from invenio_pidstore.models import PersistentIdentifier
from marshmallow import Schema


def retrieve_recid_by_uuid(rec_uuid):
"""Retrieves a persistent identifier given its objects uuid.
Helper function.
"""
recid = PersistentIdentifier.get_by_object(
pid_type="recid",
object_uuid=rec_uuid,
object_type="rec",
)
return recid
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ install_requires =
invenio-administration>=1.2.0,<2.0.0
invenio-communities>=7.0.0,<8.0.0
invenio-drafts-resources>=1.4.1,<2.0.0
invenio-github>=1.0.0a8 # TODO actually bump it
invenio-i18n>=2.0.0,<3.0.0
invenio-oaiserver>=2.0.0,<3.0.0
invenio-vocabularies>=1.2.0,<2.0.0
nameparser>=1.1.1
pytz>=2020.4
pyyaml>=5.4.0
python-slugify>=8.0.1
Expand Down

0 comments on commit c5ce68c

Please sign in to comment.