-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
github: added invenio github integration
- Loading branch information
1 parent
6686b98
commit 95a03cf
Showing
5 changed files
with
347 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2023 CERN. | ||
# | ||
# Invenio-RDM-Records is free software; you can redistribute it and/or modify | ||
# it under the terms of the MIT License; see LICENSE file for more details. | ||
"""RDM records implementation of Github.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2023 CERN. | ||
# | ||
# Invenio-RDM-Records is free software; you can redistribute it and/or modify | ||
# it under the terms of the MIT License; see LICENSE file for more details. | ||
"""Github release API implementation.""" | ||
|
||
import requests | ||
import yaml | ||
from flask import current_app | ||
from invenio_github.api import GitHubRelease | ||
from invenio_github.models import ReleaseStatus | ||
from marshmallow import ValidationError | ||
from mistune import markdown | ||
|
||
from invenio_access.permissions import system_identity | ||
from invenio_db import db | ||
from invenio_pidstore.models import PersistentIdentifier | ||
from invenio_rdm_records.github.utils import get_extra_metadata, load_citation_metadata | ||
from invenio_rdm_records.proxies import current_rdm_records_service | ||
from invenio_records_resources.services.uow import UnitOfWork | ||
|
||
|
||
class RDMGithubRelease(GitHubRelease): | ||
"""Implement release API instance for RDM.""" | ||
|
||
def _related_identifiers(self): | ||
"""Return related identifiers.""" | ||
repo_name = self.repository["full_name"] | ||
release_tag_name = self.release["tag_name"] | ||
return { | ||
"identifier": "https://github.com/{}/tree/{}".format( | ||
repo_name, release_tag_name | ||
), | ||
"scheme": "url", | ||
"relation_type": {"id": "issupplementto"}, | ||
"resource_type": {"id": "software"}, | ||
} | ||
|
||
def _title(self): | ||
"""Generate a title from a release and its repository name.""" | ||
repo_name = self.repository_object.name | ||
release_name = self.release.get("name") or self.release.get( | ||
"tag_name", self.release_object.tag | ||
) | ||
return f"{repo_name}: {release_name}" | ||
|
||
def _description(self): | ||
"""Extract description from a release. | ||
If the relesae does not have any body, the repository description is used. | ||
Falls back for "No description provided". | ||
""" | ||
if self.release.get("body"): | ||
return markdown(self.release["body"]) | ||
elif self.repository.get("description"): | ||
return self.repository["description"] | ||
return "No description provided." | ||
|
||
def _default_metadata(self): | ||
"""Return default metadata for a release.""" | ||
# Get default right from app config or use cc-by-4.0 if default is not set in app | ||
# TODO use the default software license | ||
default_right = "cc-by-4.0" | ||
version = self.release.get("tag_name", "") | ||
|
||
return dict( | ||
description=self._description(), | ||
rights={"id": default_right}, | ||
publication_date=self.release["published_at"][:10], | ||
related_identifiers=[self._related_identifiers()], | ||
version=version, | ||
title=self._title(), | ||
resource_type={"id": "software"}, | ||
creators=[ | ||
{ | ||
"person_or_org": { | ||
"type": "personal", | ||
"given_name": "TODO GIVEN NAME", # TODO get self.user.given_name (self.repository_object.user) | ||
"family_name": "TODO FAMILY NAME", # TODO get self.user.family_name (self.repository_object.user) | ||
} | ||
} | ||
], | ||
) | ||
|
||
@property | ||
def metadata(self): | ||
"""Extracts metadata to create an RDM draft.""" | ||
output = dict(self._default_metadata()) | ||
# output.update(self._extra_metadata()) | ||
output.update(self._citation_metadata()) | ||
return output | ||
|
||
# TODO what to do with this one? | ||
# TODO this is very instance specific, e.g. .zenodo.json | ||
# TODO it can be added later | ||
def _extra_metadata(self): | ||
"""Get extra metadata for file in repository.""" | ||
return get_extra_metadata( | ||
self.gh.api, | ||
self.repository["owner"]["login"], | ||
self.repository["name"], | ||
self.release["tag_name"], | ||
) | ||
|
||
def _citation_metadata(self): | ||
"""Get citation metadata for file in repository.""" | ||
citation_file_path = current_app.config.get("GITHUB_CITATION_FILE") | ||
|
||
if not citation_file_path: | ||
return {} | ||
|
||
try: | ||
# Read raw data from file | ||
data = self.load_citation_file() | ||
|
||
# Load metadata | ||
metadata = load_citation_metadata(data) | ||
return metadata | ||
except ValidationError as e: | ||
self.release_object.errors.update({citation_file_path: e.messages}) | ||
|
||
def _test_zipball(self): | ||
"""Extract files to download from GitHub payload.""" | ||
zipball_url = self.release["zipball_url"] | ||
|
||
# Execute a HEAD request to the zipball url to test the url. | ||
response = self.gh.api.session.head(zipball_url, allow_redirects=True) | ||
|
||
# In case where there is a tag and branch with the same name, we might | ||
# get back a "300 Mutliple Choices" response, which requires fetching | ||
# an "alternate" link. | ||
if response.status_code == 300: | ||
zipball_url = response.links.get("alternate", {}).get("url") | ||
if zipball_url: | ||
response = self.gh.api.session.head(zipball_url, allow_redirects=True) | ||
# Another edge-case, is when the access token we have does not | ||
# have the scopes/permissions to access public links. In that | ||
# rare case we fallback to a non-authenticated request. | ||
if response.status_code == 404: | ||
response = requests.head(zipball_url, allow_redirects=True) | ||
# If this response is successful we want to use the finally | ||
# resolved URL to fetch the ZIP from. | ||
if response.status_code == 200: | ||
zipball_url = response.url | ||
|
||
assert ( | ||
response.status_code == 200 | ||
), f"Could not retrieve archive from GitHub: {zipball_url}" | ||
|
||
def load_citation_file(self): | ||
"""Returns the citation file data.""" | ||
citation_file_name = current_app.config.get("GITHUB_CITATION_FILE") | ||
if not citation_file_name: | ||
return {} | ||
|
||
# Fetch the citation file and load it | ||
content = self.retrieve_remote_file(citation_file_name) | ||
|
||
data = ( | ||
yaml.safe_load(content.decoded.decode("utf-8")) | ||
if content is not None | ||
else None | ||
) | ||
|
||
return data | ||
|
||
def resolve_record(self): | ||
"""Resolves an RDM record from a release.""" | ||
recid = self._retrieve_record_by_uuid(self.release_object.record_id) | ||
return current_rdm_records_service.read(system_identity, recid.pid_value) | ||
|
||
def _is_first_release(self): | ||
"""Checks whether the current release is the first release of the repository.""" | ||
return self.repository_object.releases.count() == 0 | ||
|
||
def _retrieve_recid_by_uuid(self, rec_uuid): | ||
"""Retrieves a persistent identifier given its objects uuid. | ||
Helper function. | ||
""" | ||
recid = PersistentIdentifier.get_by_object( | ||
pid_type="recid", | ||
object_uuid=rec_uuid, | ||
object_type="rec", | ||
) | ||
return recid | ||
|
||
def publish(self): | ||
"""Publish GitHub release as record. | ||
Drafts and records are created using the current records service. | ||
The following steps are run inside a single transaction: | ||
- Create a draft. | ||
- The draft's ownership is set to the user's id via its parent. | ||
- Upload files to the draft. | ||
- Publish the draft. | ||
In case of failure, the transaction is rolled back and the release status set to 'FAILED' | ||
:raises ex: any exception generated by the records service (e.g. invalid metadata) | ||
""" | ||
try: | ||
self.release_object.status = ReleaseStatus.PROCESSING | ||
data = { | ||
"metadata": self.metadata, | ||
"access": {"record": "public", "files": "public"}, | ||
"files": {"enabled": True}, | ||
} | ||
|
||
with UnitOfWork(db.session) as uow: | ||
if self._is_first_release(): | ||
draft = current_rdm_records_service.create( | ||
self.user_identity, data, uow=uow | ||
) | ||
else: | ||
# Retrieve latest record id and its recid | ||
latest_record_uuid = self.repository_object.latest_release( | ||
ReleaseStatus.PUBLISHED | ||
).record_id | ||
|
||
recid = self._retrieve_recid_by_uuid(latest_record_uuid) | ||
|
||
# Create a new version and update its contents | ||
new_version_draft = current_rdm_records_service.new_version( | ||
self.user_identity, recid.pid_value, uow=uow | ||
) | ||
draft = current_rdm_records_service.update_draft( | ||
self.user_identity, new_version_draft.id, data, uow=uow | ||
) | ||
|
||
# Validate the release files are fetchable | ||
self._test_zipball() | ||
|
||
# Upload files to draft | ||
draft_file_service = current_rdm_records_service.draft_files | ||
|
||
draft_file_service.init_files( | ||
self.user_identity, | ||
draft.id, | ||
data=[{"key": self.release_file_name}], | ||
uow=uow, | ||
) | ||
|
||
with self.fetch_zipball_file() as file_stream: | ||
draft_file_service.set_file_content( | ||
self.user_identity, | ||
draft.id, | ||
self.release_file_name, | ||
file_stream, | ||
uow=uow, | ||
) | ||
|
||
draft_file_service.commit_file( | ||
self.user_identity, draft.id, self.release_file_name, uow=uow | ||
) | ||
|
||
record = current_rdm_records_service.publish( | ||
self.user_identity, draft.id, uow=uow | ||
) | ||
|
||
# Update release weak reference and set status to PUBLISHED | ||
self.release_object.record_id = record._record.model.id | ||
self.release_object.status = ReleaseStatus.PUBLISHED | ||
|
||
# UOW must be committed manually since we're not using the decorator | ||
uow.commit() | ||
return record | ||
except Exception as ex: | ||
# Flag release as FAILED and raise the exception | ||
self.release_object.status = ReleaseStatus.FAILED | ||
raise ex | ||
|
||
def process_release(self): | ||
"""Processes a github release. | ||
The release might be first validated, in terms of sender, and then published. | ||
:raises ex: any exception generated by the records service when creating a draft or publishing the release record. | ||
""" | ||
try: | ||
record = self.publish() | ||
return record | ||
except Exception as ex: | ||
current_app.logger.exception( | ||
f"Error while processing GitHub release {self.release_object.id}: {str(ex)}" | ||
) | ||
raise ex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2023 CERN. | ||
# | ||
# Invenio-RDM-Records is free software; you can redistribute it and/or modify | ||
# it under the terms of the MIT License; see LICENSE file for more details. | ||
"""Utility functions.""" | ||
|
||
import json | ||
|
||
import yaml | ||
from flask import current_app | ||
from invenio_github.errors import CustomGitHubMetadataError | ||
from marshmallow import Schema | ||
|
||
|
||
def get_extra_metadata(gh, owner, repo_name, ref): | ||
"""Get the metadata file.""" | ||
# TODO probably deprecated. On Zenodo we kept it for backwards compatibility | ||
try: | ||
content = gh.repository(owner, repo_name).file_contents( | ||
path=current_app.config["GITHUB_METADATA_FILE"], ref=ref | ||
) | ||
if not content: | ||
# File does not exists in the given ref | ||
return {} | ||
return json.loads(content.decoded.decode("utf-8")) | ||
except ValueError: | ||
raise CustomGitHubMetadataError(file=current_app.config["GITHUB_METADATA_FILE"]) | ||
|
||
|
||
def load_citation_metadata(citation_data): | ||
"""Get the metadata file.""" | ||
if not citation_data: | ||
return {} | ||
|
||
citation_schema = current_app.config.get("GITHUB_CITATION_METADATA_SCHEMA") | ||
|
||
assert isinstance( | ||
citation_schema, Schema | ||
), "Citation schema is needed to load citation metadata." | ||
|
||
data = citation_schema().load(citation_data) | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters