Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandromumo committed Jun 21, 2023
1 parent 668c1c7 commit 5b01170
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 74 deletions.
50 changes: 14 additions & 36 deletions invenio_github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import github3
import humanize
import yaml
from flask import current_app
from invenio_access.utils import get_identity
from invenio_db import db
Expand Down Expand Up @@ -474,33 +473,25 @@ def user_identity(self):
"""Generates release owner's user identity."""
return get_identity(self.repository_object.user)

@cached_property
def citation_file(self):
"""Returns the citation file data."""

def _load_citation_file(raw_content):
"""Loads a citation file."""
# Supports YAML files
# TODO maybe we can add handlers by config (e.g. 'citation.cff : yaml.safe_load)
data = yaml.safe_load(raw_content.decoded.decode("utf-8"))
return data

citation_file_path = current_app.config.get("GITHUB_CITATION_FILE")
if not citation_file_path:
return {}
def retrieve_remote_file(self, file_name):
"""Retrieves a file from the repository, for the current release, using the github client.
:param file_name: the name of the file to be retrieved from the repository.
:returns: the file contents or None, if the file if not fetched.
"""
gh_repo_owner = self.repository["owner"]["login"]
gh_repo_name = self.repository["name"]
gh_tag_name = self.release["tag_name"]

# Fetch the citation file and load it
content = self.gh.api.repository(gh_repo_owner, gh_repo_name).file_contents(
path=citation_file_path, ref=gh_tag_name
path=file_name, ref=gh_tag_name
)
return content

data = _load_citation_file(content)

return data
def download_release_file(self):
"""Download release file using the current github session."""
session = self.gh.api.session
with session.get(self.release_zipball_url, stream=True) as s:
yield s.raw

@cached_property
def author(self):
Expand All @@ -512,23 +503,10 @@ def status(self):
"""Get the release status."""
return self.release_object.status

def verify_sender(self):
"""Check if the sender is valid.
Valid means that the release repository is listed under the github account repos.
"""
return (
self.payload["repository"]["full_name"]
in self.gh.account.extra_data["repos"]
)

def publish(self):
"""Publish a GitHub release."""
raise NotImplementedError

def process_release(self, verify_sender):
"""Processes a github release.
:param verify_sender: whether to validate the sender identity.
"""
def process_release(self):
"""Processes a github release."""
raise NotImplementedError
2 changes: 2 additions & 0 deletions invenio_github/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def disconnect(remote):
except NoResultFound:
# If the repository doesn't exist, no action is necessary
pass

# Commit any changes before running the ascynhronous task
db.session.commit()

# Send Celery task for webhooks removal and token revocation
Expand Down
1 change: 0 additions & 1 deletion invenio_github/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from sqlalchemy_utils.models import Timestamp
from sqlalchemy_utils.types import ChoiceType, JSONType, UUIDType


RELEASE_STATUS_TITLES = {
"RECEIVED": _("Received"),
"PROCESSING": _("Processing"),
Expand Down
43 changes: 31 additions & 12 deletions invenio_github/receivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,21 @@

from __future__ import absolute_import

from datetime import datetime

from flask import current_app
from invenio_db import db
from invenio_webhooks.models import Receiver

from invenio_github.models import ReleaseStatus
from invenio_github.api import GitHubAPI
from invenio_github.models import Release, ReleaseStatus, Repository
from invenio_github.proxies import current_github
from invenio_github.tasks import process_release

from .errors import (
InvalidSenderError,
ReleaseAlreadyReceivedError,
RepositoryAccessError,
RepositoryDisabledError,
)
from .models import Release, Repository
from .tasks import process_release


class GitHubReceiver(Receiver):
Expand All @@ -64,18 +63,24 @@ def run(self, event):
# Event failed to be processed and error was not handled yet (response code is still not an error code)
if event.response_code < 400:
pass
# TODO code 418 I'm a teapot
# TODO code 501 Not implemented
event.response_code = 500

def _handle_event(self, event):
"""Handles an incoming github event."""
action = event.payload.get("action")
is_draft_release = event.payload.get("release", {}).get("draft")
is_release_event = (

# Draft releases do not create releases on invenio
is_create_release_event = (
action in ("published", "released", "created") and not is_draft_release
)

if is_release_event:
if is_create_release_event:
self._handle_create_release(event)
else:
# TODO other events (e.g. ping, draft release) are discarded
pass

def _handle_create_release(self, event):
Expand Down Expand Up @@ -110,25 +115,39 @@ def _create_release(event):
else:
raise RepositoryDisabledError(repo=repo)

def _verify_sender(event):
"""Validates the sender of the release."""
api = GitHubAPI(user_id=self.event.user_id)

return (
event.payload["repository"]["full_name"]
in api.account.extra_data["repos"]
)

try:
# Create a release
release = _create_release(event)

# TODO verify_sender is always set to 'False', even in legacy zenodo.
if self.verify_sender and not _verify_sender(event):
release.release_object.status = ReleaseStatus.FAILED
raise InvalidSenderError(
event=release.event.id, user=release.event.user_id
)

# Process the release
async_mode = current_app.config.get("GITHUB_ASYNC_MODE", True)
if async_mode:
# Since 'process_release' is executed asynchronously, we commit the current state of session
db.session.commit()
process_release.delay(
release.release_id, verify_sender=self.verify_sender
)
process_release.delay(release.release_id)
else:
release_api = current_github.release_api_class(release)
release_api.process_release(verify_sender=self.verify_sender)
release_api.process_release()
except (ReleaseAlreadyReceivedError, RepositoryDisabledError) as e:
event.response_code = 409
event.response = dict(message=str(e), status=409)
except RepositoryAccessError as e:
except (RepositoryAccessError, InvalidSenderError) as e:
event.response_code = 403
event.response = dict(message=str(e), status=403)
except Exception as e:
Expand Down
51 changes: 27 additions & 24 deletions invenio_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@
import datetime
import json

import github3
from celery import shared_task
from flask import current_app, g
from invenio_db import db
from invenio_oauthclient.models import RemoteAccount
from invenio_oauthclient.proxies import current_oauthclient
from invenio_rest.errors import RESTException
from sqlalchemy.orm.exc import NoResultFound

from .errors import CustomGitHubMetadataError, RepositoryAccessError
from .models import Release, ReleaseStatus
from .proxies import current_github
from invenio_github.errors import CustomGitHubMetadataError, RepositoryAccessError
from invenio_github.models import Release, ReleaseStatus
from invenio_github.proxies import current_github


def _get_err_obj(msg):
Expand Down Expand Up @@ -76,23 +76,25 @@ def disconnect_github(access_token, repo_hooks):
# Note at this point the remote account and all associated data have
# already been deleted. The celery task is passed the access_token to make
# some last cleanup and afterwards delete itself remotely.
import github3

# Local import to avoid circular imports
from .api import GitHubAPI

try:
gh = github3.login(token=access_token)
for repo_id, repo_hook in repo_hooks:
ghrepo = gh.repository_with_id(repo_id)
if ghrepo:
hook = ghrepo.hook(repo_hook)
if hook and hook.delete():
current_app.logger.info(
"Deleted hook from github repository.",
extra={"hook": hook.id, "repo": ghrepo.full_name},
)
# If we finished our clean-up successfully, we can revoke the token
GitHubAPI.revoke_token(access_token)
# Create a nested transaction to make sure that hook deletion + token revoke is atomic
with db.session.begin_nested():
gh = github3.login(token=access_token)
for repo_id, repo_hook in repo_hooks:
ghrepo = gh.repository_with_id(repo_id)
if ghrepo:
hook = ghrepo.hook(repo_hook)
if hook and hook.delete():
current_app.logger.info(
"Deleted hook from github repository.",
extra={"hook": hook.id, "repo": ghrepo.full_name},
)
# If we finished our clean-up successfully, we can revoke the token
GitHubAPI.revoke_token(access_token)
except Exception as exc:
# Retry in case GitHub may be down...
disconnect_github.retry(exc=exc)
Expand All @@ -101,6 +103,7 @@ def disconnect_github(access_token, repo_hooks):
@shared_task(max_retries=6, default_retry_delay=10 * 60, rate_limit="100/m")
def sync_hooks(user_id, repositories):
"""Sync repository hooks for a user."""
# Local import to avoid circular imports
from .api import GitHubAPI

try:
Expand All @@ -111,18 +114,16 @@ def sync_hooks(user_id, repositories):
with db.session.begin_nested():
gh.sync_repo_hook(repo_id)
# We commit per repository, because while the task is running
# the user might enable/disable a hook.
db.session.commit()
except RepositoryAccessError as e:
current_app.logger.warning(str(e), exc_info=True)
except NoResultFound:
pass # Repository not in DB yet
except Exception as exc:
sync_hooks.retry(exc=exc)


@shared_task(ignore_result=True, max_retries=5, default_retry_delay=10 * 60)
def process_release(release_id, verify_sender=False):
def process_release(release_id):
"""Process a received Release."""
with db.session.begin_nested():
release_model = Release.query.filter(
Expand All @@ -135,7 +136,7 @@ def process_release(release_id, verify_sender=False):
matched_ex = None

try:
release.process_release(verify_sender=verify_sender)
release.process_release()
except Exception as ex:
error_handlers = current_github.release_error_handlers
matched_ex = None
Expand Down Expand Up @@ -173,8 +174,10 @@ def refresh_accounts(expiration_threshold=None):
@shared_task(ignore_result=True)
def sync_account(user_id):
"""Sync a user account."""
# Local import to avoid circular imports
from .api import GitHubAPI

gh = GitHubAPI(user_id=user_id)
gh.sync(hooks=False, async_hooks=False)
db.session.commit()
# Start a nested transaction so every data writing inside sync is executed atomically
with db.session.begin_nested():
gh = GitHubAPI(user_id=user_id)
gh.sync(hooks=False, async_hooks=False)
2 changes: 1 addition & 1 deletion tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def publish(self):
self.release_object.status = ReleaseStatus.PUBLISHED
return {}

def process_release(self, verify_sender):
def process_release(self):
"""Processes a release."""
self.publish()
return {}
Expand Down

0 comments on commit 5b01170

Please sign in to comment.