Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services: zenodo upload integration #1941

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions cap/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,10 +720,7 @@ def _(x):

# Zenodo
# ======
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL',
'https://zenodo.org/api')

ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME')
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa

# Endpoints
# =========
Expand Down
127 changes: 79 additions & 48 deletions cap/modules/deposit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from sqlalchemy.orm.exc import NoResultFound
from werkzeug.local import LocalProxy

from cap.modules.auth.ext import _fetch_token
from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError
from cap.modules.deposit.validators import NoRequiredValidator
from cap.modules.experiments.permissions import exp_need_factory
Expand All @@ -75,6 +76,8 @@
UpdateDepositPermission)

from .review import Reviewable
from .tasks import upload_to_zenodo
from .utils import create_zenodo_deposit

_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)

Expand Down Expand Up @@ -254,53 +257,82 @@ def upload(self, pid, *args, **kwargs):
_, rec = request.view_args.get('pid_value').data
record_uuid = str(rec.id)
data = request.get_json()
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')

try:
host, owner, repo, branch, filepath = parse_git_url(url)
api = create_git_api(host, owner, repo, branch,
current_user.id)

if filepath:
if webhook:
raise FileUploadError(
'You cannot create a webhook on a file')

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')
target = data.get('target')

if target == 'zenodo':
# check for token
token = _fetch_token('zenodo')
if not token:
raise FileUploadError(
'Please connect your Zenodo account '
'before creating a deposit.')

files = data.get('files')
Lilykos marked this conversation as resolved.
Show resolved Hide resolved
bucket = data.get('bucket')
zenodo_data = data.get('zenodo_data', {})

if files and bucket:
zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa
self.setdefault('_zenodo', []).append(zenodo_deposit)
self.commit()

# upload files to zenodo deposit
upload_to_zenodo.delay(
files, bucket, token,
zenodo_deposit['id'],
zenodo_deposit['links']['bucket'])
else:
raise FileUploadError(
'You cannot create an empty Zenodo deposit. '
'Please add some files.')
else:
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')
try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')

create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)
try:
host, owner, repo, branch, filepath = parse_git_url(url) # noqa
api = create_git_api(host, owner, repo, branch,
current_user.id)

if filepath:
if webhook:
raise FileUploadError(
'You cannot create a webhook on a file')

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')

create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)

except GitError as e:
raise FileUploadError(str(e))
except GitError as e:
raise FileUploadError(str(e))

return self

Expand Down Expand Up @@ -584,16 +616,15 @@ def validate(self, **kwargs):

validator = NoRequiredValidator(schema, resolver=resolver)

result = {}
result['errors'] = [
errors = [
FieldError(
list(error.path)+error.validator_value,
str(error.message))
for error in validator.iter_errors(self)
]

if result['errors']:
raise DepositValidationError(None, errors=result['errors'])
if errors:
raise DepositValidationError(None, errors=errors)
except RefResolutionError:
raise DepositValidationError('Schema {} not found.'.format(
self['$schema']))
Expand Down
27 changes: 27 additions & 0 deletions cap/modules/deposit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ def __init__(self, description, **kwargs):
self.description = description or self.description


class AuthorizationError(RESTException):
"""Exception during authorization."""

code = 401

def __init__(self, description, **kwargs):
"""Initialize exception."""
super(AuthorizationError, self).__init__(**kwargs)

self.description = description or self.description


class DisconnectWebhookError(RESTException):
"""Exception during disconnecting webhook for analysis."""

Expand Down Expand Up @@ -124,3 +136,18 @@ def __init__(self, description, errors=None, **kwargs):

self.description = description or self.description
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]


class DataValidationError(RESTValidationError):
"""Review validation error exception."""

code = 400

description = "Validation error. Try again with valid data"

def __init__(self, description, errors=None, **kwargs):
"""Initialize exception."""
super(DataValidationError, self).__init__(**kwargs)

self.description = description or self.description
self.errors = [FieldError(e['field'], e['message']) for e in errors]
56 changes: 56 additions & 0 deletions cap/modules/deposit/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Analysis Preservation Framework.
# Copyright (C) 2018 CERN.
#
# CERN Analysis Preservation Framework is free software; you can redistribute
# it and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Analysis Preservation Framework is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Analysis Preservation Framework; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
"""Tasks."""

from __future__ import absolute_import, print_function

import requests
from flask import current_app
from celery import shared_task
from invenio_db import db
from invenio_files_rest.models import FileInstance, ObjectVersion


@shared_task(autoretry_for=(Exception, ),
retry_kwargs={
'max_retries': 5,
'countdown': 10
})
def upload_to_zenodo(files, bucket, token, zenodo_depid, zenodo_bucket_url):
"""Upload to Zenodo the files the user selected."""
for filename in files:
file_obj = ObjectVersion.get(bucket, filename)
file_ins = FileInstance.get(file_obj.file_id)

with open(file_ins.uri, 'rb') as fp:
file = requests.put(
url=f'{zenodo_bucket_url}/{filename}',
data=fp,
params=dict(access_token=token),
)

if not file.ok:
current_app.logger.error(
f'Uploading file {filename} to deposit {zenodo_depid} '
f'failed with {file.status_code}.')
45 changes: 45 additions & 0 deletions cap/modules/deposit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@

from __future__ import absolute_import, print_function

import requests
from flask import current_app
from flask_login import current_user
from invenio_access.models import Role
from invenio_db import db

from cap.modules.deposit.errors import AuthorizationError, \
DataValidationError, FileUploadError
from cap.modules.records.utils import url_to_api_url


Expand Down Expand Up @@ -75,3 +80,43 @@ def add_api_to_links(links):
item['links'] = add_api_to_links(item.get('links'))

return response


def create_zenodo_deposit(token, data):
"""Create a Zenodo deposit using the logged in user's credentials."""
zenodo_url = current_app.config.get("ZENODO_SERVER_URL")
deposit = requests.post(
url=f'{zenodo_url}/deposit/depositions',
params=dict(access_token=token),
json={'metadata': data},
headers={'Content-Type': 'application/json'}
)

if not deposit.ok:
if deposit.status_code == 401:
raise AuthorizationError(
'Authorization to Zenodo failed. Please reconnect.')
if deposit.status_code == 400:
data = deposit.json()
if data.get('message') == 'Validation error.':
raise DataValidationError(
'Validation error on creating the Zenodo deposit.',
errors=data.get('errors'))
raise FileUploadError(
'Something went wrong, Zenodo deposit not created.')

# TODO: fix with serializers
data = deposit.json()
zenodo_deposit = {
'id': data['id'],
'title': data.get('metadata', {}).get('title'),
'creator': current_user.id,
'created': data['created'],
'links': {
'self': data['links']['self'],
'bucket': data['links']['bucket'],
'html': data['links']['html'],
'publish': data['links']['publish'],
}
}
return zenodo_deposit
27 changes: 0 additions & 27 deletions cap/modules/services/views/zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import requests
from flask import current_app, jsonify
from invenio_files_rest.models import FileInstance, ObjectVersion

from . import blueprint

Expand All @@ -48,29 +47,3 @@ def get_zenodo_record(zenodo_id):
"""Get record from zenodo (route)."""
resp, status = _get_zenodo_record(zenodo_id)
return jsonify(resp), status


@blueprint.route('/zenodo/<bucket_id>/<filename>')
def upload_to_zenodo(bucket_id, filename):
"""Upload code to zenodo."""
zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL')
params = {"access_token": current_app.config.get(
'ZENODO_ACCESS_TOKEN')}
filename = filename + '.tar.gz'

r = requests.post(zenodo_server_url,
params=params, json={},
)

file_obj = ObjectVersion.get(bucket_id, filename)
file = FileInstance.get(file_obj.file_id)

bucket_url = r.json()['links']['bucket']
with open(file.uri, 'rb') as fp:
response = requests.put(
bucket_url + '/{}'.format(filename),
data=fp,
params=params,
)

return jsonify({"status": response.status_code})
2 changes: 2 additions & 0 deletions docker-services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ services:
- "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3"
- "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME"
- "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME"
- "DEV_HOST=CHANGE_ME"
lb:
build: ./docker/haproxy/
Expand Down
Loading