Skip to content

Commit

Permalink
services: create zenodo deposit through CAP
Browse files Browse the repository at this point in the history
* updates the config, each user has access to their Zenod ccount/token
* creates deposit, with metadata
* uploads files to deposit
* integration tests
* closes cernanalysispreservation#1938
* closes cernanalysispreservation#1934

Signed-off-by: Ilias Koutsakis <[email protected]>
  • Loading branch information
Lilykos committed Nov 20, 2020
1 parent 41cff42 commit ffdb4b7
Show file tree
Hide file tree
Showing 9 changed files with 626 additions and 80 deletions.
5 changes: 1 addition & 4 deletions cap/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,10 +720,7 @@ def _(x):

# Zenodo
# ======
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL',
'https://zenodo.org/api')

ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME')
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa

# Endpoints
# =========
Expand Down
127 changes: 79 additions & 48 deletions cap/modules/deposit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from sqlalchemy.orm.exc import NoResultFound
from werkzeug.local import LocalProxy

from cap.modules.auth.ext import _fetch_token
from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError
from cap.modules.deposit.validators import NoRequiredValidator
from cap.modules.experiments.permissions import exp_need_factory
Expand All @@ -75,6 +76,8 @@
UpdateDepositPermission)

from .review import Reviewable
from .tasks import upload_to_zenodo
from .utils import create_zenodo_deposit

_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)

Expand Down Expand Up @@ -254,53 +257,82 @@ def upload(self, pid, *args, **kwargs):
_, rec = request.view_args.get('pid_value').data
record_uuid = str(rec.id)
data = request.get_json()
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')

try:
host, owner, repo, branch, filepath = parse_git_url(url)
api = create_git_api(host, owner, repo, branch,
current_user.id)

if filepath:
if webhook:
raise FileUploadError(
'You cannot create a webhook on a file')

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')
target = data.get('target')

if target == 'zenodo':
# check for token
token = _fetch_token('zenodo')
if not token:
raise FileUploadError(
'Please connect your Zenodo account '
'before creating a deposit.')

files = data.get('files')
bucket = data.get('bucket')
zenodo_data = data.get('zenodo_data', {})

if files and bucket:
zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa
self.setdefault('_zenodo', []).append(zenodo_deposit)
self.commit()

# upload files to zenodo deposit
upload_to_zenodo.delay(
files, bucket, token,
zenodo_deposit['id'],
zenodo_deposit['links']['bucket'])
else:
raise FileUploadError(
'You cannot create an empty Zenodo deposit. '
'Please add some files.')
else:
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')
try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')

create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)
try:
host, owner, repo, branch, filepath = parse_git_url(url) # noqa
api = create_git_api(host, owner, repo, branch,
current_user.id)

if filepath:
if webhook:
raise FileUploadError(
'You cannot create a webhook on a file')

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')

create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)

except GitError as e:
raise FileUploadError(str(e))
except GitError as e:
raise FileUploadError(str(e))

return self

Expand Down Expand Up @@ -584,16 +616,15 @@ def validate(self, **kwargs):

validator = NoRequiredValidator(schema, resolver=resolver)

result = {}
result['errors'] = [
errors = [
FieldError(
list(error.path)+error.validator_value,
str(error.message))
for error in validator.iter_errors(self)
]

if result['errors']:
raise DepositValidationError(None, errors=result['errors'])
if errors:
raise DepositValidationError(None, errors=errors)
except RefResolutionError:
raise DepositValidationError('Schema {} not found.'.format(
self['$schema']))
Expand Down
27 changes: 27 additions & 0 deletions cap/modules/deposit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ def __init__(self, description, **kwargs):
self.description = description or self.description


class AuthorizationError(RESTException):
"""Exception during authorization."""

code = 401

def __init__(self, description, **kwargs):
"""Initialize exception."""
super(AuthorizationError, self).__init__(**kwargs)

self.description = description or self.description


class DisconnectWebhookError(RESTException):
"""Exception during disconnecting webhook for analysis."""

Expand Down Expand Up @@ -124,3 +136,18 @@ def __init__(self, description, errors=None, **kwargs):

self.description = description or self.description
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]


class DataValidationError(RESTValidationError):
"""Review validation error exception."""

code = 400

description = "Validation error. Try again with valid data"

def __init__(self, description, errors=None, **kwargs):
"""Initialize exception."""
super(DataValidationError, self).__init__(**kwargs)

self.description = description or self.description
self.errors = [FieldError(e['field'], e['message']) for e in errors]
56 changes: 56 additions & 0 deletions cap/modules/deposit/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Analysis Preservation Framework.
# Copyright (C) 2018 CERN.
#
# CERN Analysis Preservation Framework is free software; you can redistribute
# it and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Analysis Preservation Framework is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Analysis Preservation Framework; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
"""Tasks."""

from __future__ import absolute_import, print_function

import requests
from flask import current_app
from celery import shared_task
from invenio_db import db
from invenio_files_rest.models import FileInstance, ObjectVersion


@shared_task(autoretry_for=(Exception, ),
retry_kwargs={
'max_retries': 5,
'countdown': 10
})
def upload_to_zenodo(files, bucket, token, zenodo_depid, zenodo_bucket_url):
"""Upload to Zenodo the files the user selected."""
for filename in files:
file_obj = ObjectVersion.get(bucket, filename)
file_ins = FileInstance.get(file_obj.file_id)

with open(file_ins.uri, 'rb') as fp:
file = requests.put(
url=f'{zenodo_bucket_url}/{filename}',
data=fp,
params=dict(access_token=token),
)

if not file.ok:
current_app.logger.error(
f'Uploading file {filename} to deposit {zenodo_depid} '
f'failed with {file.status_code}.')
45 changes: 45 additions & 0 deletions cap/modules/deposit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@

from __future__ import absolute_import, print_function

import requests
from flask import current_app
from flask_login import current_user
from invenio_access.models import Role
from invenio_db import db

from cap.modules.deposit.errors import AuthorizationError, \
DataValidationError, FileUploadError
from cap.modules.records.utils import url_to_api_url


Expand Down Expand Up @@ -75,3 +80,43 @@ def add_api_to_links(links):
item['links'] = add_api_to_links(item.get('links'))

return response


def create_zenodo_deposit(token, data):
"""Create a Zenodo deposit using the logged in user's credentials."""
zenodo_url = current_app.config.get("ZENODO_SERVER_URL")
deposit = requests.post(
url=f'{zenodo_url}/deposit/depositions',
params=dict(access_token=token),
json={'metadata': data},
headers={'Content-Type': 'application/json'}
)

if not deposit.ok:
if deposit.status_code == 401:
raise AuthorizationError(
'Authorization to Zenodo failed. Please reconnect.')
if deposit.status_code == 400:
data = deposit.json()
if data.get('message') == 'Validation error.':
raise DataValidationError(
'Validation error on creating the Zenodo deposit.',
errors=data.get('errors'))
raise FileUploadError(
'Something went wrong, Zenodo deposit not created.')

# TODO: fix with serializers
data = deposit.json()
zenodo_deposit = {
'id': data['id'],
'title': data.get('metadata', {}).get('title'),
'creator': current_user.id,
'created': data['created'],
'links': {
'self': data['links']['self'],
'bucket': data['links']['bucket'],
'html': data['links']['html'],
'publish': data['links']['publish'],
}
}
return zenodo_deposit
27 changes: 0 additions & 27 deletions cap/modules/services/views/zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import requests
from flask import current_app, jsonify
from invenio_files_rest.models import FileInstance, ObjectVersion

from . import blueprint

Expand All @@ -48,29 +47,3 @@ def get_zenodo_record(zenodo_id):
"""Get record from zenodo (route)."""
resp, status = _get_zenodo_record(zenodo_id)
return jsonify(resp), status


@blueprint.route('/zenodo/<bucket_id>/<filename>')
def upload_to_zenodo(bucket_id, filename):
"""Upload code to zenodo."""
zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL')
params = {"access_token": current_app.config.get(
'ZENODO_ACCESS_TOKEN')}
filename = filename + '.tar.gz'

r = requests.post(zenodo_server_url,
params=params, json={},
)

file_obj = ObjectVersion.get(bucket_id, filename)
file = FileInstance.get(file_obj.file_id)

bucket_url = r.json()['links']['bucket']
with open(file.uri, 'rb') as fp:
response = requests.put(
bucket_url + '/{}'.format(filename),
data=fp,
params=params,
)

return jsonify({"status": response.status_code})
2 changes: 2 additions & 0 deletions docker-services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ services:
- "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3"
- "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME"
- "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME"
- "DEV_HOST=CHANGE_ME"
lb:
build: ./docker/haproxy/
Expand Down
Loading

0 comments on commit ffdb4b7

Please sign in to comment.