Skip to content

Commit

Permalink
services: create zenodo deposit through CAP
Browse files Browse the repository at this point in the history
* creates a Zenodo deposit, with files from CAP
* saves metadata about the Zenodo deposit, and attaches it to a CAP
deposit
* integration tests
* closes cernanalysispreservation#1938
* closes cernanalysispreservation#1934

Signed-off-by: Ilias Koutsakis <[email protected]>
  • Loading branch information
Lilykos committed Nov 19, 2020
1 parent 4ff923c commit 66f761a
Show file tree
Hide file tree
Showing 7 changed files with 503 additions and 51 deletions.
5 changes: 1 addition & 4 deletions cap/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,10 +720,7 @@ def _(x):

# Zenodo
# ======
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL',
'https://zenodo.org/api')

ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME')
ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa

# Endpoints
# =========
Expand Down
151 changes: 105 additions & 46 deletions cap/modules/deposit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import uuid
from functools import wraps

import requests
from flask import current_app, request
from flask_login import current_user
from invenio_access.models import ActionRoles, ActionUsers
Expand All @@ -49,7 +50,9 @@
from sqlalchemy.orm.exc import NoResultFound
from werkzeug.local import LocalProxy

from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError
from cap.modules.auth.ext import _fetch_token
from cap.modules.deposit.errors import AuthorizationError, \
DisconnectWebhookError, FileUploadError
from cap.modules.deposit.validators import NoRequiredValidator
from cap.modules.experiments.permissions import exp_need_factory
from cap.modules.mail.utils import post_action_notifications
Expand All @@ -75,6 +78,7 @@
UpdateDepositPermission)

from .review import Reviewable
from .tasks import upload_to_zenodo

_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)

Expand Down Expand Up @@ -254,53 +258,109 @@ def upload(self, pid, *args, **kwargs):
_, rec = request.view_args.get('pid_value').data
record_uuid = str(rec.id)
data = request.get_json()
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')
target = data.get('target')

if target == 'zenodo':
# check for token
token = _fetch_token('zenodo')
if not token:
raise FileUploadError(
'Token not found, please connect your Zenodo '
'account before creating a deposit.')

files = data.get('files')
bucket = data.get('bucket')
if files and bucket:
# first create a deposit, and check if token is expired
deposit = requests.post(
url=f'{current_app.config.get("ZENODO_SERVER_URL")}'
f'/deposit/depositions',
params=dict(access_token=token),
json={},
headers={'Content-Type': 'application/json'}
)
if not deposit.ok:
if deposit.status_code == 401:
raise AuthorizationError(
'Authorization to Zenodo failed. '
'Please reconnect.')
raise FileUploadError(
'Something went wrong, '
'Zenodo deposit not created.')

dep_data = deposit.json()
zenodo_bucket_url = dep_data['links']['bucket']
zenodo_depid = dep_data['id']

# TODO: fix with serializers
zenodo_deposit = {
'id': zenodo_depid,
'links': {
'self': dep_data['links']['self'],
'bucket': zenodo_bucket_url,
'html': dep_data['links']['html'],
'publish': dep_data['links']['publish'],
},
'files': []
}
self.setdefault('_zenodo', []).append(zenodo_deposit)
self.commit()

# upload files to zenodo deposit
upload_to_zenodo.delay(
record_uuid, files, bucket, token, zenodo_depid, zenodo_bucket_url) # noqa
else:
raise FileUploadError(
'You cannot create an empty Zenodo deposit. '
'Please add some files.')
else:
webhook = data.get('webhook', False)
event_type = data.get('event_type', 'release')

try:
host, owner, repo, branch, filepath = parse_git_url(url)
api = create_git_api(host, owner, repo, branch,
current_user.id)
try:
url = data['url']
except KeyError:
raise FileUploadError('Missing url parameter.')

if filepath:
if webhook:
raise FileUploadError(
'You cannot create a webhook on a file')
try:
host, owner, repo, branch, filepath = parse_git_url(url) # noqa
api = create_git_api(host, owner, repo, branch,
current_user.id)

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
if filepath:
if webhook:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')
'You cannot create a webhook on a file')

download_repo_file(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa
*api.get_file_download(filepath),
api.auth_headers,
)
elif webhook:
if event_type == 'release':
if branch:
raise FileUploadError(
'You cannot create a release webhook'
' for a specific branch or sha.')

if event_type == 'push' and \
api.branch is None and api.sha:
raise FileUploadError(
'You cannot create a push webhook'
' for a specific sha.')

create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)
create_webhook(record_uuid, api, event_type)
else:
download_repo.delay(
record_uuid,
f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa
api.get_repo_download(),
api.auth_headers)

except GitError as e:
raise FileUploadError(str(e))
except GitError as e:
raise FileUploadError(str(e))

return self

Expand Down Expand Up @@ -584,16 +644,15 @@ def validate(self, **kwargs):

validator = NoRequiredValidator(schema, resolver=resolver)

result = {}
result['errors'] = [
errors = [
FieldError(
list(error.path)+error.validator_value,
str(error.message))
for error in validator.iter_errors(self)
]

if result['errors']:
raise DepositValidationError(None, errors=result['errors'])
if errors:
raise DepositValidationError(None, errors=errors)
except RefResolutionError:
raise DepositValidationError('Schema {} not found.'.format(
self['$schema']))
Expand Down
12 changes: 12 additions & 0 deletions cap/modules/deposit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ def __init__(self, description, **kwargs):
self.description = description or self.description


class AuthorizationError(RESTException):
"""Exception during authorization."""

code = 401

def __init__(self, description, **kwargs):
"""Initialize exception."""
super(AuthorizationError, self).__init__(**kwargs)

self.description = description or self.description


class DisconnectWebhookError(RESTException):
"""Exception during disconnecting webhook for analysis."""

Expand Down
86 changes: 86 additions & 0 deletions cap/modules/deposit/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Analysis Preservation Framework.
# Copyright (C) 2018 CERN.
#
# CERN Analysis Preservation Framework is free software; you can redistribute
# it and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Analysis Preservation Framework is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Analysis Preservation Framework; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
"""Tasks."""

from __future__ import absolute_import, print_function

import requests
from flask import current_app
from celery import shared_task
from invenio_db import db
from invenio_files_rest.models import FileInstance, ObjectVersion


@shared_task(autoretry_for=(Exception, ),
retry_kwargs={
'max_retries': 5,
'countdown': 10
})
def upload_to_zenodo(record_uuid, files, bucket, token,
zenodo_depid, zenodo_bucket_url):
"""Upload code to zenodo."""
from cap.modules.deposit.api import CAPDeposit
record = CAPDeposit.get_record(record_uuid)

file_list = []
for filename in files:
file_obj = ObjectVersion.get(bucket, filename)
file_ins = FileInstance.get(file_obj.file_id)

# upload each file in the deposit
with open(file_ins.uri, 'rb') as fp:
file = requests.put(
url=f'{zenodo_bucket_url}/{filename}',
data=fp,
params=dict(access_token=token),
)

if file.ok:
data = file.json()
file_list.append({
'self': data['links']['self'],
'key': data['key'],
'size': data['size']
})
else:
current_app.logger.error(
f'Uploading file {filename} to deposit {zenodo_depid} '
f'failed with {file.status_code}.')

# optionally add metadata
# resp = requests.put(
# url=f'{zenodo_server_url}/deposit/depositions/{depid}',
# params=dict(access_token=token),
# data=json.dumps({}),
# headers={'Content-Type': 'application/json'}
# )

if file_list:
# get the specific deposit we wish to update with files
deposit = list(
filter(lambda d: d['id'] == zenodo_depid, record['_zenodo']))

deposit[0]['files'] += file_list
record.commit()
db.session.commit()
2 changes: 2 additions & 0 deletions docker-services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ services:
- "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3"
- "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME"
- "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME"
- "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME"
- "DEV_HOST=CHANGE_ME"
lb:
build: ./docker/haproxy/
Expand Down
19 changes: 18 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import tempfile
from datetime import datetime, timedelta
from uuid import uuid4
from six import BytesIO

import pytest
from flask import current_app
Expand Down Expand Up @@ -108,7 +109,8 @@ def default_config():
DEBUG=False,
TESTING=True,
APP_GITLAB_OAUTH_ACCESS_TOKEN='testtoken',
MAIL_DEFAULT_SENDER="[email protected]")
MAIL_DEFAULT_SENDER="[email protected]",
ZENODO_SERVER_URL='https://zenodo-test.org')


@pytest.fixture(scope='session')
Expand Down Expand Up @@ -401,6 +403,21 @@ def deposit(example_user, create_deposit):
)


@pytest.fixture
def deposit_with_file(example_user, create_schema, create_deposit):
"""New deposit with files."""
create_schema('test-schema', experiment='CMS')
return create_deposit(
example_user,
'test-schema',
{
'$ana_type': 'test-schema',
'title': 'test title'
},
files={'test-file.txt': BytesIO(b'Hello world!')},
experiment='CMS')


@pytest.fixture
def record(example_user, create_deposit):
"""Example record."""
Expand Down
Loading

0 comments on commit 66f761a

Please sign in to comment.