From 569b6fb6de520663a78fcd76e6c21c6d8fbb7a48 Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Tue, 17 Nov 2020 20:03:24 +0100 Subject: [PATCH] services: create zenodo deposit through CAP * creates a Zenodo deposit, with files from CAP * saves metadata about the Zenodo deposit, and attaches it to a CAP deposit * integration tests * addresses #1938 Signed-off-by: Ilias Koutsakis --- cap/config.py | 5 +- cap/modules/deposit/api.py | 151 +++++++++---- cap/modules/deposit/errors.py | 12 + cap/modules/deposit/tasks.py | 86 ++++++++ docker-services.yml | 2 + tests/conftest.py | 19 +- tests/integration/test_zenodo_upload.py | 279 ++++++++++++++++++++++++ 7 files changed, 503 insertions(+), 51 deletions(-) create mode 100644 cap/modules/deposit/tasks.py create mode 100644 tests/integration/test_zenodo_upload.py diff --git a/cap/config.py b/cap/config.py index aa306492a1..f15b8090c0 100644 --- a/cap/config.py +++ b/cap/config.py @@ -720,10 +720,7 @@ def _(x): # Zenodo # ====== -ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', - 'https://zenodo.org/api') - -ZENODO_ACCESS_TOKEN = os.environ.get('APP_ZENODO_ACCESS_TOKEN', 'CHANGE_ME') +ZENODO_SERVER_URL = os.environ.get('APP_ZENODO_SERVER_URL', 'https://zenodo.org/api') # noqa # Endpoints # ========= diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index 50ed9832dd..3b7d9a978a 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -27,6 +27,7 @@ import uuid from functools import wraps +import requests from flask import current_app, request from flask_login import current_user from invenio_access.models import ActionRoles, ActionUsers @@ -49,7 +50,9 @@ from sqlalchemy.orm.exc import NoResultFound from werkzeug.local import LocalProxy -from cap.modules.deposit.errors import DisconnectWebhookError, FileUploadError +from cap.modules.auth.ext import _fetch_token +from cap.modules.deposit.errors import AuthorizationError, \ + DisconnectWebhookError, FileUploadError from cap.modules.deposit.validators import NoRequiredValidator from cap.modules.experiments.permissions import exp_need_factory from cap.modules.mail.utils import post_action_notifications @@ -75,6 +78,7 @@ UpdateDepositPermission) from .review import Reviewable +from .tasks import upload_to_zenodo _datastore = LocalProxy(lambda: current_app.extensions['security'].datastore) @@ -254,53 +258,109 @@ def upload(self, pid, *args, **kwargs): _, rec = request.view_args.get('pid_value').data record_uuid = str(rec.id) data = request.get_json() - webhook = data.get('webhook', False) - event_type = data.get('event_type', 'release') - - try: - url = data['url'] - except KeyError: - raise FileUploadError('Missing url parameter.') + target = data.get('target') + + if target == 'zenodo': + # check for token + token = _fetch_token('zenodo') + if not token: + raise FileUploadError( + 'Token not found, please connect your Zenodo ' + 'account before creating a deposit.') + + files = data.get('files') + bucket = data.get('bucket') + if files and bucket: + # first create a deposit, and check if token is expired + deposit = requests.post( + url=f'{current_app.config.get("ZENODO_SERVER_URL")}' + f'/deposit/depositions', + params=dict(access_token=token), + json={}, + headers={'Content-Type': 'application/json'} + ) + if not deposit.ok: + if deposit.status_code == 401: + raise AuthorizationError( + 'Authorization to Zenodo failed. ' + 'Please reconnect.') + raise FileUploadError( + 'Something went wrong, ' + 'Zenodo deposit not created.') + + dep_data = deposit.json() + zenodo_bucket_url = dep_data['links']['bucket'] + zenodo_depid = dep_data['id'] + + # TODO: fix with serializers + zenodo_deposit = { + 'id': zenodo_depid, + 'links': { + 'self': dep_data['links']['self'], + 'bucket': zenodo_bucket_url, + 'html': dep_data['links']['html'], + 'publish': dep_data['links']['publish'], + }, + 'files': [] + } + self.setdefault('_zenodo', []).append(zenodo_deposit) + self.commit() + + # upload files to zenodo deposit + upload_to_zenodo.delay( + record_uuid, files, bucket, token, zenodo_depid, zenodo_bucket_url) # noqa + else: + raise FileUploadError( + 'You cannot create an empty Zenodo deposit. ' + 'Please add some files.') + else: + webhook = data.get('webhook', False) + event_type = data.get('event_type', 'release') - try: - host, owner, repo, branch, filepath = parse_git_url(url) - api = create_git_api(host, owner, repo, branch, - current_user.id) + try: + url = data['url'] + except KeyError: + raise FileUploadError('Missing url parameter.') - if filepath: - if webhook: - raise FileUploadError( - 'You cannot create a webhook on a file') + try: + host, owner, repo, branch, filepath = parse_git_url(url) # noqa + api = create_git_api(host, owner, repo, branch, + current_user.id) - download_repo_file( - record_uuid, - f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa - *api.get_file_download(filepath), - api.auth_headers, - ) - elif webhook: - if event_type == 'release': - if branch: + if filepath: + if webhook: raise FileUploadError( - 'You cannot create a release webhook' - ' for a specific branch or sha.') - - if event_type == 'push' and \ - api.branch is None and api.sha: - raise FileUploadError( - 'You cannot create a push webhook' - ' for a specific sha.') + 'You cannot create a webhook on a file') + + download_repo_file( + record_uuid, + f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}/{filepath}', # noqa + *api.get_file_download(filepath), + api.auth_headers, + ) + elif webhook: + if event_type == 'release': + if branch: + raise FileUploadError( + 'You cannot create a release webhook' + ' for a specific branch or sha.') + + if event_type == 'push' and \ + api.branch is None and api.sha: + raise FileUploadError( + 'You cannot create a push webhook' + ' for a specific sha.') - create_webhook(record_uuid, api, event_type) - else: - download_repo.delay( - record_uuid, - f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa - api.get_repo_download(), - api.auth_headers) + create_webhook(record_uuid, api, event_type) + else: + download_repo.delay( + record_uuid, + f'repositories/{host}/{owner}/{repo}/{api.branch or api.sha}.tar.gz', # noqa + api.get_repo_download(), + api.auth_headers) - except GitError as e: - raise FileUploadError(str(e)) + except GitError as e: + raise FileUploadError(str(e)) return self @@ -584,16 +644,15 @@ def validate(self, **kwargs): validator = NoRequiredValidator(schema, resolver=resolver) - result = {} - result['errors'] = [ + errors = [ FieldError( list(error.path)+error.validator_value, str(error.message)) for error in validator.iter_errors(self) ] - if result['errors']: - raise DepositValidationError(None, errors=result['errors']) + if errors: + raise DepositValidationError(None, errors=errors) except RefResolutionError: raise DepositValidationError('Schema {} not found.'.format( self['$schema'])) diff --git a/cap/modules/deposit/errors.py b/cap/modules/deposit/errors.py index bd48431dcc..972e002e7e 100644 --- a/cap/modules/deposit/errors.py +++ b/cap/modules/deposit/errors.py @@ -87,6 +87,18 @@ def __init__(self, description, **kwargs): self.description = description or self.description +class AuthorizationError(RESTException): + """Exception during authorization.""" + + code = 401 + + def __init__(self, description, **kwargs): + """Initialize exception.""" + super(AuthorizationError, self).__init__(**kwargs) + + self.description = description or self.description + + class DisconnectWebhookError(RESTException): """Exception during disconnecting webhook for analysis.""" diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py new file mode 100644 index 0000000000..109fe63eb7 --- /dev/null +++ b/cap/modules/deposit/tasks.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2018 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +"""Tasks.""" + +from __future__ import absolute_import, print_function + +import requests +from flask import current_app +from celery import shared_task +from invenio_db import db +from invenio_files_rest.models import FileInstance, ObjectVersion + + +@shared_task(autoretry_for=(Exception, ), + retry_kwargs={ + 'max_retries': 5, + 'countdown': 10 + }) +def upload_to_zenodo(record_uuid, files, bucket, token, + zenodo_depid, zenodo_bucket_url): + """Upload code to zenodo.""" + from cap.modules.deposit.api import CAPDeposit + record = CAPDeposit.get_record(record_uuid) + + file_list = [] + for filename in files: + file_obj = ObjectVersion.get(bucket, filename) + file_ins = FileInstance.get(file_obj.file_id) + + # upload each file in the deposit + with open(file_ins.uri, 'rb') as fp: + file = requests.put( + url=f'{zenodo_bucket_url}/{filename}', + data=fp, + params=dict(access_token=token), + ) + + if file.ok: + data = file.json() + file_list.append({ + 'self': data['links']['self'], + 'key': data['key'], + 'size': data['size'] + }) + else: + current_app.logger.error( + f'Uploading file {filename} to deposit {zenodo_depid} ' + f'failed with {file.status_code}.') + + # optionally add metadata + # resp = requests.put( + # url=f'{zenodo_server_url}/deposit/depositions/{depid}', + # params=dict(access_token=token), + # data=json.dumps({}), + # headers={'Content-Type': 'application/json'} + # ) + + if file_list: + # get the specific deposit we wish to update with files + deposit = list( + filter(lambda d: d['id'] == zenodo_depid, record['_zenodo'])) + + deposit[0]['files'] += file_list + record.commit() + db.session.commit() diff --git a/docker-services.yml b/docker-services.yml index 670af14627..729dae948e 100644 --- a/docker-services.yml +++ b/docker-services.yml @@ -27,6 +27,8 @@ services: - "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3" - "INVENIO_CERN_APP_CREDENTIALS_KEY=CHANGE_ME" - "INVENIO_CERN_APP_CREDENTIALS_SECRET=CHANGE_ME" + - "INVENIO_ZENODO_CLIENT_ID=CHANGE_ME" + - "INVENIO_ZENODO_CLIENT_SECRET=CHANGE_ME" - "DEV_HOST=CHANGE_ME" lb: build: ./docker/haproxy/ diff --git a/tests/conftest.py b/tests/conftest.py index eeff7d463d..e54282a475 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,7 @@ import tempfile from datetime import datetime, timedelta from uuid import uuid4 +from six import BytesIO import pytest from flask import current_app @@ -108,7 +109,8 @@ def default_config(): DEBUG=False, TESTING=True, APP_GITLAB_OAUTH_ACCESS_TOKEN='testtoken', - MAIL_DEFAULT_SENDER="analysis-preservation-support@cern.ch") + MAIL_DEFAULT_SENDER="analysis-preservation-support@cern.ch", + ZENODO_SERVER_URL='https://zenodo-test.org') @pytest.fixture(scope='session') @@ -401,6 +403,21 @@ def deposit(example_user, create_deposit): ) +@pytest.fixture +def deposit_with_file(example_user, create_schema, create_deposit): + """New deposit with files.""" + create_schema('test-schema', experiment='CMS') + return create_deposit( + example_user, + 'test-schema', + { + '$ana_type': 'test-schema', + 'title': 'test title' + }, + files={'test-file.txt': BytesIO(b'Hello world!')}, + experiment='CMS') + + @pytest.fixture def record(example_user, create_deposit): """Example record.""" diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py new file mode 100644 index 0000000000..338db7d910 --- /dev/null +++ b/tests/integration/test_zenodo_upload.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Integration tests for Zenodo Upload API.""" +import json +import re +from flask import current_app +from invenio_pidstore.resolver import Resolver + +import responses +from mock import patch + +from cap.modules.deposit.api import CAPDeposit + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_upload_to_zenodo(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + # MOCK RESPONSES FROM ZENODO SERVER + # first the deposit creation + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [] + }, + status=200) + + # then the file upload + responses.add(responses.PUT, + 'http://zenodo-test.com/test-bucket/test-file.txt', + json={ + 'mimetype': 'application/octet-stream', + 'links': { + 'self': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt', + 'uploads': 'https://sandbox.zenodo.org/api/files/test-bucket/test-file.txt?uploads' + }, + 'key': 'test-file.txt', + 'size': 100 + }, + status=200) + + # fix because responses makes request to ES, and deposit.commit() won't work without it + responses.add_callback( + responses.PUT, + re.compile(r'http://localhost:9200/deposits-records-test-schema-v1\.0\.0/' + r'test-schema-v1\.0\.0/(.*)?version=(.*)&version_type=external_gte'), + callback=lambda req: (200, {}, json.dumps({})), + content_type='application/json', + ) + + # create the zenodo deposit + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 201 + + resolver = Resolver(pid_type='depid', + object_type='rec', + getter=lambda x: x) + _, uuid = resolver.resolve(pid) + record = CAPDeposit.get_record(uuid) + + assert len(record['_zenodo']) == 1 + assert record['_zenodo'][0]['id'] == 111 + assert record['_zenodo'][0]['files'][0]['key'] == 'test-file.txt' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_authorization_failure(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={}, + status=401) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 401 + assert resp.json['message'] == 'Authorization to Zenodo failed. Please reconnect.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_deposit_not_created_error(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={}, + status=500) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Something went wrong, Zenodo deposit not created.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_file_not_uploaded_error(mock_token, app, users, deposit_with_file, + auth_headers_for_user, json_headers, capsys): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [] + }, + status=200) + + responses.add(responses.PUT, + 'http://zenodo-test.com/test-bucket/test-file.txt', + json={}, + status=500) + + responses.add_callback( + responses.PUT, + re.compile(r'http://localhost:9200/deposits-records-test-schema-v1\.0\.0/' + r'test-schema-v1\.0\.0/(.*)?version=(.*)&version_type=external_gte'), + callback=lambda req: (200, {}, json.dumps({})), + content_type='application/json', + ) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 201 + + captured = capsys.readouterr() + assert 'Uploading file test-file.txt to deposit 111 failed with 500' \ + in captured.err + + +@patch('cap.modules.deposit.api._fetch_token', return_value=None) +def test_zenodo_upload_no_token(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): + user = users['cms_user'] + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'Token not found, please connect your ' \ + 'Zenodo account before creating a deposit.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +@responses.activate +def test_zenodo_upload_empty_files(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): + user = users['cms_user'] + zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + responses.add(responses.POST, + f'{zenodo_server_url}/deposit/depositions', + json={ + 'id': 111, + 'record_id': 111, + 'submitted': False, + 'title': '', + 'links': { + 'bucket': 'http://zenodo-test.com/test-bucket', + 'html': 'https://sandbox.zenodo.org/deposit/111', + 'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', + 'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' + }, + 'files': [] + }, + status=200) + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=[])), + headers=headers) + assert resp.status_code == 400 + assert resp.json['message'] == 'You cannot create an empty Zenodo deposit. Please add some files.' + + +@patch('cap.modules.deposit.api._fetch_token', return_value='test-token') +def test_zenodo_upload_no_access(mock_token, app, users, deposit_with_file, auth_headers_for_user, json_headers): + user = users['lhcb_user'] + headers = auth_headers_for_user(user) + json_headers + pid = deposit_with_file['_deposit']['id'] + bucket = deposit_with_file.files.bucket + + with app.test_client() as client: + resp = client.post(f'/deposits/{pid}/actions/upload', + data=json.dumps(dict(target='zenodo', + bucket=str(bucket), + files=['test-file.txt'])), + headers=headers) + assert resp.status_code == 403