From 1d868b232252dffdeb9cb21ffd321917cd31c1a9 Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Fri, 20 Nov 2020 20:51:29 +0100 Subject: [PATCH] services: metadata input on zenodo deposit creation * adds serializers/validation for metadata input * closes #1952 Signed-off-by: Ilias Koutsakis --- cap/modules/deposit/api.py | 19 ++- cap/modules/deposit/errors.py | 15 +++ cap/modules/deposit/tasks.py | 1 - cap/modules/deposit/utils.py | 4 +- cap/modules/services/serializers/zenodo.py | 137 +++++++++++++++++++++ tests/integration/test_zenodo_upload.py | 22 +++- 6 files changed, 188 insertions(+), 10 deletions(-) create mode 100644 cap/modules/services/serializers/zenodo.py diff --git a/cap/modules/deposit/api.py b/cap/modules/deposit/api.py index b366f5a76d..f148cafc83 100644 --- a/cap/modules/deposit/api.py +++ b/cap/modules/deposit/api.py @@ -61,6 +61,7 @@ from cap.modules.repos.tasks import download_repo, download_repo_file from cap.modules.repos.utils import (create_webhook, disconnect_subscriber, parse_git_url) +from cap.modules.services.serializers.zenodo import ZenodoUploadSchema from cap.modules.schemas.resolvers import (resolve_schema_by_url, schema_name_to_url) from cap.modules.user.errors import DoesNotExistInLDAP @@ -68,7 +69,7 @@ get_existing_or_register_user) from .errors import (DepositValidationError, UpdateDepositPermissionsError, - ReviewError) + ReviewError, InputValidationError) from .fetchers import cap_deposit_fetcher from .minters import cap_deposit_minter from .permissions import (AdminDepositPermission, CloneDepositPermission, @@ -269,12 +270,22 @@ def upload(self, pid, *args, **kwargs): 'Please connect your Zenodo account ' 'before creating a deposit.') - files = data.get('files') + files = data.get('files', []) bucket = data.get('bucket') - zenodo_data = data.get('zenodo_data', {}) + zenodo_data = data.get('zenodo_data') + + input = {'files': files, 'bucket': bucket} + if zenodo_data: + input['data'] = zenodo_data if files and bucket: - zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa + payload, errors = ZenodoUploadSchema().load(input) + if errors: + raise InputValidationError( + 'Validation error in Zenodo input data.', + errors=errors) + + zenodo_deposit = create_zenodo_deposit(token, payload) self.setdefault('_zenodo', []).append(zenodo_deposit) self.commit() diff --git a/cap/modules/deposit/errors.py b/cap/modules/deposit/errors.py index b1bdcc2e22..4dadefecf3 100644 --- a/cap/modules/deposit/errors.py +++ b/cap/modules/deposit/errors.py @@ -138,6 +138,21 @@ def __init__(self, description, errors=None, **kwargs): self.errors = [FieldError(e[0], e[1]) for e in errors.items()] +class InputValidationError(RESTValidationError): + """Review validation error exception.""" + + code = 400 + + description = "Validation error. Try again with valid data" + + def __init__(self, description, errors=None, **kwargs): + """Initialize exception.""" + super(InputValidationError, self).__init__(**kwargs) + + self.description = description or self.description + self.errors = [FieldError(e[0], e[1]) for e in errors.items()] + + class DataValidationError(RESTValidationError): """Review validation error exception.""" diff --git a/cap/modules/deposit/tasks.py b/cap/modules/deposit/tasks.py index 8c3f1e5201..21a052cc27 100644 --- a/cap/modules/deposit/tasks.py +++ b/cap/modules/deposit/tasks.py @@ -28,7 +28,6 @@ import requests from flask import current_app from celery import shared_task -from invenio_db import db from invenio_files_rest.models import FileInstance, ObjectVersion diff --git a/cap/modules/deposit/utils.py b/cap/modules/deposit/utils.py index 5e5aa88c49..5afbb95a7f 100644 --- a/cap/modules/deposit/utils.py +++ b/cap/modules/deposit/utils.py @@ -82,13 +82,13 @@ def add_api_to_links(links): return response -def create_zenodo_deposit(token, data): +def create_zenodo_deposit(token, data=None): """Create a Zenodo deposit using the logged in user's credentials.""" zenodo_url = current_app.config.get("ZENODO_SERVER_URL") deposit = requests.post( url=f'{zenodo_url}/deposit/depositions', params=dict(access_token=token), - json={'metadata': data}, + json={'metadata': data} if data else {}, headers={'Content-Type': 'application/json'} ) diff --git a/cap/modules/services/serializers/zenodo.py b/cap/modules/services/serializers/zenodo.py new file mode 100644 index 0000000000..20d3f28406 --- /dev/null +++ b/cap/modules/services/serializers/zenodo.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CERN Analysis Preservation Framework. +# Copyright (C) 2020 CERN. +# +# CERN Analysis Preservation Framework is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CERN Analysis Preservation Framework is distributed in the hope that it will +# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CERN Analysis Preservation Framework; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. +# or submit itself to any jurisdiction. + +"""Zenodo Serializer/Validator.""" + +import arrow +from marshmallow import Schema, fields, ValidationError, validate, validates, \ + validates_schema + +from invenio_files_rest.models import ObjectVersion + +DATE_REGEX = r'\d{4}-\d{2}-\d{2}' +DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.' + +UPLOAD_TYPES = [ + 'publication', + 'poster', + 'presentation', + 'dataset', + 'image', + 'video', + 'software', + 'lesson', + 'physicalobject', + 'other' +] +LICENSES = [ + 'CC-BY-4.0', + 'CC-BY-1.0', + 'CC-BY-2.0', + 'CC-BY-3.0' +] +ACCESS_RIGHTS = [ + 'open', + 'embargoed', + 'restricted', + 'closed' +] + + +class ZenodoCreatorsSchema(Schema): + name = fields.String(required=True) + affiliation = fields.String() + orcid = fields.String() + + +class ZenodoDepositMetadataSchema(Schema): + title = fields.String(required=True) + description = fields.String(required=True) + version = fields.String() + + keywords = fields.List(fields.String()) + creators = fields.List( + fields.Nested(ZenodoCreatorsSchema), required=True) + + upload_type = fields.String( + required=True, validate=validate.OneOf(UPLOAD_TYPES)) + license = fields.String( + required=True, validate=validate.OneOf(LICENSES)) + access_right = fields.String( + required=True, validate=validate.OneOf(ACCESS_RIGHTS)) + + publication_date = fields.String( + required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + embargo_date = fields.String( + validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) + access_conditions = fields.String() + + @validates('embargo_date') + def validate_embargo_date(self, value): + """Validate that embargo date is in the future.""" + if arrow.get(value).date() <= arrow.utcnow().date(): + raise ValidationError( + 'Embargo date must be in the future.', + field_names=['embargo_date'] + ) + + @validates_schema() + def validate_license(self, data, **kwargs): + """Validate license.""" + access = data.get('access_right') + if access in ['open', 'embargoed'] and 'license' not in data: + raise ValidationError( + 'Required when access right is open or embargoed.', + field_names=['license'] + ) + if access == 'embargoed' and 'embargo_date' not in data: + raise ValidationError( + 'Required when access right is embargoed.', + field_names=['embargo_date'] + ) + if access == 'restricted' and 'access_conditions' not in data: + raise ValidationError( + 'Required when access right is restricted.', + field_names=['access_conditions'] + ) + + +class ZenodoUploadSchema(Schema): + files = fields.List(fields.String(), required=True) + data = fields.Nested(ZenodoDepositMetadataSchema, default=dict()) + bucket = fields.String(required=True) + + @validates_schema() + def validate_files(self, data, **kwargs): + bucket = data['bucket'] + files = data['files'] + + for _file in files: + obj = ObjectVersion.get(bucket, _file) + if not obj: + raise ValidationError( + f'File {_file} not found in bucket.', + field_names=['files'] + ) diff --git a/tests/integration/test_zenodo_upload.py b/tests/integration/test_zenodo_upload.py index 2591c38ca6..eaeaef2583 100644 --- a/tests/integration/test_zenodo_upload.py +++ b/tests/integration/test_zenodo_upload.py @@ -173,7 +173,15 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w files=['test-file.txt'], zenodo_data={ 'title': 'test-title', - 'description': 'This is my first upload' + 'description': 'This is my first upload', + 'upload_type': 'poster', + 'creators': [ + {'name': 'User Tester', 'affiliation': 'Zenodo CAP'} + ], + 'access_right': 'open', + 'license': 'CC-BY-4.0', + 'publication_date': '2020-11-20', + 'embargo_date': '2050-09-09' })), headers=headers) assert resp.status_code == 201 @@ -218,8 +226,16 @@ def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_fil zenodo_data={'test': 'test'})), headers=headers) assert resp.status_code == 400 - assert resp.json['message'] == 'Validation error on creating the Zenodo deposit.' - assert resp.json['errors'] == [{'field': 'test', 'message': 'Unknown field name.'}] + assert resp.json['message'] == 'Validation error in Zenodo input data.' + assert resp.json['errors'][0]['message'] == { + 'license': ['Missing data for required field.'], + 'publication_date': ['Missing data for required field.'], + 'upload_type': ['Missing data for required field.'], + 'title': ['Missing data for required field.'], + 'access_right': ['Missing data for required field.'], + 'creators': ['Missing data for required field.'], + 'description': ['Missing data for required field.'] + } @patch('cap.modules.deposit.api._fetch_token', return_value='test-token')