Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
services: metadata input on zenodo deposit creation
Browse files Browse the repository at this point in the history
* adds serializers/validation for metadata input
* adds unit tests for zenodo serializer
* closes cernanalysispreservation#1952

Signed-off-by: Ilias Koutsakis <[email protected]>
Lilykos committed Nov 23, 2020
1 parent 32f7798 commit 20af39f
Showing 7 changed files with 482 additions and 34 deletions.
19 changes: 15 additions & 4 deletions cap/modules/deposit/api.py
Original file line number Diff line number Diff line change
@@ -61,14 +61,15 @@
from cap.modules.repos.tasks import download_repo, download_repo_file
from cap.modules.repos.utils import (create_webhook, disconnect_subscriber,
parse_git_url)
from cap.modules.services.serializers.zenodo import ZenodoUploadSchema
from cap.modules.schemas.resolvers import (resolve_schema_by_url,
schema_name_to_url)
from cap.modules.user.errors import DoesNotExistInLDAP
from cap.modules.user.utils import (get_existing_or_register_role,
get_existing_or_register_user)

from .errors import (DepositValidationError, UpdateDepositPermissionsError,
ReviewError)
ReviewError, InputValidationError)
from .fetchers import cap_deposit_fetcher
from .minters import cap_deposit_minter
from .permissions import (AdminDepositPermission, CloneDepositPermission,
@@ -269,12 +270,22 @@ def upload(self, pid, *args, **kwargs):
'Please connect your Zenodo account '
'before creating a deposit.')

files = data.get('files')
files = data.get('files', [])
bucket = data.get('bucket')
zenodo_data = data.get('zenodo_data', {})
zenodo_data = data.get('zenodo_data')

input = {'files': files, 'bucket': bucket}
if zenodo_data:
input['data'] = zenodo_data

if files and bucket:
zenodo_deposit = create_zenodo_deposit(token, zenodo_data) # noqa
payload, errors = ZenodoUploadSchema().load(input)
if errors:
raise InputValidationError(
'Validation error in Zenodo input data.',
errors=errors)

zenodo_deposit = create_zenodo_deposit(token, payload)
self.setdefault('_zenodo', []).append(zenodo_deposit)
self.commit()

15 changes: 15 additions & 0 deletions cap/modules/deposit/errors.py
Original file line number Diff line number Diff line change
@@ -138,6 +138,21 @@ def __init__(self, description, errors=None, **kwargs):
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]


class InputValidationError(RESTValidationError):
"""Review validation error exception."""

code = 400

description = "Validation error. Try again with valid data"

def __init__(self, description, errors=None, **kwargs):
"""Initialize exception."""
super(InputValidationError, self).__init__(**kwargs)

self.description = description or self.description
self.errors = [FieldError(e[0], e[1]) for e in errors.items()]


class DataValidationError(RESTValidationError):
"""Review validation error exception."""

1 change: 0 additions & 1 deletion cap/modules/deposit/tasks.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,6 @@
import requests
from flask import current_app
from celery import shared_task
from invenio_db import db
from invenio_files_rest.models import FileInstance, ObjectVersion


22 changes: 5 additions & 17 deletions cap/modules/deposit/utils.py
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@
from cap.modules.deposit.errors import AuthorizationError, \
DataValidationError, FileUploadError
from cap.modules.records.utils import url_to_api_url
from cap.modules.services.serializers.zenodo import ZenodoDepositSchema


def clean_empty_values(data):
@@ -82,13 +83,13 @@ def add_api_to_links(links):
return response


def create_zenodo_deposit(token, data):
def create_zenodo_deposit(token, data=None):
"""Create a Zenodo deposit using the logged in user's credentials."""
zenodo_url = current_app.config.get("ZENODO_SERVER_URL")
deposit = requests.post(
url=f'{zenodo_url}/deposit/depositions',
params=dict(access_token=token),
json={'metadata': data},
json={'metadata': data} if data else {},
headers={'Content-Type': 'application/json'}
)

@@ -105,18 +106,5 @@ def create_zenodo_deposit(token, data):
raise FileUploadError(
'Something went wrong, Zenodo deposit not created.')

# TODO: fix with serializers
data = deposit.json()
zenodo_deposit = {
'id': data['id'],
'title': data.get('metadata', {}).get('title'),
'creator': current_user.id,
'created': data['created'],
'links': {
'self': data['links']['self'],
'bucket': data['links']['bucket'],
'html': data['links']['html'],
'publish': data['links']['publish'],
}
}
return zenodo_deposit
data = ZenodoDepositSchema().dump(deposit.json()).data
return data
162 changes: 162 additions & 0 deletions cap/modules/services/serializers/zenodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Analysis Preservation Framework.
# Copyright (C) 2020 CERN.
#
# CERN Analysis Preservation Framework is free software; you can redistribute
# it and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Analysis Preservation Framework is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Analysis Preservation Framework; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
# or submit itself to any jurisdiction.

"""Zenodo Serializer/Validator."""

import arrow
from flask_login import current_user
from marshmallow import Schema, fields, ValidationError, validate, validates, \
validates_schema

from invenio_files_rest.models import ObjectVersion

DATE_REGEX = r'\d{4}-\d{2}-\d{2}'
DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.'
CHOICE_ERROR = lambda choices: f'Not a valid choice. Select one of: {choices}' # noqa

UPLOAD_TYPES = [
'publication',
'poster',
'presentation',
'dataset',
'image',
'video',
'software',
'lesson',
'physicalobject',
'other'
]
LICENSES = [
'CC-BY-4.0',
'CC-BY-1.0',
'CC-BY-2.0',
'CC-BY-3.0'
]
ACCESS_RIGHTS = [
'open',
'embargoed',
'restricted',
'closed'
]


class ZenodoCreatorsSchema(Schema):
name = fields.String(required=True)
affiliation = fields.String()
orcid = fields.String()


class ZenodoDepositMetadataSchema(Schema):
title = fields.String(required=True)
description = fields.String(required=True)
version = fields.String()

keywords = fields.List(fields.String())
creators = fields.List(
fields.Nested(ZenodoCreatorsSchema), required=True)

upload_type = fields.String(required=True, validate=validate.OneOf(
UPLOAD_TYPES, error=CHOICE_ERROR(UPLOAD_TYPES)))
license = fields.String(required=True, validate=validate.OneOf(
LICENSES, error=CHOICE_ERROR(LICENSES)))
access_right = fields.String(required=True, validate=validate.OneOf(
ACCESS_RIGHTS, error=CHOICE_ERROR(ACCESS_RIGHTS)))

publication_date = fields.String(
required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR))
embargo_date = fields.String(
validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR))
access_conditions = fields.String()

@validates('embargo_date')
def validate_embargo_date(self, value):
"""Validate that embargo date is in the future."""
if arrow.get(value).date() <= arrow.utcnow().date():
raise ValidationError(
'Embargo date must be in the future.',
field_names=['embargo_date']
)

@validates_schema()
def validate_license(self, data, **kwargs):
"""Validate license."""
access = data.get('access_right')
if access in ['open', 'embargoed'] and 'license' not in data:
raise ValidationError(
'Required when access right is open or embargoed.',
field_names=['license']
)
if access == 'embargoed' and 'embargo_date' not in data:
raise ValidationError(
'Required when access right is embargoed.',
field_names=['embargo_date']
)
if access == 'restricted' and 'access_conditions' not in data:
raise ValidationError(
'Required when access right is restricted.',
field_names=['access_conditions']
)


class ZenodoUploadSchema(Schema):
files = fields.List(fields.String(), required=True)
data = fields.Nested(ZenodoDepositMetadataSchema, default=dict())
bucket = fields.String(required=True)

@validates_schema()
def validate_files(self, data, **kwargs):
bucket = data['bucket']
files = data['files']

for _file in files:
obj = ObjectVersion.get(bucket, _file)
if not obj:
raise ValidationError(
f'File {_file} not found in bucket.',
field_names=['files']
)


class ZenodoDepositSchema(Schema):
id = fields.Int(dump_only=True)
created = fields.String(dump_only=True)

title = fields.Method('get_title', dump_only=True, allow_none=True)
creator = fields.Method('get_creator', dump_only=True, allow_none=True)
links = fields.Method('get_links', dump_only=True)

def get_creator(self, data):
return current_user.id if current_user else None

def get_title(self, data):
return data.get('metadata', {}).get('title')

def get_links(self, data):
return {
'self': data['links']['self'],
'bucket': data['links']['bucket'],
'html': data['links']['html'],
'publish': data['links']['publish']
}
84 changes: 72 additions & 12 deletions tests/integration/test_zenodo_upload.py
Original file line number Diff line number Diff line change
@@ -173,7 +173,15 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w
files=['test-file.txt'],
zenodo_data={
'title': 'test-title',
'description': 'This is my first upload'
'description': 'This is my first upload',
'upload_type': 'poster',
'creators': [
{'name': 'User Tester', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2050-09-09'
})),
headers=headers)
assert resp.status_code == 201
@@ -191,7 +199,6 @@ def test_create_and_upload_to_zenodo_with_data(mock_token, app, users, deposit_w


@patch('cap.modules.deposit.api._fetch_token', return_value='test-token')
@responses.activate
def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_file,
auth_headers_for_user, json_headers):
user = users['cms_user']
@@ -200,26 +207,79 @@ def test_create_deposit_with_wrong_data(mock_token, app, users, deposit_with_fil
pid = deposit_with_file['_deposit']['id']
bucket = deposit_with_file.files.bucket

with app.test_client() as client:
resp = client.post(f'/deposits/{pid}/actions/upload',
data=json.dumps(dict(target='zenodo',
bucket=str(bucket),
files=['test-file.txt', 'not-found.txt'],
zenodo_data={
'description': 'This is my first upload',
'creators': [
{'name': 'User Tester', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20'
})),
headers=headers)
assert resp.status_code == 400
assert resp.json['message'] == 'Validation error in Zenodo input data.'
assert resp.json['errors'] == [{
'field': 'data',
'message': {
'upload_type': ['Missing data for required field.'],
'title': ['Missing data for required field.']}
}, {
'field': 'files',
'message': ['File not-found.txt not found in bucket.']
}]


@patch('cap.modules.deposit.api._fetch_token', return_value='test-token')
@responses.activate
def test_create_and_upload_to_zenodo_with_wrong_files(mock_token, app, users, deposit_with_file,
auth_headers_for_user, json_headers):
user = users['cms_user']
headers = auth_headers_for_user(user) + json_headers
zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL')
pid = deposit_with_file['_deposit']['id']
bucket = deposit_with_file.files.bucket

# MOCK RESPONSES FROM ZENODO SERVER
# first the deposit creation
responses.add(responses.POST,
f'{zenodo_server_url}/deposit/depositions',
json={
'status': 400,
'message': 'Validation error.',
'errors': [
{'field': 'test', 'message': 'Unknown field name.'}
]},
status=400)
'id': 111,
'record_id': 111,
'title': '',
'links': {
'bucket': 'http://zenodo-test.com/test-bucket',
'html': 'https://sandbox.zenodo.org/deposit/111',
'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish',
'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111'
},
'files': [],
'created': '2020-11-20T11:49:39.147767+00:00'
},
status=200)

# create the zenodo deposit
with app.test_client() as client:
resp = client.post(f'/deposits/{pid}/actions/upload',
data=json.dumps(dict(target='zenodo',
bucket=str(bucket),
files=['test-file.txt'],
zenodo_data={'test': 'test'})),
files=['test-file.txt', 'not-exists.txt'])),
headers=headers)

assert resp.status_code == 400
assert resp.json['message'] == 'Validation error on creating the Zenodo deposit.'
assert resp.json['errors'] == [{'field': 'test', 'message': 'Unknown field name.'}]
assert resp.json['message'] == 'Validation error in Zenodo input data.'
assert resp.json['errors'] == [{
'field': 'files',
'message': ['File not-exists.txt not found in bucket.']
}]




@patch('cap.modules.deposit.api._fetch_token', return_value='test-token')
213 changes: 213 additions & 0 deletions tests/unit/schemas/test_zenodo_serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Analysis Preservation Framework.
# Copyright (C) 2020 CERN.
#
# CERN Analysis Preservation Framework is free software; you can redistribute
# it and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# CERN Analysis Preservation Framework is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with CERN Analysis Preservation Framework; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
# or submit itself to any jurisdiction.

"""Zenodo upload serializers."""

from cap.modules.services.serializers.zenodo import ZenodoUploadSchema, ZenodoDepositSchema


def test_zenodo_upload_serializer(app, deposit_with_file):
bucket = deposit_with_file.files.bucket

data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'poster',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2030-09-09'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {}
assert payload == data

# not existing files
data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'poster',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2030-09-09'
},
'files': ['test-file.txt', 'no-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {'files': ['File no-file.txt not found in bucket.']}

# missing required fields
data = {
'data': {
'title': 'My first upload yoohoo',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2030-09-09'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {
'data': {
'upload_type': ['Missing data for required field.'],
'description': ['Missing data for required field.']}
}

# embargo date in the past
data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'poster',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2015-09-09'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {
'data': {
'embargo_date': ['Embargo date must be in the future.']
}}

# malformed dates
data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'poster',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'open',
'license': 'CC-BY-4.0',
'publication_date': '2020-11',
'embargo_date': '2015-01'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {
'data': {
'publication_date': ['The date should follow the pattern YYYY-mm-dd.'],
'embargo_date': ['The date should follow the pattern YYYY-mm-dd.']
}}

# wrong enum in license/upload/access
data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'test',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'test',
'license': 'test',
'publication_date': '2020-11-20',
'embargo_date': '2030-09-09'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {
'data': {
'license': ["Not a valid choice. Select one of: ['CC-BY-4.0', 'CC-BY-1.0', 'CC-BY-2.0', 'CC-BY-3.0']"],
'access_right': ["Not a valid choice. Select one of: ['open', 'embargoed', 'restricted', 'closed']"],
'upload_type': ["Not a valid choice. Select one of: ['publication', 'poster', "
"'presentation', 'dataset', 'image', 'video', 'software', "
"'lesson', 'physicalobject', 'other']"]
}
}

# access conditional
data = {
'data': {
'title': 'My first upload yoohoo',
'upload_type': 'poster',
'description': 'This is my first upload',
'creators': [
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'}
],
'access_right': 'restricted',
'license': 'CC-BY-4.0',
'publication_date': '2020-11-20',
'embargo_date': '2030-09-09'
},
'files': ['test-file.txt'],
'bucket': str(bucket)
}
payload, errors = ZenodoUploadSchema().load(data)
assert errors == {
'data': {
'access_conditions': ['Required when access right is restricted.']
}}


def test_zenodo_deposit_serializer():
payload = {
'id': 111,
'record_id': 111,
'title': 'test',
'links': {
'bucket': 'http://zenodo-test.com/test-bucket',
'html': 'https://sandbox.zenodo.org/deposit/111',
'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish',
'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111'
},
'files': [],
'created': '2020-11-20T11:49:39.147767+00:00'
}

data = ZenodoDepositSchema().dump(payload).data
assert data['id'] == 111
assert data['title'] == 'test'
assert data['creator'] is None

0 comments on commit 20af39f

Please sign in to comment.