-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
services: metadata input on zenodo deposit creation
* adds serializers/validation for metadata input * adds unit tests for zenodo serializer * closes cernanalysispreservation#1952 Signed-off-by: Ilias Koutsakis <[email protected]>
Showing
7 changed files
with
482 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of CERN Analysis Preservation Framework. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# CERN Analysis Preservation Framework is free software; you can redistribute | ||
# it and/or modify it under the terms of the GNU General Public License as | ||
# published by the Free Software Foundation; either version 2 of the | ||
# License, or (at your option) any later version. | ||
# | ||
# CERN Analysis Preservation Framework is distributed in the hope that it will | ||
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
# General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CERN Analysis Preservation Framework; if not, write to the | ||
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
# MA 02111-1307, USA. | ||
# | ||
# In applying this license, CERN does not | ||
# waive the privileges and immunities granted to it by virtue of its status | ||
# as an Intergovernmental Organization or submit itself to any jurisdiction. | ||
# or submit itself to any jurisdiction. | ||
|
||
"""Zenodo Serializer/Validator.""" | ||
|
||
import arrow | ||
from flask_login import current_user | ||
from marshmallow import Schema, fields, ValidationError, validate, validates, \ | ||
validates_schema | ||
|
||
from invenio_files_rest.models import ObjectVersion | ||
|
||
DATE_REGEX = r'\d{4}-\d{2}-\d{2}' | ||
DATE_ERROR = 'The date should follow the pattern YYYY-mm-dd.' | ||
CHOICE_ERROR = lambda choices: f'Not a valid choice. Select one of: {choices}' # noqa | ||
|
||
UPLOAD_TYPES = [ | ||
'publication', | ||
'poster', | ||
'presentation', | ||
'dataset', | ||
'image', | ||
'video', | ||
'software', | ||
'lesson', | ||
'physicalobject', | ||
'other' | ||
] | ||
LICENSES = [ | ||
'CC-BY-4.0', | ||
'CC-BY-1.0', | ||
'CC-BY-2.0', | ||
'CC-BY-3.0' | ||
] | ||
ACCESS_RIGHTS = [ | ||
'open', | ||
'embargoed', | ||
'restricted', | ||
'closed' | ||
] | ||
|
||
|
||
class ZenodoCreatorsSchema(Schema): | ||
name = fields.String(required=True) | ||
affiliation = fields.String() | ||
orcid = fields.String() | ||
|
||
|
||
class ZenodoDepositMetadataSchema(Schema): | ||
title = fields.String(required=True) | ||
description = fields.String(required=True) | ||
version = fields.String() | ||
|
||
keywords = fields.List(fields.String()) | ||
creators = fields.List( | ||
fields.Nested(ZenodoCreatorsSchema), required=True) | ||
|
||
upload_type = fields.String(required=True, validate=validate.OneOf( | ||
UPLOAD_TYPES, error=CHOICE_ERROR(UPLOAD_TYPES))) | ||
license = fields.String(required=True, validate=validate.OneOf( | ||
LICENSES, error=CHOICE_ERROR(LICENSES))) | ||
access_right = fields.String(required=True, validate=validate.OneOf( | ||
ACCESS_RIGHTS, error=CHOICE_ERROR(ACCESS_RIGHTS))) | ||
|
||
publication_date = fields.String( | ||
required=True, validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) | ||
embargo_date = fields.String( | ||
validate=validate.Regexp(DATE_REGEX, error=DATE_ERROR)) | ||
access_conditions = fields.String() | ||
|
||
@validates('embargo_date') | ||
def validate_embargo_date(self, value): | ||
"""Validate that embargo date is in the future.""" | ||
if arrow.get(value).date() <= arrow.utcnow().date(): | ||
raise ValidationError( | ||
'Embargo date must be in the future.', | ||
field_names=['embargo_date'] | ||
) | ||
|
||
@validates_schema() | ||
def validate_license(self, data, **kwargs): | ||
"""Validate license.""" | ||
access = data.get('access_right') | ||
if access in ['open', 'embargoed'] and 'license' not in data: | ||
raise ValidationError( | ||
'Required when access right is open or embargoed.', | ||
field_names=['license'] | ||
) | ||
if access == 'embargoed' and 'embargo_date' not in data: | ||
raise ValidationError( | ||
'Required when access right is embargoed.', | ||
field_names=['embargo_date'] | ||
) | ||
if access == 'restricted' and 'access_conditions' not in data: | ||
raise ValidationError( | ||
'Required when access right is restricted.', | ||
field_names=['access_conditions'] | ||
) | ||
|
||
|
||
class ZenodoUploadSchema(Schema): | ||
files = fields.List(fields.String(), required=True) | ||
data = fields.Nested(ZenodoDepositMetadataSchema, default=dict()) | ||
bucket = fields.String(required=True) | ||
|
||
@validates_schema() | ||
def validate_files(self, data, **kwargs): | ||
bucket = data['bucket'] | ||
files = data['files'] | ||
|
||
for _file in files: | ||
obj = ObjectVersion.get(bucket, _file) | ||
if not obj: | ||
raise ValidationError( | ||
f'File {_file} not found in bucket.', | ||
field_names=['files'] | ||
) | ||
|
||
|
||
class ZenodoDepositSchema(Schema): | ||
id = fields.Int(dump_only=True) | ||
created = fields.String(dump_only=True) | ||
|
||
title = fields.Method('get_title', dump_only=True, allow_none=True) | ||
creator = fields.Method('get_creator', dump_only=True, allow_none=True) | ||
links = fields.Method('get_links', dump_only=True) | ||
|
||
def get_creator(self, data): | ||
return current_user.id if current_user else None | ||
|
||
def get_title(self, data): | ||
return data.get('metadata', {}).get('title') | ||
|
||
def get_links(self, data): | ||
return { | ||
'self': data['links']['self'], | ||
'bucket': data['links']['bucket'], | ||
'html': data['links']['html'], | ||
'publish': data['links']['publish'] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# This file is part of CERN Analysis Preservation Framework. | ||
# Copyright (C) 2020 CERN. | ||
# | ||
# CERN Analysis Preservation Framework is free software; you can redistribute | ||
# it and/or modify it under the terms of the GNU General Public License as | ||
# published by the Free Software Foundation; either version 2 of the | ||
# License, or (at your option) any later version. | ||
# | ||
# CERN Analysis Preservation Framework is distributed in the hope that it will | ||
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
# General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with CERN Analysis Preservation Framework; if not, write to the | ||
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
# MA 02111-1307, USA. | ||
# | ||
# In applying this license, CERN does not | ||
# waive the privileges and immunities granted to it by virtue of its status | ||
# as an Intergovernmental Organization or submit itself to any jurisdiction. | ||
# or submit itself to any jurisdiction. | ||
|
||
"""Zenodo upload serializers.""" | ||
|
||
from cap.modules.services.serializers.zenodo import ZenodoUploadSchema, ZenodoDepositSchema | ||
|
||
|
||
def test_zenodo_upload_serializer(app, deposit_with_file): | ||
bucket = deposit_with_file.files.bucket | ||
|
||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'poster', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'open', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2030-09-09' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == {} | ||
assert payload == data | ||
|
||
# not existing files | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'poster', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'open', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2030-09-09' | ||
}, | ||
'files': ['test-file.txt', 'no-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == {'files': ['File no-file.txt not found in bucket.']} | ||
|
||
# missing required fields | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'open', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2030-09-09' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == { | ||
'data': { | ||
'upload_type': ['Missing data for required field.'], | ||
'description': ['Missing data for required field.']} | ||
} | ||
|
||
# embargo date in the past | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'poster', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'open', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2015-09-09' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == { | ||
'data': { | ||
'embargo_date': ['Embargo date must be in the future.'] | ||
}} | ||
|
||
# malformed dates | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'poster', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'open', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11', | ||
'embargo_date': '2015-01' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == { | ||
'data': { | ||
'publication_date': ['The date should follow the pattern YYYY-mm-dd.'], | ||
'embargo_date': ['The date should follow the pattern YYYY-mm-dd.'] | ||
}} | ||
|
||
# wrong enum in license/upload/access | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'test', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'test', | ||
'license': 'test', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2030-09-09' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == { | ||
'data': { | ||
'license': ["Not a valid choice. Select one of: ['CC-BY-4.0', 'CC-BY-1.0', 'CC-BY-2.0', 'CC-BY-3.0']"], | ||
'access_right': ["Not a valid choice. Select one of: ['open', 'embargoed', 'restricted', 'closed']"], | ||
'upload_type': ["Not a valid choice. Select one of: ['publication', 'poster', " | ||
"'presentation', 'dataset', 'image', 'video', 'software', " | ||
"'lesson', 'physicalobject', 'other']"] | ||
} | ||
} | ||
|
||
# access conditional | ||
data = { | ||
'data': { | ||
'title': 'My first upload yoohoo', | ||
'upload_type': 'poster', | ||
'description': 'This is my first upload', | ||
'creators': [ | ||
{'name': 'Ilias KoKoKo', 'affiliation': 'Zenodo CAP'} | ||
], | ||
'access_right': 'restricted', | ||
'license': 'CC-BY-4.0', | ||
'publication_date': '2020-11-20', | ||
'embargo_date': '2030-09-09' | ||
}, | ||
'files': ['test-file.txt'], | ||
'bucket': str(bucket) | ||
} | ||
payload, errors = ZenodoUploadSchema().load(data) | ||
assert errors == { | ||
'data': { | ||
'access_conditions': ['Required when access right is restricted.'] | ||
}} | ||
|
||
|
||
def test_zenodo_deposit_serializer(): | ||
payload = { | ||
'id': 111, | ||
'record_id': 111, | ||
'title': 'test', | ||
'links': { | ||
'bucket': 'http://zenodo-test.com/test-bucket', | ||
'html': 'https://sandbox.zenodo.org/deposit/111', | ||
'publish': 'https://sandbox.zenodo.org/api/deposit/depositions/111/actions/publish', | ||
'self': 'https://sandbox.zenodo.org/api/deposit/depositions/111' | ||
}, | ||
'files': [], | ||
'created': '2020-11-20T11:49:39.147767+00:00' | ||
} | ||
|
||
data = ZenodoDepositSchema().dump(payload).data | ||
assert data['id'] == 111 | ||
assert data['title'] == 'test' | ||
assert data['creator'] is None |