From 9d01a4b0e7dc52ad958a26ee193bcd7c12345872 Mon Sep 17 00:00:00 2001 From: Ilias Koutsakis Date: Fri, 5 Feb 2021 15:24:00 +0100 Subject: [PATCH] deposits: schema validation for `create-deposit` * adds schema validation, doesn't stop for failed records * saves errors in another file * closes #2074 * closes #2073 Signed-off-by: Ilias Koutsakis --- cap/modules/deposit/cli.py | 107 ++++++++++++------ .../test_deposits_create_with_permissions.py | 55 ++++++++- 2 files changed, 122 insertions(+), 40 deletions(-) diff --git a/cap/modules/deposit/cli.py b/cap/modules/deposit/cli.py index cb33b942ea..ec75bfd730 100644 --- a/cap/modules/deposit/cli.py +++ b/cap/modules/deposit/cli.py @@ -25,8 +25,11 @@ from __future__ import absolute_import, print_function +import os +import copy import json import uuid +from datetime import datetime import click from flask_cli import with_appcontext @@ -34,6 +37,7 @@ from invenio_jsonschemas.errors import JSONSchemaNotFound from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PersistentIdentifier +from jsonschema.exceptions import ValidationError from cap.modules.deposit.api import CAPDeposit from cap.modules.deposit.fetchers import cap_deposit_fetcher @@ -108,8 +112,10 @@ def add(file_path, schema, version, egroup, usermail, limit): help='User with access to the record') @click.option('--owner', '-o', help='Owner of the record') +@click.option('--save-errors-to', '-e', 'save_errors', + help="Provide a filename, that wrong records will be saved to.") @with_appcontext -def create_deposit(file, ana, roles, users, owner): +def create_deposit(file, ana, roles, users, owner, save_errors): """ Create a new deposit through the CLI. Usage: @@ -121,59 +127,92 @@ def create_deposit(file, ana, roles, users, owner): except (KeyError, ValueError): raise click.BadParameter('Not a valid JSON file.') - # create a list of all the records provided (even if it is just 1 + # create a list of all the records provided (even if it is just 1) records = [data] if isinstance(data, dict) else data + click.secho(f"{len(records)} record(s) found in file.\n", fg='green') + + errors = [] for rec in records: - update_data_with_schema(rec, ana) - create_deposit_with_permissions(rec, roles, users, owner) + create_deposit_with_permissions(rec, roles, users, owner, ana, errors) + + # save errors to be fixed, in different json file + if save_errors and errors: + save_errors_to_json(save_errors, errors) + + click.secho(f"\nProcess finished and record(s) added.", fg='green') + + +def save_errors_to_json(save_errors, errors): + """Saves the wrong records to a specified file.""" + timestamp = datetime.now().strftime("%d-%b-%Y-%H:%M:%S") + wrong_records_path = os.path.join( + os.getcwd(), + f'{save_errors}_errors_{timestamp}.json' + ) + + with open(wrong_records_path, 'w') as _json: + json.dump(errors, _json, indent=4) - click.secho(f"Record(s) added.", fg='green') + click.secho(f"\nErrors saved in {wrong_records_path}.", fg='green') -def update_data_with_schema(data, ana): +def check_and_update_data_with_schema(data, ana): """ Checks if the analysis type is included in the schema, or adds it. It also - checks if the schema provided is valid + checks if the schema provided is valid. """ schema = data.get('$schema') + if not schema and not ana: + click.secho( + 'You need to provide the --ana/-a parameter OR ' + 'add the $schema field in your JSON', fg='red') + return False + try: if schema: - resolve_schema_by_url(schema) if ana: click.secho("Your data already provide a $schema, --ana will not be used.") # noqa + resolve_schema_by_url(schema) elif ana: data['$schema'] = schema_name_to_url(ana) - else: - raise click.UsageError( - 'You need to provide the --ana/-a parameter ' - 'OR add the $schema field in your JSON') + return True except JSONSchemaNotFound: - click.secho(f'Provided schema is not valid.', fg='red') + click.secho('Provided schema is not a valid option.', fg='red') + return False -def create_deposit_with_permissions(data, roles, users, owner): +def create_deposit_with_permissions(data, roles, users, owner, ana, errors): """Create a deposit and add privileges and owner information.""" from cap.modules.deposit.api import CAPDeposit + # make sure the schema is valid first + if not check_and_update_data_with_schema(data, ana): + return + with db.session.begin_nested(): - owner = get_existing_or_register_user(owner) if owner else None - deposit = CAPDeposit.create(data=data, owner=owner) - - # add roles and users - if roles: - for role in roles: - _role = get_existing_or_register_role(role.strip()) - deposit._add_egroup_permissions(_role, - ['deposit-read'], - db.session) - if users: - for user in users: - _user = get_existing_or_register_user(user.strip()) - deposit._add_user_permissions(_user, - ['deposit-read'], - db.session) - deposit.commit() - db.session.commit() + try: + # saving original to return to user if wrong + _data = copy.deepcopy(data) + owner = get_existing_or_register_user(owner) if owner else None + deposit = CAPDeposit.create(data=data, owner=owner) + + # add roles and users + if roles: + for role in roles: + _role = get_existing_or_register_role(role.strip()) + deposit._add_egroup_permissions( + _role, ['deposit-read'], db.session) + if users: + for user in users: + _user = get_existing_or_register_user(user.strip()) + deposit._add_user_permissions( + _user, ['deposit-read'], db.session) + + deposit.commit() + except ValidationError as err: + click.secho(f'Validation Error: {err.message}', fg='red') + errors.append(_data) + return - click.secho(f"Created deposit with id: {deposit['_deposit']['id']}", - fg='green') + db.session.commit() + click.secho(f"Created deposit with id: {deposit['_deposit']['id']}", fg='green') # noqa diff --git a/tests/integration/cli/test_deposits_create_with_permissions.py b/tests/integration/cli/test_deposits_create_with_permissions.py index 1af2bd3743..5a3f998b0b 100644 --- a/tests/integration/cli/test_deposits_create_with_permissions.py +++ b/tests/integration/cli/test_deposits_create_with_permissions.py @@ -42,7 +42,7 @@ def test_no_ana_or_schema_given_fails(app, cli_runner): tmp.seek(0) res = cli_runner(f'fixtures create-deposit --file {tmp.name}') - assert res.exit_code == 2 + assert res.exit_code == 0 assert 'You need to provide the --ana/-a parameter ' \ 'OR add the $schema field in your JSON' in res.output @@ -53,8 +53,8 @@ def test_wrong_ana_fails(app, create_schema, cli_runner): tmp.seek(0) res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test') - assert res.exit_code == 1 - assert 'Provided schema is not valid.' in res.output + assert res.exit_code == 0 + assert 'Provided schema is not a valid option.' in res.output def test_wrong_schema_fails(app, create_schema, cli_runner): @@ -68,8 +68,29 @@ def test_wrong_schema_fails(app, create_schema, cli_runner): tmp.seek(0) res = cli_runner(f'fixtures create-deposit --file {tmp.name}') - assert res.exit_code == 1 - assert 'Provided schema is not valid.' in res.output + assert res.exit_code == 0 + assert 'Provided schema is not a valid option.' in res.output + + +def test_schema_validation_fails(app, create_schema, cli_runner): + create_schema( + 'test', experiment='CMS', + deposit_schema={ + 'type': 'object', + 'properties': { + 'title': {'type': 'integer'} + } + } + ) + data = [{"title": "test"}] + + with NamedTemporaryFile('w+t') as tmp: + tmp.write(json.dumps(data)) + tmp.seek(0) + res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test') + + assert res.exit_code == 0 + assert "'test' is not of type 'integer'" in res.output def test_create_record_success(app, db, location, cli_runner, create_schema, auth_headers_for_superuser): @@ -102,8 +123,30 @@ def test_create_record_success_with_multiple_records( assert res.output.count('Created deposit') == 2 +def test_create_with_multiple_records_both_fails_and_succeeds(app, db, location, cli_runner, create_schema): + create_schema( + 'test', experiment='CMS', + deposit_schema={ + 'type': 'object', + 'properties': { + 'title': {'type': 'integer'} + }, + } + ) + data = [{"title": "test1"}, {"title": 10}] + + with NamedTemporaryFile('w+t') as tmp: + tmp.write(json.dumps(data)) + tmp.seek(0) + res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test') + + assert res.exit_code == 0 + assert "'test1' is not of type 'integer'" in res.output + assert res.output.count('Created deposit') == 1 + + def test_create_record_success_with_multiple_records_schema_and_ana( - app, db, location, cli_runner, create_schema, auth_headers_for_superuser): + app, db, location, cli_runner, create_schema): create_schema('test', experiment='CMS') create_schema('test2', experiment='CMS', version='0.0.1')