Skip to content

Commit

Permalink
deposits: schema validation for create-deposit
Browse files Browse the repository at this point in the history
* adds schema validation, doesn't stop for failed records
* saves errors in another file
* closes #2074
* closes #2073

Signed-off-by: Ilias Koutsakis <[email protected]>
  • Loading branch information
Lilykos authored and pamfilos committed Feb 25, 2021
1 parent 38a9168 commit 9d01a4b
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 40 deletions.
107 changes: 73 additions & 34 deletions cap/modules/deposit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@

from __future__ import absolute_import, print_function

import os
import copy
import json
import uuid
from datetime import datetime

import click
from flask_cli import with_appcontext
from invenio_db import db
from invenio_jsonschemas.errors import JSONSchemaNotFound
from invenio_pidstore.errors import PIDDoesNotExistError
from invenio_pidstore.models import PersistentIdentifier
from jsonschema.exceptions import ValidationError

from cap.modules.deposit.api import CAPDeposit
from cap.modules.deposit.fetchers import cap_deposit_fetcher
Expand Down Expand Up @@ -108,8 +112,10 @@ def add(file_path, schema, version, egroup, usermail, limit):
help='User with access to the record')
@click.option('--owner', '-o',
help='Owner of the record')
@click.option('--save-errors-to', '-e', 'save_errors',
help="Provide a filename, that wrong records will be saved to.")
@with_appcontext
def create_deposit(file, ana, roles, users, owner):
def create_deposit(file, ana, roles, users, owner, save_errors):
"""
Create a new deposit through the CLI.
Usage:
Expand All @@ -121,59 +127,92 @@ def create_deposit(file, ana, roles, users, owner):
except (KeyError, ValueError):
raise click.BadParameter('Not a valid JSON file.')

# create a list of all the records provided (even if it is just 1
# create a list of all the records provided (even if it is just 1)
records = [data] if isinstance(data, dict) else data
click.secho(f"{len(records)} record(s) found in file.\n", fg='green')

errors = []
for rec in records:
update_data_with_schema(rec, ana)
create_deposit_with_permissions(rec, roles, users, owner)
create_deposit_with_permissions(rec, roles, users, owner, ana, errors)

# save errors to be fixed, in different json file
if save_errors and errors:
save_errors_to_json(save_errors, errors)

click.secho(f"\nProcess finished and record(s) added.", fg='green')


def save_errors_to_json(save_errors, errors):
"""Saves the wrong records to a specified file."""
timestamp = datetime.now().strftime("%d-%b-%Y-%H:%M:%S")
wrong_records_path = os.path.join(
os.getcwd(),
f'{save_errors}_errors_{timestamp}.json'
)

with open(wrong_records_path, 'w') as _json:
json.dump(errors, _json, indent=4)

click.secho(f"Record(s) added.", fg='green')
click.secho(f"\nErrors saved in {wrong_records_path}.", fg='green')


def update_data_with_schema(data, ana):
def check_and_update_data_with_schema(data, ana):
"""
Checks if the analysis type is included in the schema, or adds it. It also
checks if the schema provided is valid
checks if the schema provided is valid.
"""
schema = data.get('$schema')
if not schema and not ana:
click.secho(
'You need to provide the --ana/-a parameter OR '
'add the $schema field in your JSON', fg='red')
return False

try:
if schema:
resolve_schema_by_url(schema)
if ana:
click.secho("Your data already provide a $schema, --ana will not be used.") # noqa
resolve_schema_by_url(schema)
elif ana:
data['$schema'] = schema_name_to_url(ana)
else:
raise click.UsageError(
'You need to provide the --ana/-a parameter '
'OR add the $schema field in your JSON')
return True
except JSONSchemaNotFound:
click.secho(f'Provided schema is not valid.', fg='red')
click.secho('Provided schema is not a valid option.', fg='red')
return False


def create_deposit_with_permissions(data, roles, users, owner):
def create_deposit_with_permissions(data, roles, users, owner, ana, errors):
"""Create a deposit and add privileges and owner information."""
from cap.modules.deposit.api import CAPDeposit

# make sure the schema is valid first
if not check_and_update_data_with_schema(data, ana):
return

with db.session.begin_nested():
owner = get_existing_or_register_user(owner) if owner else None
deposit = CAPDeposit.create(data=data, owner=owner)

# add roles and users
if roles:
for role in roles:
_role = get_existing_or_register_role(role.strip())
deposit._add_egroup_permissions(_role,
['deposit-read'],
db.session)
if users:
for user in users:
_user = get_existing_or_register_user(user.strip())
deposit._add_user_permissions(_user,
['deposit-read'],
db.session)
deposit.commit()
db.session.commit()
try:
# saving original to return to user if wrong
_data = copy.deepcopy(data)
owner = get_existing_or_register_user(owner) if owner else None
deposit = CAPDeposit.create(data=data, owner=owner)

# add roles and users
if roles:
for role in roles:
_role = get_existing_or_register_role(role.strip())
deposit._add_egroup_permissions(
_role, ['deposit-read'], db.session)
if users:
for user in users:
_user = get_existing_or_register_user(user.strip())
deposit._add_user_permissions(
_user, ['deposit-read'], db.session)

deposit.commit()
except ValidationError as err:
click.secho(f'Validation Error: {err.message}', fg='red')
errors.append(_data)
return

click.secho(f"Created deposit with id: {deposit['_deposit']['id']}",
fg='green')
db.session.commit()
click.secho(f"Created deposit with id: {deposit['_deposit']['id']}", fg='green') # noqa
55 changes: 49 additions & 6 deletions tests/integration/cli/test_deposits_create_with_permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_no_ana_or_schema_given_fails(app, cli_runner):
tmp.seek(0)
res = cli_runner(f'fixtures create-deposit --file {tmp.name}')

assert res.exit_code == 2
assert res.exit_code == 0
assert 'You need to provide the --ana/-a parameter ' \
'OR add the $schema field in your JSON' in res.output

Expand All @@ -53,8 +53,8 @@ def test_wrong_ana_fails(app, create_schema, cli_runner):
tmp.seek(0)
res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test')

assert res.exit_code == 1
assert 'Provided schema is not valid.' in res.output
assert res.exit_code == 0
assert 'Provided schema is not a valid option.' in res.output


def test_wrong_schema_fails(app, create_schema, cli_runner):
Expand All @@ -68,8 +68,29 @@ def test_wrong_schema_fails(app, create_schema, cli_runner):
tmp.seek(0)
res = cli_runner(f'fixtures create-deposit --file {tmp.name}')

assert res.exit_code == 1
assert 'Provided schema is not valid.' in res.output
assert res.exit_code == 0
assert 'Provided schema is not a valid option.' in res.output


def test_schema_validation_fails(app, create_schema, cli_runner):
create_schema(
'test', experiment='CMS',
deposit_schema={
'type': 'object',
'properties': {
'title': {'type': 'integer'}
}
}
)
data = [{"title": "test"}]

with NamedTemporaryFile('w+t') as tmp:
tmp.write(json.dumps(data))
tmp.seek(0)
res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test')

assert res.exit_code == 0
assert "'test' is not of type 'integer'" in res.output


def test_create_record_success(app, db, location, cli_runner, create_schema, auth_headers_for_superuser):
Expand Down Expand Up @@ -102,8 +123,30 @@ def test_create_record_success_with_multiple_records(
assert res.output.count('Created deposit') == 2


def test_create_with_multiple_records_both_fails_and_succeeds(app, db, location, cli_runner, create_schema):
create_schema(
'test', experiment='CMS',
deposit_schema={
'type': 'object',
'properties': {
'title': {'type': 'integer'}
},
}
)
data = [{"title": "test1"}, {"title": 10}]

with NamedTemporaryFile('w+t') as tmp:
tmp.write(json.dumps(data))
tmp.seek(0)
res = cli_runner(f'fixtures create-deposit --file {tmp.name} --ana test')

assert res.exit_code == 0
assert "'test1' is not of type 'integer'" in res.output
assert res.output.count('Created deposit') == 1


def test_create_record_success_with_multiple_records_schema_and_ana(
app, db, location, cli_runner, create_schema, auth_headers_for_superuser):
app, db, location, cli_runner, create_schema):
create_schema('test', experiment='CMS')
create_schema('test2', experiment='CMS', version='0.0.1')

Expand Down

0 comments on commit 9d01a4b

Please sign in to comment.