From f3df6177e6e0a6ceb0131cd3f0e0788b63500f0b Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Wed, 29 Jul 2020 22:28:29 +0200 Subject: [PATCH] cli: marc21json cli function to use splitted json schemas * Fixes the marc21json cli function to work proberly with json schema files with $refs. Co-Authored-by: Peter Weber --- rero_ils/modules/cli.py | 63 +++++------- tests/data/documents.json | 198 +++++++++++++++++++++++--------------- tests/unit/test_cli.py | 41 ++++++++ 3 files changed, 185 insertions(+), 117 deletions(-) create mode 100644 tests/unit/test_cli.py diff --git a/rero_ils/modules/cli.py b/rero_ils/modules/cli.py index b574d3cee0..dfe768c201 100644 --- a/rero_ils/modules/cli.py +++ b/rero_ils/modules/cli.py @@ -34,7 +34,6 @@ from glob import glob import click -import jsonref import polib import pycountry import requests @@ -48,6 +47,7 @@ from invenio_accounts.cli import commit, users from invenio_app.factory import static_folder from invenio_db import db +from invenio_jsonschemas.proxies import current_jsonschemas from invenio_pidstore.models import PersistentIdentifier, PIDStatus from invenio_records.api import Record from invenio_records_rest.utils import obj_or_import_string @@ -56,7 +56,6 @@ from jsonschema import validate from jsonschema.exceptions import ValidationError from lxml import etree -from pkg_resources import resource_string from werkzeug.local import LocalProxy from .api import IlsRecordsIndexer @@ -68,8 +67,10 @@ from .tasks import process_bulk_queue from .utils import read_json_record from ..modules.providers import append_fixtures_new_identifiers +from ..modules.utils import get_schema_for_resource _datastore = LocalProxy(lambda: current_app.extensions['security'].datastore) +_records_state = LocalProxy(lambda: current_app.extensions['invenio-records']) def abort_if_false(ctx, param, value): @@ -555,23 +556,20 @@ def test_license(file, extension, license_lines, verbose): @utils.command('validate') @click.argument('jsonfile', type=click.File('r')) -@click.argument('type', default='documents') -@click.argument('schema', default='document-v0.0.1.json') +@click.argument('type', default='doc') @click.option('-v', '--verbose', 'verbose', is_flag=True, default=False) @click.option('-e', '--error_file', 'error_file', type=click.File('w'), default=None) @click.option('-o', '--ok_file', 'ok_file', type=click.File('w'), default=None) -def check_validate(jsonfile, type, schema, verbose, error_file, ok_file): +@with_appcontext +def check_validate(jsonfile, type, verbose, error_file, ok_file): """Check record validation.""" click.secho('Testing json schema for file', fg='green') - schema_in_bytes = resource_string( - 'rero_ils.modules.{type}.jsonschemas'.format(type=type), - '{type}/{schema}'.format( - type=type, - schema=schema - ) - ) - schema = jsonref.loads(schema_in_bytes.decode('utf8')) + + path = current_jsonschemas.url_to_path(get_schema_for_resource(type)) + schema = current_jsonschemas.get_schema(path=path) + schema = _records_state.replace_refs(schema) + datas = json.load(jsonfile) count = 0 for data in datas: @@ -599,27 +597,8 @@ def check_validate(jsonfile, type, schema, verbose, error_file, ok_file): click.secho(str(err)) -@utils.command('compile_json') -@click.argument('src_jsonfile', type=click.File('r')) -@click.option('-o', '--output', 'output', type=click.File('w'), default=None) -@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False) -def compile_json(src_jsonfile, output, verbose): - """Compile source json file (resolve $ref).""" - click.secho('Compile json file (resolve $ref): ', fg='green', nl=False) - click.secho(src_jsonfile.name) - data = jsonref.load(src_jsonfile) - if not output: - output = sys.stdout - json.dump(data, fp=output, indent=2) - - -def do_worker(marc21records, results, pid_required, debug): +def do_worker(marc21records, results, pid_required, debug, schema=None): """Worker for marc21 to json transformation.""" - schema_in_bytes = resource_string( - 'rero_ils.modules.documents.jsonschemas', - 'documents/document-v0.0.1.json' - ) - schema = jsonref.loads(schema_in_bytes.decode('utf8')) for data in marc21records: data_json = data['json'] pid = data_json.get('001', '???') @@ -632,7 +611,8 @@ def do_worker(marc21records, results, pid_required, debug): if not record.get("pid"): # create dummy pid in data record["pid"] = 'dummy' - validate(record, schema) + if schema: + validate(record, schema) if record["$schema"] == 'dummy': del record["$schema"] if not pid_required: @@ -660,11 +640,12 @@ class Marc21toJson(): __slots__ = ['xml_file', 'json_file_ok', 'xml_file_error', 'parallel', 'chunk', 'verbose', 'debug', 'pid_required', 'count', 'count_ok', 'count_ko', 'ctx', - 'results', 'active_buffer', 'buffer', 'first_result'] + 'results', 'active_buffer', 'buffer', 'first_result', + 'schema'] def __init__(self, xml_file, json_file_ok, xml_file_error, parallel=8, chunk=5000, - verbose=False, debug=False, pid_required=False): + verbose=False, debug=False, pid_required=False, schema=None): """Constructor.""" self.count = 0 self.count_ok = 0 @@ -675,6 +656,7 @@ def __init__(self, xml_file, json_file_ok, xml_file_error, self.parallel = parallel self.chunk = chunk self.verbose = verbose + self.schema = schema self.first_result = True if verbose: click.echo('Main process pid: {pid}'.format( @@ -746,7 +728,7 @@ def start_new_process(self): new_process = self.ctx.Process( target=do_worker, args=(self.active_records, self.results, self.pid_required, - self.debug) + self.debug, self.schema) ) self.wait_free_process() new_process.start() @@ -829,6 +811,7 @@ def active_records(self): @click.option('-d', '--debug', 'debug', is_flag=True, default=False) @click.option('-r', '--pidrequired', 'pid_required', is_flag=True, default=False) +@with_appcontext def marc21json(xml_file, json_file_ok, xml_file_error, parallel, chunk, verbose, debug, pid_required): """Convert xml file to json with dojson.""" @@ -837,8 +820,12 @@ def marc21json(xml_file, json_file_ok, xml_file_error, parallel, chunk, click.secho(' (validation tests pid) ', nl=False) click.secho(xml_file.name) + path = current_jsonschemas.url_to_path(get_schema_for_resource(type)) + schema = current_jsonschemas.get_schema(path=path) + schema = _records_state.replace_refs(schema) transform = Marc21toJson(xml_file, json_file_ok, xml_file_error, - parallel, chunk, verbose, debug, pid_required) + parallel, chunk, verbose, debug, pid_required, + schema) count, count_ok, count_ko = transform.counts() diff --git a/tests/data/documents.json b/tests/data/documents.json index c3c342c4eb..7205c6ef8f 100644 --- a/tests/data/documents.json +++ b/tests/data/documents.json @@ -1,55 +1,60 @@ [ { "type": "book", - "pid": "1", + "issuance": { + "main_type": "rdami:1001", + "subtype": "materialUnit" + }, + "pid": "43", "language": [ { - "value": "ita", + "value": "fre", "type": "bf:Language" } ], "identifiedBy": [ { - "value": "9788898983056", - "type": "bf:Isbn" - }, - { - "value": "R008400428", + "value": "0812781", "type": "bf:Local", "source": "RERO" - }, - { - "source": "OCoLC", - "value": "ocn945401320", - "type": "bf:Local" } ], - "authors": [ - { - "type": "person", - "$ref": "https://mef.rero.ch/api/idref/20109313" - }, + "responsibilityStatement": [ + [ + { + "value": "[\u00e9d.] Hans E. Bachmann" + } + ], + [ + { + "value": "trad. Henri Perrin" + } + ] + ], + "title": [ { - "type": "person", - "$ref": "https://mef.rero.ch/api/gnd/25552024" + "mainTitle": [ + { + "value": "La norme S.I.A. 118 et l'actualit\u00e9 juridique en mati\u00e8re de construction" + } + ], + "subtitle": [ + { + "value": "un ouvrage pratique pour tous les entrepreneurs en rapport avec la construction, avec \u00e9tudes de cas, check-lists, exemples de contrats et de lettres relatifs au contrat d'entreprise" + } + ], + "type": "bf:Title" } ], - "title": "Le due tensioni : appunti per una ideologia della letteratura", "provisionActivity": [ { "type": "bf:Publication", - "place": [ - { - "type": "bf:Place", - "country": "it" - } - ], "statement": [ { "type": "bf:Place", "label": [ { - "value": "Matelica (MC)" + "value": "Z\u00fcrich" } ] }, @@ -57,67 +62,83 @@ "type": "bf:Agent", "label": [ { - "value": "Hacca" + "value": "Ed. Weka" } ] }, { - "type": "Date", "label": [ { - "value": "2016" + "value": "1987->" } - ] + ], + "type": "Date" } ], - "startDate": 2016 + "startDate": 1987, + "place": [ + { + "country": "sz", + "type": "bf:Place" + } + ] } ], - "extent": "380 pages", - "formats": [ - "21 cm" - ], - "series": [ + "extent": "8 classeurs", + "note": [ { - "name": "Novecento.0", - "number": "68" + "noteType": "otherPhysicalDetails", + "label": "ill." + }, + { + "noteType": "general", + "label": "Publication \u00e0 feuillets mobiles avec mises \u00e0 jour p\u00e9riodiques" } ], - "notes": [ - "Collected writings", - "Includes preface (pages 9-22) and postface (pages 347-357)", - "Includes writings, published for the first time" + "illustrativeContent": [ + "illustrations" + ], + "dimensions": [ + "23 cm" ], "subjects": [ - "Litt\u00e9rature", - "Culture", - "[Notes, esquisses, etc.]" + "contrat de construction", + "Suisse" ], - "partOf": [ + "authors": [ { - "document": { - "$ref": "https://ils.rero.ch/api/documents/12" - }, - "numbering": [ - { - "volume": 25 - } - ] + "type": "person", + "$ref": "https://mef.rero.ch/api/idref/074755978" + }, + { + "type": "person", + "$ref": "https://mef.rero.ch/api/rero/A003683610" } + ], + "titlesProper": [ + "La norme SIA 118 et l'actualit\u00e9 juridique en mati\u00e8re de construction" ] }, { "type": "book", - "pid": "2", + "issuance": { + "main_type": "rdami:1001", + "subtype": "materialUnit" + }, + "pid": "44", "language": [ { - "value": "fre", + "value": "ger", "type": "bf:Language" } ], "identifiedBy": [ { - "value": "R006039425", + "value": "9783503057221", + "type": "bf:Isbn" + }, + { + "value": "R270072860", "type": "bf:Local", "source": "RERO" } @@ -125,25 +146,40 @@ "authors": [ { "type": "person", - "$ref": "https://mef.rero.ch/api/mef/19985648" + "$ref": "https://mef.rero.ch/api/rero/A006010680" } ], - "title": "Sukkwan island : roman", - "provisionActivity": [ + "responsibilityStatement": [ + [ + { + "value": "von Erwin Zacharias" + } + ] + ], + "title": [ { - "type": "bf:Publication", - "place": [ + "mainTitle": [ { - "type": "bf:Place", - "country": "fr" + "value": "Going Public einer Fussball-Kapitalgesellschaft" } ], + "subtitle": [ + { + "value": "rechtliche, betriebswirtschaftliche und strategische Konzepte bei der Vorbereitung der B\u00f6rseneinf\u00fchrung eines Fussball-Bundesligavereins" + } + ], + "type": "bf:Title" + } + ], + "provisionActivity": [ + { + "type": "bf:Publication", "statement": [ { "type": "bf:Place", "label": [ { - "value": "Paris" + "value": "Bielefeld" } ] }, @@ -151,33 +187,37 @@ "type": "bf:Agent", "label": [ { - "value": "Gallmeister" + "value": "Erich Schmidt" } ] }, { - "type": "Date", "label": [ { - "value": "2009" + "value": "1999" } - ] + ], + "type": "Date" } ], - "startDate": 2009 + "startDate": 1999, + "place": [ + { + "country": "gw", + "type": "bf:Place" + } + ] } ], - "extent": "191 p.", - "formats": [ - "21 cm" - ], - "series": [ + "extent": "617 S.", + "note": [ { - "name": "Nature writing" + "noteType": "otherPhysicalDetails", + "label": "Taf." } ], - "abstracts": [ - "Une \u00eele sauvage du Sud de l'Alaska, accessible uniquement par bateau ou par hydravion, tout en for\u00eats humides et montagnes escarp\u00e9es. C'est dans ce d\u00e9cor que Jim d\u00e9cide d'emmener son fils de treize ans pour y vivre dans une cabane isol\u00e9e, une ann\u00e9e durant. Apr\u00e8s une succession d'\u00e9checs personnels, il voit l\u00e0 l'occasion de prendre un nouveau d\u00e9part et de renouer avec ce gar\u00e7on qu'il conna\u00eet si mal. La rigueur de cette vie et les d\u00e9faillances du p\u00e8re ne tardent pas \u00e0 transformer ce s\u00e9jour en cauchemar, et la situation devient vite incontr\u00f4lable. Jusqu'au drame violent et impr\u00e9visible qui scellera leur destin. Sukkwan Island est une histoire au suspense insoutenable. Avec ce roman qui nous entra\u00eene au coeur des t\u00e9n\u00e8bres de l'\u00e2me humaine, David Vann s'installe d'embl\u00e9e parmi les jeunes auteurs am\u00e9ricains de tout premier plan." + "dimensions": [ + "21 cm" ] } ] diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py new file mode 100644 index 0000000000..badf9b6b7c --- /dev/null +++ b/tests/unit/test_cli.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test cli.""" + +from os.path import dirname, join + +from click.testing import CliRunner + +from rero_ils.modules.cli import check_validate + + +def test_cli_validate(app, script_info): + """Test validate cli.""" + runner = CliRunner() + file_name = join(dirname(__file__), '../data/documents.json') + + res = runner.invoke( + check_validate, + [file_name, 'doc', '-v'], + obj=script_info + ) + assert res.output.strip().split('\n') == [ + 'Testing json schema for file', + '\tTest record: 1', + '\tTest record: 2' + ]