Skip to content

Commit

Permalink
api: support for new-style record dumping
Browse files Browse the repository at this point in the history
* Adds support for new-style record dumping that relies solely on
  Record.dumps() to produce the Elasticsearch source document. This is
  in order to allow the Record class to harmonize record dumping/loading
  from multiple different sources.

* Fixes tests to work with latest Invenio-Records.
  • Loading branch information
lnielsen committed Sep 16, 2020
1 parent 09285fb commit 312af3f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 13 deletions.
43 changes: 39 additions & 4 deletions invenio_indexer/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,18 @@ class RecordIndexer(object):
"""

record_cls = Record
"""Record class used for retriving and dumping records.
You can either subclass and overwrite this attribute, or provide the record
class to the constructor.
"""

record_dumper = None
"""Dumper instance to use with this record indexer."""

def __init__(self, search_client=None, exchange=None, queue=None,
routing_key=None, version_type=None, record_to_index=None,
record_cls=None):
record_cls=None, record_dumper=None):
"""Initialize indexer.
:param search_client: Elasticsearch client.
Expand All @@ -66,6 +74,12 @@ def __init__(self, search_client=None, exchange=None, queue=None,
(Default: ``external_gte``)
:param record_to_index: Function to extract the index and doc_type
from the record.
:param record_cls: Record class used for retriving and dumping records.
If the ``Record.enable_jsonref`` flag is False, new-style record
dumping will be used for creating the Elasticsearch source
document.
:param record_dumper: Dumper instance to use for dumping the record.
Only has an effect for new-style record dumping.
"""
self.client = search_client or current_search_client
self._exchange = exchange
Expand All @@ -76,6 +90,8 @@ def __init__(self, search_client=None, exchange=None, queue=None,

if record_cls:
self.record_cls = record_cls
if record_dumper:
self.record_dumper = record_dumper

def record_to_index(self, record):
"""Get index/doc_type given a record.
Expand Down Expand Up @@ -323,17 +339,36 @@ def _prepare_index(self, index, doc_type):
"""Prepare the index/doc_type before an operation."""
return build_alias_name(index), doc_type

@staticmethod
def _prepare_record(record, index, doc_type, arguments=None, **kwargs):
def _prepare_record(self, record, index, doc_type, arguments=None,
**kwargs):
"""Prepare record data for indexing.
Invenio-Records is evolving and preparing an Elasticsearch source
document is now a responsibility of the Record class. For backward
compatibility, we use the ``Record.enable_jsonref`` flag to control
if we use the new record dumpers feature from Invenio-Records. Set the
flag to ``False`` (disabling JSONRef replacement) to use the new
style record dumping.
:param record: The record to prepare.
:param index: The Elasticsearch index.
:param doc_type: The Elasticsearch document type.
:param arguments: The arguments to send to Elasticsearch upon indexing.
:param **kwargs: Extra parameters.
:returns: The record metadata.
:returns: The Elasticsearch source document.
"""
# New-style record dumping - we use the Record.enable_jsonref flag on
# the Record to control if we use the new simplified dumping.
if not getattr(record, 'enable_jsonref', True):
# If dumper is None, dumps() will use the default configured dumper
# on the Record class.
return record.dumps(dumper=self.record_dumper)

# Old-style dumping - the old style will still if INDEXER_REPLACE_REFS
# is False use the Record.dumps(), however the default implementation
# is backward compatible for new-style records. Also, we're adding
# extra information into the record like _created and _updated
# afterwards, which the Record.dumps() have no control over.
if current_app.config['INDEXER_REPLACE_REFS']:
data = copy.deepcopy(record.replace_refs())
else:
Expand Down
8 changes: 4 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def base_app(request):
instance_path = tempfile.mkdtemp()
app = Flask('testapp', instance_path=instance_path)
app.config.update(
BROKER_URL=os.environ.get('BROKER_URL',
'amqp://guest:guest@localhost:5672//'),
CELERY_ALWAYS_EAGER=True,
CELERY_BROKER_URL=os.environ.get(
'BROKER_URL', 'amqp://guest:guest@localhost:5672//'),
CELERY_TASK_ALWAYS_EAGER=True,
CELERY_CACHE_BACKEND='memory',
CELERY_EAGER_PROPAGATES_EXCEPTIONS=True,
CELERY_TASK_EAGER_PROPAGATES=True,
CELERY_RESULT_BACKEND='cache',
INDEXER_DEFAULT_INDEX='records-default-v1.0.0',
INDEXER_DEFAULT_DOC_TYPE='default-v1.0.0',
Expand Down
8 changes: 4 additions & 4 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_delete_action(app):
assert action['_id'] == testid

# Skip JSONSchema validation
with patch('invenio_records.api.Record.validate'):
with patch('invenio_records.api._records_state.validate'):
record = Record.create({
'$schema': {
'$ref': '/records/authorities/authority-v1.0.0.json'},
Expand Down Expand Up @@ -236,13 +236,13 @@ def test_replace_refs(app):

with app.app_context():
record = Record({'$ref': 'http://dx.doi.org/10.1234/foo'})
data = RecordIndexer._prepare_record(record, 'records', 'record')
data = RecordIndexer()._prepare_record(record, 'records', 'record')
assert '$ref' in data

app.config['INDEXER_REPLACE_REFS'] = True
with app.app_context():
record = Record({'$ref': 'http://dx.doi.org/10.1234/foo'})
data = RecordIndexer._prepare_record(record, 'records', 'record')
data = RecordIndexer()._prepare_record(record, 'records', 'record')
assert '$ref' not in data
assert json.dumps(data)

Expand Down Expand Up @@ -306,7 +306,7 @@ def test_bulkrecordindexer_index_delete_by_record(app, queue):
def test_before_record_index_dynamic_connect(app):
"""Test before_record_index.dynamic_connect."""
with app.app_context():
with patch('invenio_records.api.Record.validate'):
with patch('invenio_records.api._records_state.validate'):
auth_record = Record.create({
'$schema': '/records/authorities/authority-v1.0.0.json',
'title': 'Test'})
Expand Down
2 changes: 1 addition & 1 deletion tests/test_invenio_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_index_prefixing(base_app):
default_doc_type = app.config['INDEXER_DEFAULT_DOC_TYPE']

with app.app_context():
with patch('invenio_records.api.Record.validate'):
with patch('invenio_records.api._records_state.validate'):
record = Record.create({'title': 'Test'})
record2 = Record.create({
'$schema': '/records/authorities/authority-v1.0.0.json',
Expand Down

0 comments on commit 312af3f

Please sign in to comment.