Skip to content

Commit

Permalink
OAIHARVESTING: bulk indexing of oai reords
Browse files Browse the repository at this point in the history
* Improves indexing of harvested oai records.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep and rerowep committed Aug 23, 2019
1 parent e894a57 commit 2cf1c24
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 4 deletions.
1 change: 1 addition & 0 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def _(x):

}
CELERY_BROKER_HEARTBEAT = 0
INDEXER_BULK_REQUEST_TIMEOUT = 60

# Database
# ========
Expand Down
12 changes: 9 additions & 3 deletions rero_ils/modules/ebooks/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
from flask import current_app

from ..documents.api import Document, DocumentsSearch
from ..utils import bulk_index


@shared_task(ignore_result=True)
def create_records(records):
"""Records creation and indexing."""
n_updated = 0
n_created = 0
uuids = []
for record in records:
record['$schema'] = \
'https://ils.rero.ch/schema/documents/document-minimal-v0.0.1.json'
Expand All @@ -55,15 +57,19 @@ def create_records(records):
existing_record.update(
record,
dbcommit=True,
reindex=True)
reindex=False
)
n_updated += 1
uuids.append(existing_record.id)
else:
# create a new record
Document.create(
new_record = Document.create(
record,
dbcommit=True,
reindex=True
reindex=False
)
n_created += 1
uuids.append(new_record.id)
bulk_index(uuids, process=True)
current_app.logger.info('create_records: {} updated, {} new'
.format(n_updated, n_created))
30 changes: 29 additions & 1 deletion rero_ils/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
"""Utilities for rero-ils editor."""

from datetime import time
from time import sleep

from flask import url_for
import click
from flask import current_app
from invenio_indexer.api import RecordIndexer


Expand All @@ -30,3 +32,29 @@ def strtotime(strtime):
hour=int(splittime[0]),
minute=int(splittime[1])
)


def bulk_index(uuids, process=False, verbose=False):
"""Bulk index records."""
if verbose:
click.echo(' add to index: {count}'.format(count=len(uuids)))
indexer = RecordIndexer()
retry = True
minutes = 1
while retry:
try:
indexer.bulk_index(uuids)
retry = False
except Exception as exc:
msg = 'Bulk Index Error: retry in {minutes} min {exc}'.format(
exc=exc,
minutes=minutes
)
current_app.logger.error(msg)
if verbose:
click.secho(msg, fg='red')
sleep(minutes * 60)
retry = True
minutes *= 2
if process:
indexer.process_bulk_queue()

0 comments on commit 2cf1c24

Please sign in to comment.