Skip to content

Commit

Permalink
Use bulk create for missing genres; handle category or categories
Browse files Browse the repository at this point in the history
ref #791
  • Loading branch information
rlskoeser committed Apr 30, 2024
1 parent da16ea5 commit d197efe
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 18 deletions.
38 changes: 22 additions & 16 deletions mep/books/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,21 +422,32 @@ def before_import(self, dataset, using_transactions, dry_run, **kwargs):
# run parent method
super().before_import(dataset, using_transactions, dry_run, **kwargs)

# ensure nationalities
# for historical reasons, support both category and categories
if "category" in dataset.headers:
# if category is present, rename to categories
dataset.headers[dataset.headers.index("category")] = "categories"

# preprocess categories; create any new ones not in the database
genre_categories = {
nat.strip()
category.strip()
for row in dataset.dict
for nat in row["categories"].split(";")
if nat.strip()
for category in row.get("categories", "").split(";")
if category.strip()
}

known_categories = (
Genre.objects.filter(name__in=genre_categories)
.distinct("name")
.values_list("name", flat=True)
)
unknown_categories = genre_categories - set(known_categories)
try:
added = 0
for genre in genre_categories:
if not Genre.objects.filter(name=genre).exists():
logger.debug(f'Genre "{genre}" does not exist in db, creating now')
Genre.objects.create(name=genre)
added += 1
logger.debug(f"Successfully created {added} new genres")
genres = Genre.objects.bulk_create(
[Genre(name=category) for category in unknown_categories]
)
logger.debug(
f"Successfully created records for {len(genres)} new genre/categories"
)
except IntegrityError as e:
logger.debug(
f"Database integrity error occurred in creating new genres: {e}"
Expand All @@ -451,11 +462,6 @@ def before_import_row(self, row, **kwargs):
# alter slug if a previous version of present one
self.validate_row_by_slug(row)

# # make sure we have a genre category for each listed
# category_names = row["categories"]
# for cat in category_names.split(";"):
# Genre.objects.get_or_create(name=cat.strip())


class NamedListWidget(Widget):
sep = ";"
Expand Down
11 changes: 10 additions & 1 deletion mep/books/tests/test_books_admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
import time
from unittest.mock import Mock, patch
from unittest.mock import Mock, patch, MagicMock
from io import StringIO
import csv
import os
Expand All @@ -21,6 +21,7 @@
EditionForm,
WorkAdmin,
WorkAdminImportExport,
WorkResource,
WORK_IMPORT_COLUMNS,
WORK_IMPORT_EXPORT_COLUMNS,
)
Expand Down Expand Up @@ -409,6 +410,14 @@ def test_djangoimportexport_import(self):
assert response.content.count(b'<tr class="grp-row') == len(rows)


class TestWorkResouce:
def test_before_import(self):
dataset = MagicMock(headers=["slug", "category"])
WorkResource().before_import(dataset, using_transactions=False, dry_run=True)
assert "category" not in dataset.headers
assert "categories" in dataset.headers


class TestEditionForm(TestCase):
# tests adapted from PartialDateFormMixin tests in account admin tests

Expand Down
1 change: 0 additions & 1 deletion mep/common/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def before_import(self, dataset, using_transactions, dry_run, **kwargs):
# turn off indexing temporarily; track whether indexing was enabled
# (as of parasolr v0.9.2, disconnect returns # of handlers disconnected)
self.indexing_enabled = IndexableSignalHandler.disconnect()
print(f"indexing enabled {self.indexing_enabled}")

# turn off viaf lookups
settings.SKIP_VIAF_LOOKUP = True
Expand Down

0 comments on commit d197efe

Please sign in to comment.