Skip to content

Commit

Permalink
Merge pull request #816 from Princeton-CDH/feature/revise-import-export
Browse files Browse the repository at this point in the history
Revise import/export updates
  • Loading branch information
rlskoeser authored Apr 17, 2024
2 parents b46021a + 6565239 commit 0651e05
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 154 deletions.
27 changes: 15 additions & 12 deletions mep/books/admin.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
import logging

from dal import autocomplete
from django.db import IntegrityError
from django import forms
from django.db import IntegrityError
from django.db.models import Count
from django.conf import settings
from django.contrib import admin
from django.core.validators import ValidationError
from django.urls import path, reverse
from django.db.models import Count
from django.utils.html import format_html
from django.utils.timezone import now

from import_export.resources import ModelResource
from import_export.widgets import ManyToManyWidget, ForeignKeyWidget, Widget
from import_export.fields import Field
from tabular_export.admin import export_to_csv_response
from mep.common.admin import ImportExportModelResource, ImportExportAdmin
from parasolr.django.signals import IndexableSignalHandler

from mep.accounts.admin import AUTOCOMPLETE
from mep.accounts.partial_date import PartialDateFormMixin
from mep.books.models import Creator, CreatorType, Work, Subject, Format, Genre, Edition
from mep.people.models import Person
from mep.books.queryset import WorkSolrQuerySet
from mep.common.admin import CollapsibleTabularInline
from import_export.resources import ModelResource
from import_export.widgets import ManyToManyWidget, ForeignKeyWidget, Widget
from import_export.fields import Field
from parasolr.django.signals import IndexableSignalHandler
from django.conf import settings
import logging
from mep.common.admin import (
CollapsibleTabularInline,
ImportExportModelResource,
LocalImportExportModelAdmin,
)

logger = logging.getLogger()

Expand Down Expand Up @@ -487,7 +490,7 @@ class Meta:
export_order = WORK_IMPORT_EXPORT_COLUMNS


class WorkAdminImportExport(WorkAdmin, ImportExportAdmin):
class WorkAdminImportExport(WorkAdmin, LocalImportExportModelAdmin):
resource_classes = [WorkResource]

def get_export_resource_classes(self):
Expand Down
123 changes: 52 additions & 71 deletions mep/common/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,39 +55,36 @@ def group_names(self, obj):
class ImportExportModelResource(ModelResource):
max_objects_to_index = 1000
use_transactions = False
# store updated objects for bulk indexing after import completes
store_instance = True

def __init__(self, *args, **kwargs):
self.request = kwargs.pop("request", None)
# NOTE: request is not passed in by default;
# extend get_resource_kwargs or use LocalImportExportModelAdmin
self.request = kwargs.get("request", None)
super().__init__(*args, **kwargs)
# list to contain updated objects for batch indexing at end
self.objects_to_index = []

def before_import(self, dataset, using_transactions, dry_run, **kwargs):
# lower and camel_case headers
# lower and snake_case headers
dataset.headers = [x.lower().replace(" ", "_") for x in dataset.headers]

# log
# log summary of what will be done
logger.debug(
f"importing dataset of {len(dataset.headers)} columns, using_transactions = {using_transactions}, dry_run = {dry_run}"
f"importing dataset with {len(dataset.headers)} columns "
+ f"(using_transactions={using_transactions}, dry_run={dry_run})"
)

# turn off indexing temporarily
IndexableSignalHandler.disconnect()
# turn off indexing temporarily; track whether indexing was enabled
# (as of parasolr v0.9.2, disconnect returns # of handlers disconnected)
self.indexing_enabled = IndexableSignalHandler.disconnect()
print(f"indexing enabled {self.indexing_enabled}")

# turn off viaf lookups
settings.SKIP_VIAF_LOOKUP = True

def before_import_row(self, row, **kwargs):
"""
Called on an OrderedDictionary of row attributes.
Opportunity to do quick string formatting as a
principle of charity to annotators before passing
values into django-import-export lookup logic.
"""
pass

def validate_row_by_slug(self, row):
"""Make sure the record to update can be found by slug or past slug; if the slug is a past slug, row data is updated to use the current slug."""
"""Make sure the record to update can be found by slug or past slug;
if the slug is a past slug, row data is updated to use the current slug."""
if not row.get("slug"):
return False
if not self.Meta.model.objects.filter(slug=row["slug"]).exists():
Expand All @@ -111,47 +108,40 @@ def skip_row(self, instance, original, row, import_validation_errors=None):
return True
return super().skip_row(instance, original, row, import_validation_errors)

def after_save_instance(self, instance, using_transactions, dry_run):
"""
Called when an instance either was or would be saved (depending on dry_run)
"""
self.objects_to_index.append(instance)
return super().after_save_instance(instance, using_transactions, dry_run)

def after_import(self, dataset, result, using_transactions, dry_run, **kwargs):
"""
Called after importing, twice: once with dry_run==True (preview),
once dry_run==False. We report how many objects were updated and need to be indexed.
We only do so when dry_run is False.
After import completes, report how many objects were updated and
need to be indexed. When `dry_run` is true, this is called to display
import preview; indexing is only done when `dry_run` is false.
"""
# run parent method
super().after_import(dataset, result, using_transactions, dry_run, **kwargs)

# report how many need indexing
logger.debug(
f"requesting index of {len(self.objects_to_index)} objects, dry_run = {dry_run}"
)

# only continue if not a dry run
if not dry_run:
# re-enable indexing
IndexableSignalHandler.connect()

# index objects
if self.objects_to_index:
# get objects to index
items2index = self.objects_to_index[: self.max_objects_to_index]
logger.debug(f"indexing {len(items2index):,} items now")
# default implementation does nothing, no need to call parent method
# result is a list of rowresult obects; we only care
# about updates since we don't support creation or deletion
updated_objects = [
row_result.instance
for row_result in result
if row_result.import_type == "update"
]

# if this is a dry run, report how many would be indexd
if dry_run:
# report how many need indexing
logger.debug(f"{len(updated_objects):,} records to index")

# is this is not a dry run, index the updated objects
else:
if updated_objects:
# get objects to index, up to configured maximum
items2index = updated_objects[: self.max_objects_to_index]

# do the actual indexing
start = time.time()

# do indexing
self.Meta.model.index_items(items2index)
logger.debug(
f"finished indexing {len(items2index):,} items in {time.time() - start:.1f} seconds"
f"Indexing {len(items2index):,} records in {time.time() - start:.1f} seconds"
)

# warn if only so many indexed
n_indexed, n_updated = len(items2index), len(self.objects_to_index)
# warn if there are updated records that were not indexud
n_indexed, n_updated = len(items2index), len(updated_objects)
n_remaining = n_updated - n_indexed
if n_remaining:
msg = (
Expand All @@ -164,32 +154,23 @@ def after_import(self, dataset, result, using_transactions, dry_run, **kwargs):
# turn viaf lookups back on
settings.SKIP_VIAF_LOOKUP = False

# make sure indexing disconnected afterward
IndexableSignalHandler.disconnect()

def ensure_nulls(self, row):
for k, v in row.items():
row[k] = v if v or v == 0 else None
# re-enable indexing signal handlers if any were disconnected
# (i.e., don't enable in unit tests when they're already disabled)
if self.indexing_enabled:
IndexableSignalHandler.connect()

class Meta:
skip_unchanged = True
report_skipped = True


class ImportExportAdmin(ImportExportModelAdmin):
resource_classes = []

def get_export_resource_classes(self):
"""
Specifies the resource class to use for exporting,
so that separate fields can be exported than those imported
"""
# Subclass this function
return super().get_export_resource_classes()

class LocalImportExportModelAdmin(ImportExportModelAdmin):
def get_resource_kwargs(self, request, *args, **kwargs):
"""Passing request to resource obj for use in django messages"""
return {"request": request}
"""Pass request object to resource for use in django messages"""
kwargs = super().get_resource_kwargs(request, *args, **kwargs)
# pass request object in so we can use messages to warn
kwargs["request"] = request
return kwargs


admin.site.unregister(User)
Expand Down
Loading

0 comments on commit 0651e05

Please sign in to comment.