Skip to content

Commit

Permalink
[#2871] Refactor ZGW import logic
Browse files Browse the repository at this point in the history
    - overwrite only editable fields when importing ZGW entities,
      skip read-only fields (url, domein, rsin)
  • Loading branch information
pi-sigma committed Nov 22, 2024
1 parent f50ba69 commit 5d89630
Show file tree
Hide file tree
Showing 2 changed files with 399 additions and 254 deletions.
277 changes: 206 additions & 71 deletions src/open_inwoner/openzaak/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
from typing import IO, Any, Generator, Self
from urllib.parse import urlparse

from django.apps import apps
from django.core import serializers
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
from django.core.files.storage import Storage
from django.core.serializers.base import DeserializationError
from django.db import transaction
from django.db.models import QuerySet

Expand All @@ -22,6 +25,159 @@
logger = logging.getLogger(__name__)


class ZGWImportError(Exception):
@classmethod
def extract_error_data(cls, exc: Exception, jsonl: str):
exc_source = type(exc.__context__)
data = json.loads(jsonl) if jsonl else {}
source_config = apps.get_model(data["model"])

# error type
if exc_source is CatalogusConfig.DoesNotExist or source_config.DoesNotExist:
error_type = ObjectDoesNotExist
if exc_source is source_config.MultipleObjectsReturned:
error_type = MultipleObjectsReturned

# metadata about source_config
items = []
fields = data.get("fields", None)
if source_config is CatalogusConfig:
items = [
f"Domein = {fields['domein']}",
f"Rsin = {fields['rsin']}",
]
if source_config is ZaakTypeConfig:
items = [
f"Identificatie = {fields['identificatie']}",
f"Catalogus domein = {fields['catalogus'][0]}",
f"Catalogus rsin = {fields['catalogus'][1]}",
]
if source_config in [
ZaakTypeStatusTypeConfig,
ZaakTypeResultaatTypeConfig,
ZaakTypeInformatieObjectTypeConfig,
]:
items = [
f"omschrijving = {fields['omschrijving']}",
f"ZaakTypeConfig identificatie = {fields['zaaktype_config'][0]}",
f"Catalogus domein = {fields['zaaktype_config'][1]}",
f"Catalogus rsin = {fields['zaaktype_config'][2]}",
]

return {
"error_type": error_type,
"source_config_name": source_config.__name__,
"info": ", ".join(items),
}

@classmethod
def from_exception_and_jsonl(cls, exception: Exception, jsonl: str) -> Self:
error_data = cls.extract_error_data(exception, jsonl)

error_template = (
"%(source_config_name)s not found in target environment: %(info)s"
)
if error_data["error_type"] is MultipleObjectsReturned:
error_template = "Got multiple results for %(source_config_name)s: %(info)s"

return cls(error_template % error_data)


def check_catalogus_config_exists(source_config, jsonl: str):
try:
CatalogusConfig.objects.get_by_natural_key(
domein=source_config.domein, rsin=source_config.rsin
)
except CatalogusConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except CatalogusConfig.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)


def update_zaaktype_config(source_config, jsonl: str):
try:
target = ZaakTypeConfig.objects.get_by_natural_key(
identificatie=source_config.identificatie,
catalogus_domein=source_config.catalogus.domein,
catalogus_rsin=source_config.catalogus.rsin,
)
except ZaakTypeConfig.MultipleObjectsReturned as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except (CatalogusConfig.DoesNotExist, ZaakTypeConfig.DoesNotExist) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
update_fields = [
"notify_status_changes",
"external_document_upload_url",
"document_upload_enabled",
"contact_form_enabled",
"contact_subject_code",
"relevante_zaakperiode",
]
for field in update_fields:
val = getattr(source_config, field, None)
setattr(target, field, val)
target.save()


def _update_nested_zgw_config(source_config: type, update_fields: list[str], jsonl):
zaaktype_config_identificatie = source_config.zaaktype_config.identificatie
catalogus_domein = source_config.zaaktype_config.catalogus.domein
catalogus_rsin = source_config.zaaktype_config.catalogus.rsin

try:
target = source_config.__class__.objects.get_by_natural_key(
omschrijving=source_config.omschrijving,
zaak_type_config_identificatie=zaaktype_config_identificatie,
catalogus_domein=catalogus_domein,
catalogus_rsin=catalogus_rsin,
)
except (source_config.MultipleObjectsReturned) as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
except source_config.DoesNotExist as exc:
raise ZGWImportError.from_exception_and_jsonl(exc, jsonl)
else:
for field in update_fields:
val = getattr(source_config, field, None)
setattr(target, field, val)
target.save()


def update_zaaktype_informatie_objecttype_config(source_config, jsonl):
update_fields = [
# "zaaktype_uuids", ignore, not stable
"document_upload_enabled",
"document_notification_enabled",
]
_update_nested_zgw_config(source_config, update_fields, jsonl)


def update_zaaktype_statustype_config(source_config, jsonl):
update_fields = [
"statustekst",
# "zaaktype_uuids", not stable
"status_indicator",
"status_indicator_text",
"document_upload_description",
"desciption",
"notify_status_change",
"action_required",
"document_upload_enabled",
"call_to_action_url",
"call_to_action_text",
"case_link_text",
]
_update_nested_zgw_config(source_config, update_fields, jsonl)


def update_zaaktype_resultaattype_config(source_config, jsonl):
update_fields = [
# "zaaktype_uuids", not stable
"description",
]
_update_nested_zgw_config(source_config, update_fields, jsonl)


@dataclasses.dataclass(frozen=True)
class CatalogusConfigExport:
"""Gather and export CatalogusConfig(s) and all associated relations."""
Expand Down Expand Up @@ -113,9 +269,10 @@ class CatalogusConfigImport:
total_rows_processed: int = 0
catalogus_configs_imported: int = 0
zaaktype_configs_imported: int = 0
zaak_inormatie_object_type_configs_imported: int = 0
zaak_informatie_object_type_configs_imported: int = 0
zaak_status_type_configs_imported: int = 0
zaak_resultaat_type_configs_imported: int = 0
import_errors: list | None = None

@staticmethod
def _get_url_root(url: str) -> str:
Expand Down Expand Up @@ -149,90 +306,68 @@ def _lines_iter_from_jsonl_stream_or_string(
# Reset the stream in case it gets re-used
lines.seek(0)

@classmethod
def _rewrite_jsonl_url_references(
cls, stream_or_string: IO | str
) -> Generator[str, Any, None]:
# The assumption is that the exporting and importing instance both have
# a `Service` with the same slug as the `Service` referenced in the
# `configued_from` attribute of the imported CatalogusConfig. The
# assumption is further that all URLs in the imported objects are
# prefixed by an URL that matches the API root in the service. Because
# of this, the import file will contain URLs with a base URL pointing to
# the `api_root`` of the `configured_from` Service on the _source_
# instance, and has to be re-written to match the `api_root` of the
# `configured_from` Service on the _target_ instance. Put differently,
# we assume that we are migrating ZGW objects that _do not differ_ as
# far as the ZGW objects themselves are concerned (apart from the URL,
# they essentially point to the same ZGW backend), but that they _do_
# differ in terms of additional model fields that do not have their
# source of truth in the ZGW backends.
#
# This expectation is also encoded in our API clients: you can only
# fetch ZGW objects using the ApePie clients if the root of those
# objects matches the configured API root.

base_url_mapping = {}
for deserialized_object in serializers.deserialize(
"jsonl",
filter(
lambda row: ('"model": "openzaak.catalogusconfig"' in row),
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string),
),
use_natural_foreign_keys=True,
use_natural_primary_keys=True,
):
object_type: str = deserialized_object.object.__class__.__name__

if object_type == "CatalogusConfig":
target_base_url = cls._get_url_root(
deserialized_object.object.service.api_root
)
source_base_url = cls._get_url_root(deserialized_object.object.url)
base_url_mapping[source_base_url] = target_base_url
else:
# https://www.xkcd.com/2200/
logger.error(
"Tried to filter for catalogus config objects, but also got: %s",
object_type,
)

for line in cls._lines_iter_from_jsonl_stream_or_string(stream_or_string):
source_url_found = False
for source, target in base_url_mapping.items():
line = line.replace(source, target)
source_url_found = True

if not source_url_found:
raise ValueError("Unable to rewrite ZGW urls")

yield line

@classmethod
@transaction.atomic()
def from_jsonl_stream_or_string(cls, stream_or_string: IO | str) -> Self:
model_to_counter_mapping = {
"CatalogusConfig": "catalogus_configs_imported",
"ZaakTypeConfig": "zaaktype_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_inormatie_object_type_configs_imported",
"ZaakTypeInformatieObjectTypeConfig": "zaak_informatie_object_type_configs_imported",
"ZaakTypeStatusTypeConfig": "zaak_status_type_configs_imported",
"ZaakTypeResultaatTypeConfig": "zaak_resultaat_type_configs_imported",
}

object_type_counts = defaultdict(int)

for deserialized_object in serializers.deserialize(
"jsonl",
cls._rewrite_jsonl_url_references(stream_or_string),
use_natural_foreign_keys=True,
use_natural_primary_keys=True,
import_errors = []
for rows_processed, line in enumerate(
cls._lines_iter_from_jsonl_stream_or_string(stream_or_string)
):
deserialized_object.save()
object_type = deserialized_object.object.__class__.__name__
object_type_counts[object_type] += 1
try:
(deserialized_object,) = serializers.deserialize(
"jsonl",
line,
use_natural_primary_keys=True,
use_natural_foreign_keys=True,
)
except DeserializationError as exc:
error = ZGWImportError.from_exception_and_jsonl(exc, line)
logger.error(error)
import_errors.append(error)
else:
source_config = deserialized_object.object
try:
match source_config:
case CatalogusConfig():
check_catalogus_config_exists(
source_config=source_config, jsonl=line
)
case ZaakTypeConfig():
update_zaaktype_config(
source_config=source_config, jsonl=line
)
case ZaakTypeInformatieObjectTypeConfig():
update_zaaktype_informatie_objecttype_config(
source_config=source_config, jsonl=line
)
case ZaakTypeStatusTypeConfig():
update_zaaktype_statustype_config(
source_config=source_config,
jsonl=line,
)
case ZaakTypeResultaatTypeConfig():
update_zaaktype_resultaattype_config(
source_config=source_config, jsonl=line
)
except ZGWImportError as exc:
logger.error(exc)
import_errors.append(exc)
else:
object_type = source_config.__class__.__name__
object_type_counts[object_type] += 1

creation_kwargs = {
"total_rows_processed": sum(object_type_counts.values()),
"total_rows_processed": rows_processed + 1 if rows_processed else 0,
"import_errors": import_errors,
}

for model_name, counter_field in model_to_counter_mapping.items():
Expand Down
Loading

0 comments on commit 5d89630

Please sign in to comment.