From 2f9f71fa5a79d1966e8743327fd0735f0ef94ec4 Mon Sep 17 00:00:00 2001 From: Sergey Fursov Date: Tue, 12 Jan 2021 10:46:14 +0300 Subject: [PATCH 1/2] cast object_id instead of primary keys when fetching deleted objects `WHERE model.id = CAST(version.object_id AS int)` works faster than `WHERE CAST(model.id AS varchar) = version.object_id` --- reversion/models.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/reversion/models.py b/reversion/models.py index 0c8a0a97..c5309816 100644 --- a/reversion/models.py +++ b/reversion/models.py @@ -133,12 +133,32 @@ def get_deleted(self, model, model_db=None): model_db = model_db or router.db_for_write(model) connection = connections[self.db] if self.db == model_db and connection.vendor in ("sqlite", "postgresql", "oracle"): - model_qs = ( - model._default_manager - .using(model_db) - .annotate(_pk_to_object_id=Cast("pk", Version._meta.get_field("object_id"))) - .filter(_pk_to_object_id=models.OuterRef("object_id")) - ) + pk_field_name = model._meta.pk.name + object_id_cast_target = model._meta.get_field(pk_field_name) + if django.VERSION >= (2, 1): + # django 2.0 contains a critical bug that doesn't allow the code below to work, + # fallback to casting primary keys then + # see https://code.djangoproject.com/ticket/29142 + if django.VERSION < (2, 2): + # properly cast autofields for django before 2.2 as it was fixed in django itself later + # see https://github.com/django/django/commit/ac25dd1f8d48accc765c05aebb47c427e51f3255 + object_id_cast_target = { + "AutoField": models.IntegerField(), + "BigAutoField": models.BigIntegerField(), + }.get(object_id_cast_target.__class__.__name__, object_id_cast_target) + casted_object_id = Cast(models.OuterRef("object_id"), object_id_cast_target) + model_qs = ( + model._default_manager + .using(model_db) + .filter(**{pk_field_name: casted_object_id}) + ) + else: + model_qs = ( + model._default_manager + .using(model_db) + .annotate(_pk_to_object_id=Cast("pk", Version._meta.get_field("object_id"))) + .filter(_pk_to_object_id=models.OuterRef("object_id")) + ) subquery = ( self.get_for_model(model, model_db=model_db) .annotate(pk_not_exists=~models.Exists(model_qs)) From 84972f0a23ade72d07fec0153b477ce1689de775 Mon Sep 17 00:00:00 2001 From: Sergey Fursov Date: Tue, 12 Jan 2021 10:56:45 +0300 Subject: [PATCH 2/2] speedup deleted query by using conditional expressions and DISTINCT ON --- reversion/models.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/reversion/models.py b/reversion/models.py index c5309816..1cdbef8c 100644 --- a/reversion/models.py +++ b/reversion/models.py @@ -1,6 +1,7 @@ from collections import defaultdict from itertools import chain, groupby +import django from django.apps import apps from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey @@ -159,14 +160,25 @@ def get_deleted(self, model, model_db=None): .annotate(_pk_to_object_id=Cast("pk", Version._meta.get_field("object_id"))) .filter(_pk_to_object_id=models.OuterRef("object_id")) ) - subquery = ( - self.get_for_model(model, model_db=model_db) - .annotate(pk_not_exists=~models.Exists(model_qs)) - .filter(pk_not_exists=True) - .values("object_id") - .annotate(latest_pk=models.Max("pk")) - .values("latest_pk") - ) + # conditional expressions are being supported since django 3.0 + # DISTINCT ON works only for Postgres DB + if connection.vendor == "postgresql" and django.VERSION >= (3, 0): + subquery = ( + self.get_for_model(model, model_db=model_db) + .filter(~models.Exists(model_qs)) + .order_by("object_id", "-pk") + .distinct("object_id") + .values("pk") + ) + else: + subquery = ( + self.get_for_model(model, model_db=model_db) + .annotate(pk_not_exists=~models.Exists(model_qs)) + .filter(pk_not_exists=True) + .values("object_id") + .annotate(latest_pk=models.Max("pk")) + .values("latest_pk") + ) else: # We have to use a slow subquery. subquery = self.get_for_model(model, model_db=model_db).exclude(