From 63ac79f045c744f4e9556a57f7c524120ea504d2 Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Thu, 8 Aug 2024 14:14:43 +0930 Subject: [PATCH] SACGF/variantgrid#831 - remove citext fields - be able to do like query --- genes/models.py | 15 ++++++++++++--- genes/views/views_autocomplete.py | 7 ++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/genes/models.py b/genes/models.py index 2d4669d36..18f5b1170 100644 --- a/genes/models.py +++ b/genes/models.py @@ -24,7 +24,7 @@ from django.db import models, IntegrityError, transaction from django.db.models import QuerySet, TextField from django.db.models.deletion import CASCADE, SET_NULL, PROTECT -from django.db.models.functions import Upper +from django.db.models.functions import Upper, Collate from django.db.models.query_utils import Q from django.db.models.signals import post_save, pre_delete from django.dispatch import receiver @@ -168,8 +168,11 @@ class UniProt(models.Model): def __str__(self): return self.accession - class GeneSymbol(models.Model, PreviewModelMixin): + """ + If you need to perform a 'like' query on this field, you need to: + + """ symbol = TextField(primary_key=True, db_collation='case_insensitive') objects = ObjectManagerCachingRequest() @@ -177,6 +180,12 @@ class GeneSymbol(models.Model, PreviewModelMixin): class Meta: base_manager_name = 'objects' + @classmethod + def get_deterministic_queryset(cls) -> QuerySet['GeneSymbol']: + """ Adds 'symbol_deterministic' you can do like queries on """ + qs = cls.objects.all() + return qs.annotate(symbol_deterministic=Collate("symbol", "und-x-icu")) + @staticmethod def cast(symbol: Union[str, 'GeneSymbol']) -> Optional['GeneSymbol']: if isinstance(symbol, str): @@ -227,7 +236,7 @@ def preview(self): def has_different_genes(self, other: 'GeneSymbol') -> bool: """ - Tries to work out if genes are equivilant, not that sometimes refseq or ensembl assign gene ids to both the + Tries to work out if genes are equivilent, not that sometimes RefSeq or ensembl assign gene ids to both the symbol and the alias, but the other consortium only assigns to one. In that case we'd still like to treat them as the "same" """ diff --git a/genes/views/views_autocomplete.py b/genes/views/views_autocomplete.py index a6255d96b..c7c68d414 100644 --- a/genes/views/views_autocomplete.py +++ b/genes/views/views_autocomplete.py @@ -101,7 +101,7 @@ def get_user_queryset(self, user): @method_decorator(cache_page(WEEK_SECS), name='dispatch') class GeneSymbolAutocompleteView(AutocompleteView): - fields = ['symbol'] + fields = ['symbol_deterministic'] def sort_queryset(self, qs): return qs.order_by(Length("symbol").asc(), 'symbol') @@ -109,11 +109,12 @@ def sort_queryset(self, qs): def get_user_queryset(self, _user): """ Doesn't actually use user for genes """ annotation_consortium = self.forwarded.get('annotation_consortium', None) - qs = GeneSymbol.objects.all() + qs = GeneSymbol.get_deterministic_queryset() if annotation_consortium: qs = qs.filter(geneversion__gene__annotation_consortium=annotation_consortium) if self.q: - qs = qs.filter(symbol__istartswith=self.q) + # Make it start with the query not just contain it + qs = qs.filter(symbol_deterministic__startswith=self.q) return qs.distinct()