From 8b9596151af94c2ded456d2ecde388d50b855957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Schl=C3=B6gl?= Date: Wed, 13 Nov 2024 10:34:57 +0100 Subject: [PATCH] fix: normalize special characters from search resolves #362 --- apis_ontology/filtersets.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/apis_ontology/filtersets.py b/apis_ontology/filtersets.py index 9f21a41..d00868e 100644 --- a/apis_ontology/filtersets.py +++ b/apis_ontology/filtersets.py @@ -3,6 +3,7 @@ from django.contrib.postgres.search import TrigramWordSimilarity from django.db.models.functions import Greatest from django.db import models +import unicodedata from apis_core.apis_entities.filtersets import AbstractEntityFilterSet from apis_core.collections.models import SkosCollection, SkosCollectionContentObject @@ -23,6 +24,12 @@ def remove_quotes(token): return token.strip('"') +def remove_accents(input_str): + nfkd_form = unicodedata.normalize("NFKD", input_str) + only_ascii = nfkd_form.encode("ASCII", "ignore") + return only_ascii.decode() + + ################ # filter methods ################ @@ -49,7 +56,7 @@ def trigram_search_filter(queryset, fields, value): trig_vector_list = [] for token in tokens: for field in fields: - trig_vector_list.append(TrigramWordSimilarity(token, field)) + trig_vector_list.append(TrigramWordSimilarity(remove_accents(token), field)) trig_vector = Greatest(*trig_vector_list, None) return ( queryset.annotate(similarity=trig_vector)