From de57a0794e830b59b82b5e899f250e4ee7dd36fd Mon Sep 17 00:00:00 2001 From: Alexander Parrill Date: Wed, 4 Dec 2024 10:47:05 -0500 Subject: [PATCH] Fix oplog search breaking with very long entries Postgres aborts queries that try to make a `tsvector` out of a string that is too large, causing oplog search to break. This patch reworks the FTS oplog query to limit each individual field to avoid the cutoff. A drawback is that text after this cutoff point will not be searchable. Fixes #557 --- ghostwriter/oplog/consumers.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ghostwriter/oplog/consumers.py b/ghostwriter/oplog/consumers.py index 56c6ed29a..483ae80f7 100644 --- a/ghostwriter/oplog/consumers.py +++ b/ghostwriter/oplog/consumers.py @@ -9,7 +9,7 @@ # Django Imports from django.db.models import TextField, Func, Subquery, OuterRef, Value, F -from django.db.models.functions import Cast +from django.db.models.functions import Cast, Left from django.db.models.expressions import CombinedExpression from django.utils.timezone import make_aware from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank, SearchVectorField @@ -164,20 +164,20 @@ def get_log_entries(self, oplog_id: int, offset: int, user: User, filter: str | if spec.type == "json": continue - field = Cast(CombinedExpression( + field = CombinedExpression( F("extra_fields"), "->>", Value(spec.internal_name), - ), TextField()) + ) simple_vector_args.append(field) if spec.type == "rich_text": english_vector_args.append(field) - # Combine search vector - vector = TsVectorConcat( - SearchVector(*english_vector_args, config="english"), - SearchVector(*simple_vector_args, config="simple"), - ) + # Create and combine search vectors. + # Limit inputs since PostgreSQL will abort the query if attempting to make a tsvector out of a huge string + vectors = [SearchVector(Left(Cast(va, TextField()), 1000000), config="english") for va in english_vector_args] + \ + [SearchVector(Left(Cast(va, TextField()), 1000000), config="simple") for va in simple_vector_args] + vector = TsVectorConcat(*vectors) # Build filter. # Search using both english and simple configs, to help match both types of vectors. Also use prefix