From dbfffdfd35a0e8ec102bcf03a86c8193df1cc645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 19:10:07 +0200 Subject: [PATCH] chore: refactor views for better mysql/es separation Instead of checking a boolean flag in multiple different places, we use class inheritance. This makes it possible to later override the view and implement our own using a different search backend, such as Meilisearch. --- notesapi/v1/urls.py | 8 +- notesapi/v1/views/__init__.py | 19 +++ notesapi/v1/{views.py => views/common.py} | 155 ++++++---------------- notesapi/v1/views/elasticsearch.py | 138 +++++++++++++++++++ notesapi/v1/views/exceptions.py | 2 + notesapi/v1/views/mysql.py | 0 notesserver/test_views.py | 6 +- notesserver/views.py | 41 +++--- 8 files changed, 226 insertions(+), 143 deletions(-) create mode 100644 notesapi/v1/views/__init__.py rename notesapi/v1/{views.py => views/common.py} (76%) create mode 100644 notesapi/v1/views/elasticsearch.py create mode 100644 notesapi/v1/views/exceptions.py create mode 100644 notesapi/v1/views/mysql.py diff --git a/notesapi/v1/urls.py b/notesapi/v1/urls.py index 543ba7a7..93131162 100644 --- a/notesapi/v1/urls.py +++ b/notesapi/v1/urls.py @@ -1,7 +1,7 @@ from django.urls import path, re_path from notesapi.v1.views import (AnnotationDetailView, AnnotationListView, - AnnotationRetireView, AnnotationSearchView) + AnnotationRetireView, get_views_module) app_name = "notesapi.v1" urlpatterns = [ path('annotations/', AnnotationListView.as_view(), name='annotations'), @@ -11,5 +11,9 @@ AnnotationDetailView.as_view(), name='annotations_detail' ), - path('search/', AnnotationSearchView.as_view(), name='annotations_search'), + path( + 'search/', + get_views_module().AnnotationSearchView.as_view(), + name='annotations_search' + ), ] diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py new file mode 100644 index 00000000..eef7e9b4 --- /dev/null +++ b/notesapi/v1/views/__init__.py @@ -0,0 +1,19 @@ +from django.conf import settings + +from .common import ( + AnnotationDetailView, + AnnotationListView, + AnnotationRetireView, +) + +from .exceptions import SearchViewRuntimeError + +def get_views_module(): + """ + Import views from either mysql or elasticsearch backend + """ + if settings.ES_DISABLED: + from . import common as backend_module + else: + from . import elasticsearch as backend_module + return backend_module diff --git a/notesapi/v1/views.py b/notesapi/v1/views/common.py similarity index 76% rename from notesapi/v1/views.py rename to notesapi/v1/views/common.py index 7f479f18..bd917fb5 100644 --- a/notesapi/v1/views.py +++ b/notesapi/v1/views/common.py @@ -13,21 +13,8 @@ from rest_framework.views import APIView from notesapi.v1.models import Note -from notesapi.v1.search_indexes.documents import NoteDocument from notesapi.v1.serializers import NoteSerializer -if not settings.ES_DISABLED: - from elasticsearch_dsl import Search - from elasticsearch_dsl.connections import connections - from django_elasticsearch_dsl_drf.filter_backends import DefaultOrderingFilterBackend, HighlightBackend - from django_elasticsearch_dsl_drf.constants import ( - LOOKUP_FILTER_TERM, - LOOKUP_QUERY_IN, - SEPARATOR_LOOKUP_COMPLEX_VALUE, - ) - from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination - from notesapi.v1.search_indexes.backends import CompoundSearchFilterBackend, FilteringFilterBackend - from notesapi.v1.search_indexes.serializers import NoteDocumentSerializer as NotesElasticSearchSerializer log = logging.getLogger(__name__) @@ -127,104 +114,43 @@ class AnnotationSearchView(ListAPIView): params = {} query_params = {} search_with_usage_id = False - document = NoteDocument search_fields = ('text', 'tags') - filter_fields = { - 'course_id': 'course_id', - 'user': 'user', - 'usage_id': { - 'field': 'usage_id', - 'lookups': [ - LOOKUP_QUERY_IN, - LOOKUP_FILTER_TERM, - ], - }, - - } - highlight_fields = { - 'text': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - 'tags': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - } ordering = ('-updated',) - def __init__(self, *args, **kwargs): - self.initiate_es_specific_state_if_is_enabled() - super().__init__(*args, **kwargs) - - def initiate_es_specific_state_if_is_enabled(self): - """ - Initiates elasticsearch specific state if elasticsearch is enabled. - - Should be called in the class `__init__` method. - """ - if not settings.ES_DISABLED: - self.client = connections.get_connection(self.document._get_using()) # pylint: disable=protected-access - self.index = self.document._index._name # pylint: disable=protected-access - self.mapping = self.document._doc_type.mapping.properties.name # pylint: disable=protected-access - # pylint: disable=protected-access - self.search = Search(using=self.client, index=self.index, doc_type=self.document._doc_type.name) @property - def is_es_disabled(self): + def is_text_search(self): """ - Predicate instance method. - - Search in DB when ES is not available or there is no need to bother it + We identify text search by the presence of a "text" parameter. Subclasses may + want to have a different behaviour in such cases. """ - - return settings.ES_DISABLED or 'text' not in self.params + return "text" in self.params def get_queryset(self): - if self.is_es_disabled: - queryset = Note.objects.filter(**self.query_params).order_by('-updated') - if 'text' in self.params: - queryset = queryset.filter( - Q(text__icontains=self.params['text']) | Q(tags__icontains=self.params['text']) - ) - else: - queryset = self.search.query() - queryset.model = self.document.Django.model - + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + if "text" in self.params: + queryset = queryset.filter( + Q(text__icontains=self.params["text"]) + | Q(tags__icontains=self.params["text"]) + ) return queryset def get_serializer_class(self): """ Return the class to use for the serializer. - Defaults to using `NoteSerializer` if elasticsearch is disabled - or `NotesElasticSearchSerializer` if elasticsearch is enabled + Defaults to `NoteSerializer`. """ - - return NoteSerializer if self.is_es_disabled else NotesElasticSearchSerializer + return NoteSerializer @property def paginator(self): """ The paginator instance associated with the view and used data source, or `None`. """ - if not hasattr(self, '_paginator'): - if self.pagination_class is None: - self._paginator = None # pylint: disable=attribute-defined-outside-init - else: - self._paginator = ( # pylint: disable=attribute-defined-outside-init - self.pagination_class() - if self.is_es_disabled - else ESNotesPagination() - ) + if not hasattr(self, "_paginator"): + # pylint: disable=attribute-defined-outside-init + self._paginator = self.pagination_class() if self.pagination_class else None return self._paginator @@ -235,20 +161,17 @@ def filter_queryset(self, queryset): Do not filter additionally if mysql db used or use `CompoundSearchFilterBackend` and `HighlightBackend` if elasticsearch is the data source. """ - filter_backends = [] - if not self.is_es_disabled: - filter_backends = [ - FilteringFilterBackend, - CompoundSearchFilterBackend, - DefaultOrderingFilterBackend, - ] - if self.params.get('highlight'): - filter_backends.append(HighlightBackend) - + filter_backends = self.get_filter_backends() for backend in filter_backends: queryset = backend().filter_queryset(self.request, queryset, view=self) return queryset + def get_filter_backends(self): + """ + List of filter backends, each with a `filter_queryset` method. + """ + return [] + def list(self, *args, **kwargs): """ Returns list of students notes. @@ -268,22 +191,16 @@ def build_query_params_state(self): """ self.query_params = {} self.params = self.request.query_params.dict() - usage_ids = self.request.query_params.getlist('usage_id') + usage_ids = self.request.query_params.getlist("usage_id") if usage_ids: self.search_with_usage_id = True - if not self.is_es_disabled: - usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) - - self.query_params['usage_id__in'] = usage_ids + self.query_params["usage_id__in"] = usage_ids - if 'course_id' in self.params: - self.query_params['course_id'] = self.params['course_id'] + if "course_id" in self.params: + self.query_params["course_id"] = self.params["course_id"] - if 'user' in self.params: - if self.is_es_disabled: - self.query_params['user_id'] = self.params['user'] - else: - self.query_params['user'] = self.params['user'] + if "user" in self.params: + self.query_params["user_id"] = self.params["user"] def get(self, *args, **kwargs): """ @@ -305,10 +222,10 @@ def post(self, *args, **kwargs): Delete all annotations for a user. """ params = self.request.data - if 'user' not in params: + if "user" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - Note.objects.filter(user_id=params['user']).delete() + Note.objects.filter(user_id=params["user"]).delete() return Response(status=status.HTTP_204_NO_CONTENT) @@ -608,3 +525,15 @@ def delete(self, *args, **kwargs): # Annotation deleted successfully. return Response(status=status.HTTP_204_NO_CONTENT) + +def selftest(): + """ + No-op. + """ + return {} + +def heartbeat(): + """ + No-op + """ + return diff --git a/notesapi/v1/views/elasticsearch.py b/notesapi/v1/views/elasticsearch.py new file mode 100644 index 00000000..0d91b74d --- /dev/null +++ b/notesapi/v1/views/elasticsearch.py @@ -0,0 +1,138 @@ +import logging +import traceback + +from django_elasticsearch_dsl_drf.constants import ( + LOOKUP_FILTER_TERM, + LOOKUP_QUERY_IN, + SEPARATOR_LOOKUP_COMPLEX_VALUE, +) +from django_elasticsearch_dsl_drf.filter_backends import ( + DefaultOrderingFilterBackend, + HighlightBackend, +) +from elasticsearch.exceptions import TransportError +from elasticsearch_dsl import Search +from elasticsearch_dsl.connections import connections + +from notesapi.v1.search_indexes.backends import ( + CompoundSearchFilterBackend, + FilteringFilterBackend, +) +from notesapi.v1.search_indexes.documents import NoteDocument +from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination +from notesapi.v1.search_indexes.serializers import ( + NoteDocumentSerializer as NotesElasticSearchSerializer, +) + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + +logger = logging.getLogger(__name__) + + +class AnnotationSearchView(BaseAnnotationSearchView): + + # https://django-elasticsearch-dsl-drf.readthedocs.io/en/latest/advanced_usage_examples.html + filter_fields = { + "course_id": "course_id", + "user": "user", + "usage_id": { + "field": "usage_id", + "lookups": [ + LOOKUP_QUERY_IN, + LOOKUP_FILTER_TERM, + ], + }, + } + highlight_fields = { + "text": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + "tags": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + } + + def __init__(self, *args, **kwargs): + self.client = connections.get_connection(NoteDocument._get_using()) # pylint: disable=protected-access + self.index = NoteDocument._index._name # pylint: disable=protected-access + self.mapping = NoteDocument._doc_type.mapping.properties.name # pylint: disable=protected-access + # pylint: disable=protected-access + self.search = Search( + using=self.client, index=self.index, doc_type=NoteDocument._doc_type.name + ) + super().__init__(*args, **kwargs) + + def get_serializer_class(self): + """ + Use Elasticsearch-specific serializer. + """ + if not self.is_text_search: + return super().get_serializer_class() + return NotesElasticSearchSerializer + + def get_queryset(self): + """ + Hackish method that doesn't quite return a Django queryset. + """ + if not self.is_text_search: + return super().get_queryset() + queryset = self.search.query() + queryset.model = NoteDocument.Django.model + return queryset + + def get_filter_backends(self): + if not self.is_text_search: + return super().get_filter_backends() + filter_backends = [ + FilteringFilterBackend, + CompoundSearchFilterBackend, + DefaultOrderingFilterBackend, + ] + if self.params.get("highlight"): + filter_backends.append(HighlightBackend) + return filter_backends + + @property + def pagination_class(self): + if not self.is_text_search: + return super().pagination_class + return ESNotesPagination + + def build_query_params_state(self): + super().build_query_params_state() + if not self.is_text_search: + return + if "usage_id__in" in self.query_params: + usage_ids = self.query_params["usage_id__in"] + usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) + self.query_params["usage_id__in"] = usage_ids + + if "user" in self.params: + self.query_params["user"] = self.query_params.pop("user_id") + + +def heartbeat(): + if not get_es().ping(): + raise SearchViewRuntimeError("es") + + +def selftest(): + try: + return {"es": get_es().info()} + except TransportError as e: + raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e + + +def get_es(): + return connections.get_connection() diff --git a/notesapi/v1/views/exceptions.py b/notesapi/v1/views/exceptions.py new file mode 100644 index 00000000..835bd7f6 --- /dev/null +++ b/notesapi/v1/views/exceptions.py @@ -0,0 +1,2 @@ +class SearchViewRuntimeError(RuntimeError): + pass diff --git a/notesapi/v1/views/mysql.py b/notesapi/v1/views/mysql.py new file mode 100644 index 00000000..e69de29b diff --git a/notesserver/test_views.py b/notesserver/test_views.py index 23a8dee9..301724e7 100644 --- a/notesserver/test_views.py +++ b/notesserver/test_views.py @@ -22,7 +22,7 @@ def test_heartbeat(self): self.assertEqual(json.loads(bytes.decode(response.content, 'utf-8')), {"OK": True}) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_heartbeat_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable. @@ -65,7 +65,7 @@ def test_selftest_status(self): @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") @patch('notesserver.views.datetime', datetime=Mock(wraps=datetime.datetime)) - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_data(self, mocked_get_es, mocked_datetime): """ Test returned data on success. @@ -101,7 +101,7 @@ def test_selftest_data_es_disabled(self, mocked_datetime): ) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable on selftest. diff --git a/notesserver/views.py b/notesserver/views.py index 994cd8fc..a90b48c3 100644 --- a/notesserver/views.py +++ b/notesserver/views.py @@ -1,11 +1,9 @@ import datetime import traceback -from django.conf import settings from django.db import connection from django.http import JsonResponse from django.http import HttpResponse -from elasticsearch.exceptions import TransportError from rest_framework import status from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import AllowAny @@ -15,11 +13,9 @@ import newrelic.agent except ImportError: # pragma: no cover newrelic = None # pylint: disable=invalid-name -if not settings.ES_DISABLED: - from elasticsearch_dsl.connections import connections - def get_es(): - return connections.get_connection() +from notesapi.v1.views import get_views_module +from notesapi.v1.views import SearchViewRuntimeError @api_view(['GET']) @@ -56,8 +52,10 @@ def heartbeat(request): except Exception: # pylint: disable=broad-exception-caught return JsonResponse({"OK": False, "check": "db"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - if not settings.ES_DISABLED and not get_es().ping(): - return JsonResponse({"OK": False, "check": "es"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + try: + get_views_module().heartbeat() + except SearchViewRuntimeError as e: + return JsonResponse({"OK": False, "check": e.args[0]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return JsonResponse({"OK": True}, status=status.HTTP_200_OK) @@ -70,18 +68,18 @@ def selftest(request): """ start = datetime.datetime.now() - if not settings.ES_DISABLED: - try: - es_status = get_es().info() - except TransportError: - return Response( - {"es_error": traceback.format_exc()}, - status=status.HTTP_500_INTERNAL_SERVER_ERROR - ) + response = {} + try: + response.update(get_views_module().selftest()) + except SearchViewRuntimeError as e: + return Response( + e.args[0], + status=status.HTTP_500_INTERNAL_SERVER_ERROR + ) try: db_status() - database = "OK" + response["db"] = "OK" except Exception: # pylint: disable=broad-exception-caught return Response( {"db_error": traceback.format_exc()}, @@ -90,14 +88,7 @@ def selftest(request): end = datetime.datetime.now() delta = end - start - - response = { - "db": database, - "time_elapsed": int(delta.total_seconds() * 1000) # In milliseconds. - } - - if not settings.ES_DISABLED: - response['es'] = es_status + response["time_elapsed"] = int(delta.total_seconds() * 1000) # In milliseconds. return Response(response)