From f6184846f93ae5e8a5055c755b06fbe58cbd2383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 23:12:07 +0200 Subject: [PATCH 1/5] feat: introduce "make compile-requirement" target This is convenient to compile dependencies without upgrading them. --- Makefile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index b3d9a6ad..2a20a103 100644 --- a/Makefile +++ b/Makefile @@ -59,18 +59,21 @@ develop: requirements test.requirements piptools: ## install pinned version of pip-compile and pip-sync pip install -r requirements/pip-tools.txt -upgrade: export CUSTOM_COMPILE_COMMAND=make upgrade -upgrade: piptools ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in +compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade +compile-requirements: piptools ## Re-compile *.in requirements to *.txt (without upgrading) # Make sure to compile files after any other files they include! - pip-compile --upgrade --rebuild --allow-unsafe -o requirements/pip.txt requirements/pip.in - pip-compile --upgrade -o requirements/pip-tools.txt requirements/pip-tools.in + pip-compile ${COMPILE_OPTS} --rebuild --allow-unsafe -o requirements/pip.txt requirements/pip.in + pip-compile ${COMPILE_OPTS} -o requirements/pip-tools.txt requirements/pip-tools.in pip install -qr requirements/pip.txt pip install -qr requirements/pip-tools.txt - pip-compile --upgrade --allow-unsafe -o requirements/base.txt requirements/base.in - pip-compile --upgrade --allow-unsafe -o requirements/test.txt requirements/test.in - pip-compile --upgrade --allow-unsafe -o requirements/ci.txt requirements/ci.in - pip-compile --upgrade --allow-unsafe -o requirements/quality.txt requirements/quality.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/base.txt requirements/base.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/test.txt requirements/test.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/ci.txt requirements/ci.in + pip-compile ${COMPILE_OPTS} --allow-unsafe -o requirements/quality.txt requirements/quality.in # Let tox control the Django version for tests grep -e "^django==" requirements/base.txt > requirements/django.txt sed '/^[dD]jango==/d' requirements/test.txt > requirements/test.tmp mv requirements/test.tmp requirements/test.txt + +upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in + $(MAKE) compile-requirements COMPILE_OPTS="--upgrade" From 7a9aa32f92670bc897d084a9288041dde99f054a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Tue, 12 Nov 2024 09:08:58 +0100 Subject: [PATCH 2/5] chore: simplify tox/make test commands This makes it possible to run the make commands directly without going through tox (though tox targets keep working, of course). --- Makefile | 14 +++++++++----- tox.ini | 4 ---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 2a20a103..0c689738 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ test: clean python -Wd -m pytest pii_check: test.requirements pii_clean - code_annotations django_find_annotations --config_file .pii_annotations.yml \ + DJANGO_SETTINGS_MODULE=notesserver.settings.test code_annotations django_find_annotations --config_file .pii_annotations.yml \ --lint --report --coverage check_keywords: ## Scan the Django models in all installed apps in this project for restricted field names - python manage.py check_reserved_keywords --override_file db_keyword_overrides.yml + DJANGO_SETTINGS_MODULE=notesserver.settings.test python manage.py check_reserved_keywords --override_file db_keyword_overrides.yml run: ./manage.py runserver 0.0.0.0:8120 @@ -26,9 +26,13 @@ pii_clean: rm -rf pii_report mkdir -p pii_report -quality: - pycodestyle --config=.pycodestyle $(PACKAGES) - pylint $(PACKAGES) +quality: pycodestyle pylint + +pycodestyle: + pycodestyle --config=.pycodestyle $(PACKAGES) + +pylint: + DJANGO_SETTINGS_MODULE=notesserver.settings.test pylint $(PACKAGES) diff-coverage: diff-cover build/coverage/coverage.xml --html-report build/coverage/diff_cover.html diff --git a/tox.ini b/tox.ini index a9688cfb..2c64d0f9 100644 --- a/tox.ini +++ b/tox.ini @@ -24,8 +24,6 @@ commands = [testenv:quality] envdir = {toxworkdir}/{envname} -setenv = - DJANGO_SETTINGS_MODULE = notesserver.settings.test allowlist_externals = make deps = @@ -35,8 +33,6 @@ commands = [testenv:pii_check] envdir = {toxworkdir}/{envname} -setenv = - DJANGO_SETTINGS_MODULE = notesserver.settings.test allowlist_externals = make deps = From 3ff7cbde55ac01034eda6963c8c8d5e37d5ea445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Tue, 12 Nov 2024 09:50:11 +0100 Subject: [PATCH 3/5] chore: more convenient unit test running Previously, it was not possible to run unit tests locally without manually creating mysql & elasticsearch containers. Here, we create a `make pytest` target that automatically starts the required containers. --- Makefile | 8 ++++++++ README.rst | 17 +++++++++++++++-- notesserver/docker-compose.test.yml | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 notesserver/docker-compose.test.yml diff --git a/Makefile b/Makefile index 0c689738..2082bb82 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,17 @@ PACKAGES = notesserver notesapi validate: test.requirements test +pytest: test-start-services test test-stop-services + test: clean python -Wd -m pytest +test-start-services: + docker compose -f notesserver/docker-compose.test.yml --project-name=edxnotesapi_test up -d --remove-orphans + +test-stop-services: + docker compose -f notesserver/docker-compose.test.yml --project-name=edxnotesapi_test stop + pii_check: test.requirements pii_clean DJANGO_SETTINGS_MODULE=notesserver.settings.test code_annotations django_find_annotations --config_file .pii_annotations.yml \ --lint --report --coverage diff --git a/README.rst b/README.rst index fbaaec7e..321452c0 100644 --- a/README.rst +++ b/README.rst @@ -47,8 +47,21 @@ Configuration Running Tests ************* -Run ``make validate`` install the requirements, run the tests, and run -lint. +Install requirements:: + + make test.requirements + +Start mysql/elasticsearch services:: + + make test-start-services + +Run unit tests:: + + make test + +Run quality checks:: + + make quality How To Resync The Index *********************** diff --git a/notesserver/docker-compose.test.yml b/notesserver/docker-compose.test.yml new file mode 100644 index 00000000..0dc73b6c --- /dev/null +++ b/notesserver/docker-compose.test.yml @@ -0,0 +1,18 @@ +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ROOT_PASSWORD: + MYSQL_ALLOW_EMPTY_PASSWORD: "yes" + MYSQL_DATABASE: "edx_notes_api" + ports: + - 127.0.0.1:3306:3306 + + elasticsearch: + image: elasticsearch:7.13.4 + environment: + discovery.type: single-node + bootstrap.memory_lock: "true" + ES_JAVA_OPTS: "-Xms512m -Xmx512m" + ports: + - 127.0.0.1:9200:9200 From dbfffdfd35a0e8ec102bcf03a86c8193df1cc645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 19:10:07 +0200 Subject: [PATCH 4/5] chore: refactor views for better mysql/es separation Instead of checking a boolean flag in multiple different places, we use class inheritance. This makes it possible to later override the view and implement our own using a different search backend, such as Meilisearch. --- notesapi/v1/urls.py | 8 +- notesapi/v1/views/__init__.py | 19 +++ notesapi/v1/{views.py => views/common.py} | 155 ++++++---------------- notesapi/v1/views/elasticsearch.py | 138 +++++++++++++++++++ notesapi/v1/views/exceptions.py | 2 + notesapi/v1/views/mysql.py | 0 notesserver/test_views.py | 6 +- notesserver/views.py | 41 +++--- 8 files changed, 226 insertions(+), 143 deletions(-) create mode 100644 notesapi/v1/views/__init__.py rename notesapi/v1/{views.py => views/common.py} (76%) create mode 100644 notesapi/v1/views/elasticsearch.py create mode 100644 notesapi/v1/views/exceptions.py create mode 100644 notesapi/v1/views/mysql.py diff --git a/notesapi/v1/urls.py b/notesapi/v1/urls.py index 543ba7a7..93131162 100644 --- a/notesapi/v1/urls.py +++ b/notesapi/v1/urls.py @@ -1,7 +1,7 @@ from django.urls import path, re_path from notesapi.v1.views import (AnnotationDetailView, AnnotationListView, - AnnotationRetireView, AnnotationSearchView) + AnnotationRetireView, get_views_module) app_name = "notesapi.v1" urlpatterns = [ path('annotations/', AnnotationListView.as_view(), name='annotations'), @@ -11,5 +11,9 @@ AnnotationDetailView.as_view(), name='annotations_detail' ), - path('search/', AnnotationSearchView.as_view(), name='annotations_search'), + path( + 'search/', + get_views_module().AnnotationSearchView.as_view(), + name='annotations_search' + ), ] diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py new file mode 100644 index 00000000..eef7e9b4 --- /dev/null +++ b/notesapi/v1/views/__init__.py @@ -0,0 +1,19 @@ +from django.conf import settings + +from .common import ( + AnnotationDetailView, + AnnotationListView, + AnnotationRetireView, +) + +from .exceptions import SearchViewRuntimeError + +def get_views_module(): + """ + Import views from either mysql or elasticsearch backend + """ + if settings.ES_DISABLED: + from . import common as backend_module + else: + from . import elasticsearch as backend_module + return backend_module diff --git a/notesapi/v1/views.py b/notesapi/v1/views/common.py similarity index 76% rename from notesapi/v1/views.py rename to notesapi/v1/views/common.py index 7f479f18..bd917fb5 100644 --- a/notesapi/v1/views.py +++ b/notesapi/v1/views/common.py @@ -13,21 +13,8 @@ from rest_framework.views import APIView from notesapi.v1.models import Note -from notesapi.v1.search_indexes.documents import NoteDocument from notesapi.v1.serializers import NoteSerializer -if not settings.ES_DISABLED: - from elasticsearch_dsl import Search - from elasticsearch_dsl.connections import connections - from django_elasticsearch_dsl_drf.filter_backends import DefaultOrderingFilterBackend, HighlightBackend - from django_elasticsearch_dsl_drf.constants import ( - LOOKUP_FILTER_TERM, - LOOKUP_QUERY_IN, - SEPARATOR_LOOKUP_COMPLEX_VALUE, - ) - from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination - from notesapi.v1.search_indexes.backends import CompoundSearchFilterBackend, FilteringFilterBackend - from notesapi.v1.search_indexes.serializers import NoteDocumentSerializer as NotesElasticSearchSerializer log = logging.getLogger(__name__) @@ -127,104 +114,43 @@ class AnnotationSearchView(ListAPIView): params = {} query_params = {} search_with_usage_id = False - document = NoteDocument search_fields = ('text', 'tags') - filter_fields = { - 'course_id': 'course_id', - 'user': 'user', - 'usage_id': { - 'field': 'usage_id', - 'lookups': [ - LOOKUP_QUERY_IN, - LOOKUP_FILTER_TERM, - ], - }, - - } - highlight_fields = { - 'text': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - 'tags': { - 'enabled': True, - 'options': { - 'pre_tags': ['{elasticsearch_highlight_start}'], - 'post_tags': ['{elasticsearch_highlight_end}'], - 'number_of_fragments': 0, - }, - }, - } ordering = ('-updated',) - def __init__(self, *args, **kwargs): - self.initiate_es_specific_state_if_is_enabled() - super().__init__(*args, **kwargs) - - def initiate_es_specific_state_if_is_enabled(self): - """ - Initiates elasticsearch specific state if elasticsearch is enabled. - - Should be called in the class `__init__` method. - """ - if not settings.ES_DISABLED: - self.client = connections.get_connection(self.document._get_using()) # pylint: disable=protected-access - self.index = self.document._index._name # pylint: disable=protected-access - self.mapping = self.document._doc_type.mapping.properties.name # pylint: disable=protected-access - # pylint: disable=protected-access - self.search = Search(using=self.client, index=self.index, doc_type=self.document._doc_type.name) @property - def is_es_disabled(self): + def is_text_search(self): """ - Predicate instance method. - - Search in DB when ES is not available or there is no need to bother it + We identify text search by the presence of a "text" parameter. Subclasses may + want to have a different behaviour in such cases. """ - - return settings.ES_DISABLED or 'text' not in self.params + return "text" in self.params def get_queryset(self): - if self.is_es_disabled: - queryset = Note.objects.filter(**self.query_params).order_by('-updated') - if 'text' in self.params: - queryset = queryset.filter( - Q(text__icontains=self.params['text']) | Q(tags__icontains=self.params['text']) - ) - else: - queryset = self.search.query() - queryset.model = self.document.Django.model - + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + if "text" in self.params: + queryset = queryset.filter( + Q(text__icontains=self.params["text"]) + | Q(tags__icontains=self.params["text"]) + ) return queryset def get_serializer_class(self): """ Return the class to use for the serializer. - Defaults to using `NoteSerializer` if elasticsearch is disabled - or `NotesElasticSearchSerializer` if elasticsearch is enabled + Defaults to `NoteSerializer`. """ - - return NoteSerializer if self.is_es_disabled else NotesElasticSearchSerializer + return NoteSerializer @property def paginator(self): """ The paginator instance associated with the view and used data source, or `None`. """ - if not hasattr(self, '_paginator'): - if self.pagination_class is None: - self._paginator = None # pylint: disable=attribute-defined-outside-init - else: - self._paginator = ( # pylint: disable=attribute-defined-outside-init - self.pagination_class() - if self.is_es_disabled - else ESNotesPagination() - ) + if not hasattr(self, "_paginator"): + # pylint: disable=attribute-defined-outside-init + self._paginator = self.pagination_class() if self.pagination_class else None return self._paginator @@ -235,20 +161,17 @@ def filter_queryset(self, queryset): Do not filter additionally if mysql db used or use `CompoundSearchFilterBackend` and `HighlightBackend` if elasticsearch is the data source. """ - filter_backends = [] - if not self.is_es_disabled: - filter_backends = [ - FilteringFilterBackend, - CompoundSearchFilterBackend, - DefaultOrderingFilterBackend, - ] - if self.params.get('highlight'): - filter_backends.append(HighlightBackend) - + filter_backends = self.get_filter_backends() for backend in filter_backends: queryset = backend().filter_queryset(self.request, queryset, view=self) return queryset + def get_filter_backends(self): + """ + List of filter backends, each with a `filter_queryset` method. + """ + return [] + def list(self, *args, **kwargs): """ Returns list of students notes. @@ -268,22 +191,16 @@ def build_query_params_state(self): """ self.query_params = {} self.params = self.request.query_params.dict() - usage_ids = self.request.query_params.getlist('usage_id') + usage_ids = self.request.query_params.getlist("usage_id") if usage_ids: self.search_with_usage_id = True - if not self.is_es_disabled: - usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) - - self.query_params['usage_id__in'] = usage_ids + self.query_params["usage_id__in"] = usage_ids - if 'course_id' in self.params: - self.query_params['course_id'] = self.params['course_id'] + if "course_id" in self.params: + self.query_params["course_id"] = self.params["course_id"] - if 'user' in self.params: - if self.is_es_disabled: - self.query_params['user_id'] = self.params['user'] - else: - self.query_params['user'] = self.params['user'] + if "user" in self.params: + self.query_params["user_id"] = self.params["user"] def get(self, *args, **kwargs): """ @@ -305,10 +222,10 @@ def post(self, *args, **kwargs): Delete all annotations for a user. """ params = self.request.data - if 'user' not in params: + if "user" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - Note.objects.filter(user_id=params['user']).delete() + Note.objects.filter(user_id=params["user"]).delete() return Response(status=status.HTTP_204_NO_CONTENT) @@ -608,3 +525,15 @@ def delete(self, *args, **kwargs): # Annotation deleted successfully. return Response(status=status.HTTP_204_NO_CONTENT) + +def selftest(): + """ + No-op. + """ + return {} + +def heartbeat(): + """ + No-op + """ + return diff --git a/notesapi/v1/views/elasticsearch.py b/notesapi/v1/views/elasticsearch.py new file mode 100644 index 00000000..0d91b74d --- /dev/null +++ b/notesapi/v1/views/elasticsearch.py @@ -0,0 +1,138 @@ +import logging +import traceback + +from django_elasticsearch_dsl_drf.constants import ( + LOOKUP_FILTER_TERM, + LOOKUP_QUERY_IN, + SEPARATOR_LOOKUP_COMPLEX_VALUE, +) +from django_elasticsearch_dsl_drf.filter_backends import ( + DefaultOrderingFilterBackend, + HighlightBackend, +) +from elasticsearch.exceptions import TransportError +from elasticsearch_dsl import Search +from elasticsearch_dsl.connections import connections + +from notesapi.v1.search_indexes.backends import ( + CompoundSearchFilterBackend, + FilteringFilterBackend, +) +from notesapi.v1.search_indexes.documents import NoteDocument +from notesapi.v1.search_indexes.paginators import NotesPagination as ESNotesPagination +from notesapi.v1.search_indexes.serializers import ( + NoteDocumentSerializer as NotesElasticSearchSerializer, +) + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + +logger = logging.getLogger(__name__) + + +class AnnotationSearchView(BaseAnnotationSearchView): + + # https://django-elasticsearch-dsl-drf.readthedocs.io/en/latest/advanced_usage_examples.html + filter_fields = { + "course_id": "course_id", + "user": "user", + "usage_id": { + "field": "usage_id", + "lookups": [ + LOOKUP_QUERY_IN, + LOOKUP_FILTER_TERM, + ], + }, + } + highlight_fields = { + "text": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + "tags": { + "enabled": True, + "options": { + "pre_tags": ["{elasticsearch_highlight_start}"], + "post_tags": ["{elasticsearch_highlight_end}"], + "number_of_fragments": 0, + }, + }, + } + + def __init__(self, *args, **kwargs): + self.client = connections.get_connection(NoteDocument._get_using()) # pylint: disable=protected-access + self.index = NoteDocument._index._name # pylint: disable=protected-access + self.mapping = NoteDocument._doc_type.mapping.properties.name # pylint: disable=protected-access + # pylint: disable=protected-access + self.search = Search( + using=self.client, index=self.index, doc_type=NoteDocument._doc_type.name + ) + super().__init__(*args, **kwargs) + + def get_serializer_class(self): + """ + Use Elasticsearch-specific serializer. + """ + if not self.is_text_search: + return super().get_serializer_class() + return NotesElasticSearchSerializer + + def get_queryset(self): + """ + Hackish method that doesn't quite return a Django queryset. + """ + if not self.is_text_search: + return super().get_queryset() + queryset = self.search.query() + queryset.model = NoteDocument.Django.model + return queryset + + def get_filter_backends(self): + if not self.is_text_search: + return super().get_filter_backends() + filter_backends = [ + FilteringFilterBackend, + CompoundSearchFilterBackend, + DefaultOrderingFilterBackend, + ] + if self.params.get("highlight"): + filter_backends.append(HighlightBackend) + return filter_backends + + @property + def pagination_class(self): + if not self.is_text_search: + return super().pagination_class + return ESNotesPagination + + def build_query_params_state(self): + super().build_query_params_state() + if not self.is_text_search: + return + if "usage_id__in" in self.query_params: + usage_ids = self.query_params["usage_id__in"] + usage_ids = SEPARATOR_LOOKUP_COMPLEX_VALUE.join(usage_ids) + self.query_params["usage_id__in"] = usage_ids + + if "user" in self.params: + self.query_params["user"] = self.query_params.pop("user_id") + + +def heartbeat(): + if not get_es().ping(): + raise SearchViewRuntimeError("es") + + +def selftest(): + try: + return {"es": get_es().info()} + except TransportError as e: + raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e + + +def get_es(): + return connections.get_connection() diff --git a/notesapi/v1/views/exceptions.py b/notesapi/v1/views/exceptions.py new file mode 100644 index 00000000..835bd7f6 --- /dev/null +++ b/notesapi/v1/views/exceptions.py @@ -0,0 +1,2 @@ +class SearchViewRuntimeError(RuntimeError): + pass diff --git a/notesapi/v1/views/mysql.py b/notesapi/v1/views/mysql.py new file mode 100644 index 00000000..e69de29b diff --git a/notesserver/test_views.py b/notesserver/test_views.py index 23a8dee9..301724e7 100644 --- a/notesserver/test_views.py +++ b/notesserver/test_views.py @@ -22,7 +22,7 @@ def test_heartbeat(self): self.assertEqual(json.loads(bytes.decode(response.content, 'utf-8')), {"OK": True}) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_heartbeat_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable. @@ -65,7 +65,7 @@ def test_selftest_status(self): @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") @patch('notesserver.views.datetime', datetime=Mock(wraps=datetime.datetime)) - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_data(self, mocked_get_es, mocked_datetime): """ Test returned data on success. @@ -101,7 +101,7 @@ def test_selftest_data_es_disabled(self, mocked_datetime): ) @skipIf(settings.ES_DISABLED, "Do not test if Elasticsearch service is disabled.") - @patch('notesserver.views.get_es') + @patch("notesapi.v1.views.elasticsearch.get_es") def test_selftest_failure_es(self, mocked_get_es): """ Elasticsearch is not reachable on selftest. diff --git a/notesserver/views.py b/notesserver/views.py index 994cd8fc..a90b48c3 100644 --- a/notesserver/views.py +++ b/notesserver/views.py @@ -1,11 +1,9 @@ import datetime import traceback -from django.conf import settings from django.db import connection from django.http import JsonResponse from django.http import HttpResponse -from elasticsearch.exceptions import TransportError from rest_framework import status from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import AllowAny @@ -15,11 +13,9 @@ import newrelic.agent except ImportError: # pragma: no cover newrelic = None # pylint: disable=invalid-name -if not settings.ES_DISABLED: - from elasticsearch_dsl.connections import connections - def get_es(): - return connections.get_connection() +from notesapi.v1.views import get_views_module +from notesapi.v1.views import SearchViewRuntimeError @api_view(['GET']) @@ -56,8 +52,10 @@ def heartbeat(request): except Exception: # pylint: disable=broad-exception-caught return JsonResponse({"OK": False, "check": "db"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - if not settings.ES_DISABLED and not get_es().ping(): - return JsonResponse({"OK": False, "check": "es"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + try: + get_views_module().heartbeat() + except SearchViewRuntimeError as e: + return JsonResponse({"OK": False, "check": e.args[0]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return JsonResponse({"OK": True}, status=status.HTTP_200_OK) @@ -70,18 +68,18 @@ def selftest(request): """ start = datetime.datetime.now() - if not settings.ES_DISABLED: - try: - es_status = get_es().info() - except TransportError: - return Response( - {"es_error": traceback.format_exc()}, - status=status.HTTP_500_INTERNAL_SERVER_ERROR - ) + response = {} + try: + response.update(get_views_module().selftest()) + except SearchViewRuntimeError as e: + return Response( + e.args[0], + status=status.HTTP_500_INTERNAL_SERVER_ERROR + ) try: db_status() - database = "OK" + response["db"] = "OK" except Exception: # pylint: disable=broad-exception-caught return Response( {"db_error": traceback.format_exc()}, @@ -90,14 +88,7 @@ def selftest(request): end = datetime.datetime.now() delta = end - start - - response = { - "db": database, - "time_elapsed": int(delta.total_seconds() * 1000) # In milliseconds. - } - - if not settings.ES_DISABLED: - response['es'] = es_status + response["time_elapsed"] = int(delta.total_seconds() * 1000) # In milliseconds. return Response(response) From 3e4c8cd4fcae678b965d500bde6f4b222ef70390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 23:03:41 +0200 Subject: [PATCH 5/5] feat: meilisearch backend for notes search This is a very simple and basic backend. It is based on Django signals, just like the Elasticsearch backend. But it is much simpler, in the sense that there are just two signals: one for saving documents and one for deletion. This backend is limited, in the sense that it does not support highlighting -- but that's probably not such a big deal. To start using this backend, define the following settings: ES_DISABLED = True MEILISEARCH_ENABLED = True MEILISEARCH_URL = "http://meilisearch:7700" MEILISEARCH_API_KEY = "s3cr3t" MEILISEARCH_INDEX = "tutor_student_notes" --- notesapi/v1/tests/test_meilisearch.py | 96 +++++++++ notesapi/v1/urls.py | 4 +- notesapi/v1/views/__init__.py | 19 +- notesapi/v1/views/common.py | 283 ++++++++++++++------------ notesapi/v1/views/elasticsearch.py | 32 +-- notesapi/v1/views/meilisearch.py | 193 ++++++++++++++++++ notesserver/views.py | 6 +- requirements/base.in | 1 + requirements/base.txt | 14 +- requirements/quality.txt | 27 +++ requirements/test.txt | 21 ++ 11 files changed, 539 insertions(+), 157 deletions(-) create mode 100644 notesapi/v1/tests/test_meilisearch.py create mode 100644 notesapi/v1/views/meilisearch.py diff --git a/notesapi/v1/tests/test_meilisearch.py b/notesapi/v1/tests/test_meilisearch.py new file mode 100644 index 00000000..abcc2d1f --- /dev/null +++ b/notesapi/v1/tests/test_meilisearch.py @@ -0,0 +1,96 @@ +from unittest.mock import Mock, patch + +from django.test import TestCase + +from notesapi.v1.models import Note +from notesapi.v1.views import meilisearch + + +class MeilisearchTest(TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + meilisearch.connect_signals() + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + meilisearch.disconnect_signals() + + def setUp(self): + self.enterContext( + patch.object(meilisearch.Client, "meilisearch_client", Mock()) + ) + self.enterContext(patch.object(meilisearch.Client, "meilisearch_index", Mock())) + + @property + def note_dict(self): + return { + "user": "test_user_id", + "usage_id": "i4x://org/course/html/52aa9816425a4ce98a07625b8cb70811", + "course_id": "org/course/run", + "text": "test note text", + "quote": "test note quote", + "ranges": [ + { + "start": "/p[1]", + "end": "/p[1]", + "startOffset": 0, + "endOffset": 10, + } + ], + "tags": ["apple", "pear"], + } + + def test_save_delete_note(self): + note = Note.create(self.note_dict) + note.save() + note_id = note.id + + meilisearch.Client.meilisearch_index.add_documents.assert_called_with( + [ + { + "id": note_id, + "user_id": "test_user_id", + "course_id": "org/course/run", + "text": "test note text", + } + ] + ) + + note.delete() + meilisearch.Client.meilisearch_index.delete_document.assert_called_with(note_id) + + def test_get_queryset_no_result(self): + queryset = meilisearch.AnnotationSearchView().get_queryset() + assert not queryset.all() + + def test_get_queryset_one_match(self): + note1 = Note.create(self.note_dict) + note2 = Note.create(self.note_dict) + note1.save() + note2.save() + view = meilisearch.AnnotationSearchView() + view.params = { + "text": "dummy text", + "user": "someuser", + "course_id": "course/id", + "page_size": 10, + "page": 2, + } + with patch.object( + meilisearch.Client.meilisearch_index, + "search", + Mock(return_value={"hits": [{"id": note2.id}]}), + ) as mock_search: + queryset = view.get_queryset() + mock_search.assert_called_once_with( + "dummy text", + { + "offset": 10, + "limit": 10, + "filter": ["user_id = 'someuser'", "course_id = 'course/id'"], + }, + ) + assert [note2.id] == [note.id for note in queryset] diff --git a/notesapi/v1/urls.py b/notesapi/v1/urls.py index 93131162..cacdb9dd 100644 --- a/notesapi/v1/urls.py +++ b/notesapi/v1/urls.py @@ -1,7 +1,7 @@ from django.urls import path, re_path from notesapi.v1.views import (AnnotationDetailView, AnnotationListView, - AnnotationRetireView, get_views_module) + AnnotationRetireView, get_annotation_search_view_class) app_name = "notesapi.v1" urlpatterns = [ path('annotations/', AnnotationListView.as_view(), name='annotations'), @@ -13,7 +13,7 @@ ), path( 'search/', - get_views_module().AnnotationSearchView.as_view(), + get_annotation_search_view_class().as_view(), name='annotations_search' ), ] diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py index eef7e9b4..297cb9d5 100644 --- a/notesapi/v1/views/__init__.py +++ b/notesapi/v1/views/__init__.py @@ -1,19 +1,26 @@ +import typing as t from django.conf import settings from .common import ( AnnotationDetailView, AnnotationListView, AnnotationRetireView, + AnnotationSearchView ) from .exceptions import SearchViewRuntimeError -def get_views_module(): + +# pylint: disable=import-outside-toplevel +def get_annotation_search_view_class() -> t.Type[AnnotationSearchView]: """ - Import views from either mysql or elasticsearch backend + Import views from either mysql, elasticsearch or meilisearch backend """ if settings.ES_DISABLED: - from . import common as backend_module - else: - from . import elasticsearch as backend_module - return backend_module + if getattr(settings, "MEILISEARCH_ENABLED", False): + from . import meilisearch + return meilisearch.AnnotationSearchView + else: + return AnnotationSearchView + from . import elasticsearch + return elasticsearch.AnnotationSearchView diff --git a/notesapi/v1/views/common.py b/notesapi/v1/views/common.py index bd917fb5..0e4e308a 100644 --- a/notesapi/v1/views/common.py +++ b/notesapi/v1/views/common.py @@ -110,13 +110,12 @@ class AnnotationSearchView(ListAPIView): * updated: DateTime. When was the last time annotation was updated. """ - action = '' + action = "" params = {} query_params = {} search_with_usage_id = False - search_fields = ('text', 'tags') - ordering = ('-updated',) - + search_fields = ("text", "tags") + ordering = ("-updated",) @property def is_text_search(self): @@ -129,10 +128,10 @@ def is_text_search(self): def get_queryset(self): queryset = Note.objects.filter(**self.query_params).order_by("-updated") if "text" in self.params: - queryset = queryset.filter( - Q(text__icontains=self.params["text"]) - | Q(tags__icontains=self.params["text"]) + qs_filter = Q(text__icontains=self.params["text"]) | Q( + tags__icontains=self.params["text"] ) + queryset = queryset.filter(qs_filter) return queryset def get_serializer_class(self): @@ -149,7 +148,7 @@ def paginator(self): The paginator instance associated with the view and used data source, or `None`. """ if not hasattr(self, "_paginator"): - # pylint: disable=attribute-defined-outside-init + # pylint: disable=attribute-defined-outside-init self._paginator = self.pagination_class() if self.pagination_class else None return self._paginator @@ -211,6 +210,20 @@ def get(self, *args, **kwargs): return super().get(*args, **kwargs) + @classmethod + def selftest(cls): + """ + No-op. + """ + return {} + + @classmethod + def heartbeat(cls): + """ + No-op + """ + return + class AnnotationRetireView(GenericAPIView): """ @@ -231,115 +244,115 @@ def post(self, *args, **kwargs): class AnnotationListView(GenericAPIView): """ - **Use Case** + **Use Case** - * Get a paginated list of annotations for a user. + * Get a paginated list of annotations for a user. - The annotations are always sorted in descending order by updated date. + The annotations are always sorted in descending order by updated date. - Each page in the list contains 25 annotations by default. The page - size can be altered by passing parameter "page_size=". + Each page in the list contains 25 annotations by default. The page + size can be altered by passing parameter "page_size=". - HTTP 400 Bad Request: The format of the request is not correct. + HTTP 400 Bad Request: The format of the request is not correct. - * Create a new annotation for a user. + * Create a new annotation for a user. - HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a - user has been reached. + HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a + user has been reached. - HTTP 201 Created: Success. + HTTP 201 Created: Success. - * Delete all annotations for a user. + * Delete all annotations for a user. - HTTP 400 Bad Request: The format of the request is not correct. + HTTP 400 Bad Request: The format of the request is not correct. - HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. + HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. - **Example Requests** + **Example Requests** - GET /api/v1/annotations/?course_id={course_id}&user={user_id} + GET /api/v1/annotations/?course_id={course_id}&user={user_id} - POST /api/v1/annotations/ - user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} + POST /api/v1/annotations/ + user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} - DELETE /api/v1/annotations/ - user={user_id} + DELETE /api/v1/annotations/ + user={user_id} - **Query Parameters for GET** + **Query Parameters for GET** - Both the course_id and user must be provided. + Both the course_id and user must be provided. - * course_id: Id of the course. + * course_id: Id of the course. - * user: Anonymized user id. + * user: Anonymized user id. - **Response Values for GET** + **Response Values for GET** - * count: The number of annotations in a course. + * count: The number of annotations in a course. - * next: The URI to the next page of annotations. + * next: The URI to the next page of annotations. - * previous: The URI to the previous page of annotations. + * previous: The URI to the previous page of annotations. - * current: Current page number. + * current: Current page number. - * num_pages: The number of pages listing annotations. + * num_pages: The number of pages listing annotations. - * results: A list of annotations returned. Each collection in the list contains these fields. + * results: A list of annotations returned. Each collection in the list contains these fields. - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote. + * ranges: List. Describes position of quote. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Form-encoded data for POST** + **Form-encoded data for POST** - user, course_id, usage_id, ranges and quote fields must be provided. + user, course_id, usage_id, ranges and quote fields must be provided. - **Response Values for POST** + **Response Values for POST** - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote in the source text. + * ranges: List. Describes position of quote in the source text. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Form-encoded data for DELETE** + **Form-encoded data for DELETE** - * user: Anonymized user id. + * user: Anonymized user id. - **Response Values for DELETE** + **Response Values for DELETE** - * no content. + * no content. """ @@ -351,13 +364,15 @@ def get(self, *args, **kwargs): """ params = self.request.query_params.dict() - if 'course_id' not in params: + if "course_id" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - if 'user' not in params: + if "user" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - notes = Note.objects.filter(course_id=params['course_id'], user_id=params['user']).order_by('-updated') + notes = Note.objects.filter( + course_id=params["course_id"], user_id=params["user"] + ).order_by("-updated") page = self.paginate_queryset(notes) serializer = self.get_serializer(page, many=True) response = self.get_paginated_response(serializer.data) @@ -369,12 +384,13 @@ def post(self, *args, **kwargs): Returns 400 request if bad payload is sent or it was empty object. """ - if not self.request.data or 'id' in self.request.data: + if not self.request.data or "id" in self.request.data: return Response(status=status.HTTP_400_BAD_REQUEST) try: total_notes = Note.objects.filter( - user_id=self.request.data['user'], course_id=self.request.data['course_id'] + user_id=self.request.data["user"], + course_id=self.request.data["course_id"], ).count() if total_notes >= settings.MAX_NOTES_PER_COURSE: raise AnnotationsLimitReachedError @@ -383,105 +399,116 @@ def post(self, *args, **kwargs): note.full_clean() # Gather metrics for New Relic so we can slice data in New Relic Insights - newrelic.agent.add_custom_parameter('notes.count', total_notes) + newrelic.agent.add_custom_parameter("notes.count", total_notes) except ValidationError as error: log.debug(error, exc_info=True) return Response(status=status.HTTP_400_BAD_REQUEST) except AnnotationsLimitReachedError: error_message = _( - 'You can create up to {max_num_annotations_per_course} notes.' - ' You must remove some notes before you can add new ones.' + "You can create up to {max_num_annotations_per_course} notes." + " You must remove some notes before you can add new ones." ).format(max_num_annotations_per_course=settings.MAX_NOTES_PER_COURSE) - log.info('Attempted to create more than %s annotations', settings.MAX_NOTES_PER_COURSE) + log.info( + "Attempted to create more than %s annotations", + settings.MAX_NOTES_PER_COURSE, + ) - return Response({'error_msg': error_message}, status=status.HTTP_400_BAD_REQUEST) + return Response( + {"error_msg": error_message}, status=status.HTTP_400_BAD_REQUEST + ) note.save() - location = reverse('api:v1:annotations_detail', kwargs={'annotation_id': note.id}) + location = reverse( + "api:v1:annotations_detail", kwargs={"annotation_id": note.id} + ) serializer = NoteSerializer(note) - return Response(serializer.data, status=status.HTTP_201_CREATED, headers={'Location': location}) + return Response( + serializer.data, + status=status.HTTP_201_CREATED, + headers={"Location": location}, + ) class AnnotationDetailView(APIView): """ - **Use Case** + **Use Case** - * Get a single annotation. + * Get a single annotation. - * Update an annotation. + * Update an annotation. - * Delete an annotation. + * Delete an annotation. - **Example Requests** + **Example Requests** - GET /api/v1/annotations/ - PUT /api/v1/annotations/ - DELETE /api/v1/annotations/ + GET /api/v1/annotations/ + PUT /api/v1/annotations/ + DELETE /api/v1/annotations/ - **Query Parameters for GET** + **Query Parameters for GET** - HTTP404 is returned if annotation_id is missing. + HTTP404 is returned if annotation_id is missing. - * annotation_id: Annotation id + * annotation_id: Annotation id - **Query Parameters for PUT** + **Query Parameters for PUT** - HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. + HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. - * annotation_id: String. Annotation id + * annotation_id: String. Annotation id - * text: String. Text to be updated + * text: String. Text to be updated - * tags: List. Tags to be updated + * tags: List. Tags to be updated - **Query Parameters for DELETE** + **Query Parameters for DELETE** - HTTP404 is returned if annotation_id is missing. + HTTP404 is returned if annotation_id is missing. - * annotation_id: Annotation id + * annotation_id: Annotation id - **Response Values for GET** + **Response Values for GET** - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote. + * ranges: List. Describes position of quote. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Response Values for PUT** + **Response Values for PUT** - * same as GET with updated values + * same as GET with updated values - **Response Values for DELETE** + **Response Values for DELETE** - * HTTP_204_NO_CONTENT is returned + * HTTP_204_NO_CONTENT is returned """ def get(self, *args, **kwargs): """ Get an existing annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found!', status=status.HTTP_404_NOT_FOUND) + return Response("Annotation not found!", status=status.HTTP_404_NOT_FOUND) serializer = NoteSerializer(note) return Response(serializer.data) @@ -490,16 +517,19 @@ def put(self, *args, **kwargs): """ Update an existing annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) try: - note.text = self.request.data['text'] - note.tags = json.dumps(self.request.data['tags']) + note.text = self.request.data["text"] + note.tags = json.dumps(self.request.data["tags"]) note.full_clean() except KeyError as error: log.debug(error, exc_info=True) @@ -514,26 +544,17 @@ def delete(self, *args, **kwargs): """ Delete an annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) note.delete() # Annotation deleted successfully. return Response(status=status.HTTP_204_NO_CONTENT) - -def selftest(): - """ - No-op. - """ - return {} - -def heartbeat(): - """ - No-op - """ - return diff --git a/notesapi/v1/views/elasticsearch.py b/notesapi/v1/views/elasticsearch.py index 0d91b74d..32c7207c 100644 --- a/notesapi/v1/views/elasticsearch.py +++ b/notesapi/v1/views/elasticsearch.py @@ -64,10 +64,14 @@ class AnnotationSearchView(BaseAnnotationSearchView): } def __init__(self, *args, **kwargs): - self.client = connections.get_connection(NoteDocument._get_using()) # pylint: disable=protected-access + self.client = connections.get_connection( + NoteDocument._get_using() + ) # pylint: disable=protected-access self.index = NoteDocument._index._name # pylint: disable=protected-access - self.mapping = NoteDocument._doc_type.mapping.properties.name # pylint: disable=protected-access - # pylint: disable=protected-access + self.mapping = ( + NoteDocument._doc_type.mapping.properties.name + ) # pylint: disable=protected-access + # pylint: disable=protected-access self.search = Search( using=self.client, index=self.index, doc_type=NoteDocument._doc_type.name ) @@ -121,17 +125,17 @@ def build_query_params_state(self): if "user" in self.params: self.query_params["user"] = self.query_params.pop("user_id") - -def heartbeat(): - if not get_es().ping(): - raise SearchViewRuntimeError("es") - - -def selftest(): - try: - return {"es": get_es().info()} - except TransportError as e: - raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e + @classmethod + def heartbeat(cls): + if not get_es().ping(): + raise SearchViewRuntimeError("es") + + @classmethod + def selftest(cls): + try: + return {"es": get_es().info()} + except TransportError as e: + raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e def get_es(): diff --git a/notesapi/v1/views/meilisearch.py b/notesapi/v1/views/meilisearch.py new file mode 100644 index 00000000..504d327c --- /dev/null +++ b/notesapi/v1/views/meilisearch.py @@ -0,0 +1,193 @@ +""" +Meilisearch views to search for annotations. + +To enable this backend, define the following settings: + +ES_DISABLED = True +MEILISEARCH_ENABLED = True + +Then check the Client class for more information about Meilisearch credential settings. + +When you start using this backend, you might want to re-index all your content. To do that, run: + + ./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()" +""" + +import traceback + +import meilisearch +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import signals + +from notesapi.v1.models import Note + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + + +class Client: + """ + Simple Meilisearch client class + + It depends on the following Django settings: + + - MEILISEARCH_URL + - MEILISEARCH_API_KEY + - MEILISEARCH_INDEX + """ + + _CLIENT = None + _INDEX = None + FILTERABLES = ["user_id", "course_id"] + + @property + def meilisearch_client(self) -> meilisearch.Client: + """ + Return a meilisearch client. + """ + if self._CLIENT is None: + self._CLIENT = meilisearch.Client( + getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"), + getattr(settings, "MEILISEARCH_API_KEY", ""), + ) + return self._CLIENT + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Return the meilisearch index used to store annotations. + + If the index does not exist, it is created. And if it does not have the right + filterable fields, then it is updated. + """ + if self._INDEX is None: + index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes") + try: + self._INDEX = self.meilisearch_client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError: + task = self.meilisearch_client.create_index( + index_name, {"primaryKey": "id"} + ) + self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000) + self._INDEX = self.meilisearch_client.get_index(index_name) + + # Checking filterable attributes + existing_filterables = set(self._INDEX.get_filterable_attributes()) + if not set(self.FILTERABLES).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(self.FILTERABLES)) + self._INDEX.update_filterable_attributes(all_filterables) + + return self._INDEX + + +class AnnotationSearchView(BaseAnnotationSearchView): + def get_queryset(self): + """ + Simple result filtering method based on test search. + + We simply include in the query only those that match the text search query. Note + that this backend does not support highlighting (yet). + """ + if not self.is_text_search: + return super().get_queryset() + + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + + # Define meilisearch params + filters = [ + f"user_id = '{self.params['user']}'", + f"course_id = '{self.params['course_id']}'", + ] + page_size = int(self.params["page_size"]) + offset = (int(self.params["page"]) - 1) * page_size + + # Perform search + search_results = Client().meilisearch_index.search( + self.params["text"], + {"offset": offset, "limit": page_size, "filter": filters}, + ) + + # Limit to these ID + queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]]) + return queryset + + @classmethod + def heartbeat(cls): + """ + Check that the meilisearch client is healthy. + """ + if not Client().meilisearch_client.is_healthy(): + raise SearchViewRuntimeError("meilisearch") + + @classmethod + def selftest(cls): + """ + Check that we can access the meilisearch index. + """ + try: + return {"meilisearch": Client().meilisearch_index.created_at} + except meilisearch.errors.MeilisearchError as e: + raise SearchViewRuntimeError( + {"meilisearch_error": traceback.format_exc()} + ) from e + + +def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Create or update a document. + """ + add_documents([instance]) + + +def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Delete a document. + """ + Client().meilisearch_index.delete_document(instance.id) + + +def connect_signals() -> None: + """ + Connect Django signal to meilisearch indexing. + """ + signals.post_save.connect(on_note_save, sender=Note) + signals.post_delete.connect(on_note_delete, sender=Note) + + +def disconnect_signals() -> None: + """ + Disconnect Django signals: this is necessary in unit tests. + """ + signals.post_save.disconnect(on_note_save, sender=Note) + signals.post_delete.disconnect(on_note_delete, sender=Note) + + +connect_signals() + + +def reindex(): + """ + Re-index all notes, in batches of 100. + """ + paginator = Paginator(Note.objects.all(), 100) + for page_number in paginator.page_range: + page = paginator.page(page_number) + add_documents(page.object_list) + + +def add_documents(notes): + """ + Convert some Note objects and insert them in the index. + """ + documents_to_add = [ + { + "id": note.id, + "user_id": note.user_id, + "course_id": note.course_id, + "text": note.text, + } + for note in notes + ] + if documents_to_add: + Client().meilisearch_index.add_documents(documents_to_add) diff --git a/notesserver/views.py b/notesserver/views.py index a90b48c3..bf2026fb 100644 --- a/notesserver/views.py +++ b/notesserver/views.py @@ -14,7 +14,7 @@ except ImportError: # pragma: no cover newrelic = None # pylint: disable=invalid-name -from notesapi.v1.views import get_views_module +from notesapi.v1.views import get_annotation_search_view_class from notesapi.v1.views import SearchViewRuntimeError @@ -53,7 +53,7 @@ def heartbeat(request): return JsonResponse({"OK": False, "check": "db"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) try: - get_views_module().heartbeat() + get_annotation_search_view_class().heartbeat() except SearchViewRuntimeError as e: return JsonResponse({"OK": False, "check": e.args[0]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) @@ -70,7 +70,7 @@ def selftest(request): response = {} try: - response.update(get_views_module().selftest()) + response.update(get_annotation_search_view_class().selftest()) except SearchViewRuntimeError as e: return Response( e.args[0], diff --git a/requirements/base.in b/requirements/base.in index dc2ab72b..0a9b7105 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,6 +10,7 @@ elasticsearch-dsl django-elasticsearch-dsl django-elasticsearch-dsl-drf django-cors-headers +meilisearch mysqlclient PyJWT gunicorn # MIT diff --git a/requirements/base.txt b/requirements/base.txt index 30bc5a40..f6470daa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,6 +4,8 @@ # # make upgrade # +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via # django @@ -12,6 +14,8 @@ attrs==24.2.0 # via # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via meilisearch certifi==2024.8.30 # via # elasticsearch @@ -100,6 +104,8 @@ jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2024.10.1 # via jsonschema +meilisearch==0.31.5 + # via -r requirements/base.in mysqlclient==2.2.5 # via -r requirements/base.in newrelic==10.2.0 @@ -120,6 +126,10 @@ psutil==6.1.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pyjwt[crypto]==2.9.0 # via # -r requirements/base.in @@ -147,6 +157,7 @@ requests==2.32.3 # via # -r requirements/base.in # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # jsonschema @@ -169,7 +180,8 @@ stevedore==5.3.0 typing-extensions==4.12.2 # via # edx-opaque-keys - # elasticsearch-dsl + # pydantic + # pydantic-core uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.20 diff --git a/requirements/quality.txt b/requirements/quality.txt index 7f6ba23f..be859b79 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,11 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -25,6 +30,11 @@ cachetools==5.5.0 # via # -r requirements/test.txt # tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -242,6 +252,10 @@ mccabe==0.7.0 # via # -r requirements/test.txt # pylint +meilisearch==0.31.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt more-itertools==10.5.0 # via -r requirements/test.txt mysqlclient==2.2.5 @@ -302,6 +316,16 @@ pycparser==2.22 # -r requirements/base.txt # -r requirements/test.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic pygments==2.18.0 # via # -r requirements/test.txt @@ -383,6 +407,7 @@ requests==2.32.3 # -r requirements/base.txt # -r requirements/test.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -432,6 +457,8 @@ typing-extensions==4.12.2 # -r requirements/test.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt diff --git a/requirements/test.txt b/requirements/test.txt index d8412d3f..6251f94c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,6 +4,10 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -20,6 +24,10 @@ attrs==24.2.0 # referencing cachetools==5.5.0 # via tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -175,6 +183,8 @@ markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via pylint +meilisearch==0.31.5 + # via -r requirements/base.txt more-itertools==10.5.0 # via -r requirements/test.in mysqlclient==2.2.5 @@ -221,6 +231,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pygments==2.18.0 # via diff-cover pyjwt[crypto]==2.9.0 @@ -273,6 +291,7 @@ requests==2.32.3 # via # -r requirements/base.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -311,6 +330,8 @@ typing-extensions==4.12.2 # -r requirements/base.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt