From d893502a38c52c066d59053031c86870d7da56d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 23:03:41 +0200 Subject: [PATCH] feat: meilisearch backend for notes search This is a very simple and basic backend. It is based on Django signals, just like the Elasticsearch backend. But it is much simpler, in the sense that there are just two signals: one for saving documents and one for deletion. This backend is limited, in the sense that it does not support highlighting -- but that's probably not such a big deal. To start using this backend, define the following settings: ES_DISABLED = True MEILISEARCH_ENABLED = True MEILISEARCH_URL = "http://meilisearch:7700" MEILISEARCH_API_KEY = "s3cr3t" MEILISEARCH_INDEX = "tutor_student_notes" --- notesapi/v1/tests/test_meilisearch.py | 96 +++++++++ notesapi/v1/urls.py | 4 +- notesapi/v1/views/__init__.py | 19 +- notesapi/v1/views/common.py | 283 ++++++++++++++------------ notesapi/v1/views/elasticsearch.py | 32 +-- notesapi/v1/views/meilisearch.py | 193 ++++++++++++++++++ notesserver/views.py | 6 +- requirements/base.in | 1 + requirements/base.txt | 14 +- requirements/quality.txt | 27 +++ requirements/test.txt | 21 ++ 11 files changed, 539 insertions(+), 157 deletions(-) create mode 100644 notesapi/v1/tests/test_meilisearch.py create mode 100644 notesapi/v1/views/meilisearch.py diff --git a/notesapi/v1/tests/test_meilisearch.py b/notesapi/v1/tests/test_meilisearch.py new file mode 100644 index 00000000..abcc2d1f --- /dev/null +++ b/notesapi/v1/tests/test_meilisearch.py @@ -0,0 +1,96 @@ +from unittest.mock import Mock, patch + +from django.test import TestCase + +from notesapi.v1.models import Note +from notesapi.v1.views import meilisearch + + +class MeilisearchTest(TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + meilisearch.connect_signals() + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + meilisearch.disconnect_signals() + + def setUp(self): + self.enterContext( + patch.object(meilisearch.Client, "meilisearch_client", Mock()) + ) + self.enterContext(patch.object(meilisearch.Client, "meilisearch_index", Mock())) + + @property + def note_dict(self): + return { + "user": "test_user_id", + "usage_id": "i4x://org/course/html/52aa9816425a4ce98a07625b8cb70811", + "course_id": "org/course/run", + "text": "test note text", + "quote": "test note quote", + "ranges": [ + { + "start": "/p[1]", + "end": "/p[1]", + "startOffset": 0, + "endOffset": 10, + } + ], + "tags": ["apple", "pear"], + } + + def test_save_delete_note(self): + note = Note.create(self.note_dict) + note.save() + note_id = note.id + + meilisearch.Client.meilisearch_index.add_documents.assert_called_with( + [ + { + "id": note_id, + "user_id": "test_user_id", + "course_id": "org/course/run", + "text": "test note text", + } + ] + ) + + note.delete() + meilisearch.Client.meilisearch_index.delete_document.assert_called_with(note_id) + + def test_get_queryset_no_result(self): + queryset = meilisearch.AnnotationSearchView().get_queryset() + assert not queryset.all() + + def test_get_queryset_one_match(self): + note1 = Note.create(self.note_dict) + note2 = Note.create(self.note_dict) + note1.save() + note2.save() + view = meilisearch.AnnotationSearchView() + view.params = { + "text": "dummy text", + "user": "someuser", + "course_id": "course/id", + "page_size": 10, + "page": 2, + } + with patch.object( + meilisearch.Client.meilisearch_index, + "search", + Mock(return_value={"hits": [{"id": note2.id}]}), + ) as mock_search: + queryset = view.get_queryset() + mock_search.assert_called_once_with( + "dummy text", + { + "offset": 10, + "limit": 10, + "filter": ["user_id = 'someuser'", "course_id = 'course/id'"], + }, + ) + assert [note2.id] == [note.id for note in queryset] diff --git a/notesapi/v1/urls.py b/notesapi/v1/urls.py index 93131162..cacdb9dd 100644 --- a/notesapi/v1/urls.py +++ b/notesapi/v1/urls.py @@ -1,7 +1,7 @@ from django.urls import path, re_path from notesapi.v1.views import (AnnotationDetailView, AnnotationListView, - AnnotationRetireView, get_views_module) + AnnotationRetireView, get_annotation_search_view_class) app_name = "notesapi.v1" urlpatterns = [ path('annotations/', AnnotationListView.as_view(), name='annotations'), @@ -13,7 +13,7 @@ ), path( 'search/', - get_views_module().AnnotationSearchView.as_view(), + get_annotation_search_view_class().as_view(), name='annotations_search' ), ] diff --git a/notesapi/v1/views/__init__.py b/notesapi/v1/views/__init__.py index eef7e9b4..297cb9d5 100644 --- a/notesapi/v1/views/__init__.py +++ b/notesapi/v1/views/__init__.py @@ -1,19 +1,26 @@ +import typing as t from django.conf import settings from .common import ( AnnotationDetailView, AnnotationListView, AnnotationRetireView, + AnnotationSearchView ) from .exceptions import SearchViewRuntimeError -def get_views_module(): + +# pylint: disable=import-outside-toplevel +def get_annotation_search_view_class() -> t.Type[AnnotationSearchView]: """ - Import views from either mysql or elasticsearch backend + Import views from either mysql, elasticsearch or meilisearch backend """ if settings.ES_DISABLED: - from . import common as backend_module - else: - from . import elasticsearch as backend_module - return backend_module + if getattr(settings, "MEILISEARCH_ENABLED", False): + from . import meilisearch + return meilisearch.AnnotationSearchView + else: + return AnnotationSearchView + from . import elasticsearch + return elasticsearch.AnnotationSearchView diff --git a/notesapi/v1/views/common.py b/notesapi/v1/views/common.py index bd917fb5..0e4e308a 100644 --- a/notesapi/v1/views/common.py +++ b/notesapi/v1/views/common.py @@ -110,13 +110,12 @@ class AnnotationSearchView(ListAPIView): * updated: DateTime. When was the last time annotation was updated. """ - action = '' + action = "" params = {} query_params = {} search_with_usage_id = False - search_fields = ('text', 'tags') - ordering = ('-updated',) - + search_fields = ("text", "tags") + ordering = ("-updated",) @property def is_text_search(self): @@ -129,10 +128,10 @@ def is_text_search(self): def get_queryset(self): queryset = Note.objects.filter(**self.query_params).order_by("-updated") if "text" in self.params: - queryset = queryset.filter( - Q(text__icontains=self.params["text"]) - | Q(tags__icontains=self.params["text"]) + qs_filter = Q(text__icontains=self.params["text"]) | Q( + tags__icontains=self.params["text"] ) + queryset = queryset.filter(qs_filter) return queryset def get_serializer_class(self): @@ -149,7 +148,7 @@ def paginator(self): The paginator instance associated with the view and used data source, or `None`. """ if not hasattr(self, "_paginator"): - # pylint: disable=attribute-defined-outside-init + # pylint: disable=attribute-defined-outside-init self._paginator = self.pagination_class() if self.pagination_class else None return self._paginator @@ -211,6 +210,20 @@ def get(self, *args, **kwargs): return super().get(*args, **kwargs) + @classmethod + def selftest(cls): + """ + No-op. + """ + return {} + + @classmethod + def heartbeat(cls): + """ + No-op + """ + return + class AnnotationRetireView(GenericAPIView): """ @@ -231,115 +244,115 @@ def post(self, *args, **kwargs): class AnnotationListView(GenericAPIView): """ - **Use Case** + **Use Case** - * Get a paginated list of annotations for a user. + * Get a paginated list of annotations for a user. - The annotations are always sorted in descending order by updated date. + The annotations are always sorted in descending order by updated date. - Each page in the list contains 25 annotations by default. The page - size can be altered by passing parameter "page_size=". + Each page in the list contains 25 annotations by default. The page + size can be altered by passing parameter "page_size=". - HTTP 400 Bad Request: The format of the request is not correct. + HTTP 400 Bad Request: The format of the request is not correct. - * Create a new annotation for a user. + * Create a new annotation for a user. - HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a - user has been reached. + HTTP 400 Bad Request: The format of the request is not correct, or the maximum number of notes for a + user has been reached. - HTTP 201 Created: Success. + HTTP 201 Created: Success. - * Delete all annotations for a user. + * Delete all annotations for a user. - HTTP 400 Bad Request: The format of the request is not correct. + HTTP 400 Bad Request: The format of the request is not correct. - HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. + HTTP 200 OK: Either annotations from the user were deleted, or no annotations for the user were found. - **Example Requests** + **Example Requests** - GET /api/v1/annotations/?course_id={course_id}&user={user_id} + GET /api/v1/annotations/?course_id={course_id}&user={user_id} - POST /api/v1/annotations/ - user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} + POST /api/v1/annotations/ + user={user_id}&course_id={course_id}&usage_id={usage_id}&ranges={ranges}"e={quote} - DELETE /api/v1/annotations/ - user={user_id} + DELETE /api/v1/annotations/ + user={user_id} - **Query Parameters for GET** + **Query Parameters for GET** - Both the course_id and user must be provided. + Both the course_id and user must be provided. - * course_id: Id of the course. + * course_id: Id of the course. - * user: Anonymized user id. + * user: Anonymized user id. - **Response Values for GET** + **Response Values for GET** - * count: The number of annotations in a course. + * count: The number of annotations in a course. - * next: The URI to the next page of annotations. + * next: The URI to the next page of annotations. - * previous: The URI to the previous page of annotations. + * previous: The URI to the previous page of annotations. - * current: Current page number. + * current: Current page number. - * num_pages: The number of pages listing annotations. + * num_pages: The number of pages listing annotations. - * results: A list of annotations returned. Each collection in the list contains these fields. + * results: A list of annotations returned. Each collection in the list contains these fields. - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote. + * ranges: List. Describes position of quote. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Form-encoded data for POST** + **Form-encoded data for POST** - user, course_id, usage_id, ranges and quote fields must be provided. + user, course_id, usage_id, ranges and quote fields must be provided. - **Response Values for POST** + **Response Values for POST** - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote in the source text. + * ranges: List. Describes position of quote in the source text. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Form-encoded data for DELETE** + **Form-encoded data for DELETE** - * user: Anonymized user id. + * user: Anonymized user id. - **Response Values for DELETE** + **Response Values for DELETE** - * no content. + * no content. """ @@ -351,13 +364,15 @@ def get(self, *args, **kwargs): """ params = self.request.query_params.dict() - if 'course_id' not in params: + if "course_id" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - if 'user' not in params: + if "user" not in params: return Response(status=status.HTTP_400_BAD_REQUEST) - notes = Note.objects.filter(course_id=params['course_id'], user_id=params['user']).order_by('-updated') + notes = Note.objects.filter( + course_id=params["course_id"], user_id=params["user"] + ).order_by("-updated") page = self.paginate_queryset(notes) serializer = self.get_serializer(page, many=True) response = self.get_paginated_response(serializer.data) @@ -369,12 +384,13 @@ def post(self, *args, **kwargs): Returns 400 request if bad payload is sent or it was empty object. """ - if not self.request.data or 'id' in self.request.data: + if not self.request.data or "id" in self.request.data: return Response(status=status.HTTP_400_BAD_REQUEST) try: total_notes = Note.objects.filter( - user_id=self.request.data['user'], course_id=self.request.data['course_id'] + user_id=self.request.data["user"], + course_id=self.request.data["course_id"], ).count() if total_notes >= settings.MAX_NOTES_PER_COURSE: raise AnnotationsLimitReachedError @@ -383,105 +399,116 @@ def post(self, *args, **kwargs): note.full_clean() # Gather metrics for New Relic so we can slice data in New Relic Insights - newrelic.agent.add_custom_parameter('notes.count', total_notes) + newrelic.agent.add_custom_parameter("notes.count", total_notes) except ValidationError as error: log.debug(error, exc_info=True) return Response(status=status.HTTP_400_BAD_REQUEST) except AnnotationsLimitReachedError: error_message = _( - 'You can create up to {max_num_annotations_per_course} notes.' - ' You must remove some notes before you can add new ones.' + "You can create up to {max_num_annotations_per_course} notes." + " You must remove some notes before you can add new ones." ).format(max_num_annotations_per_course=settings.MAX_NOTES_PER_COURSE) - log.info('Attempted to create more than %s annotations', settings.MAX_NOTES_PER_COURSE) + log.info( + "Attempted to create more than %s annotations", + settings.MAX_NOTES_PER_COURSE, + ) - return Response({'error_msg': error_message}, status=status.HTTP_400_BAD_REQUEST) + return Response( + {"error_msg": error_message}, status=status.HTTP_400_BAD_REQUEST + ) note.save() - location = reverse('api:v1:annotations_detail', kwargs={'annotation_id': note.id}) + location = reverse( + "api:v1:annotations_detail", kwargs={"annotation_id": note.id} + ) serializer = NoteSerializer(note) - return Response(serializer.data, status=status.HTTP_201_CREATED, headers={'Location': location}) + return Response( + serializer.data, + status=status.HTTP_201_CREATED, + headers={"Location": location}, + ) class AnnotationDetailView(APIView): """ - **Use Case** + **Use Case** - * Get a single annotation. + * Get a single annotation. - * Update an annotation. + * Update an annotation. - * Delete an annotation. + * Delete an annotation. - **Example Requests** + **Example Requests** - GET /api/v1/annotations/ - PUT /api/v1/annotations/ - DELETE /api/v1/annotations/ + GET /api/v1/annotations/ + PUT /api/v1/annotations/ + DELETE /api/v1/annotations/ - **Query Parameters for GET** + **Query Parameters for GET** - HTTP404 is returned if annotation_id is missing. + HTTP404 is returned if annotation_id is missing. - * annotation_id: Annotation id + * annotation_id: Annotation id - **Query Parameters for PUT** + **Query Parameters for PUT** - HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. + HTTP404 is returned if annotation_id is missing and HTTP400 is returned if text and tags are missing. - * annotation_id: String. Annotation id + * annotation_id: String. Annotation id - * text: String. Text to be updated + * text: String. Text to be updated - * tags: List. Tags to be updated + * tags: List. Tags to be updated - **Query Parameters for DELETE** + **Query Parameters for DELETE** - HTTP404 is returned if annotation_id is missing. + HTTP404 is returned if annotation_id is missing. - * annotation_id: Annotation id + * annotation_id: Annotation id - **Response Values for GET** + **Response Values for GET** - * id: String. The primary key of the note. + * id: String. The primary key of the note. - * user: String. Anonymized id of the user. + * user: String. Anonymized id of the user. - * course_id: String. The identifier string of the annotations course. + * course_id: String. The identifier string of the annotations course. - * usage_id: String. The identifier string of the annotations XBlock. + * usage_id: String. The identifier string of the annotations XBlock. - * quote: String. Quoted text. + * quote: String. Quoted text. - * text: String. Student's thoughts on the quote. + * text: String. Student's thoughts on the quote. - * ranges: List. Describes position of quote. + * ranges: List. Describes position of quote. - * tags: List. Comma separated tags. + * tags: List. Comma separated tags. - * created: DateTime. Creation datetime of annotation. + * created: DateTime. Creation datetime of annotation. - * updated: DateTime. When was the last time annotation was updated. + * updated: DateTime. When was the last time annotation was updated. - **Response Values for PUT** + **Response Values for PUT** - * same as GET with updated values + * same as GET with updated values - **Response Values for DELETE** + **Response Values for DELETE** - * HTTP_204_NO_CONTENT is returned + * HTTP_204_NO_CONTENT is returned """ def get(self, *args, **kwargs): """ Get an existing annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found!', status=status.HTTP_404_NOT_FOUND) + return Response("Annotation not found!", status=status.HTTP_404_NOT_FOUND) serializer = NoteSerializer(note) return Response(serializer.data) @@ -490,16 +517,19 @@ def put(self, *args, **kwargs): """ Update an existing annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) try: - note.text = self.request.data['text'] - note.tags = json.dumps(self.request.data['tags']) + note.text = self.request.data["text"] + note.tags = json.dumps(self.request.data["tags"]) note.full_clean() except KeyError as error: log.debug(error, exc_info=True) @@ -514,26 +544,17 @@ def delete(self, *args, **kwargs): """ Delete an annotation. """ - note_id = self.kwargs.get('annotation_id') + note_id = self.kwargs.get("annotation_id") try: note = Note.objects.get(id=note_id) except Note.DoesNotExist: - return Response('Annotation not found! No update performed.', status=status.HTTP_404_NOT_FOUND) + return Response( + "Annotation not found! No update performed.", + status=status.HTTP_404_NOT_FOUND, + ) note.delete() # Annotation deleted successfully. return Response(status=status.HTTP_204_NO_CONTENT) - -def selftest(): - """ - No-op. - """ - return {} - -def heartbeat(): - """ - No-op - """ - return diff --git a/notesapi/v1/views/elasticsearch.py b/notesapi/v1/views/elasticsearch.py index 0d91b74d..32c7207c 100644 --- a/notesapi/v1/views/elasticsearch.py +++ b/notesapi/v1/views/elasticsearch.py @@ -64,10 +64,14 @@ class AnnotationSearchView(BaseAnnotationSearchView): } def __init__(self, *args, **kwargs): - self.client = connections.get_connection(NoteDocument._get_using()) # pylint: disable=protected-access + self.client = connections.get_connection( + NoteDocument._get_using() + ) # pylint: disable=protected-access self.index = NoteDocument._index._name # pylint: disable=protected-access - self.mapping = NoteDocument._doc_type.mapping.properties.name # pylint: disable=protected-access - # pylint: disable=protected-access + self.mapping = ( + NoteDocument._doc_type.mapping.properties.name + ) # pylint: disable=protected-access + # pylint: disable=protected-access self.search = Search( using=self.client, index=self.index, doc_type=NoteDocument._doc_type.name ) @@ -121,17 +125,17 @@ def build_query_params_state(self): if "user" in self.params: self.query_params["user"] = self.query_params.pop("user_id") - -def heartbeat(): - if not get_es().ping(): - raise SearchViewRuntimeError("es") - - -def selftest(): - try: - return {"es": get_es().info()} - except TransportError as e: - raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e + @classmethod + def heartbeat(cls): + if not get_es().ping(): + raise SearchViewRuntimeError("es") + + @classmethod + def selftest(cls): + try: + return {"es": get_es().info()} + except TransportError as e: + raise SearchViewRuntimeError({"es_error": traceback.format_exc()}) from e def get_es(): diff --git a/notesapi/v1/views/meilisearch.py b/notesapi/v1/views/meilisearch.py new file mode 100644 index 00000000..a6e7f9d1 --- /dev/null +++ b/notesapi/v1/views/meilisearch.py @@ -0,0 +1,193 @@ +""" +Meilisearch views to search for annotations. + +To enable this backend, define the following settings: + +ES_DISABLED = True +MEILISEARCH_ENABLED = True + +Then check the Client class for more information about Meilisearch credential settings. + +When you start using this backend, you might want to re-index all your content. To do that, run: + + ./manage.py shell -c "from notesapi.v1.views.meilisearch import reindex; reindex()" +""" + +import traceback + +import meilisearch +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import signals +from django.dispatch import receiver + +from notesapi.v1.models import Note + +from .common import AnnotationSearchView as BaseAnnotationSearchView +from .exceptions import SearchViewRuntimeError + + +class Client: + """ + Simple Meilisearch client class + + It depends on the following Django settings: + + - MEILISEARCH_URL + - MEILISEARCH_API_KEY + - MEILISEARCH_INDEX + """ + + _CLIENT = None + _INDEX = None + FILTERABLES = ["user_id", "course_id"] + + @property + def meilisearch_client(self) -> meilisearch.Client: + """ + Return a meilisearch client. + """ + if self._CLIENT is None: + self._CLIENT = meilisearch.Client( + getattr(settings, "MEILISEARCH_URL", "http://meilisearch:7700"), + getattr(settings, "MEILISEARCH_API_KEY", ""), + ) + return self._CLIENT + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Return the meilisearch index used to store annotations. + + If the index does not exist, it is created. And if it does not have the right + filterable fields, then it is updated. + """ + if self._INDEX is None: + index_name = getattr(settings, "MEILISEARCH_INDEX", "student_notes") + try: + self._INDEX = self.meilisearch_client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError: + task = self.meilisearch_client.create_index( + index_name, {"primaryKey": "id"} + ) + self.meilisearch_client.wait_for_task(task.task_uid, timeout_in_ms=2000) + self._INDEX = self.meilisearch_client.get_index(index_name) + + # Checking filterable attributes + existing_filterables = set(self._INDEX.get_filterable_attributes()) + if not set(self.FILTERABLES).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(self.FILTERABLES)) + self._INDEX.update_filterable_attributes(all_filterables) + + return self._INDEX + + +class AnnotationSearchView(BaseAnnotationSearchView): + def get_queryset(self): + """ + Simple result filtering method based on test search. + + We simply include in the query only those that match the text search query. Note + that this backend does not support highlighting (yet). + """ + if not self.is_text_search: + return super().get_queryset() + + queryset = Note.objects.filter(**self.query_params).order_by("-updated") + + # Define meilisearch params + filters = [ + f"user_id = '{self.params['user']}'", + f"course_id = '{self.params['course_id']}'", + ] + page_size = int(self.params["page_size"]) + offset = (int(self.params["page"]) - 1) * page_size + + # Perform search + search_results = Client().meilisearch_index.search( + self.params["text"], + {"offset": offset, "limit": page_size, "filter": filters}, + ) + + # Limit to these ID + queryset = queryset.filter(id__in=[r["id"] for r in search_results["hits"]]) + return queryset + + @classmethod + def heartbeat(cls): + """ + Check that the meilisearch client is healthy. + """ + if not Client().meilisearch_client.is_healthy(): + raise SearchViewRuntimeError("meilisearch") + + @classmethod + def selftest(cls): + """ + Check that we can access the meilisearch index. + """ + try: + return {"meilisearch": Client().meilisearch_index.created_at} + except meilisearch.errors.MeilisearchError as e: + raise SearchViewRuntimeError( + {"meilisearch_error": traceback.format_exc()} + ) from e + + +def on_note_save(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Create or update a document. + """ + add_documents([instance]) + + +def on_note_delete(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Delete a document. + """ + Client().meilisearch_index.delete_document(instance.id) + + +def connect_signals() -> None: + """ + Connect Django signal to meilisearch indexing. + """ + signals.post_save.connect(on_note_save, sender=Note) + signals.post_delete.connect(on_note_delete, sender=Note) + +def disconnect_signals() -> None: + """ + Disconnect Django signals: this is necessary in unit tests. + """ + signals.post_save.disconnect(on_note_save, sender=Note) + signals.post_delete.disconnect(on_note_delete, sender=Note) + + +connect_signals() + + +def reindex(): + """ + Re-index all notes, in batches of 100. + """ + paginator = Paginator(Note.objects.all(), 100) + for page_number in paginator.page_range: + page = paginator.page(page_number) + add_documents(page.object_list) + + +def add_documents(notes): + """ + Convert some Note objects and insert them in the index. + """ + documents_to_add = [ + { + "id": note.id, + "user_id": note.user_id, + "course_id": note.course_id, + "text": note.text, + } + for note in notes + ] + if documents_to_add: + Client().meilisearch_index.add_documents(documents_to_add) diff --git a/notesserver/views.py b/notesserver/views.py index a90b48c3..bf2026fb 100644 --- a/notesserver/views.py +++ b/notesserver/views.py @@ -14,7 +14,7 @@ except ImportError: # pragma: no cover newrelic = None # pylint: disable=invalid-name -from notesapi.v1.views import get_views_module +from notesapi.v1.views import get_annotation_search_view_class from notesapi.v1.views import SearchViewRuntimeError @@ -53,7 +53,7 @@ def heartbeat(request): return JsonResponse({"OK": False, "check": "db"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) try: - get_views_module().heartbeat() + get_annotation_search_view_class().heartbeat() except SearchViewRuntimeError as e: return JsonResponse({"OK": False, "check": e.args[0]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) @@ -70,7 +70,7 @@ def selftest(request): response = {} try: - response.update(get_views_module().selftest()) + response.update(get_annotation_search_view_class().selftest()) except SearchViewRuntimeError as e: return Response( e.args[0], diff --git a/requirements/base.in b/requirements/base.in index dc2ab72b..0a9b7105 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -10,6 +10,7 @@ elasticsearch-dsl django-elasticsearch-dsl django-elasticsearch-dsl-drf django-cors-headers +meilisearch mysqlclient PyJWT gunicorn # MIT diff --git a/requirements/base.txt b/requirements/base.txt index 30bc5a40..f6470daa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,6 +4,8 @@ # # make upgrade # +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via # django @@ -12,6 +14,8 @@ attrs==24.2.0 # via # jsonschema # referencing +camel-converter[pydantic]==4.0.1 + # via meilisearch certifi==2024.8.30 # via # elasticsearch @@ -100,6 +104,8 @@ jsonschema==4.23.0 # via drf-spectacular jsonschema-specifications==2024.10.1 # via jsonschema +meilisearch==0.31.5 + # via -r requirements/base.in mysqlclient==2.2.5 # via -r requirements/base.in newrelic==10.2.0 @@ -120,6 +126,10 @@ psutil==6.1.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pyjwt[crypto]==2.9.0 # via # -r requirements/base.in @@ -147,6 +157,7 @@ requests==2.32.3 # via # -r requirements/base.in # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # jsonschema @@ -169,7 +180,8 @@ stevedore==5.3.0 typing-extensions==4.12.2 # via # edx-opaque-keys - # elasticsearch-dsl + # pydantic + # pydantic-core uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.20 diff --git a/requirements/quality.txt b/requirements/quality.txt index 7f6ba23f..be859b79 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,11 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -25,6 +30,11 @@ cachetools==5.5.0 # via # -r requirements/test.txt # tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -242,6 +252,10 @@ mccabe==0.7.0 # via # -r requirements/test.txt # pylint +meilisearch==0.31.5 + # via + # -r requirements/base.txt + # -r requirements/test.txt more-itertools==10.5.0 # via -r requirements/test.txt mysqlclient==2.2.5 @@ -302,6 +316,16 @@ pycparser==2.22 # -r requirements/base.txt # -r requirements/test.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # -r requirements/test.txt + # pydantic pygments==2.18.0 # via # -r requirements/test.txt @@ -383,6 +407,7 @@ requests==2.32.3 # -r requirements/base.txt # -r requirements/test.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -432,6 +457,8 @@ typing-extensions==4.12.2 # -r requirements/test.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt diff --git a/requirements/test.txt b/requirements/test.txt index d8412d3f..6251f94c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,6 +4,10 @@ # # make upgrade # +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -20,6 +24,10 @@ attrs==24.2.0 # referencing cachetools==5.5.0 # via tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch certifi==2024.8.30 # via # -r requirements/base.txt @@ -175,6 +183,8 @@ markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via pylint +meilisearch==0.31.5 + # via -r requirements/base.txt more-itertools==10.5.0 # via -r requirements/test.in mysqlclient==2.2.5 @@ -221,6 +231,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pygments==2.18.0 # via diff-cover pyjwt[crypto]==2.9.0 @@ -273,6 +291,7 @@ requests==2.32.3 # via # -r requirements/base.txt # edx-drf-extensions + # meilisearch rpds-py==0.21.0 # via # -r requirements/base.txt @@ -311,6 +330,8 @@ typing-extensions==4.12.2 # -r requirements/base.txt # edx-opaque-keys # faker + # pydantic + # pydantic-core uritemplate==4.1.1 # via # -r requirements/base.txt