From 9c457b64104c309524f7a069777c1990da9f1511 Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Fri, 15 Nov 2024 09:56:07 -0300
Subject: [PATCH 1/7] feat: incremental reindex_studio management command

---
 openedx/core/djangoapps/content/search/api.py | 207 +++++++++++-------
 .../management/commands/reindex_studio.py     |  14 +-
 .../0002_incrementalindexcompleted.py         |  21 ++
 .../core/djangoapps/content/search/models.py  |   9 +
 .../content/search/tests/test_api.py          |  70 ++++++
 5 files changed, 241 insertions(+), 80 deletions(-)
 create mode 100644 openedx/core/djangoapps/content/search/migrations/0002_incrementalindexcompleted.py

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index b1d224b411e4..9d0a2379a0bb 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -5,7 +5,7 @@
 
 import logging
 import time
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from datetime import datetime, timedelta, timezone
 from functools import wraps
 from typing import Callable, Generator
@@ -24,7 +24,7 @@
 from rest_framework.request import Request
 from common.djangoapps.student.role_helpers import get_course_roles
 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
-from openedx.core.djangoapps.content.search.models import get_access_ids_for_request
+from openedx.core.djangoapps.content.search.models import get_access_ids_for_request, IncrementalIndexCompleted
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from xmodule.modulestore.django import modulestore
 
@@ -217,6 +217,83 @@ def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generat
         _wait_for_meili_task(client.delete_index(temp_index_name))
 
 
+def _configure_index(index_name):
+    """
+    Configure the index. The following index settings are best changed on an empty index.
+    Changing them on a populated index will "re-index all documents in the index", which can take some time.
+
+    Args:
+        index_name (str): The name of the index to configure
+    """
+    client = _get_meilisearch_client()
+
+    # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
+    client.index(index_name).update_distinct_attribute(Fields.usage_key)
+    # Mark which attributes can be used for filtering/faceted search:
+    client.index(index_name).update_filterable_attributes([
+        # Get specific block/collection using combination of block_id and context_key
+        Fields.block_id,
+        Fields.block_type,
+        Fields.context_key,
+        Fields.usage_key,
+        Fields.org,
+        Fields.tags,
+        Fields.tags + "." + Fields.tags_taxonomy,
+        Fields.tags + "." + Fields.tags_level0,
+        Fields.tags + "." + Fields.tags_level1,
+        Fields.tags + "." + Fields.tags_level2,
+        Fields.tags + "." + Fields.tags_level3,
+        Fields.collections,
+        Fields.collections + "." + Fields.collections_display_name,
+        Fields.collections + "." + Fields.collections_key,
+        Fields.type,
+        Fields.access_id,
+        Fields.last_published,
+        Fields.content + "." + Fields.problem_types,
+    ])
+    # Mark which attributes are used for keyword search, in order of importance:
+    client.index(index_name).update_searchable_attributes([
+        # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
+        Fields.display_name,
+        Fields.block_id,
+        Fields.content,
+        Fields.description,
+        Fields.tags,
+        Fields.collections,
+        # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
+        # are searchable only if at least one document in the index has a value. If we didn't list them here and,
+        # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
+        # these sub-fields: "Attribute `tags.level3` is not searchable."
+        Fields.tags + "." + Fields.tags_taxonomy,
+        Fields.tags + "." + Fields.tags_level0,
+        Fields.tags + "." + Fields.tags_level1,
+        Fields.tags + "." + Fields.tags_level2,
+        Fields.tags + "." + Fields.tags_level3,
+        Fields.collections + "." + Fields.collections_display_name,
+        Fields.collections + "." + Fields.collections_key,
+        Fields.published + "." + Fields.display_name,
+        Fields.published + "." + Fields.published_description,
+    ])
+    # Mark which attributes can be used for sorting search results:
+    client.index(index_name).update_sortable_attributes([
+        Fields.display_name,
+        Fields.created,
+        Fields.modified,
+        Fields.last_published,
+    ])
+
+    # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
+    # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
+    client.index(index_name).update_ranking_rules([
+        "sort",
+        "words",
+        "typo",
+        "proximity",
+        "attribute",
+        "exactness",
+    ])
+
+
 def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None:
     """
     Recurse the children of an XBlock and call the given function for each
@@ -279,8 +356,31 @@ def is_meilisearch_enabled() -> bool:
     return False
 
 
-# pylint: disable=too-many-statements
-def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
+def reset_index(status_cb: Callable[[str], None] | None = None) -> None:
+    if status_cb is None:
+        status_cb = log.info
+
+    status_cb("Creating new empty index...")
+    with _using_temp_index(status_cb) as temp_index_name:
+        _configure_index(temp_index_name)
+        status_cb("Index recreated!")
+    status_cb("Index reset complete.")
+
+
+def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None:
+    if status_cb is None:
+        status_cb = log.info
+    if warn_cb is None:
+        warn_cb = log.warning
+
+    if _index_exists(STUDIO_INDEX_NAME):
+        warn_cb("A rebuild of the index is required. Please run ./manage.py cms reindex_studio --experimental [--incremental]")
+        return
+
+    reset_index(status_cb)
+
+
+def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None:
     """
     Rebuild the Meilisearch index from scratch
     """
@@ -292,7 +392,14 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
 
     # Get the lists of libraries
     status_cb("Counting libraries...")
-    lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')]
+    keys_indexed = []
+    if incremental:
+        keys_indexed = list(IncrementalIndexCompleted.objects.values_list('context_key', flat=True))
+    lib_keys = [
+        lib.library_key
+        for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug').order_by('-id')
+        if lib.library_key not in keys_indexed
+    ]
     num_libraries = len(lib_keys)
 
     # Get the list of courses
@@ -305,83 +412,19 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
     num_blocks_done = 0  # How many individual components/XBlocks we've indexed
 
     status_cb(f"Found {num_courses} courses, {num_libraries} libraries.")
-    with _using_temp_index(status_cb) as temp_index_name:
+    with _using_temp_index(status_cb) if not incremental else nullcontext(STUDIO_INDEX_NAME) as index_name:
         ############## Configure the index ##############
 
-        # The following index settings are best changed on an empty index.
-        # Changing them on a populated index will "re-index all documents in the index, which can take some time"
+        # The index settings are best changed on an empty index.
+        # Changing them on a populated index will "re-index all documents in the index", which can take some time
         # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time.
-
-        # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
-        client.index(temp_index_name).update_distinct_attribute(Fields.usage_key)
-        # Mark which attributes can be used for filtering/faceted search:
-        client.index(temp_index_name).update_filterable_attributes([
-            # Get specific block/collection using combination of block_id and context_key
-            Fields.block_id,
-            Fields.block_type,
-            Fields.context_key,
-            Fields.usage_key,
-            Fields.org,
-            Fields.tags,
-            Fields.tags + "." + Fields.tags_taxonomy,
-            Fields.tags + "." + Fields.tags_level0,
-            Fields.tags + "." + Fields.tags_level1,
-            Fields.tags + "." + Fields.tags_level2,
-            Fields.tags + "." + Fields.tags_level3,
-            Fields.collections,
-            Fields.collections + "." + Fields.collections_display_name,
-            Fields.collections + "." + Fields.collections_key,
-            Fields.type,
-            Fields.access_id,
-            Fields.last_published,
-            Fields.content + "." + Fields.problem_types,
-        ])
-        # Mark which attributes are used for keyword search, in order of importance:
-        client.index(temp_index_name).update_searchable_attributes([
-            # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
-            Fields.display_name,
-            Fields.block_id,
-            Fields.content,
-            Fields.description,
-            Fields.tags,
-            Fields.collections,
-            # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
-            # are searchable only if at least one document in the index has a value. If we didn't list them here and,
-            # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
-            # these sub-fields: "Attribute `tags.level3` is not searchable."
-            Fields.tags + "." + Fields.tags_taxonomy,
-            Fields.tags + "." + Fields.tags_level0,
-            Fields.tags + "." + Fields.tags_level1,
-            Fields.tags + "." + Fields.tags_level2,
-            Fields.tags + "." + Fields.tags_level3,
-            Fields.collections + "." + Fields.collections_display_name,
-            Fields.collections + "." + Fields.collections_key,
-            Fields.published + "." + Fields.display_name,
-            Fields.published + "." + Fields.published_description,
-        ])
-        # Mark which attributes can be used for sorting search results:
-        client.index(temp_index_name).update_sortable_attributes([
-            Fields.display_name,
-            Fields.created,
-            Fields.modified,
-            Fields.last_published,
-        ])
-
-        # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
-        # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
-        client.index(temp_index_name).update_ranking_rules([
-            "sort",
-            "words",
-            "typo",
-            "proximity",
-            "attribute",
-            "exactness",
-        ])
+        if not incremental:
+            _configure_index(index_name)
 
         ############## Libraries ##############
         status_cb("Indexing libraries...")
 
-        def index_library(lib_key: str) -> list:
+        def index_library(lib_key: LibraryLocatorV2) -> list:
             docs = []
             for component in lib_api.get_library_components(lib_key):
                 try:
@@ -396,7 +439,7 @@ def index_library(lib_key: str) -> list:
             if docs:
                 try:
                     # Add all the docs in this library at once (usually faster than adding one at a time):
-                    _wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                    _wait_for_meili_task(client.index(index_name).add_documents(docs))
                 except (TypeError, KeyError, MeilisearchError) as err:
                     status_cb(f"Error indexing library {lib_key}: {err}")
             return docs
@@ -416,7 +459,7 @@ def index_collection_batch(batch, num_done, library_key) -> int:
             if docs:
                 try:
                     # Add docs in batch of 100 at once (usually faster than adding one at a time):
-                    _wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                    _wait_for_meili_task(client.index(index_name).add_documents(docs))
                 except (TypeError, KeyError, MeilisearchError) as err:
                     status_cb(f"Error indexing collection batch {p}: {err}")
             return num_done
@@ -439,6 +482,8 @@ def index_collection_batch(batch, num_done, library_key) -> int:
                     num_collections_done,
                     lib_key,
                 )
+            if incremental:
+                IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
             status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
 
             num_contexts_done += 1
@@ -464,7 +509,7 @@ def add_with_children(block):
 
             if docs:
                 # Add all the docs in this course at once (usually faster than adding one at a time):
-                _wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                _wait_for_meili_task(client.index(index_name).add_documents(docs))
             return docs
 
         paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
@@ -473,10 +518,16 @@ def add_with_children(block):
                 status_cb(
                     f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
                 )
+                if course.id in keys_indexed:
+                    num_contexts_done += 1
+                    continue
                 course_docs = index_course(course)
+                if incremental:
+                    IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
                 num_contexts_done += 1
                 num_blocks_done += len(course_docs)
 
+    IncrementalIndexCompleted.objects.all().delete()
     status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses, collections and libraries.")
 
 
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 3767ebcba6c9..9fa2b30ea87a 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -19,7 +19,10 @@ class Command(BaseCommand):
 
     def add_arguments(self, parser):
         parser.add_argument('--experimental', action='store_true')
-        parser.set_defaults(experimental=False)
+        parser.add_argument('--reset', action='store_true')
+        parser.add_argument('--init', action='store_true')
+        parser.add_argument('--incremental', action='store_true')
+        parser.set_defaults(experimental=False, reset=False, init=False, incremental=False)
 
     def handle(self, *args, **options):
         """
@@ -34,4 +37,11 @@ def handle(self, *args, **options):
                 "Use the --experimental argument to acknowledge and run it."
             )
 
-        api.rebuild_index(self.stdout.write)
+        if options["reset"]:
+            api.reset_index(self.stdout.write)
+        elif options["init"]:
+            api.init_index(self.stdout.write)
+        elif options["incremental"]:
+            api.rebuild_index(self.stdout.write, incremental=True)
+        else:
+            api.rebuild_index(self.stdout.write)
diff --git a/openedx/core/djangoapps/content/search/migrations/0002_incrementalindexcompleted.py b/openedx/core/djangoapps/content/search/migrations/0002_incrementalindexcompleted.py
new file mode 100644
index 000000000000..a316c35a7dfe
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/migrations/0002_incrementalindexcompleted.py
@@ -0,0 +1,21 @@
+# Generated by Django 4.2.16 on 2024-11-15 12:40
+
+from django.db import migrations, models
+import opaque_keys.edx.django.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('search', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='IncrementalIndexCompleted',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('context_key', opaque_keys.edx.django.models.LearningContextKeyField(max_length=255, unique=True)),
+            ],
+        ),
+    ]
diff --git a/openedx/core/djangoapps/content/search/models.py b/openedx/core/djangoapps/content/search/models.py
index 711c493ff895..91e12affc326 100644
--- a/openedx/core/djangoapps/content/search/models.py
+++ b/openedx/core/djangoapps/content/search/models.py
@@ -65,3 +65,12 @@ def get_access_ids_for_request(request: Request, omit_orgs: list[str] = None) ->
             course_clause | library_clause
         ).order_by('-id').values_list("id", flat=True)
     )
+
+
+class IncrementalIndexCompleted(models.Model):
+    """
+    Stores the contex keys of aleady indexed courses and libraries for incremental indexing.
+    """
+    context_key = LearningContextKeyField(
+        max_length=255, unique=True, null=False,
+    )
diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py
index 0aa762fd187f..1c4961ac9838 100644
--- a/openedx/core/djangoapps/content/search/tests/test_api.py
+++ b/openedx/core/djangoapps/content/search/tests/test_api.py
@@ -10,12 +10,15 @@
 from opaque_keys.edx.keys import UsageKey
 
 import ddt
+import pytest
 from django.test import override_settings
 from freezegun import freeze_time
+from meilisearch.errors import MeilisearchApiError
 from openedx_learning.api import authoring as authoring_api
 from organizations.tests.factories import OrganizationFactory
 
 from common.djangoapps.student.tests.factories import UserFactory
+from openedx.core.djangoapps.content.search.models import IncrementalIndexCompleted
 from openedx.core.djangoapps.content_libraries import api as library_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.content.course_overviews.api import CourseOverview
@@ -239,6 +242,73 @@ def test_reindex_meilisearch(self, mock_meilisearch):
             any_order=True,
         )
 
+    @override_settings(MEILISEARCH_ENABLED=True)
+    def test_reindex_meilisearch_incremental(self, mock_meilisearch):
+
+        # Add tags field to doc, since reindex calls includes tags
+        doc_sequential = copy.deepcopy(self.doc_sequential)
+        doc_sequential["tags"] = {}
+        doc_vertical = copy.deepcopy(self.doc_vertical)
+        doc_vertical["tags"] = {}
+        doc_problem1 = copy.deepcopy(self.doc_problem1)
+        doc_problem1["tags"] = {}
+        doc_problem1["collections"] = {'display_name': [], 'key': []}
+        doc_problem2 = copy.deepcopy(self.doc_problem2)
+        doc_problem2["tags"] = {}
+        doc_problem2["collections"] = {'display_name': [], 'key': []}
+        doc_collection = copy.deepcopy(self.collection_dict)
+        doc_collection["tags"] = {}
+
+        api.rebuild_index(incremental=True)
+        assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 3
+        mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
+            [
+                call([doc_sequential, doc_vertical]),
+                call([doc_problem1, doc_problem2]),
+                call([doc_collection]),
+            ],
+            any_order=True,
+        )
+
+        # Now we simulate interruption by patching _wait_for_meili_task to raise an exception
+        def simulated_interruption():
+            yield
+            yield
+            raise Exception("Simulated interruption")
+        with patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", side_effect=simulated_interruption()):
+            with pytest.raises(Exception, match="Simulated interruption"):
+                api.rebuild_index(incremental=True)
+        assert IncrementalIndexCompleted.objects.all().count() == 1
+        api.rebuild_index(incremental=True)
+        assert IncrementalIndexCompleted.objects.all().count() == 0
+        assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 7
+
+    @override_settings(MEILISEARCH_ENABLED=True)
+    def test_reset_meilisearch_index(self, mock_meilisearch):
+        api.reset_index()
+        mock_meilisearch.return_value.swap_indexes.assert_called_once()
+        mock_meilisearch.return_value.create_index.assert_called_once()
+        mock_meilisearch.return_value.delete_index.call_count = 2
+        api.reset_index()
+        mock_meilisearch.return_value.delete_index.call_count = 4
+
+    @override_settings(MEILISEARCH_ENABLED=True)
+    def test_init_meilisearch_index(self, mock_meilisearch):
+        api.init_index()
+        mock_meilisearch.return_value.swap_indexes.assert_not_called()
+        mock_meilisearch.return_value.create_index.assert_not_called()
+        mock_meilisearch.return_value.delete_index.assert_not_called()
+
+        mock_meilisearch.return_value.get_index.side_effect = [
+            MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)),
+            MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)),
+            Mock(),
+        ]
+        api.init_index()
+        mock_meilisearch.return_value.swap_indexes.assert_called_once()
+        mock_meilisearch.return_value.create_index.assert_called_once()
+        mock_meilisearch.return_value.delete_index.call_count = 2
+
     @override_settings(MEILISEARCH_ENABLED=True)
     @patch(
         "openedx.core.djangoapps.content.search.api.searchable_doc_for_collection",

From f54cbb461f3df94deedd8443fd249b32676ec4a0 Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Sat, 16 Nov 2024 01:10:48 -0300
Subject: [PATCH 2/7] fix: tests, linting and formatting

---
 openedx/core/djangoapps/content/search/api.py | 139 ++++++++++--------
 .../management/commands/reindex_studio.py     |   8 +-
 .../core/djangoapps/content/search/models.py  |   5 +-
 .../content/search/tests/test_api.py          |  37 +++--
 4 files changed, 107 insertions(+), 82 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 9d0a2379a0bb..443bae2b1a65 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -230,68 +230,76 @@ def _configure_index(index_name):
     # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
     client.index(index_name).update_distinct_attribute(Fields.usage_key)
     # Mark which attributes can be used for filtering/faceted search:
-    client.index(index_name).update_filterable_attributes([
-        # Get specific block/collection using combination of block_id and context_key
-        Fields.block_id,
-        Fields.block_type,
-        Fields.context_key,
-        Fields.usage_key,
-        Fields.org,
-        Fields.tags,
-        Fields.tags + "." + Fields.tags_taxonomy,
-        Fields.tags + "." + Fields.tags_level0,
-        Fields.tags + "." + Fields.tags_level1,
-        Fields.tags + "." + Fields.tags_level2,
-        Fields.tags + "." + Fields.tags_level3,
-        Fields.collections,
-        Fields.collections + "." + Fields.collections_display_name,
-        Fields.collections + "." + Fields.collections_key,
-        Fields.type,
-        Fields.access_id,
-        Fields.last_published,
-        Fields.content + "." + Fields.problem_types,
-    ])
+    client.index(index_name).update_filterable_attributes(
+        [
+            # Get specific block/collection using combination of block_id and context_key
+            Fields.block_id,
+            Fields.block_type,
+            Fields.context_key,
+            Fields.usage_key,
+            Fields.org,
+            Fields.tags,
+            Fields.tags + "." + Fields.tags_taxonomy,
+            Fields.tags + "." + Fields.tags_level0,
+            Fields.tags + "." + Fields.tags_level1,
+            Fields.tags + "." + Fields.tags_level2,
+            Fields.tags + "." + Fields.tags_level3,
+            Fields.collections,
+            Fields.collections + "." + Fields.collections_display_name,
+            Fields.collections + "." + Fields.collections_key,
+            Fields.type,
+            Fields.access_id,
+            Fields.last_published,
+            Fields.content + "." + Fields.problem_types,
+        ]
+    )
     # Mark which attributes are used for keyword search, in order of importance:
-    client.index(index_name).update_searchable_attributes([
-        # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
-        Fields.display_name,
-        Fields.block_id,
-        Fields.content,
-        Fields.description,
-        Fields.tags,
-        Fields.collections,
-        # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
-        # are searchable only if at least one document in the index has a value. If we didn't list them here and,
-        # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
-        # these sub-fields: "Attribute `tags.level3` is not searchable."
-        Fields.tags + "." + Fields.tags_taxonomy,
-        Fields.tags + "." + Fields.tags_level0,
-        Fields.tags + "." + Fields.tags_level1,
-        Fields.tags + "." + Fields.tags_level2,
-        Fields.tags + "." + Fields.tags_level3,
-        Fields.collections + "." + Fields.collections_display_name,
-        Fields.collections + "." + Fields.collections_key,
-        Fields.published + "." + Fields.display_name,
-        Fields.published + "." + Fields.published_description,
-    ])
+    client.index(index_name).update_searchable_attributes(
+        [
+            # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
+            Fields.display_name,
+            Fields.block_id,
+            Fields.content,
+            Fields.description,
+            Fields.tags,
+            Fields.collections,
+            # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
+            # are searchable only if at least one document in the index has a value. If we didn't list them here and,
+            # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
+            # these sub-fields: "Attribute `tags.level3` is not searchable."
+            Fields.tags + "." + Fields.tags_taxonomy,
+            Fields.tags + "." + Fields.tags_level0,
+            Fields.tags + "." + Fields.tags_level1,
+            Fields.tags + "." + Fields.tags_level2,
+            Fields.tags + "." + Fields.tags_level3,
+            Fields.collections + "." + Fields.collections_display_name,
+            Fields.collections + "." + Fields.collections_key,
+            Fields.published + "." + Fields.display_name,
+            Fields.published + "." + Fields.published_description,
+        ]
+    )
     # Mark which attributes can be used for sorting search results:
-    client.index(index_name).update_sortable_attributes([
-        Fields.display_name,
-        Fields.created,
-        Fields.modified,
-        Fields.last_published,
-    ])
+    client.index(index_name).update_sortable_attributes(
+        [
+            Fields.display_name,
+            Fields.created,
+            Fields.modified,
+            Fields.last_published,
+        ]
+    )
 
     # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
     # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
-    client.index(index_name).update_ranking_rules([
-        "sort",
-        "words",
-        "typo",
-        "proximity",
-        "attribute",
-        "exactness",
-    ])
+    client.index(index_name).update_ranking_rules(
+        [
+            "sort",
+            "words",
+            "typo",
+            "proximity",
+            "attribute",
+            "exactness",
+        ]
+    )
 
 
 def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None:
@@ -357,6 +365,9 @@ def is_meilisearch_enabled() -> bool:
 
 
 def reset_index(status_cb: Callable[[str], None] | None = None) -> None:
+    """
+    Reset the Meilisearch index, deleting all documents and reconfiguring it
+    """
     if status_cb is None:
         status_cb = log.info
 
@@ -368,19 +379,25 @@ def reset_index(status_cb: Callable[[str], None] | None = None) -> None:
 
 
 def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None:
+    """
+    Initialize the Meilisearch index, creating it and configuring it if it doesn't exist
+    """
     if status_cb is None:
         status_cb = log.info
     if warn_cb is None:
         warn_cb = log.warning
 
     if _index_exists(STUDIO_INDEX_NAME):
-        warn_cb("A rebuild of the index is required. Please run ./manage.py cms reindex_studio --experimental [--incremental]")
+        warn_cb(
+            "A rebuild of the index is required. Please run ./manage.py cms reindex_studio"
+            " --experimental [--incremental]"
+        )
         return
 
     reset_index(status_cb)
 
 
-def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None:
+def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None:  # lint-amnesty, pylint: disable=too-many-statements
     """
     Rebuild the Meilisearch index from scratch
     """
@@ -394,10 +411,10 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
     status_cb("Counting libraries...")
     keys_indexed = []
     if incremental:
-        keys_indexed = list(IncrementalIndexCompleted.objects.values_list('context_key', flat=True))
+        keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True))
     lib_keys = [
         lib.library_key
-        for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug').order_by('-id')
+        for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id")
         if lib.library_key not in keys_indexed
     ]
     num_libraries = len(lib_keys)
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 9fa2b30ea87a..e06070927b7d 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -18,10 +18,10 @@ class Command(BaseCommand):
     """
 
     def add_arguments(self, parser):
-        parser.add_argument('--experimental', action='store_true')
-        parser.add_argument('--reset', action='store_true')
-        parser.add_argument('--init', action='store_true')
-        parser.add_argument('--incremental', action='store_true')
+        parser.add_argument("--experimental", action="store_true")
+        parser.add_argument("--reset", action="store_true")
+        parser.add_argument("--init", action="store_true")
+        parser.add_argument("--incremental", action="store_true")
         parser.set_defaults(experimental=False, reset=False, init=False, incremental=False)
 
     def handle(self, *args, **options):
diff --git a/openedx/core/djangoapps/content/search/models.py b/openedx/core/djangoapps/content/search/models.py
index 91e12affc326..6fa53ef17b34 100644
--- a/openedx/core/djangoapps/content/search/models.py
+++ b/openedx/core/djangoapps/content/search/models.py
@@ -71,6 +71,9 @@ class IncrementalIndexCompleted(models.Model):
     """
     Stores the contex keys of aleady indexed courses and libraries for incremental indexing.
     """
+
     context_key = LearningContextKeyField(
-        max_length=255, unique=True, null=False,
+        max_length=255,
+        unique=True,
+        null=False,
     )
diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py
index 1c4961ac9838..ac472c510c8b 100644
--- a/openedx/core/djangoapps/content/search/tests/test_api.py
+++ b/openedx/core/djangoapps/content/search/tests/test_api.py
@@ -18,7 +18,6 @@
 from organizations.tests.factories import OrganizationFactory
 
 from common.djangoapps.student.tests.factories import UserFactory
-from openedx.core.djangoapps.content.search.models import IncrementalIndexCompleted
 from openedx.core.djangoapps.content_libraries import api as library_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.content.course_overviews.api import CourseOverview
@@ -29,7 +28,7 @@
 try:
     # This import errors in the lms because content.search is not an installed app there.
     from .. import api
-    from ..models import SearchAccess
+    from ..models import SearchAccess, IncrementalIndexCompleted
 except RuntimeError:
     SearchAccess = {}
 
@@ -252,10 +251,10 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch):
         doc_vertical["tags"] = {}
         doc_problem1 = copy.deepcopy(self.doc_problem1)
         doc_problem1["tags"] = {}
-        doc_problem1["collections"] = {'display_name': [], 'key': []}
+        doc_problem1["collections"] = {"display_name": [], "key": []}
         doc_problem2 = copy.deepcopy(self.doc_problem2)
         doc_problem2["tags"] = {}
-        doc_problem2["collections"] = {'display_name': [], 'key': []}
+        doc_problem2["collections"] = {"display_name": [], "key": []}
         doc_collection = copy.deepcopy(self.collection_dict)
         doc_collection["tags"] = {}
 
@@ -270,18 +269,22 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch):
             any_order=True,
         )
 
-        # Now we simulate interruption by patching _wait_for_meili_task to raise an exception
-        def simulated_interruption():
-            yield
-            yield
-            raise Exception("Simulated interruption")
-        with patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", side_effect=simulated_interruption()):
-            with pytest.raises(Exception, match="Simulated interruption"):
-                api.rebuild_index(incremental=True)
+        # Now we simulate interruption by passing this function to the status_cb argument
+        def simulated_interruption(message):
+            # this exception prevents courses from being indexed
+            if "Indexing courses" in message:
+                raise Exception("Simulated interruption")
+
+        with pytest.raises(Exception, match="Simulated interruption"):
+            api.rebuild_index(simulated_interruption, incremental=True)
+
+        # two more calls due to collections
+        assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 5
         assert IncrementalIndexCompleted.objects.all().count() == 1
         api.rebuild_index(incremental=True)
         assert IncrementalIndexCompleted.objects.all().count() == 0
-        assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 7
+        # one missing course indexed
+        assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 6
 
     @override_settings(MEILISEARCH_ENABLED=True)
     def test_reset_meilisearch_index(self, mock_meilisearch):
@@ -300,9 +303,11 @@ def test_init_meilisearch_index(self, mock_meilisearch):
         mock_meilisearch.return_value.delete_index.assert_not_called()
 
         mock_meilisearch.return_value.get_index.side_effect = [
-            MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)),
-            MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)),
-            Mock(),
+            MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),
+            MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')),
+            Mock(created_at=1),
+            Mock(created_at=1),
+            Mock(created_at=1),
         ]
         api.init_index()
         mock_meilisearch.return_value.swap_indexes.assert_called_once()

From 0643c9b536a013006ee07367160b4b37c395a282 Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Mon, 18 Nov 2024 20:36:41 -0300
Subject: [PATCH 3/7] fix: improve output of init_index

---
 .../content/search/management/commands/reindex_studio.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index e06070927b7d..2d8bb29f7a1f 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -40,7 +40,7 @@ def handle(self, *args, **options):
         if options["reset"]:
             api.reset_index(self.stdout.write)
         elif options["init"]:
-            api.init_index(self.stdout.write)
+            api.init_index(self.stdout.write, self.stderr.write)
         elif options["incremental"]:
             api.rebuild_index(self.stdout.write, incremental=True)
         else:

From 077e426f05b893548529e37310570241ba40f77b Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Thu, 28 Nov 2024 01:28:20 -0300
Subject: [PATCH 4/7] fix: address bradens comments

---
 openedx/core/djangoapps/content/search/api.py | 190 +++++++++++-------
 .../content/search/tests/test_api.py          |   8 +
 2 files changed, 126 insertions(+), 72 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 443bae2b1a65..297505a21b26 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -62,6 +62,65 @@
 
 EXCLUDED_XBLOCK_TYPES = ['course', 'course_info']
 
+INDEX_DISTINCT_ATTRIBUTE = "usage_key"
+INDEX_FILTRABLE_ATTRIBUTES = [
+    # Get specific block/collection using combination of block_id and context_key
+    Fields.block_id,
+    Fields.block_type,
+    Fields.context_key,
+    Fields.usage_key,
+    Fields.org,
+    Fields.tags,
+    Fields.tags + "." + Fields.tags_taxonomy,
+    Fields.tags + "." + Fields.tags_level0,
+    Fields.tags + "." + Fields.tags_level1,
+    Fields.tags + "." + Fields.tags_level2,
+    Fields.tags + "." + Fields.tags_level3,
+    Fields.collections,
+    Fields.collections + "." + Fields.collections_display_name,
+    Fields.collections + "." + Fields.collections_key,
+    Fields.type,
+    Fields.access_id,
+    Fields.last_published,
+    Fields.content + "." + Fields.problem_types,
+]
+INDEX_SEARCHABLE_ATTRIBUTES = [
+    # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
+    Fields.display_name,
+    Fields.block_id,
+    Fields.content,
+    Fields.description,
+    Fields.tags,
+    Fields.collections,
+    # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
+    # are searchable only if at least one document in the index has a value. If we didn't list them here and,
+    # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
+    # these sub-fields: "Attribute `tags.level3` is not searchable."
+    Fields.tags + "." + Fields.tags_taxonomy,
+    Fields.tags + "." + Fields.tags_level0,
+    Fields.tags + "." + Fields.tags_level1,
+    Fields.tags + "." + Fields.tags_level2,
+    Fields.tags + "." + Fields.tags_level3,
+    Fields.collections + "." + Fields.collections_display_name,
+    Fields.collections + "." + Fields.collections_key,
+    Fields.published + "." + Fields.display_name,
+    Fields.published + "." + Fields.published_description,
+]
+INDEX_SORTABLE_ATTRIBUTES = [
+    Fields.display_name,
+    Fields.created,
+    Fields.modified,
+    Fields.last_published,
+]
+INDEX_RANKING_RULES = [
+    "sort",
+    "words",
+    "typo",
+    "proximity",
+    "attribute",
+    "exactness",
+]
+
 
 @contextmanager
 def _index_rebuild_lock() -> Generator[str, None, None]:
@@ -217,6 +276,18 @@ def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generat
         _wait_for_meili_task(client.delete_index(temp_index_name))
 
 
+def _index_is_empty(index_name: str) -> bool:
+    """
+    Check if an index is empty
+
+    Args:
+        index_name (str): The name of the index to check
+    """
+    client = _get_meilisearch_client()
+    index = client.get_index(index_name)
+    return index.get_stats().number_of_documents == 0
+
+
 def _configure_index(index_name):
     """
     Configure the index. The following index settings are best changed on an empty index.
@@ -228,78 +299,17 @@ def _configure_index(index_name):
     client = _get_meilisearch_client()
 
     # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
-    client.index(index_name).update_distinct_attribute(Fields.usage_key)
+    client.index(index_name).update_distinct_attribute(INDEX_DISTINCT_ATTRIBUTE)
     # Mark which attributes can be used for filtering/faceted search:
-    client.index(index_name).update_filterable_attributes(
-        [
-            # Get specific block/collection using combination of block_id and context_key
-            Fields.block_id,
-            Fields.block_type,
-            Fields.context_key,
-            Fields.usage_key,
-            Fields.org,
-            Fields.tags,
-            Fields.tags + "." + Fields.tags_taxonomy,
-            Fields.tags + "." + Fields.tags_level0,
-            Fields.tags + "." + Fields.tags_level1,
-            Fields.tags + "." + Fields.tags_level2,
-            Fields.tags + "." + Fields.tags_level3,
-            Fields.collections,
-            Fields.collections + "." + Fields.collections_display_name,
-            Fields.collections + "." + Fields.collections_key,
-            Fields.type,
-            Fields.access_id,
-            Fields.last_published,
-            Fields.content + "." + Fields.problem_types,
-        ]
-    )
+    client.index(index_name).update_filterable_attributes(INDEX_FILTRABLE_ATTRIBUTES)
     # Mark which attributes are used for keyword search, in order of importance:
-    client.index(index_name).update_searchable_attributes(
-        [
-            # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
-            Fields.display_name,
-            Fields.block_id,
-            Fields.content,
-            Fields.description,
-            Fields.tags,
-            Fields.collections,
-            # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
-            # are searchable only if at least one document in the index has a value. If we didn't list them here and,
-            # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
-            # these sub-fields: "Attribute `tags.level3` is not searchable."
-            Fields.tags + "." + Fields.tags_taxonomy,
-            Fields.tags + "." + Fields.tags_level0,
-            Fields.tags + "." + Fields.tags_level1,
-            Fields.tags + "." + Fields.tags_level2,
-            Fields.tags + "." + Fields.tags_level3,
-            Fields.collections + "." + Fields.collections_display_name,
-            Fields.collections + "." + Fields.collections_key,
-            Fields.published + "." + Fields.display_name,
-            Fields.published + "." + Fields.published_description,
-        ]
-    )
+    client.index(index_name).update_searchable_attributes(INDEX_SEARCHABLE_ATTRIBUTES)
     # Mark which attributes can be used for sorting search results:
-    client.index(index_name).update_sortable_attributes(
-        [
-            Fields.display_name,
-            Fields.created,
-            Fields.modified,
-            Fields.last_published,
-        ]
-    )
+    client.index(index_name).update_sortable_attributes(INDEX_SORTABLE_ATTRIBUTES)
 
     # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
     # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
-    client.index(index_name).update_ranking_rules(
-        [
-            "sort",
-            "words",
-            "typo",
-            "proximity",
-            "attribute",
-            "exactness",
-        ]
-    )
+    client.index(index_name).update_ranking_rules(INDEX_RANKING_RULES)
 
 
 def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None:
@@ -378,6 +388,32 @@ def reset_index(status_cb: Callable[[str], None] | None = None) -> None:
     status_cb("Index reset complete.")
 
 
+def _is_index_configured(index_name: str) -> bool:
+    """
+    Check if an index is completely configured
+
+    Args:
+        index_name (str): The name of the index to check
+    """
+    client = _get_meilisearch_client()
+    index = client.get_index(index_name)
+    settings = index.get_settings()
+    for k, v in (
+        ("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE),
+        ("filterableAttributes", INDEX_FILTRABLE_ATTRIBUTES),
+        ("searchableAttributes", INDEX_SEARCHABLE_ATTRIBUTES),
+        ("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES),
+        ("rankingRules", INDEX_RANKING_RULES),
+    ):
+        setting = settings.get(k, [])
+        if isinstance(v, list):
+            v = set(v)
+            setting = set(setting)
+        if setting != v:
+            return False
+    return True
+
+
 def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None:
     """
     Initialize the Meilisearch index, creating it and configuring it if it doesn't exist
@@ -388,10 +424,19 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
         warn_cb = log.warning
 
     if _index_exists(STUDIO_INDEX_NAME):
-        warn_cb(
-            "A rebuild of the index is required. Please run ./manage.py cms reindex_studio"
-            " --experimental [--incremental]"
-        )
+        if _index_is_empty(STUDIO_INDEX_NAME):
+            warn_cb(
+                "The studio search index is empty. Please run ./manage.py cms reindex_studio"
+                " --experimental [--incremental]"
+            )
+            return
+        if not _is_index_configured(STUDIO_INDEX_NAME):
+            warn_cb(
+                "A rebuild of the index is required. Please run ./manage.py cms reindex_studio"
+                " --experimental [--incremental]"
+            )
+            return
+        status_cb("Index already exists and is configured.")
         return
 
     reset_index(status_cb)
@@ -424,8 +469,9 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
     num_courses = CourseOverview.objects.count()
 
     # Some counters so we can track our progress as indexing progresses:
-    num_contexts = num_courses + num_libraries
-    num_contexts_done = 0  # How many courses/libraries we've indexed
+    num_libs_skipped = len(keys_indexed)
+    num_contexts = num_courses + num_libraries + num_libs_skipped
+    num_contexts_done = 0 + num_libs_skipped  # How many courses/libraries we've indexed
     num_blocks_done = 0  # How many individual components/XBlocks we've indexed
 
     status_cb(f"Found {num_courses} courses, {num_libraries} libraries.")
diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py
index ac472c510c8b..c9c2b2589a31 100644
--- a/openedx/core/djangoapps/content/search/tests/test_api.py
+++ b/openedx/core/djangoapps/content/search/tests/test_api.py
@@ -297,6 +297,14 @@ def test_reset_meilisearch_index(self, mock_meilisearch):
 
     @override_settings(MEILISEARCH_ENABLED=True)
     def test_init_meilisearch_index(self, mock_meilisearch):
+        # Test index already exists
+        api.init_index()
+        mock_meilisearch.return_value.swap_indexes.assert_not_called()
+        mock_meilisearch.return_value.create_index.assert_not_called()
+        mock_meilisearch.return_value.delete_index.assert_not_called()
+
+        # Test index already exists and has no documents
+        mock_meilisearch.return_value.get_stats.return_value = 0
         api.init_index()
         mock_meilisearch.return_value.swap_indexes.assert_not_called()
         mock_meilisearch.return_value.create_index.assert_not_called()

From 4a0e89c7282e6a0f568823421c01cab3324a73ff Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Thu, 28 Nov 2024 09:37:59 -0300
Subject: [PATCH 5/7] fix: settings name overshadow

---
 openedx/core/djangoapps/content/search/api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 297505a21b26..688223ece13b 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -397,7 +397,7 @@ def _is_index_configured(index_name: str) -> bool:
     """
     client = _get_meilisearch_client()
     index = client.get_index(index_name)
-    settings = index.get_settings()
+    index_settings = index.get_settings()
     for k, v in (
         ("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE),
         ("filterableAttributes", INDEX_FILTRABLE_ATTRIBUTES),
@@ -405,7 +405,7 @@ def _is_index_configured(index_name: str) -> bool:
         ("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES),
         ("rankingRules", INDEX_RANKING_RULES),
     ):
-        setting = settings.get(k, [])
+        setting = index_settings.get(k, [])
         if isinstance(v, list):
             v = set(v)
             setting = set(setting)

From 85d280a4a8cfef6bfc0cc908b0435a25c3a70602 Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Mon, 2 Dec 2024 21:14:24 -0300
Subject: [PATCH 6/7] refactor: extract constants to index config module

---
 openedx/core/djangoapps/content/search/api.py | 70 +++----------------
 .../djangoapps/content/search/index_config.py | 69 ++++++++++++++++++
 2 files changed, 78 insertions(+), 61 deletions(-)
 create mode 100644 openedx/core/djangoapps/content/search/index_config.py

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 688223ece13b..a18c55bd3d22 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -25,6 +25,13 @@
 from common.djangoapps.student.role_helpers import get_course_roles
 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
 from openedx.core.djangoapps.content.search.models import get_access_ids_for_request, IncrementalIndexCompleted
+from openedx.core.djangoapps.content.search.index_config import (
+    INDEX_DISTINCT_ATTRIBUTE,
+    INDEX_FILTERABLE_ATTRIBUTES,
+    INDEX_SEARCHABLE_ATTRIBUTES,
+    INDEX_SORTABLE_ATTRIBUTES,
+    INDEX_RANKING_RULES,
+)
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from xmodule.modulestore.django import modulestore
 
@@ -62,65 +69,6 @@
 
 EXCLUDED_XBLOCK_TYPES = ['course', 'course_info']
 
-INDEX_DISTINCT_ATTRIBUTE = "usage_key"
-INDEX_FILTRABLE_ATTRIBUTES = [
-    # Get specific block/collection using combination of block_id and context_key
-    Fields.block_id,
-    Fields.block_type,
-    Fields.context_key,
-    Fields.usage_key,
-    Fields.org,
-    Fields.tags,
-    Fields.tags + "." + Fields.tags_taxonomy,
-    Fields.tags + "." + Fields.tags_level0,
-    Fields.tags + "." + Fields.tags_level1,
-    Fields.tags + "." + Fields.tags_level2,
-    Fields.tags + "." + Fields.tags_level3,
-    Fields.collections,
-    Fields.collections + "." + Fields.collections_display_name,
-    Fields.collections + "." + Fields.collections_key,
-    Fields.type,
-    Fields.access_id,
-    Fields.last_published,
-    Fields.content + "." + Fields.problem_types,
-]
-INDEX_SEARCHABLE_ATTRIBUTES = [
-    # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
-    Fields.display_name,
-    Fields.block_id,
-    Fields.content,
-    Fields.description,
-    Fields.tags,
-    Fields.collections,
-    # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
-    # are searchable only if at least one document in the index has a value. If we didn't list them here and,
-    # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
-    # these sub-fields: "Attribute `tags.level3` is not searchable."
-    Fields.tags + "." + Fields.tags_taxonomy,
-    Fields.tags + "." + Fields.tags_level0,
-    Fields.tags + "." + Fields.tags_level1,
-    Fields.tags + "." + Fields.tags_level2,
-    Fields.tags + "." + Fields.tags_level3,
-    Fields.collections + "." + Fields.collections_display_name,
-    Fields.collections + "." + Fields.collections_key,
-    Fields.published + "." + Fields.display_name,
-    Fields.published + "." + Fields.published_description,
-]
-INDEX_SORTABLE_ATTRIBUTES = [
-    Fields.display_name,
-    Fields.created,
-    Fields.modified,
-    Fields.last_published,
-]
-INDEX_RANKING_RULES = [
-    "sort",
-    "words",
-    "typo",
-    "proximity",
-    "attribute",
-    "exactness",
-]
-
 
 @contextmanager
 def _index_rebuild_lock() -> Generator[str, None, None]:
@@ -301,7 +249,7 @@ def _configure_index(index_name):
     # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
     client.index(index_name).update_distinct_attribute(INDEX_DISTINCT_ATTRIBUTE)
     # Mark which attributes can be used for filtering/faceted search:
-    client.index(index_name).update_filterable_attributes(INDEX_FILTRABLE_ATTRIBUTES)
+    client.index(index_name).update_filterable_attributes(INDEX_FILTERABLE_ATTRIBUTES)
     # Mark which attributes are used for keyword search, in order of importance:
     client.index(index_name).update_searchable_attributes(INDEX_SEARCHABLE_ATTRIBUTES)
     # Mark which attributes can be used for sorting search results:
@@ -400,7 +348,7 @@ def _is_index_configured(index_name: str) -> bool:
     index_settings = index.get_settings()
     for k, v in (
         ("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE),
-        ("filterableAttributes", INDEX_FILTRABLE_ATTRIBUTES),
+        ("filterableAttributes", INDEX_FILTERABLE_ATTRIBUTES),
         ("searchableAttributes", INDEX_SEARCHABLE_ATTRIBUTES),
         ("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES),
         ("rankingRules", INDEX_RANKING_RULES),
diff --git a/openedx/core/djangoapps/content/search/index_config.py b/openedx/core/djangoapps/content/search/index_config.py
new file mode 100644
index 000000000000..e60db0e28dc7
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/index_config.py
@@ -0,0 +1,69 @@
+from .documents import Fields
+
+
+INDEX_DISTINCT_ATTRIBUTE = "usage_key"
+
+# Mark which attributes can be used for filtering/faceted search:
+INDEX_FILTERABLE_ATTRIBUTES = [
+    # Get specific block/collection using combination of block_id and context_key
+    Fields.block_id,
+    Fields.block_type,
+    Fields.context_key,
+    Fields.usage_key,
+    Fields.org,
+    Fields.tags,
+    Fields.tags + "." + Fields.tags_taxonomy,
+    Fields.tags + "." + Fields.tags_level0,
+    Fields.tags + "." + Fields.tags_level1,
+    Fields.tags + "." + Fields.tags_level2,
+    Fields.tags + "." + Fields.tags_level3,
+    Fields.collections,
+    Fields.collections + "." + Fields.collections_display_name,
+    Fields.collections + "." + Fields.collections_key,
+    Fields.type,
+    Fields.access_id,
+    Fields.last_published,
+    Fields.content + "." + Fields.problem_types,
+]
+
+# Mark which attributes are used for keyword search, in order of importance:
+INDEX_SEARCHABLE_ATTRIBUTES = [
+    # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
+    Fields.display_name,
+    Fields.block_id,
+    Fields.content,
+    Fields.description,
+    Fields.tags,
+    Fields.collections,
+    # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
+    # are searchable only if at least one document in the index has a value. If we didn't list them here and,
+    # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
+    # these sub-fields: "Attribute `tags.level3` is not searchable."
+    Fields.tags + "." + Fields.tags_taxonomy,
+    Fields.tags + "." + Fields.tags_level0,
+    Fields.tags + "." + Fields.tags_level1,
+    Fields.tags + "." + Fields.tags_level2,
+    Fields.tags + "." + Fields.tags_level3,
+    Fields.collections + "." + Fields.collections_display_name,
+    Fields.collections + "." + Fields.collections_key,
+    Fields.published + "." + Fields.display_name,
+    Fields.published + "." + Fields.published_description,
+]
+
+# Mark which attributes can be used for sorting search results:
+INDEX_SORTABLE_ATTRIBUTES = [
+    Fields.display_name,
+    Fields.created,
+    Fields.modified,
+    Fields.last_published,
+]
+
+# Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
+INDEX_RANKING_RULES = [
+    "sort",
+    "words",
+    "typo",
+    "proximity",
+    "attribute",
+    "exactness",
+]

From c41c44d441aaf8b7521c9e3c7dadd4a4057e0af2 Mon Sep 17 00:00:00 2001
From: Daniel Valenzuela <daniel.valenzuela@opencraft.com>
Date: Mon, 2 Dec 2024 22:16:56 -0300
Subject: [PATCH 7/7] fix: add index config docstring

---
 openedx/core/djangoapps/content/search/index_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openedx/core/djangoapps/content/search/index_config.py b/openedx/core/djangoapps/content/search/index_config.py
index e60db0e28dc7..9570956e425e 100644
--- a/openedx/core/djangoapps/content/search/index_config.py
+++ b/openedx/core/djangoapps/content/search/index_config.py
@@ -1,3 +1,4 @@
+"""Configuration for the search index."""
 from .documents import Fields