From 94a35a0350c62d2cbabe21fa9bde2c59e6d328c5 Mon Sep 17 00:00:00 2001 From: XnpioChV Date: Sat, 19 Oct 2024 11:49:11 -0500 Subject: [PATCH 1/5] feat: Store "published display name" and "published description" in studio search index --- .../djangoapps/content/search/documents.py | 111 ++++++++++++++---- 1 file changed, 91 insertions(+), 20 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index f520cd14e40f..1c2199ff4206 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -11,11 +11,13 @@ from opaque_keys.edx.keys import LearningContextKey, UsageKey from openedx_learning.api import authoring as authoring_api from opaque_keys.edx.locator import LibraryLocatorV2 +from rest_framework.exceptions import NotFound from openedx.core.djangoapps.content.search.models import SearchAccess from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangoapps.xblock import api as xblock_api +from openedx.core.djangoapps.xblock.data import LatestVersion from openedx_learning.api.authoring_models import Collection log = logging.getLogger(__name__) @@ -74,6 +76,11 @@ class Fields: # Structural XBlocks may use this one day to indicate how many child blocks they ocntain. num_children = "num_children" + # Published data (dictionary) of this object + published = "published" + published_display_name = "display_name" + published_description = "description" + # Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword # search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio' # command (changing those settings on an large active index is not recommended). @@ -121,6 +128,48 @@ def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict: } +def _get_content_from_block(block) -> dict: + """ + Get the content from block + """ + try: + content_data = block.index_dictionary() + # Will be something like: + # { + # 'content': {'display_name': '...', 'capa_content': '...'}, + # 'content_type': 'CAPA', + # 'problem_types': ['multiplechoiceresponse'] + # } + # Which we need to flatten: + if "content_type" in content_data: + del content_data["content_type"] # Redundant with our standard Fields.block_type field. + if "content" in content_data and isinstance(content_data["content"], dict): + content = content_data["content"] + if "display_name" in content: + del content["display_name"] + del content_data["content"] + content_data.update(content) + + return content_data + + except Exception as err: # pylint: disable=broad-except + log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}") + + +def _get_description_from_block_content(block_type, block_content) -> str: + """ + Generate description from block content + """ + result = None + + if block_type == 'html' and 'html_content' in block_content: + result = block_content['html_content'] + elif block_type == 'problem' and 'capa_content' in block_content: + result = block_content['capa_content'] + + return result + + def _fields_from_block(block) -> dict: """ Given an XBlock instance, call its index_dictionary() method to load any @@ -130,16 +179,18 @@ def _fields_from_block(block) -> dict: class implementation returns only: {"content": {"display_name": "..."}, "content_type": "..."} """ + block_type = block.scope_ids.block_type block_data = { Fields.usage_key: str(block.usage_key), Fields.block_id: str(block.usage_key.block_id), Fields.display_name: xblock_api.get_block_display_name(block), - Fields.block_type: block.scope_ids.block_type, + Fields.block_type: block_type, # This is called context_key so it's the same for courses and libraries Fields.context_key: str(block.usage_key.context_key), # same as lib_key Fields.org: str(block.usage_key.context_key.org), Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key), - Fields.breadcrumbs: [] + Fields.breadcrumbs: [], + Fields.description: None, } # Get the breadcrumbs (course, section, subsection, etc.): if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries). @@ -160,25 +211,14 @@ class implementation returns only: parent_data, ) try: - content_data = block.index_dictionary() - # Will be something like: - # { - # 'content': {'display_name': '...', 'capa_content': '...'}, - # 'content_type': 'CAPA', - # 'problem_types': ['multiplechoiceresponse'] - # } - # Which we need to flatten: - if "content_type" in content_data: - del content_data["content_type"] # Redundant with our standard Fields.block_type field. - if "content" in content_data and isinstance(content_data["content"], dict): - content = content_data["content"] - if "display_name" in content: - del content["display_name"] - del content_data["content"] - content_data.update(content) - # Now we have something like: - # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] } + content_data = _get_content_from_block(block) block_data[Fields.content] = content_data + + # Generate description from the content + description = _get_description_from_block_content(block_type, content_data) + if description: + block_data[Fields.description] = description + except Exception as err: # pylint: disable=broad-except log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}") return block_data @@ -295,6 +335,28 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> return result +def _published_data_from_block(block_published) -> dict: + """ + Given an library block get the published data. + """ + result = { + Fields.published: { + Fields.published_display_name: xblock_api.get_block_display_name(block_published), + } + } + + try: + content_data = _get_content_from_block(block_published) + result[Fields.published][Fields.published_description] = _get_description_from_block_content( + block_published.scope_ids.block_type, + content_data, + ) + except Exception as err: # pylint: disable=broad-except + log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}") + + return result + + def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine @@ -306,6 +368,12 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad """ library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title block = xblock_api.load_block(xblock_metadata.usage_key, user=None) + + try: + block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED) + except NotFound: + # Never published + block_published = None doc = searchable_doc_for_usage_key(xblock_metadata.usage_key) doc.update({ @@ -317,6 +385,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad }) doc.update(_fields_from_block(block)) + + if block_published: + doc.update(_published_data_from_block(block_published)) # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: doc[Fields.breadcrumbs] = [{"display_name": library_name}] From 3583b50a618dd668d365a25511157dd4d9b9e455 Mon Sep 17 00:00:00 2001 From: XnpioChV Date: Sat, 19 Oct 2024 12:38:54 -0500 Subject: [PATCH 2/5] test: Testing index for published blocks --- .../djangoapps/content/search/documents.py | 7 +- .../content/search/tests/test_documents.py | 125 ++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 1c2199ff4206..90ae2fd1c0be 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -190,7 +190,6 @@ class implementation returns only: Fields.org: str(block.usage_key.context_key.org), Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key), Fields.breadcrumbs: [], - Fields.description: None, } # Get the breadcrumbs (course, section, subsection, etc.): if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries). @@ -347,10 +346,14 @@ def _published_data_from_block(block_published) -> dict: try: content_data = _get_content_from_block(block_published) - result[Fields.published][Fields.published_description] = _get_description_from_block_content( + + description = _get_description_from_block_content( block_published.scope_ids.block_type, content_data, ) + + if description: + result[Fields.published][Fields.published_description] = description except Exception as err: # pylint: disable=broad-except log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}") diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 755ab4d19ad3..e0a604c24feb 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -154,6 +154,7 @@ def test_problem_block(self): "org": "edX", "access_id": self.toy_course_access_id, "display_name": "Test Problem", + "description": "What is a test?", "breadcrumbs": [ { 'display_name': 'Toy Course', @@ -202,6 +203,8 @@ def test_html_block(self): "org": "edX", "access_id": self.toy_course_access_id, "display_name": "Text", + "description": "This is a link to another page and some Chinese 四節比分和七年前 Some " + "more Chinese 四節比分和七年前 ", "breadcrumbs": [ { 'display_name': 'Toy Course', @@ -297,6 +300,128 @@ def test_html_library_block(self): }, } + def test_html_published_library_block(self): + library_api.publish_changes(self.library.key) + + doc = searchable_doc_for_library_block(self.library_block) + doc.update(searchable_doc_tags(self.library_block.usage_key)) + doc.update(searchable_doc_collections(self.library_block.usage_key)) + + assert doc == { + "id": "lbedx2012_fallhtmltext2-4bb47d67", + "type": "library_block", + "block_type": "html", + "usage_key": "lb:edX:2012_Fall:html:text2", + "block_id": "text2", + "context_key": "lib:edX:2012_Fall", + "org": "edX", + "access_id": self.library_access_id, + "display_name": "Text", + "breadcrumbs": [ + { + "display_name": "some content_library", + }, + ], + "last_published": None, + "created": 1680674828.0, + "modified": 1680674828.0, + "content": { + "html_content": "", + }, + "collections": { + "key": ["TOY_COLLECTION"], + "display_name": ["Toy Collection"], + }, + "tags": { + "taxonomy": ["Difficulty"], + "level0": ["Difficulty > Normal"], + }, + 'published': {'display_name': 'Text'}, + } + + # Update library block to create a draft + olx_str = 'This is a Test

]]>' + library_api.set_library_block_olx(self.library_block.usage_key, olx_str) + + doc = searchable_doc_for_library_block(self.library_block) + doc.update(searchable_doc_tags(self.library_block.usage_key)) + doc.update(searchable_doc_collections(self.library_block.usage_key)) + + assert doc == { + "id": "lbedx2012_fallhtmltext2-4bb47d67", + "type": "library_block", + "block_type": "html", + "usage_key": "lb:edX:2012_Fall:html:text2", + "block_id": "text2", + "context_key": "lib:edX:2012_Fall", + "org": "edX", + "access_id": self.library_access_id, + "display_name": "Text 2", + "description": "This is a Test", + "breadcrumbs": [ + { + "display_name": "some content_library", + }, + ], + "last_published": None, + "created": 1680674828.0, + "modified": 1680674828.0, + "content": { + "html_content": "This is a Test", + }, + "collections": { + "key": ["TOY_COLLECTION"], + "display_name": ["Toy Collection"], + }, + "tags": { + "taxonomy": ["Difficulty"], + "level0": ["Difficulty > Normal"], + }, + "published": {"display_name": "Text"}, + } + + # Publish new changes + library_api.publish_changes(self.library.key) + doc = searchable_doc_for_library_block(self.library_block) + doc.update(searchable_doc_tags(self.library_block.usage_key)) + doc.update(searchable_doc_collections(self.library_block.usage_key)) + + assert doc == { + "id": "lbedx2012_fallhtmltext2-4bb47d67", + "type": "library_block", + "block_type": "html", + "usage_key": "lb:edX:2012_Fall:html:text2", + "block_id": "text2", + "context_key": "lib:edX:2012_Fall", + "org": "edX", + "access_id": self.library_access_id, + "display_name": "Text 2", + "description": "This is a Test", + "breadcrumbs": [ + { + "display_name": "some content_library", + }, + ], + "last_published": None, + "created": 1680674828.0, + "modified": 1680674828.0, + "content": { + "html_content": "This is a Test", + }, + "collections": { + "key": ["TOY_COLLECTION"], + "display_name": ["Toy Collection"], + }, + "tags": { + "taxonomy": ["Difficulty"], + "level0": ["Difficulty > Normal"], + }, + "published": { + "display_name": "Text 2", + "description": "This is a Test", + }, + } + def test_collection_with_library(self): doc = searchable_doc_for_collection(self.library.key, self.collection.key) doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key)) From 198ee6ee0561cf40ab1bdfa06cfd475e3f07868e Mon Sep 17 00:00:00 2001 From: XnpioChV Date: Sat, 19 Oct 2024 12:42:09 -0500 Subject: [PATCH 3/5] style: Nits on the code --- openedx/core/djangoapps/content/search/documents.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 90ae2fd1c0be..fb385ccc4503 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -161,12 +161,12 @@ def _get_description_from_block_content(block_type, block_content) -> str: Generate description from block content """ result = None - + if block_type == 'html' and 'html_content' in block_content: result = block_content['html_content'] elif block_type == 'problem' and 'capa_content' in block_content: result = block_content['capa_content'] - + return result @@ -210,7 +210,7 @@ class implementation returns only: parent_data, ) try: - content_data = _get_content_from_block(block) + content_data = _get_content_from_block(block) block_data[Fields.content] = content_data # Generate description from the content @@ -345,7 +345,7 @@ def _published_data_from_block(block_published) -> dict: } try: - content_data = _get_content_from_block(block_published) + content_data = _get_content_from_block(block_published) description = _get_description_from_block_content( block_published.scope_ids.block_type, @@ -371,7 +371,7 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad """ library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title block = xblock_api.load_block(xblock_metadata.usage_key, user=None) - + try: block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED) except NotFound: @@ -388,7 +388,7 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad }) doc.update(_fields_from_block(block)) - + if block_published: doc.update(_published_data_from_block(block_published)) From d6b34ff794782aaf8fa8e6f1510f80d0a10c7384 Mon Sep 17 00:00:00 2001 From: XnpioChV Date: Sat, 19 Oct 2024 13:11:13 -0500 Subject: [PATCH 4/5] fix: Fix tests --- openedx/core/djangoapps/content/search/tests/test_handlers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py index bdc4814d1c8f..3577cbfc5692 100644 --- a/openedx/core/djangoapps/content/search/tests/test_handlers.py +++ b/openedx/core/djangoapps/content/search/tests/test_handlers.py @@ -176,6 +176,7 @@ def test_create_delete_library_block(self, meilisearch_client): with freeze_time(published_date): library_api.publish_changes(library.key) doc_problem["last_published"] = published_date.timestamp() + doc_problem["published"] = {"display_name": "Blank Problem"} meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem]) # Delete the Library Block From e26c28648ebaee69dbd0614a7bb8a7106acb20de Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Mon, 21 Oct 2024 11:54:00 -0700 Subject: [PATCH 5/5] feat: add published fields to the keyword searchable attributes list --- openedx/core/djangoapps/content/search/api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 17338f20ab83..b1d224b411e4 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -356,6 +356,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None: Fields.tags + "." + Fields.tags_level3, Fields.collections + "." + Fields.collections_display_name, Fields.collections + "." + Fields.collections_key, + Fields.published + "." + Fields.display_name, + Fields.published + "." + Fields.published_description, ]) # Mark which attributes can be used for sorting search results: client.index(temp_index_name).update_sortable_attributes([