From 94a35a0350c62d2cbabe21fa9bde2c59e6d328c5 Mon Sep 17 00:00:00 2001
From: XnpioChV
Date: Sat, 19 Oct 2024 11:49:11 -0500
Subject: [PATCH 1/5] feat: Store "published display name" and "published
description" in studio search index
---
.../djangoapps/content/search/documents.py | 111 ++++++++++++++----
1 file changed, 91 insertions(+), 20 deletions(-)
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index f520cd14e40f..1c2199ff4206 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -11,11 +11,13 @@
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api
from opaque_keys.edx.locator import LibraryLocatorV2
+from rest_framework.exceptions import NotFound
from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
+from openedx.core.djangoapps.xblock.data import LatestVersion
from openedx_learning.api.authoring_models import Collection
log = logging.getLogger(__name__)
@@ -74,6 +76,11 @@ class Fields:
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"
+ # Published data (dictionary) of this object
+ published = "published"
+ published_display_name = "display_name"
+ published_description = "description"
+
# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
@@ -121,6 +128,48 @@ def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
}
+def _get_content_from_block(block) -> dict:
+ """
+ Get the content from block
+ """
+ try:
+ content_data = block.index_dictionary()
+ # Will be something like:
+ # {
+ # 'content': {'display_name': '...', 'capa_content': '...'},
+ # 'content_type': 'CAPA',
+ # 'problem_types': ['multiplechoiceresponse']
+ # }
+ # Which we need to flatten:
+ if "content_type" in content_data:
+ del content_data["content_type"] # Redundant with our standard Fields.block_type field.
+ if "content" in content_data and isinstance(content_data["content"], dict):
+ content = content_data["content"]
+ if "display_name" in content:
+ del content["display_name"]
+ del content_data["content"]
+ content_data.update(content)
+
+ return content_data
+
+ except Exception as err: # pylint: disable=broad-except
+ log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
+
+
+def _get_description_from_block_content(block_type, block_content) -> str:
+ """
+ Generate description from block content
+ """
+ result = None
+
+ if block_type == 'html' and 'html_content' in block_content:
+ result = block_content['html_content']
+ elif block_type == 'problem' and 'capa_content' in block_content:
+ result = block_content['capa_content']
+
+ return result
+
+
def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
@@ -130,16 +179,18 @@ def _fields_from_block(block) -> dict:
class implementation returns only:
{"content": {"display_name": "..."}, "content_type": "..."}
"""
+ block_type = block.scope_ids.block_type
block_data = {
Fields.usage_key: str(block.usage_key),
Fields.block_id: str(block.usage_key.block_id),
Fields.display_name: xblock_api.get_block_display_name(block),
- Fields.block_type: block.scope_ids.block_type,
+ Fields.block_type: block_type,
# This is called context_key so it's the same for courses and libraries
Fields.context_key: str(block.usage_key.context_key), # same as lib_key
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
- Fields.breadcrumbs: []
+ Fields.breadcrumbs: [],
+ Fields.description: None,
}
# Get the breadcrumbs (course, section, subsection, etc.):
if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries).
@@ -160,25 +211,14 @@ class implementation returns only:
parent_data,
)
try:
- content_data = block.index_dictionary()
- # Will be something like:
- # {
- # 'content': {'display_name': '...', 'capa_content': '...'},
- # 'content_type': 'CAPA',
- # 'problem_types': ['multiplechoiceresponse']
- # }
- # Which we need to flatten:
- if "content_type" in content_data:
- del content_data["content_type"] # Redundant with our standard Fields.block_type field.
- if "content" in content_data and isinstance(content_data["content"], dict):
- content = content_data["content"]
- if "display_name" in content:
- del content["display_name"]
- del content_data["content"]
- content_data.update(content)
- # Now we have something like:
- # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
+ content_data = _get_content_from_block(block)
block_data[Fields.content] = content_data
+
+ # Generate description from the content
+ description = _get_description_from_block_content(block_type, content_data)
+ if description:
+ block_data[Fields.description] = description
+
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
return block_data
@@ -295,6 +335,28 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
return result
+def _published_data_from_block(block_published) -> dict:
+ """
+ Given an library block get the published data.
+ """
+ result = {
+ Fields.published: {
+ Fields.published_display_name: xblock_api.get_block_display_name(block_published),
+ }
+ }
+
+ try:
+ content_data = _get_content_from_block(block_published)
+ result[Fields.published][Fields.published_description] = _get_description_from_block_content(
+ block_published.scope_ids.block_type,
+ content_data,
+ )
+ except Exception as err: # pylint: disable=broad-except
+ log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}")
+
+ return result
+
+
def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
@@ -306,6 +368,12 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
"""
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)
+
+ try:
+ block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED)
+ except NotFound:
+ # Never published
+ block_published = None
doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
doc.update({
@@ -317,6 +385,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
})
doc.update(_fields_from_block(block))
+
+ if block_published:
+ doc.update(_published_data_from_block(block_published))
# Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
doc[Fields.breadcrumbs] = [{"display_name": library_name}]
From 3583b50a618dd668d365a25511157dd4d9b9e455 Mon Sep 17 00:00:00 2001
From: XnpioChV
Date: Sat, 19 Oct 2024 12:38:54 -0500
Subject: [PATCH 2/5] test: Testing index for published blocks
---
.../djangoapps/content/search/documents.py | 7 +-
.../content/search/tests/test_documents.py | 125 ++++++++++++++++++
2 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 1c2199ff4206..90ae2fd1c0be 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -190,7 +190,6 @@ class implementation returns only:
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
Fields.breadcrumbs: [],
- Fields.description: None,
}
# Get the breadcrumbs (course, section, subsection, etc.):
if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries).
@@ -347,10 +346,14 @@ def _published_data_from_block(block_published) -> dict:
try:
content_data = _get_content_from_block(block_published)
- result[Fields.published][Fields.published_description] = _get_description_from_block_content(
+
+ description = _get_description_from_block_content(
block_published.scope_ids.block_type,
content_data,
)
+
+ if description:
+ result[Fields.published][Fields.published_description] = description
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}")
diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
index 755ab4d19ad3..e0a604c24feb 100644
--- a/openedx/core/djangoapps/content/search/tests/test_documents.py
+++ b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -154,6 +154,7 @@ def test_problem_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Test Problem",
+ "description": "What is a test?",
"breadcrumbs": [
{
'display_name': 'Toy Course',
@@ -202,6 +203,8 @@ def test_html_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Text",
+ "description": "This is a link to another page and some Chinese 四節比分和七年前 Some "
+ "more Chinese 四節比分和七年前 ",
"breadcrumbs": [
{
'display_name': 'Toy Course',
@@ -297,6 +300,128 @@ def test_html_library_block(self):
},
}
+ def test_html_published_library_block(self):
+ library_api.publish_changes(self.library.key)
+
+ doc = searchable_doc_for_library_block(self.library_block)
+ doc.update(searchable_doc_tags(self.library_block.usage_key))
+ doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+ assert doc == {
+ "id": "lbedx2012_fallhtmltext2-4bb47d67",
+ "type": "library_block",
+ "block_type": "html",
+ "usage_key": "lb:edX:2012_Fall:html:text2",
+ "block_id": "text2",
+ "context_key": "lib:edX:2012_Fall",
+ "org": "edX",
+ "access_id": self.library_access_id,
+ "display_name": "Text",
+ "breadcrumbs": [
+ {
+ "display_name": "some content_library",
+ },
+ ],
+ "last_published": None,
+ "created": 1680674828.0,
+ "modified": 1680674828.0,
+ "content": {
+ "html_content": "",
+ },
+ "collections": {
+ "key": ["TOY_COLLECTION"],
+ "display_name": ["Toy Collection"],
+ },
+ "tags": {
+ "taxonomy": ["Difficulty"],
+ "level0": ["Difficulty > Normal"],
+ },
+ 'published': {'display_name': 'Text'},
+ }
+
+ # Update library block to create a draft
+ olx_str = 'This is a Test
]]>'
+ library_api.set_library_block_olx(self.library_block.usage_key, olx_str)
+
+ doc = searchable_doc_for_library_block(self.library_block)
+ doc.update(searchable_doc_tags(self.library_block.usage_key))
+ doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+ assert doc == {
+ "id": "lbedx2012_fallhtmltext2-4bb47d67",
+ "type": "library_block",
+ "block_type": "html",
+ "usage_key": "lb:edX:2012_Fall:html:text2",
+ "block_id": "text2",
+ "context_key": "lib:edX:2012_Fall",
+ "org": "edX",
+ "access_id": self.library_access_id,
+ "display_name": "Text 2",
+ "description": "This is a Test",
+ "breadcrumbs": [
+ {
+ "display_name": "some content_library",
+ },
+ ],
+ "last_published": None,
+ "created": 1680674828.0,
+ "modified": 1680674828.0,
+ "content": {
+ "html_content": "This is a Test",
+ },
+ "collections": {
+ "key": ["TOY_COLLECTION"],
+ "display_name": ["Toy Collection"],
+ },
+ "tags": {
+ "taxonomy": ["Difficulty"],
+ "level0": ["Difficulty > Normal"],
+ },
+ "published": {"display_name": "Text"},
+ }
+
+ # Publish new changes
+ library_api.publish_changes(self.library.key)
+ doc = searchable_doc_for_library_block(self.library_block)
+ doc.update(searchable_doc_tags(self.library_block.usage_key))
+ doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+ assert doc == {
+ "id": "lbedx2012_fallhtmltext2-4bb47d67",
+ "type": "library_block",
+ "block_type": "html",
+ "usage_key": "lb:edX:2012_Fall:html:text2",
+ "block_id": "text2",
+ "context_key": "lib:edX:2012_Fall",
+ "org": "edX",
+ "access_id": self.library_access_id,
+ "display_name": "Text 2",
+ "description": "This is a Test",
+ "breadcrumbs": [
+ {
+ "display_name": "some content_library",
+ },
+ ],
+ "last_published": None,
+ "created": 1680674828.0,
+ "modified": 1680674828.0,
+ "content": {
+ "html_content": "This is a Test",
+ },
+ "collections": {
+ "key": ["TOY_COLLECTION"],
+ "display_name": ["Toy Collection"],
+ },
+ "tags": {
+ "taxonomy": ["Difficulty"],
+ "level0": ["Difficulty > Normal"],
+ },
+ "published": {
+ "display_name": "Text 2",
+ "description": "This is a Test",
+ },
+ }
+
def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
From 198ee6ee0561cf40ab1bdfa06cfd475e3f07868e Mon Sep 17 00:00:00 2001
From: XnpioChV
Date: Sat, 19 Oct 2024 12:42:09 -0500
Subject: [PATCH 3/5] style: Nits on the code
---
openedx/core/djangoapps/content/search/documents.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 90ae2fd1c0be..fb385ccc4503 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -161,12 +161,12 @@ def _get_description_from_block_content(block_type, block_content) -> str:
Generate description from block content
"""
result = None
-
+
if block_type == 'html' and 'html_content' in block_content:
result = block_content['html_content']
elif block_type == 'problem' and 'capa_content' in block_content:
result = block_content['capa_content']
-
+
return result
@@ -210,7 +210,7 @@ class implementation returns only:
parent_data,
)
try:
- content_data = _get_content_from_block(block)
+ content_data = _get_content_from_block(block)
block_data[Fields.content] = content_data
# Generate description from the content
@@ -345,7 +345,7 @@ def _published_data_from_block(block_published) -> dict:
}
try:
- content_data = _get_content_from_block(block_published)
+ content_data = _get_content_from_block(block_published)
description = _get_description_from_block_content(
block_published.scope_ids.block_type,
@@ -371,7 +371,7 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
"""
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)
-
+
try:
block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED)
except NotFound:
@@ -388,7 +388,7 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
})
doc.update(_fields_from_block(block))
-
+
if block_published:
doc.update(_published_data_from_block(block_published))
From d6b34ff794782aaf8fa8e6f1510f80d0a10c7384 Mon Sep 17 00:00:00 2001
From: XnpioChV
Date: Sat, 19 Oct 2024 13:11:13 -0500
Subject: [PATCH 4/5] fix: Fix tests
---
openedx/core/djangoapps/content/search/tests/test_handlers.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py
index bdc4814d1c8f..3577cbfc5692 100644
--- a/openedx/core/djangoapps/content/search/tests/test_handlers.py
+++ b/openedx/core/djangoapps/content/search/tests/test_handlers.py
@@ -176,6 +176,7 @@ def test_create_delete_library_block(self, meilisearch_client):
with freeze_time(published_date):
library_api.publish_changes(library.key)
doc_problem["last_published"] = published_date.timestamp()
+ doc_problem["published"] = {"display_name": "Blank Problem"}
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
# Delete the Library Block
From e26c28648ebaee69dbd0614a7bb8a7106acb20de Mon Sep 17 00:00:00 2001
From: Braden MacDonald
Date: Mon, 21 Oct 2024 11:54:00 -0700
Subject: [PATCH 5/5] feat: add published fields to the keyword searchable
attributes list
---
openedx/core/djangoapps/content/search/api.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 17338f20ab83..b1d224b411e4 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -356,6 +356,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
+ Fields.published + "." + Fields.display_name,
+ Fields.published + "." + Fields.published_description,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([