Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Store "published display name" and "published description" in search index [FC-0062] #35678

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.published + "." + Fields.display_name,
Fields.published + "." + Fields.published_description,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([
Expand Down
114 changes: 94 additions & 20 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api
from opaque_keys.edx.locator import LibraryLocatorV2
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
from openedx.core.djangoapps.xblock.data import LatestVersion
from openedx_learning.api.authoring_models import Collection

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,6 +76,11 @@ class Fields:
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"

# Published data (dictionary) of this object
published = "published"
published_display_name = "display_name"
published_description = "description"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
Expand Down Expand Up @@ -121,6 +128,48 @@ def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
}


def _get_content_from_block(block) -> dict:
"""
Get the content from block
"""
try:
content_data = block.index_dictionary()
# Will be something like:
# {
# 'content': {'display_name': '...', 'capa_content': '...'},
# 'content_type': 'CAPA',
# 'problem_types': ['multiplechoiceresponse']
# }
# Which we need to flatten:
if "content_type" in content_data:
del content_data["content_type"] # Redundant with our standard Fields.block_type field.
if "content" in content_data and isinstance(content_data["content"], dict):
content = content_data["content"]
if "display_name" in content:
del content["display_name"]
del content_data["content"]
content_data.update(content)

return content_data

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")


def _get_description_from_block_content(block_type, block_content) -> str:
"""
Generate description from block content
"""
result = None

if block_type == 'html' and 'html_content' in block_content:
result = block_content['html_content']
elif block_type == 'problem' and 'capa_content' in block_content:
result = block_content['capa_content']

return result


def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
Expand All @@ -130,16 +179,17 @@ def _fields_from_block(block) -> dict:
class implementation returns only:
{"content": {"display_name": "..."}, "content_type": "..."}
"""
block_type = block.scope_ids.block_type
block_data = {
Fields.usage_key: str(block.usage_key),
Fields.block_id: str(block.usage_key.block_id),
Fields.display_name: xblock_api.get_block_display_name(block),
Fields.block_type: block.scope_ids.block_type,
Fields.block_type: block_type,
# This is called context_key so it's the same for courses and libraries
Fields.context_key: str(block.usage_key.context_key), # same as lib_key
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
Fields.breadcrumbs: []
Fields.breadcrumbs: [],
}
# Get the breadcrumbs (course, section, subsection, etc.):
if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries).
Expand All @@ -160,25 +210,14 @@ class implementation returns only:
parent_data,
)
try:
content_data = block.index_dictionary()
# Will be something like:
# {
# 'content': {'display_name': '...', 'capa_content': '...'},
# 'content_type': 'CAPA',
# 'problem_types': ['multiplechoiceresponse']
# }
# Which we need to flatten:
if "content_type" in content_data:
del content_data["content_type"] # Redundant with our standard Fields.block_type field.
if "content" in content_data and isinstance(content_data["content"], dict):
content = content_data["content"]
if "display_name" in content:
del content["display_name"]
del content_data["content"]
content_data.update(content)
# Now we have something like:
# { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
content_data = _get_content_from_block(block)
block_data[Fields.content] = content_data

# Generate description from the content
description = _get_description_from_block_content(block_type, content_data)
if description:
block_data[Fields.description] = description

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
return block_data
Expand Down Expand Up @@ -295,6 +334,32 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
return result


def _published_data_from_block(block_published) -> dict:
"""
Given an library block get the published data.
"""
result = {
Fields.published: {
Fields.published_display_name: xblock_api.get_block_display_name(block_published),
}
}

try:
content_data = _get_content_from_block(block_published)

description = _get_description_from_block_content(
block_published.scope_ids.block_type,
content_data,
)

if description:
result[Fields.published][Fields.published_description] = description
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}")

return result


def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand All @@ -307,6 +372,12 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

try:
block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED)
except NotFound:
# Never published
block_published = None

doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
doc.update({
Fields.type: DocType.library_block,
Expand All @@ -318,6 +389,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad

doc.update(_fields_from_block(block))

if block_published:
doc.update(_published_data_from_block(block_published))

# Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
doc[Fields.breadcrumbs] = [{"display_name": library_name}]

Expand Down
125 changes: 125 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_problem_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Test Problem",
"description": "What is a test?",
"breadcrumbs": [
{
'display_name': 'Toy Course',
Expand Down Expand Up @@ -202,6 +203,8 @@ def test_html_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Text",
"description": "This is a link to another page and some Chinese 四節比分和七年前 Some "
"more Chinese 四節比分和七年前 ",
"breadcrumbs": [
{
'display_name': 'Toy Course',
Expand Down Expand Up @@ -297,6 +300,128 @@ def test_html_library_block(self):
},
}

def test_html_published_library_block(self):
library_api.publish_changes(self.library.key)

doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
'published': {'display_name': 'Text'},
}

# Update library block to create a draft
olx_str = '<html display_name="Text 2"><![CDATA[<p>This is a Test</p>]]></html>'
library_api.set_library_block_olx(self.library_block.usage_key, olx_str)

doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text 2",
"description": "This is a Test",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "This is a Test",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
"published": {"display_name": "Text"},
}

# Publish new changes
library_api.publish_changes(self.library.key)
doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text 2",
"description": "This is a Test",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "This is a Test",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
"published": {
"display_name": "Text 2",
"description": "This is a Test",
},
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: you don't have to check all the fields of doc again here. You can just check doc["display_name"], doc["description"], and doc["published"]


def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def test_create_delete_library_block(self, meilisearch_client):
with freeze_time(published_date):
library_api.publish_changes(library.key)
doc_problem["last_published"] = published_date.timestamp()
doc_problem["published"] = {"display_name": "Blank Problem"}
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Delete the Library Block
Expand Down
Loading