Skip to content

Commit

Permalink
Store content object collections in search index [FC-0062] (#35469)
Browse files Browse the repository at this point in the history
* feat: Add Library Collections REST endpoints

* test: Add tests for Collections REST APIs

* chore: Add missing __init__ files

* docs: Add warning about unstable REST APIs

* feat: emit CONTENT_OBJECT_ASSOCIATIONS_CHANGED
whenever a content object's tags or collections have changed,
and handle that event in content/search.

The deprecated CONTENT_OBJECT_TAGS_CHANGED event is still emitted when
tags change; to be removed after Sumac.

* docs: replaces CONTENT_OBJECT_TAGS_CHANGED with CONTENT_OBJECT_ASSOCIATIONS_CHANGED

* chore: updates openedx-events==9.14.0

* chore: updates openedx-learning==0.11.4

Co-authored-by: Yusuf Musleh <[email protected]>
Co-authored-by: Chris Chávez <[email protected]>
Co-authored-by: Rômulo Penido <[email protected]>
  • Loading branch information
4 people authored Sep 13, 2024
1 parent 7d22784 commit c41fe89
Show file tree
Hide file tree
Showing 11 changed files with 491 additions and 53 deletions.
8 changes: 4 additions & 4 deletions docs/hooks/events.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,6 @@ Content Authoring Events
- org.openedx.content_authoring.library_block.deleted.v1
- 2023-07-20

* - `CONTENT_OBJECT_TAGS_CHANGED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L207>`_
- org.openedx.content_authoring.content.object.tags.changed.v1
- 2024-03-31

* - `LIBRARY_COLLECTION_CREATED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L219>`_
- org.openedx.content_authoring.content_library.collection.created.v1
- 2024-08-23
Expand All @@ -259,3 +255,7 @@ Content Authoring Events
* - `LIBRARY_COLLECTION_DELETED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L241>`_
- org.openedx.content_authoring.content_library.collection.deleted.v1
- 2024-08-23

* - `CONTENT_OBJECT_ASSOCIATIONS_CHANGED <https://github.com/openedx/openedx-events/blob/eb17e03f075b272ad8a29e8435d6a514f8884131/openedx_events/content_authoring/signals.py#L205-L214>`_
- org.openedx.content_authoring.content.object.associations.changed.v1
- 2024-09-06
35 changes: 34 additions & 1 deletion openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
searchable_doc_for_course_block,
searchable_doc_for_collection,
searchable_doc_for_library_block,
searchable_doc_collections,
searchable_doc_tags,
)

Expand Down Expand Up @@ -322,6 +323,9 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.type,
Fields.access_id,
Fields.last_published,
Expand All @@ -333,8 +337,9 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.display_name,
Fields.block_id,
Fields.content,
Fields.tags,
Fields.description,
Fields.tags,
Fields.collections,
# If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
# are searchable only if at least one document in the index has a value. If we didn't list them here and,
# say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
Expand All @@ -344,6 +349,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([
Expand Down Expand Up @@ -375,6 +382,7 @@ def index_library(lib_key: str) -> list:
doc = {}
doc.update(searchable_doc_for_library_block(metadata))
doc.update(searchable_doc_tags(metadata.usage_key))
doc.update(searchable_doc_collections(metadata.usage_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing library component {component}: {err}")
Expand Down Expand Up @@ -549,6 +557,22 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None:
_update_index_docs(docs)


def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None:
"""
Creates or updates the document for the given Library Collection in the search index
"""
content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
collection = authoring_api.get_collection(
learning_package_id=content_library.learning_package_id,
collection_key=collection_key,
)
docs = [
searchable_doc_for_collection(collection)
]

_update_index_docs(docs)


def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None:
"""
Creates or updates the documents for the given Content Library in the search index
Expand All @@ -571,6 +595,15 @@ def upsert_block_tags_index_docs(usage_key: UsageKey):
_update_index_docs([doc])


def upsert_block_collections_index_docs(usage_key: UsageKey):
"""
Updates the collections data in documents for the given Course/Library block
"""
doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
doc.update(searchable_doc_collections(usage_key))
_update_index_docs([doc])


def _get_user_orgs(request: Request) -> list[str]:
"""
Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole.
Expand Down
78 changes: 77 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from hashlib import blake2b

from django.utils.text import slugify
from django.core.exceptions import ObjectDoesNotExist
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
Expand All @@ -26,7 +28,11 @@ class Fields:
id = "id"
usage_key = "usage_key"
type = "type" # DocType.course_block or DocType.library_block (see below)
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
# The block_id part of the usage key for course or library blocks.
# If it's a collection, the collection.key is stored here.
# Sometimes human-readable, sometimes a random hex ID
# Is only unique within the given context_key.
block_id = "block_id"
display_name = "display_name"
description = "description"
modified = "modified"
Expand All @@ -52,11 +58,21 @@ class Fields:
tags_level1 = "level1"
tags_level2 = "level2"
tags_level3 = "level3"
# Collections (dictionary) that this object belongs to.
# Similarly to tags above, we collect the collection.titles and collection.keys into hierarchical facets.
collections = "collections"
collections_display_name = "display_name"
collections_key = "key"

# The "content" field is a dictionary of arbitrary data, depending on the block_type.
# It comes from each XBlock's index_dictionary() method (if present) plus some processing.
# Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
content = "content"

# Collections use this field to communicate how many entities/components they contain.
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
Expand Down Expand Up @@ -223,6 +239,51 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
return {Fields.tags: result}


def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
"""
Given an XBlock, course, library, etc., get the collections for its index doc.
e.g. for something in Collections "COL_A" and "COL_B", this would return:
{
"collections": {
"display_name": ["Collection A", "Collection B"],
"key": ["COL_A", "COL_B"],
}
}
If the object is in no collections, returns:
{
"collections": {},
}
"""
# Gather the collections associated with this object
collections = None
try:
component = lib_api.get_component_from_usage_key(object_id)
collections = authoring_api.get_entity_collections(
component.learning_package_id,
component.key,
)
except ObjectDoesNotExist:
log.warning(f"No component found for {object_id}")

if not collections:
return {Fields.collections: {}}

result = {
Fields.collections: {
Fields.collections_display_name: [],
Fields.collections_key: [],
}
}
for collection in collections:
result[Fields.collections][Fields.collections_display_name].append(collection.title)
result[Fields.collections][Fields.collections_key].append(collection.key)

return result


def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand Down Expand Up @@ -265,6 +326,19 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
return doc


def searchable_doc_collections(usage_key: UsageKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the collections data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc.update(_collections_for_content_object(usage_key))

return doc


def searchable_doc_for_course_block(block) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand All @@ -289,6 +363,7 @@ def searchable_doc_for_collection(collection) -> dict:
"""
doc = {
Fields.id: collection.id,
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Fields.description: collection.description,
Expand All @@ -298,6 +373,7 @@ def searchable_doc_for_collection(collection) -> dict:
# If related contentlibrary is found, it will override this value below.
# Mostly contentlibrary.library_key == learning_package.key
Fields.context_key: collection.learning_package.key,
Fields.num_children: collection.entities.count(),
}
# Just in case learning_package is not related to a library
try:
Expand Down
62 changes: 49 additions & 13 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,40 @@

from django.db.models.signals import post_delete
from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
LibraryBlockData,
LibraryCollectionData,
XBlockData,
)
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_COLLECTION_CREATED,
LIBRARY_COLLECTION_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
CONTENT_OBJECT_TAGS_CHANGED,
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
)
from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess

from .api import only_if_meilisearch_enabled, upsert_block_tags_index_docs
from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
update_library_collection_index_doc,
upsert_library_block_index_doc,
upsert_xblock_index_doc
upsert_xblock_index_doc,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -145,22 +155,48 @@ def content_library_updated_handler(**kwargs) -> None:
update_content_library_index_docs.apply(args=[str(content_library_data.library_key)])


@receiver(CONTENT_OBJECT_TAGS_CHANGED)
@receiver(LIBRARY_COLLECTION_CREATED)
@receiver(LIBRARY_COLLECTION_UPDATED)
@only_if_meilisearch_enabled
def library_collection_updated_handler(**kwargs) -> None:
"""
Create or update the index for the content library collection
"""
library_collection = kwargs.get("library_collection", None)
if not library_collection or not isinstance(library_collection, LibraryCollectionData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

# Update collection index synchronously to make sure that search index is updated before
# the frontend invalidates/refetches index.
# See content_library_updated_handler for more details.
update_library_collection_index_doc.apply(args=[
str(library_collection.library_key),
library_collection.collection_key,
])


@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
@only_if_meilisearch_enabled
def content_object_tags_changed_handler(**kwargs) -> None:
def content_object_associations_changed_handler(**kwargs) -> None:
"""
Update the tags data in the index for the Content Object
Update the collections/tags data in the index for the Content Object
"""
content_object_tags = kwargs.get("content_object", None)
if not content_object_tags or not isinstance(content_object_tags, ContentObjectData):
content_object = kwargs.get("content_object", None)
if not content_object or not isinstance(content_object, ContentObjectChangedData):
log.error("Received null or incorrect data for event")
return

try:
# Check if valid if course or library block
get_content_key_from_string(content_object_tags.object_id)
except ValueError:
usage_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return

upsert_block_tags_index_docs(content_object_tags.object_id)
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
if not content_object.changes or "tags" in content_object.changes:
upsert_block_tags_index_docs(usage_key)
if not content_object.changes or "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)
13 changes: 13 additions & 0 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,16 @@ def update_content_library_index_docs(library_key_str: str) -> None:
# Delete all documents in this library that were not published by above function
# as this task is also triggered on discard event.
api.delete_all_draft_docs_for_library(library_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None:
"""
Celery task to update the content index documents for a library collection
"""
library_key = LibraryLocatorV2.from_string(library_key_str)

log.info("Updating content index documents for collection %s in library%s", collection_key, library_key)

api.upsert_library_collection_index_doc(library_key, collection_key)
Loading

0 comments on commit c41fe89

Please sign in to comment.