Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store content object collections in search index [FC-0062] #680

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/hooks/events.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,6 @@ Content Authoring Events
- org.openedx.content_authoring.library_block.deleted.v1
- 2023-07-20

* - `CONTENT_OBJECT_TAGS_CHANGED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L207>`_
- org.openedx.content_authoring.content.object.tags.changed.v1
- 2024-03-31

* - `LIBRARY_COLLECTION_CREATED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L219>`_
- org.openedx.content_authoring.content_library.collection.created.v1
- 2024-08-23
Expand All @@ -259,3 +255,7 @@ Content Authoring Events
* - `LIBRARY_COLLECTION_DELETED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L241>`_
- org.openedx.content_authoring.content_library.collection.deleted.v1
- 2024-08-23

* - `CONTENT_OBJECT_ASSOCIATIONS_CHANGED <https://github.com/openedx/openedx-events/blob/eb17e03f075b272ad8a29e8435d6a514f8884131/openedx_events/content_authoring/signals.py#L205-L214>`_
- org.openedx.content_authoring.content.object.associations.changed.v1
- 2024-09-06
13 changes: 13 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
searchable_doc_for_course_block,
searchable_doc_for_collection,
searchable_doc_for_library_block,
searchable_doc_collections,
searchable_doc_tags,
)

Expand Down Expand Up @@ -322,6 +323,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections,
Fields.type,
Fields.access_id,
Fields.last_published,
Expand All @@ -335,6 +337,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.content,
Fields.tags,
Fields.description,
Fields.collections,
# If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
# are searchable only if at least one document in the index has a value. If we didn't list them here and,
# say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
Expand Down Expand Up @@ -375,6 +378,7 @@ def index_library(lib_key: str) -> list:
doc = {}
doc.update(searchable_doc_for_library_block(metadata))
doc.update(searchable_doc_tags(metadata.usage_key))
doc.update(searchable_doc_collections(metadata.usage_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing library component {component}: {err}")
Expand Down Expand Up @@ -571,6 +575,15 @@ def upsert_block_tags_index_docs(usage_key: UsageKey):
_update_index_docs([doc])


def upsert_block_collections_index_docs(usage_key: UsageKey):
"""
Updates the collections data in documents for the given Course/Library block
"""
doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
doc.update(searchable_doc_collections(usage_key))
_update_index_docs([doc])


def _get_user_orgs(request: Request) -> list[str]:
"""
Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole.
Expand Down
46 changes: 46 additions & 0 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from hashlib import blake2b

from django.utils.text import slugify
from django.core.exceptions import ObjectDoesNotExist
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
Expand Down Expand Up @@ -52,6 +54,8 @@ class Fields:
tags_level1 = "level1"
tags_level2 = "level2"
tags_level3 = "level3"
# List of collection.key strings this object belongs to.
collections = "collections"
# The "content" field is a dictionary of arbitrary data, depending on the block_type.
# It comes from each XBlock's index_dictionary() method (if present) plus some processing.
# Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
Expand Down Expand Up @@ -223,6 +227,35 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
return {Fields.tags: result}


def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
"""
Given an XBlock, course, library, etc., get the collections for its index doc.

e.g. for something in Collections "COL_A" and "COL_B", this would return:
{
"collections": ["COL_A", "COL_B"],
Copy link
Member

@rpenido rpenido Sep 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pomegranited Sorry for the late request (I overlooked this before), but what about changing the structure here?

Suggested change
"collections": ["COL_A", "COL_B"],
"collections": [
{ "display_name": "Collection A", slug: "COL_A" },
{ "display_name": "Collection B", slug: "COL_B" },
],

That way, we can use the display_name as a searchable attribute and the slug/key to a dev action (like a redirect).

Edit: I also added this comment in the upstream PR, so please ignore it here

}

Returns an empty dict if the object is not in any collections.
"""
# Gather the collections associated with this object
result = {}
collections = []
try:
component = lib_api.get_component_from_usage_key(object_id)
collections = authoring_api.get_entity_collections(
component.learning_package_id,
component.key,
).values_list("key", flat=True)
except ObjectDoesNotExist:
log.warning(f"No component found for {object_id}")

if collections:
result[Fields.collections] = list(collections)

return result


def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand Down Expand Up @@ -265,6 +298,19 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
return doc


def searchable_doc_collections(usage_key: UsageKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the collections data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc.update(_collections_for_content_object(usage_key))

return doc


def searchable_doc_for_course_block(block) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand Down
37 changes: 24 additions & 13 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@

from django.db.models.signals import post_delete
from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
LibraryBlockData,
XBlockData,
)
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
Expand All @@ -16,20 +23,19 @@
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
CONTENT_OBJECT_TAGS_CHANGED,
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
)
from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess

from .api import only_if_meilisearch_enabled, upsert_block_tags_index_docs
from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
upsert_library_block_index_doc,
upsert_xblock_index_doc
upsert_xblock_index_doc,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -145,22 +151,27 @@ def content_library_updated_handler(**kwargs) -> None:
update_content_library_index_docs.apply(args=[str(content_library_data.library_key)])


@receiver(CONTENT_OBJECT_TAGS_CHANGED)
@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
@only_if_meilisearch_enabled
def content_object_tags_changed_handler(**kwargs) -> None:
def content_object_associations_changed_handler(**kwargs) -> None:
"""
Update the tags data in the index for the Content Object
Update the collections/tags data in the index for the Content Object
"""
content_object_tags = kwargs.get("content_object", None)
if not content_object_tags or not isinstance(content_object_tags, ContentObjectData):
content_object = kwargs.get("content_object", None)
if not content_object or not isinstance(content_object, ContentObjectChangedData):
log.error("Received null or incorrect data for event")
return

try:
# Check if valid if course or library block
get_content_key_from_string(content_object_tags.object_id)
except ValueError:
usage_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return

upsert_block_tags_index_docs(content_object_tags.object_id)
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
if "tags" in content_object.changes:
upsert_block_tags_index_docs(usage_key)
elif "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)
50 changes: 50 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,56 @@ def test_index_library_block_tags(self, mock_meilisearch):
any_order=True,
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_index_library_block_collections(self, mock_meilisearch):
"""
Test indexing an Library Block that is in two collections.
"""
collection1 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL1",
title="Collection 1",
created_by=None,
description="First Collection",
)

collection2 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL2",
title="Collection 2",
created_by=None,
description="Second Collection",
)

# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs`)
# (adding in reverse order to test sorting of collection tag))
for collection in (collection2, collection1):
library_api.update_library_collection_components(
self.library.key,
collection_key=collection.key,
usage_keys=[
self.problem1.usage_key,
],
)

# Build expected docs with collections at each stage
doc_problem_with_collection2 = {
"id": self.doc_problem1["id"],
"collections": [collection2.key],
}
doc_problem_with_collection1 = {
"id": self.doc_problem1["id"],
"collections": [collection1.key, collection2.key],
}

mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_problem_with_collection2]),
call([doc_problem_with_collection1]),
],
any_order=True,
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_delete_index_library_block(self, mock_meilisearch):
"""
Expand Down
Loading
Loading