Skip to content

Commit

Permalink
refactor: use collection usage_key as search document id
Browse files Browse the repository at this point in the history
This change standardises the search document "id" to be a meilisearch ID
generated from the usage key, for all types of indexed objects.

This is important for collections so we can locate the collection
document in the search index solely from the data provided by the
LIBRARY_COLLECTION_DELETED event (library_key + collection_key), even if
the collection has been deleted from the database.
  • Loading branch information
pomegranited committed Sep 20, 2024
1 parent 21393f1 commit f760c2b
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 42 deletions.
1 change: 1 addition & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
searchable_doc_for_course_block,
searchable_doc_for_collection,
searchable_doc_for_library_block,
searchable_doc_for_usage_key,
searchable_doc_collections,
searchable_doc_tags,
searchable_doc_tags_for_collection,
Expand Down
67 changes: 33 additions & 34 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int:
return access.id


def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
"""
Generates a base document identified by its usage key.
"""
return {
Fields.id: meili_id_from_opaque_key(usage_key),
}


def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
Expand Down Expand Up @@ -297,14 +306,14 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

doc = {
Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
doc.update({
Fields.type: DocType.library_block,
Fields.breadcrumbs: [],
Fields.created: xblock_metadata.created.timestamp(),
Fields.modified: xblock_metadata.modified.timestamp(),
Fields.last_published: xblock_metadata.last_published.timestamp() if xblock_metadata.last_published else None,
}
})

doc.update(_fields_from_block(block))

Expand All @@ -319,9 +328,7 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc = searchable_doc_for_usage_key(usage_key)
doc.update(_tags_for_content_object(usage_key))

return doc
Expand All @@ -332,9 +339,7 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the collections data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc = searchable_doc_for_usage_key(usage_key)
doc.update(_collections_for_content_object(usage_key))

return doc
Expand All @@ -348,15 +353,11 @@ def searchable_doc_tags_for_collection(
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given library collection.
"""
doc = {
Fields.id: collection.id,
}

collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection.key,
)

doc = searchable_doc_for_usage_key(collection_usage_key)
doc.update(_tags_for_content_object(collection_usage_key))

return doc
Expand All @@ -368,44 +369,42 @@ def searchable_doc_for_course_block(block) -> dict:
like Meilisearch or Elasticsearch, so that the given course block can be
found using faceted search.
"""
doc = {
Fields.id: meili_id_from_opaque_key(block.usage_key),
doc = searchable_doc_for_usage_key(block.usage_key)
doc.update({
Fields.type: DocType.course_block,
}
})

doc.update(_fields_from_block(block))

return doc


def searchable_doc_for_collection(collection) -> dict:
def searchable_doc_for_collection(
library_key: LibraryLocatorV2,
collection_key: str,
*,
# Optionally provide the collection if we've already fetched one
collection: Collection | None = None,
) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given collection can be
found using faceted search.
"""
doc = {
Fields.id: collection.id,
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Fields.description: collection.description,
Fields.created: collection.created.timestamp(),
Fields.modified: collection.modified.timestamp(),
# Add related learning_package.key as context_key by default.
# If related contentlibrary is found, it will override this value below.
# Mostly contentlibrary.library_key == learning_package.key
Fields.context_key: collection.learning_package.key,
Fields.num_children: collection.entities.count(),
}
# Just in case learning_package is not related to a library
collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection_key,
)

doc = searchable_doc_for_usage_key(collection_usage_key)

try:
context_key = collection.learning_package.contentlibrary.library_key
org = str(context_key.org)
doc.update({
Fields.context_key: str(context_key),
Fields.org: org,
Fields.usage_key: str(lib_api.get_library_collection_usage_key(context_key, collection.key)),
Fields.usage_key: str(collection_usage_key),
})
except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
log.warning(f"Related library not found for {collection}")
Expand Down
14 changes: 7 additions & 7 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def setUp(self):
)
self.collection_usage_key = "lib-collection:org1:lib:MYCOL"
self.collection_dict = {
"id": self.collection.id,
"id": "lib-collectionorg1libmycol-5b647617",
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
Expand Down Expand Up @@ -461,7 +461,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
# Build expected docs at each stage
lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key)
doc_collection1_created = {
"id": collection1.id,
"id": "lib-collectionorg1libcol1-283a79c9",
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
Expand All @@ -476,7 +476,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_created = {
"id": collection2.id,
"id": "lib-collectionorg1libcol2-46823d4d",
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
Expand All @@ -491,7 +491,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_updated = {
"id": collection2.id,
"id": "lib-collectionorg1libcol2-46823d4d",
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
Expand All @@ -506,7 +506,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection1_updated = {
"id": collection1.id,
"id": "lib-collectionorg1libcol1-283a79c9",
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
Expand Down Expand Up @@ -593,14 +593,14 @@ def test_index_tags_in_collections(self, mock_meilisearch):

# Build expected docs with tags at each stage
doc_collection_with_tags1 = {
"id": self.collection.id,
"id": "lib-collectionorg1libmycol-5b647617",
"tags": {
'taxonomy': ['A'],
'level0': ['A > one', 'A > two']
}
}
doc_collection_with_tags2 = {
"id": self.collection.id,
"id": "lib-collectionorg1libmycol-5b647617",
"tags": {
'taxonomy': ['A', 'B'],
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def test_collection_with_library(self):
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection))

assert doc == {
"id": self.collection.id,
"id": "lib-collectionedx2012_falltoy_collection-d1d907a4",
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
Expand Down

0 comments on commit f760c2b

Please sign in to comment.