From d25e651145547b57655b0f50f5131016006a82d2 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 23 Oct 2024 09:21:27 -0400 Subject: [PATCH] Support static assets when copy/pasting between courses and libraries (#35668) The biggest challenge is dealing with the mismatch between how Libraries store assets (per-Component) and how Courses store assets (global Files and Uploads space). To bridge this, we're going to kludge a component-local namespace in Files and Uploads by making use of the obscure feature that you can create folders there at an API level, even if no such UI exists. In this commit: * Assets work when copy-pasting between library components. * Assets work when copy-pasting from a library to a course, with the convention being to put that file in a subdirectory of the form: components/{block_type}/{block_id}/file. Note that the Studio course Files page still just shows the filename. * Assets work when copy-pasting from a course to a library. Top level assets are put into a static folder in the Component, per Learning Core conventions. Limitations: * Roundtrips don't work properly. * There's no normalized form, so directories will start nesting if you copy from library and paste into course, then copy the pasted thing and paste back into library, etc. This was deemed acceptable for Sumac. Low level stuff: * XBlockSerializerForLearningCore has been removed, with the url_name stripping functionality added as an optional param to XBlockSerializer (the other stuff was for children and "vertical" -> "unit" conversion, neither of which are relevant now). * url_name is now stripped out of anything added to the clipboard, so that we don't end up writing it in block.xml when it is redundant (and would be stripped out with the next write anyway). For the Libraries Relaunch Beta. This should not affect any site which has kept New Libraries disabled. Issue: https://github.com/openedx/frontend-app-authoring/issues/1170 --- cms/djangoapps/contentstore/helpers.py | 113 ++++++++++++---- .../views/tests/test_clipboard_paste.py | 58 ++++++++ .../core/djangoapps/content_libraries/api.py | 124 ++++++++++++++++-- .../tests/test_content_libraries.py | 14 ++ .../djangoapps/content_libraries/views.py | 2 +- .../core/djangoapps/content_staging/api.py | 7 +- .../djangoapps/olx_rest_api/test_views.py | 22 +--- .../xblock/runtime/learning_core_runtime.py | 12 +- openedx/core/lib/xblock_serializer/api.py | 7 +- .../lib/xblock_serializer/block_serializer.py | 87 ++---------- .../core/lib/xblock_serializer/test_api.py | 27 +--- 11 files changed, 311 insertions(+), 162 deletions(-) diff --git a/cms/djangoapps/contentstore/helpers.py b/cms/djangoapps/contentstore/helpers.py index 5a4f3c652347..e67e337e55fa 100644 --- a/cms/djangoapps/contentstore/helpers.py +++ b/cms/djangoapps/contentstore/helpers.py @@ -3,6 +3,7 @@ """ from __future__ import annotations import logging +import pathlib import urllib from lxml import etree from mimetypes import guess_type @@ -11,7 +12,7 @@ from django.conf import settings from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ -from opaque_keys.edx.keys import AssetKey, CourseKey, UsageKey +from opaque_keys.edx.keys import CourseKey, UsageKey from opaque_keys.edx.locator import DefinitionLocator, LocalId from xblock.core import XBlock from xblock.fields import ScopeIds @@ -280,7 +281,6 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) -> # Clipboard is empty or expired/error/loading return None, StaticFileNotices() olx_str = content_staging_api.get_staged_content_olx(user_clipboard.content.id) - static_files = content_staging_api.get_staged_content_static_files(user_clipboard.content.id) node = etree.fromstring(olx_str) store = modulestore() with store.bulk_operations(parent_key.course_key): @@ -297,12 +297,29 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) -> copied_from_version_num=user_clipboard.content.version_num, tags=user_clipboard.content.tags, ) - # Now handle static files that need to go into Files & Uploads: - notices = _import_files_into_course( - course_key=parent_key.context_key, - staged_content_id=user_clipboard.content.id, - static_files=static_files, - ) + + # Now handle static files that need to go into Files & Uploads. + static_files = content_staging_api.get_staged_content_static_files(user_clipboard.content.id) + notices, substitutions = _import_files_into_course( + course_key=parent_key.context_key, + staged_content_id=user_clipboard.content.id, + static_files=static_files, + usage_key=new_xblock.scope_ids.usage_id, + ) + + # Rewrite the OLX's static asset references to point to the new + # locations for those assets. See _import_files_into_course for more + # info on why this is necessary. + if hasattr(new_xblock, 'data') and substitutions: + data_with_substitutions = new_xblock.data + for old_static_ref, new_static_ref in substitutions.items(): + data_with_substitutions = data_with_substitutions.replace( + old_static_ref, + new_static_ref, + ) + new_xblock.data = data_with_substitutions + store.update_item(new_xblock, request.user.id) + return new_xblock, notices @@ -456,11 +473,21 @@ def _import_files_into_course( course_key: CourseKey, staged_content_id: int, static_files: list[content_staging_api.StagedContentFileData], -) -> StaticFileNotices: + usage_key: UsageKey, +) -> tuple[StaticFileNotices, dict[str, str]]: """ - For the given staged static asset files (which are in "Staged Content" such as the user's clipbaord, but which - need to end up in the course's Files & Uploads page), import them into the destination course, unless they already + For the given staged static asset files (which are in "Staged Content" such + as the user's clipbaord, but which need to end up in the course's Files & + Uploads page), import them into the destination course, unless they already exist. + + This function returns a tuple of StaticFileNotices (assets added, errors, + conflicts), and static asset path substitutions that should be made in the + OLX in order to paste this content into this course. The latter is for the + case in which we're brining content in from a v2 library, which stores + static assets locally to a Component and needs to go into a subdirectory + when pasting into a course to avoid overwriting commonly named things, e.g. + "figure1.png". """ # List of files that were newly added to the destination course new_files = [] @@ -468,17 +495,25 @@ def _import_files_into_course( conflicting_files = [] # List of files that had an error (shouldn't happen unless we have some kind of bug) error_files = [] + + # Store a mapping of asset URLs that need to be modified for the destination + # assets. This is necessary when you take something from a library and paste + # it into a course, because we need to translate Component-local static + # assets and shove them into the Course's global Files & Uploads space in a + # nested directory structure. + substitutions = {} for file_data_obj in static_files: - if not isinstance(file_data_obj.source_key, AssetKey): - # This static asset was managed by the XBlock and instead of being added to "Files & Uploads", it is stored - # using some other system. We could make it available via runtime.resources_fs during XML parsing, but it's - # not needed here. - continue # At this point, we know this is a "Files & Uploads" asset that we may need to copy into the course: try: - result = _import_file_into_course(course_key, staged_content_id, file_data_obj) + result, substitution_for_file = _import_file_into_course( + course_key, + staged_content_id, + file_data_obj, + usage_key, + ) if result is True: new_files.append(file_data_obj.filename) + substitutions.update(substitution_for_file) elif result is None: pass # This file already exists; no action needed. else: @@ -486,25 +521,45 @@ def _import_files_into_course( except Exception: # lint-amnesty, pylint: disable=broad-except error_files.append(file_data_obj.filename) log.exception(f"Failed to import Files & Uploads file {file_data_obj.filename}") - return StaticFileNotices( + + notices = StaticFileNotices( new_files=new_files, conflicting_files=conflicting_files, error_files=error_files, ) + return notices, substitutions + def _import_file_into_course( course_key: CourseKey, staged_content_id: int, file_data_obj: content_staging_api.StagedContentFileData, -) -> bool | None: + usage_key: UsageKey, +) -> tuple[bool | None, dict]: """ Import a single staged static asset file into the course, unless it already exists. Returns True if it was imported, False if there's a conflict, or None if the file already existed (no action needed). """ - filename = file_data_obj.filename - new_key = course_key.make_asset_key("asset", filename) + clipboard_file_path = file_data_obj.filename + + # We need to generate an AssetKey to add an asset to a course. The mapping + # of directories '/' -> '_' is a long-existing contentstore convention that + # we're not going to attempt to change. + if clipboard_file_path.startswith('static/'): + # If it's in this form, it came from a library and assumes component-local assets + file_path = clipboard_file_path.lstrip('static/') + import_path = f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}" + filename = pathlib.Path(file_path).name + new_key = course_key.make_asset_key("asset", import_path.replace("/", "_")) + else: + # Otherwise it came from a course... + file_path = clipboard_file_path + import_path = None + filename = pathlib.Path(file_path).name + new_key = course_key.make_asset_key("asset", file_path.replace("/", "_")) + try: current_file = contentstore().find(new_key) except NotFoundError: @@ -512,22 +567,28 @@ def _import_file_into_course( if not current_file: # This static asset should be imported into the new course: content_type = guess_type(filename)[0] - data = content_staging_api.get_staged_content_static_file_data(staged_content_id, filename) + data = content_staging_api.get_staged_content_static_file_data(staged_content_id, clipboard_file_path) if data is None: raise NotFoundError(file_data_obj.source_key) - content = StaticContent(new_key, name=filename, content_type=content_type, data=data) + content = StaticContent( + new_key, + name=filename, + content_type=content_type, + data=data, + import_path=import_path + ) # If it's an image file, also generate the thumbnail: thumbnail_content, thumbnail_location = contentstore().generate_thumbnail(content) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location contentstore().save(content) - return True + return True, {clipboard_file_path: f"static/{import_path}"} elif current_file.content_digest == file_data_obj.md5_hash: # The file already exists and matches exactly, so no action is needed - return None + return None, {} else: # There is a conflict with some other file that has the same name. - return False + return False, {} def is_item_in_course_tree(item): diff --git a/cms/djangoapps/contentstore/views/tests/test_clipboard_paste.py b/cms/djangoapps/contentstore/views/tests/test_clipboard_paste.py index a864b29025e0..a818da81d10f 100644 --- a/cms/djangoapps/contentstore/views/tests/test_clipboard_paste.py +++ b/cms/djangoapps/contentstore/views/tests/test_clipboard_paste.py @@ -498,6 +498,64 @@ def test_paste_from_library_read_only_tags(self): assert object_tag.value in self.lib_block_tags assert object_tag.is_copied + def test_paste_from_library_copies_asset(self): + """ + Assets from a library component copied into a subdir of Files & Uploads. + """ + # This is the binary for a real, 1px webp file – we need actual image + # data because contentstore will try to make a thumbnail and grab + # metadata. + webp_raw_data = b'RIFF\x16\x00\x00\x00WEBPVP8L\n\x00\x00\x00/\x00\x00\x00\x00E\xff#\xfa\x1f' + + # First add the asset. + library_api.add_library_block_static_asset_file( + self.lib_block_key, + "static/1px.webp", + webp_raw_data, + ) # v==4 + + # Now add the reference to the asset + library_api.set_library_block_olx(self.lib_block_key, """ + +

Including this totally real image:

+ + + + Wrong + Right + + +
+ """) # v==5 + + copy_response = self.client.post( + CLIPBOARD_ENDPOINT, + {"usage_key": str(self.lib_block_key)}, + format="json" + ) + assert copy_response.status_code == 200 + + paste_response = self.client.post(XBLOCK_ENDPOINT, { + "parent_locator": str(self.course.usage_key), + "staged_content": "clipboard", + }, format="json") + assert paste_response.status_code == 200 + + new_block_key = UsageKey.from_string(paste_response.json()["locator"]) + new_block = modulestore().get_item(new_block_key) + + # Check that the substitution worked. + expected_import_path = f"components/{new_block_key.block_type}/{new_block_key.block_id}/1px.webp" + assert f"/static/{expected_import_path}" in new_block.data + + # Check that the asset was copied over properly + image_asset = contentstore().find( + self.course.id.make_asset_key("asset", expected_import_path.replace('/', '_')) + ) + assert image_asset.import_path == expected_import_path + assert image_asset.name == "1px.webp" + assert image_asset.length == len(webp_raw_data) + class ClipboardPasteFromV1LibraryTestCase(ModuleStoreTestCase): """ diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index ede11dc5101a..58b99a390fb2 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -93,7 +93,14 @@ LIBRARY_COLLECTION_UPDATED, ) from openedx_learning.api import authoring as authoring_api -from openedx_learning.api.authoring_models import Collection, Component, MediaType, LearningPackage, PublishableEntity +from openedx_learning.api.authoring_models import ( + Collection, + Component, + ComponentVersion, + MediaType, + LearningPackage, + PublishableEntity, +) from organizations.models import Organization from xblock.core import XBlock from xblock.exceptions import XBlockNotFoundError @@ -748,7 +755,7 @@ def get_library_block(usage_key, include_collections=False) -> LibraryXBlockMeta return xblock_metadata -def set_library_block_olx(usage_key, new_olx_str) -> int: +def set_library_block_olx(usage_key, new_olx_str) -> ComponentVersion: """ Replace the OLX source of the given XBlock. @@ -761,11 +768,12 @@ def set_library_block_olx(usage_key, new_olx_str) -> int: # because this old pylint can't understand attr.ib() objects, pylint: disable=no-member assert isinstance(usage_key, LibraryUsageLocatorV2) - # Make sure the block exists: - _block_metadata = get_library_block(usage_key) + # HTMLBlock uses CDATA to preserve HTML inside the XML, so make sure we + # don't strip that out. + parser = etree.XMLParser(strip_cdata=False) # Verify that the OLX parses, at least as generic XML, and the root tag is correct: - node = etree.fromstring(new_olx_str) + node = etree.fromstring(new_olx_str, parser=parser) if node.tag != usage_key.block_type: raise ValueError( f"Tried to set the OLX of a {usage_key.block_type} block to a <{node.tag}> node. " @@ -784,6 +792,14 @@ def set_library_block_olx(usage_key, new_olx_str) -> int: xblock_type_display_name(usage_key.block_type), ) + # Libraries don't use the url_name attribute, because they encode that into + # the Component key. Normally this is stripped out by the XBlockSerializer, + # but we're not actually creating the XBlock when it's coming from the + # clipboard right now. + if "url_name" in node.attrib: + del node.attrib["url_name"] + new_olx_str = etree.tostring(node, encoding='unicode') + now = datetime.now(tz=timezone.utc) with transaction.atomic(): @@ -809,7 +825,7 @@ def set_library_block_olx(usage_key, new_olx_str) -> int: ) ) - return new_component_version.version_num + return new_component_version def library_component_usage_key( @@ -926,9 +942,9 @@ def import_staged_content_from_user_clipboard(library_key: LibraryLocatorV2, use if not user_clipboard: return None - olx_str = content_staging_api.get_staged_content_olx(user_clipboard.content.id) - - # TODO: Handle importing over static assets + staged_content_id = user_clipboard.content.id + olx_str = content_staging_api.get_staged_content_olx(staged_content_id) + staged_content_files = content_staging_api.get_staged_content_static_files(staged_content_id) content_library, usage_key = validate_can_add_block_to_library( library_key, @@ -936,9 +952,91 @@ def import_staged_content_from_user_clipboard(library_key: LibraryLocatorV2, use block_id ) + # content_library.learning_package is technically a nullable field because + # it was added in a later migration, but we can't actually make a Library + # without one at the moment. TODO: fix this at the model level. + learning_package: LearningPackage = content_library.learning_package # type: ignore + + now = datetime.now(tz=timezone.utc) + # Create component for block then populate it with clipboard data - _create_component_for_block(content_library, usage_key, user.id) - set_library_block_olx(usage_key, olx_str) + with transaction.atomic(): + # First create the Component, but do not initialize it to anything (i.e. + # no ComponentVersion). + component_type = authoring_api.get_or_create_component_type( + "xblock.v1", usage_key.block_type + ) + component = authoring_api.create_component( + learning_package.id, + component_type=component_type, + local_key=usage_key.block_id, + created=now, + created_by=user.id, + ) + + # This will create the first component version and set the OLX/title + # appropriately. It will not publish. Once we get the newly created + # ComponentVersion back from this, we can attach all our files to it. + component_version = set_library_block_olx(usage_key, olx_str) + + for staged_content_file_data in staged_content_files: + # The ``data`` attribute is going to be None because the clipboard + # is optimized to not do redundant file copying when copying/pasting + # within the same course (where all the Files and Uploads are + # shared). Learning Core backed content Components will always store + # a Component-local "copy" of the data, and rely on lower-level + # deduplication to happen in the ``contents`` app. + filename = staged_content_file_data.filename + + # Grab our byte data for the file... + file_data = content_staging_api.get_staged_content_static_file_data( + staged_content_id, + filename, + ) + if not file_data: + log.error( + f"Staged content {staged_content_id} included referenced " + f"file {filename}, but no file data was found." + ) + continue + + # Courses don't support having assets that are local to a specific + # component, and instead store all their content together in a + # shared Files and Uploads namespace. If we're pasting that into a + # Learning Core backed data model (v2 Libraries), then we want to + # prepend "static/" to the filename. This will need to get updated + # when we start moving courses over to Learning Core, or if we start + # storing course component assets in sub-directories of Files and + # Uploads. + # + # The reason we don't just search for a "static/" prefix is that + # Learning Core components can store other kinds of files if they + # wish (though none currently do). + source_assumes_global_assets = not isinstance( + user_clipboard.source_context_key, LibraryLocatorV2 + ) + if source_assumes_global_assets: + filename = f"static/{filename}" + + # Now construct the Learning Core data models for it... + # TODO: more of this logic should be pushed down to openedx-learning + media_type_str, _encoding = mimetypes.guess_type(filename) + if not media_type_str: + media_type_str = "application/octet-stream" + + media_type = authoring_api.get_or_create_media_type(media_type_str) + content = authoring_api.get_or_create_file_content( + learning_package.id, + media_type.id, + data=file_data, + created=now, + ) + authoring_api.create_component_version_content( + component_version.pk, + content.id, + key=filename, + learner_downloadable=True, + ) # Emit library block created event LIBRARY_BLOCK_CREATED.send_event( @@ -966,7 +1064,7 @@ def get_or_create_olx_media_type(block_type: str) -> MediaType: def _create_component_for_block(content_lib, usage_key, user_id=None): """ - Create a Component for an XBlock type, and initialize it. + Create a Component for an XBlock type, initialize it, and return the ComponentVersion. This will create a Component, along with its first ComponentVersion. The tag in the OLX will have no attributes, e.g. ``. This first version @@ -1010,6 +1108,8 @@ def _create_component_for_block(content_lib, usage_key, user_id=None): learner_downloadable=False ) + return component_version + def delete_library_block(usage_key, remove_from_parent=True): """ diff --git a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py index d8fc85f29de2..8977464dd4b6 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py @@ -1090,6 +1090,9 @@ def test_library_paste_clipboard(self): usage_id="problem1" ) + # Add an asset to the block before copying + self._set_library_block_asset(usage_key, "static/hello.txt", b"Hello World!") + # Get the XBlock created in the previous step block = xblock_api.load_block(usage_key, user=author) @@ -1099,6 +1102,17 @@ def test_library_paste_clipboard(self): # Paste the content of the clipboard into the library pasted_block_id = str(uuid4()) paste_data = self._paste_clipboard_content_in_library(lib_id, pasted_block_id) + pasted_usage_key = LibraryUsageLocatorV2( + lib_key=library_key, + block_type="problem", + usage_id=pasted_block_id + ) + self._get_library_block_asset(pasted_usage_key, "static/hello.txt") + + # Compare the two text files + src_data = self.client.get(f"/library_assets/blocks/{usage_key}/static/hello.txt").content + dest_data = self.client.get(f"/library_assets/blocks/{pasted_usage_key}/static/hello.txt").content + assert src_data == dest_data # Check that the new block was created after the paste and it's content matches # the the block in the clipboard diff --git a/openedx/core/djangoapps/content_libraries/views.py b/openedx/core/djangoapps/content_libraries/views.py index 50b532f25bc2..9357d54ca7ce 100644 --- a/openedx/core/djangoapps/content_libraries/views.py +++ b/openedx/core/djangoapps/content_libraries/views.py @@ -737,7 +737,7 @@ def post(self, request, usage_key_str): serializer.is_valid(raise_exception=True) new_olx_str = serializer.validated_data["olx"] try: - version_num = api.set_library_block_olx(key, new_olx_str) + version_num = api.set_library_block_olx(key, new_olx_str).version_num except ValueError as err: raise ValidationError(detail=str(err)) # lint-amnesty, pylint: disable=raise-missing-from return Response(LibraryXBlockOlxSerializer({"olx": new_olx_str, "version_num": version_num}).data) diff --git a/openedx/core/djangoapps/content_staging/api.py b/openedx/core/djangoapps/content_staging/api.py index 3912cb51c396..f0432922dcb0 100644 --- a/openedx/core/djangoapps/content_staging/api.py +++ b/openedx/core/djangoapps/content_staging/api.py @@ -13,7 +13,7 @@ from opaque_keys.edx.keys import AssetKey, UsageKey from xblock.core import XBlock -from openedx.core.lib.xblock_serializer.api import serialize_xblock_to_olx, StaticFile +from openedx.core.lib.xblock_serializer.api import StaticFile, XBlockSerializer from openedx.core.djangoapps.content.course_overviews.api import get_course_overview_or_none from xmodule import block_metadata_utils from xmodule.contentstore.content import StaticContent @@ -38,7 +38,10 @@ def save_xblock_to_user_clipboard(block: XBlock, user_id: int, version_num: int """ Copy an XBlock's OLX to the user's clipboard. """ - block_data = serialize_xblock_to_olx(block) + block_data = XBlockSerializer( + block, + fetch_asset_data=True, + ) usage_key = block.usage_key expired_ids = [] diff --git a/openedx/core/djangoapps/olx_rest_api/test_views.py b/openedx/core/djangoapps/olx_rest_api/test_views.py index c91cb6ff3ce8..bde877a457ae 100644 --- a/openedx/core/djangoapps/olx_rest_api/test_views.py +++ b/openedx/core/djangoapps/olx_rest_api/test_views.py @@ -26,7 +26,7 @@ def setUpClass(cls): with cls.store.default_store(ModuleStoreEnum.Type.split): cls.course = ToyCourseFactory.create(modulestore=cls.store) assert str(cls.course.id).startswith("course-v1:"), "This test is for split mongo course exports only" - cls.unit_key = cls.course.id.make_usage_key('vertical', 'vertical_test') + cls.video_key = cls.course.id.make_usage_key('video', 'sample_video') def setUp(self): """ @@ -56,7 +56,7 @@ def test_no_permission(self): A regular user enrolled in the course (but not part of the authoring team) should not be able to use the API. """ - response = self.get_olx_response_for_block(self.unit_key) + response = self.get_olx_response_for_block(self.video_key) assert response.status_code == 403 assert response.json()['detail'] ==\ 'You must be a member of the course team in Studio to export OLX using this API.' @@ -67,24 +67,14 @@ def test_export(self): the course. """ CourseStaffRole(self.course.id).add_users(self.user) - - response = self.get_olx_response_for_block(self.unit_key) + response = self.get_olx_response_for_block(self.video_key) assert response.status_code == 200 - assert response.json()['root_block_id'] == str(self.unit_key) + assert response.json()['root_block_id'] == str(self.video_key) blocks = response.json()['blocks'] - # Check the OLX of the root block: - self.assertXmlEqual( - blocks[str(self.unit_key)]['olx'], - '\n' - ' \n' - ' \n' - ' \n' - ' \n' - '\n' - ) + # Check the OLX of a video self.assertXmlEqual( - blocks[str(self.course.id.make_usage_key('video', 'sample_video'))]['olx'], + blocks[str(self.video_key)]['olx'], '