-
Notifications
You must be signed in to change notification settings - Fork 3.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REST API to export modulestore XBlocks as OLX #23068
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
""" | ||
Helpers required to adapt to differing APIs | ||
""" | ||
from contextlib import contextmanager | ||
import logging | ||
import re | ||
|
||
from opaque_keys import InvalidKeyError | ||
from opaque_keys.edx.keys import AssetKey, CourseKey | ||
from fs.memoryfs import MemoryFS | ||
from fs.wrapfs import WrapFS | ||
|
||
from static_replace import replace_static_urls | ||
from xmodule.contentstore.content import StaticContent | ||
from xmodule.assetstore.assetmgr import AssetManager | ||
from xmodule.modulestore.django import modulestore as store | ||
from xmodule.modulestore.exceptions import ItemNotFoundError | ||
from xmodule.exceptions import NotFoundError | ||
from xmodule.xml_module import XmlParserMixin | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def get_block(usage_key): | ||
""" | ||
Return an XBlock from modulestore. | ||
""" | ||
return store().get_item(usage_key) | ||
|
||
|
||
def get_asset_content_from_path(course_key, asset_path): | ||
""" | ||
Locate the given asset content, load it into memory, and return it. | ||
Returns None if the asset is not found. | ||
""" | ||
try: | ||
asset_key = StaticContent.get_asset_key_from_path(course_key, asset_path) | ||
return AssetManager.find(asset_key) | ||
except (ItemNotFoundError, NotFoundError): | ||
return None | ||
|
||
|
||
def rewrite_absolute_static_urls(text, course_id): | ||
""" | ||
Convert absolute URLs like | ||
https://studio-site.opencraft.hosting/asset-v1:LabXchange+101+2019+type@asset+block@SCI_1.2_Image_.png | ||
to the proper | ||
/static/SCI_1.2_Image_.png | ||
format for consistency and portability. | ||
""" | ||
assert isinstance(course_id, CourseKey) | ||
asset_full_url_re = r'https?://[^/]+/(?P<maybe_asset_key>[^\s\'"&]+)' | ||
|
||
def check_asset_key(match_obj): | ||
""" | ||
If this URL's path part is an AssetKey from the same course, rewrite it. | ||
""" | ||
try: | ||
asset_key = AssetKey.from_string(match_obj.group('maybe_asset_key')) | ||
except InvalidKeyError: | ||
return match_obj.group(0) # Not an asset key; do not rewrite | ||
if asset_key.course_key == course_id: | ||
return '/static/' + asset_key.path # Rewrite this to portable form | ||
else: | ||
return match_obj.group(0) # From a different course; do not rewrite | ||
|
||
return re.sub(asset_full_url_re, check_asset_key, text) | ||
|
||
|
||
def collect_assets_from_text(text, course_id, include_content=False): | ||
""" | ||
Yield dicts of asset content and path from static asset paths found in the given text. | ||
Make sure to have replaced the URLs with rewrite_absolute_static_urls first. | ||
If include_content is True, the result will include a contentstore | ||
StaticContent file object which wraps the actual binary content of the file. | ||
""" | ||
# Replace static urls like '/static/foo.png' | ||
static_paths = [] | ||
# Drag-and-drop-v2 has | ||
# "/static/blah.png" | ||
# which must be changed to "/static/blah.png" for replace_static_urls to work: | ||
text2 = text.replace(""", '"') | ||
replace_static_urls(text=text2, course_id=course_id, static_paths_out=static_paths) | ||
for (path, uri) in static_paths: | ||
if path.startswith('/static/'): | ||
path = path[8:] | ||
info = { | ||
'path': path, | ||
'url': '/' + str(course_id.make_asset_key("asset", path)), | ||
} | ||
if include_content: | ||
content = get_asset_content_from_path(course_id, path) | ||
if content is None: | ||
log.error("Static asset not found: (%s, %s)", path, uri) | ||
else: | ||
info['content'] = content | ||
yield info | ||
|
||
|
||
@contextmanager | ||
def override_export_fs(block): | ||
""" | ||
Hack required for some legacy XBlocks which inherit | ||
XModuleDescriptor.add_xml_to_node() instead of the usual | ||
XmlSerializationMixin.add_xml_to_node() method. | ||
This method temporarily replaces a block's runtime's | ||
'export_fs' system with an in-memory filesystem. | ||
This method also abuses the XmlParserMixin.export_to_file() | ||
API to prevent the XModule export code from exporting each | ||
block as two files (one .olx pointing to one .xml file). | ||
The export_to_file was meant to be used only by the | ||
customtag XModule but it makes our lives here much easier. | ||
""" | ||
fs = WrapFS(MemoryFS()) | ||
fs.makedir('course') | ||
fs.makedir('course/static') # Video XBlock requires this directory to exists, to put srt files etc. | ||
|
||
old_export_fs = block.runtime.export_fs | ||
block.runtime.export_fs = fs | ||
if hasattr(block, 'export_to_file'): | ||
old_export_to_file = block.export_to_file | ||
block.export_to_file = lambda: False | ||
old_global_export_to_file = XmlParserMixin.export_to_file | ||
XmlParserMixin.export_to_file = lambda _: False # So this applies to child blocks that get loaded during export | ||
yield fs | ||
block.runtime.export_fs = old_export_fs | ||
if hasattr(block, 'export_to_file'): | ||
block.export_to_file = old_export_to_file | ||
XmlParserMixin.export_to_file = old_global_export_to_file |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
olx_rest_api Django application initialization. | ||
""" | ||
from django.apps import AppConfig | ||
|
||
from openedx.core.djangoapps.plugins.constants import PluginURLs, ProjectType | ||
|
||
|
||
class OlxRestApiAppConfig(AppConfig): | ||
""" | ||
Configuration for the olx_rest_api Django plugin application. | ||
See: https://github.com/edx/edx-platform/blob/master/openedx/core/djangoapps/plugins/README.rst | ||
""" | ||
|
||
name = 'openedx.core.djangoapps.olx_rest_api' | ||
verbose_name = 'Modulestore OLX REST API' | ||
plugin_app = { | ||
PluginURLs.CONFIG: { | ||
ProjectType.CMS: { | ||
# The namespace to provide to django's urls.include. | ||
PluginURLs.NAMESPACE: 'olx_rest_api', | ||
}, | ||
}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
""" | ||
Code for serializing a modulestore XBlock to OLX suitable for import into | ||
Blockstore. | ||
""" | ||
import logging | ||
import os | ||
from collections import namedtuple | ||
|
||
from lxml import etree | ||
|
||
from . import adapters | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
# A static file required by an XBlock | ||
StaticFile = namedtuple('StaticFile', ['name', 'url', 'data']) | ||
|
||
|
||
def blockstore_def_key_from_modulestore_usage_key(usage_key): | ||
""" | ||
In modulestore, the "definition key" is a MongoDB ObjectID kept in split's | ||
definitions table, which theoretically allows the same block to be used in | ||
many places (each with a unique usage key). However, that functionality is | ||
not exposed in Studio (other than via content libraries). So when we import | ||
into Blockstore, we assume that each usage is unique, don't generate a usage | ||
key, and create a new "definition key" from the original usage key. | ||
So modulestore usage key | ||
block-v1:A+B+C+type@html+block@introduction | ||
will become Blockstore definition key | ||
html/introduction | ||
""" | ||
block_type = usage_key.block_type | ||
if block_type == 'vertical': | ||
# We transform <vertical> to <unit> | ||
block_type = "unit" | ||
return block_type + "/" + usage_key.block_id | ||
|
||
|
||
class XBlockSerializer(object): | ||
""" | ||
This class will serialize an XBlock, producing: | ||
(1) A new definition ID for use in Blockstore | ||
(2) an XML string defining the XBlock and referencing the IDs of its | ||
children (but not containing the actual XML of its children) | ||
(3) a list of any static files required by the XBlock and their URL | ||
""" | ||
|
||
def __init__(self, block): | ||
""" | ||
Serialize an XBlock to an OLX string + supporting files, and store the | ||
resulting data in this object. | ||
""" | ||
self.orig_block_key = block.scope_ids.usage_id | ||
self.static_files = [] | ||
self.def_id = blockstore_def_key_from_modulestore_usage_key(self.orig_block_key) | ||
|
||
# Special cases: | ||
if self.orig_block_key.block_type == 'html': | ||
self.serialize_html_block(block) | ||
else: | ||
self.serialize_normal_block(block) | ||
|
||
course_key = self.orig_block_key.course_key | ||
# Search the OLX for references to files stored in the course's | ||
# "Files & Uploads" (contentstore): | ||
self.olx_str = adapters.rewrite_absolute_static_urls(self.olx_str, course_key) | ||
for asset in adapters.collect_assets_from_text(self.olx_str, course_key): | ||
path = asset['path'] | ||
if path not in [sf.name for sf in self.static_files]: | ||
self.static_files.append(StaticFile(name=path, url=asset['url'], data=None)) | ||
|
||
def serialize_normal_block(self, block): | ||
""" | ||
Serialize an XBlock to XML. | ||
|
||
This method is used for every block type except HTML, which uses | ||
serialize_html_block() instead. | ||
""" | ||
# Create an XML node to hold the exported data | ||
olx_node = etree.Element("root") # The node name doesn't matter: add_xml_to_node will change it | ||
# ^ Note: We could pass nsmap=xblock.core.XML_NAMESPACES here, but the | ||
# resulting XML namespace attributes don't seem that useful? | ||
with adapters.override_export_fs(block) as filesystem: # Needed for XBlocks that inherit XModuleDescriptor | ||
# Tell the block to serialize itself as XML/OLX: | ||
if not block.has_children: | ||
block.add_xml_to_node(olx_node) | ||
else: | ||
# We don't want the children serialized at this time, because | ||
# otherwise we can't tell which files in 'filesystem' belong to | ||
# this block and which belong to its children. So, temporarily | ||
# disable any children: | ||
children = block.children | ||
block.children = [] | ||
block.add_xml_to_node(olx_node) | ||
block.children = children | ||
|
||
# Now the block/module may have exported addtional data as files in | ||
# 'filesystem'. If so, store them: | ||
for item in filesystem.walk(): # pylint: disable=not-callable | ||
for unit_file in item.files: | ||
file_path = os.path.join(item.path, unit_file.name) | ||
with filesystem.open(file_path, 'rb') as fh: | ||
data = fh.read() | ||
self.static_files.append(StaticFile(name=unit_file.name, data=data, url=None)) | ||
# Apply some transformations to the OLX: | ||
self.transform_olx(olx_node, usage_id=block.scope_ids.usage_id) | ||
# Add <xblock-include /> tags for each child (XBlock XML export | ||
# normally puts children inline as e.g. <html> tags, but we want | ||
# references to them only.) | ||
if block.has_children: | ||
for child_id in block.children: | ||
# In modulestore, the "definition key" is a MongoDB ObjectID | ||
# kept in split's definitions table, which theoretically allows | ||
# the same block to be used in many places (each with a unique | ||
# usage key). However, that functionality is not exposed in | ||
# Studio (other than via content libraries). So when we import | ||
# into Blockstore, we assume that each usage is unique, don't | ||
# generate a usage key, and create a new "definition key" from | ||
# the original usage key. | ||
# So modulestore usage key | ||
# block-v1:A+B+C+type@html+block@introduction | ||
# will become Blockstore definition key | ||
# html+introduction | ||
# | ||
# If we needed the real definition key, we could get it via | ||
# child = block.runtime.get_block(child_id) | ||
# child_def_id = str(child.scope_ids.def_id) | ||
# and then use | ||
# <xblock-include definition={child_def_id} usage={child_id.block_id} /> | ||
def_id = blockstore_def_key_from_modulestore_usage_key(child_id) | ||
olx_node.append(olx_node.makeelement("xblock-include", {"definition": def_id})) | ||
# Store the resulting XML as a string: | ||
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) | ||
|
||
def serialize_html_block(self, block): | ||
""" | ||
Special case handling for HTML blocks | ||
""" | ||
olx_node = etree.Element("html") | ||
if block.display_name: | ||
olx_node.attrib["display_name"] = block.display_name | ||
olx_node.text = etree.CDATA("\n" + block.data + "\n") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bradenmacdonald Something I stumbled into while unit testing upstream_sync: this line causes Minimal repro: # Boilerplate...
from organizations.api import ensure_organization
from organizations.models import Organization
from openedx.core.djangoapps.content_libraries import api as libs
from openedx.core.djangoapps.xblock import api as xblock
from django.contrib.auth.models import User
user = User.objects.get(username="openedx")
ensure_organization("TestX")
library = libs.create_library(
org=Organization.objects.get(short_name="TestX"), slug="TestLib", title="Test Library"
)
block_key = libs.create_library_block(library.key, "html", "test-newlines").usage_key
# Initial block data: No newlines
block = xblock.load_block(block_key, user)
block.data = "<html><body>Hello</body></html>"
print(repr(block.data)) # '<html><body>Hello</body></html>'
block.save()
# First round-trip: data is wrapped in newlines
block = xblock.load_block(block_key, user)
print(repr(block.data)) # '\n<html><body>Hello</body></html>\n'
block.save()
# Second round-trip: data is stilled wrapped in newlines
# Fortunately, it didn't add another pair of newlines.
block = xblock.load_block(block_key, user)
print(repr(block.data)) # '\n<html><body>Hello</body></html>\n' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, yeah I don't think we need the newlines. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bradenmacdonald Actually, it's a little worse than I thought. As long as any block fields are edited, then it always adds a pair of newlines to print(repr(block.data)) # '\n<html><body>Hello</body></html>\n'
block.display_name = "blah"
block.save()
block = xblock.load_block(block_key, user)
print(repr(block.data)) # '\n\n<html><body>Hello</body></html>\n\n' This still wouldn't break anything for end users AFAICT, but it'll make unit tests awkward, and it'll probably bother folks who edit OLX directly. I'll open a bug ticket. (EDIT: #35525) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll remove the newlines and see if any unit tests break. |
||
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) | ||
|
||
def transform_olx(self, olx_node, usage_id): | ||
""" | ||
Apply transformations to the given OLX etree Node. | ||
""" | ||
# Remove 'url_name' - we store the definition key in the folder name | ||
# that holds the OLX and the usage key elsewhere, so specifying it | ||
# within the OLX file is redundant and can lead to issues if the file is | ||
# copied and pasted elsewhere in the bundle with a new definition key. | ||
olx_node.attrib.pop('url_name', None) | ||
# Convert <vertical> to the new <unit> tag/block | ||
if olx_node.tag == 'vertical': | ||
olx_node.tag = 'unit' | ||
for key in olx_node.attrib.keys(): | ||
if key not in ('display_name', 'url_name'): | ||
log.warning( | ||
'<vertical> tag attribute "%s" will be ignored after conversion to <unit> (in %s)', | ||
key, | ||
str(usage_id) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" | ||
Test the OLX REST API adapters code | ||
""" | ||
import unittest | ||
|
||
from opaque_keys.edx.keys import CourseKey | ||
|
||
from openedx.core.djangoapps.olx_rest_api import adapters | ||
|
||
|
||
class TestAdapters(unittest.TestCase): | ||
""" | ||
Test the OLX REST API adapters code | ||
""" | ||
|
||
def test_rewrite_absolute_static_urls(self): | ||
""" | ||
Test that rewrite_absolute_static_urls() can find and replace all uses | ||
of absolute Studio URLs in a course. | ||
|
||
Some criteria: | ||
- Rewriting only happens if the course ID is the same. If the absolute | ||
URL points to a different course, the new /static/foo.png form won't | ||
work. | ||
""" | ||
# Note that this doesn't have to be well-formed OLX | ||
course_id = CourseKey.from_string("course-v1:TestCourse+101+2020") | ||
olx_in = """ | ||
<problem> | ||
<img src="https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@SCI_1.2_Image_.png"> | ||
<a href='https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@Québec.html'> | ||
View a file with accented characters in the filename. | ||
</a> | ||
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>. | ||
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png"> | ||
</problem> | ||
""" | ||
olx_expected = """ | ||
<problem> | ||
<img src="/static/SCI_1.2_Image_.png"> | ||
<a href='/static/Québec.html'> | ||
View a file with accented characters in the filename. | ||
</a> | ||
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>. | ||
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png"> | ||
</problem> | ||
""" | ||
olx_out = adapters.rewrite_absolute_static_urls(olx_in, course_id) | ||
self.assertEqual(olx_out, olx_expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(sigh)... totally legit comment... but (sigh)