Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate OLX with single-quoted attributes (WIP) #35031

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ files =
openedx/core/djangoapps/xblock,
openedx/core/types,
openedx/core/djangoapps/content_tagging,
xmodule/util/keys.py
xmodule/util/keys.py,
xmodule/util/olx.py

[mypy.plugins.django-stubs]
# content_staging only works with CMS; others work with either, so we run mypy with CMS settings.
Expand Down
17 changes: 9 additions & 8 deletions xmodule/modulestore/xml_exporter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""
Methods for exporting course data to XML
"""

from __future__ import annotations

import logging
import os
import typing as t
from abc import abstractmethod
from json import dumps

Expand All @@ -24,6 +25,7 @@
from xmodule.modulestore.draft_and_published import DIRECT_ONLY_CATEGORIES
from xmodule.modulestore.inheritance import own_metadata
from xmodule.modulestore.store_utilities import draft_node_constructor, get_draft_subtree_roots
from xmodule.util.olx import write_xml_to_file

DRAFT_DIR = "drafts"
PUBLISHED_DIR = "published"
Expand Down Expand Up @@ -202,8 +204,8 @@ def get_courselike(self):
return self.modulestore.get_course(self.courselike_key, depth=None, lazy=False)

def process_root(self, root, export_fs):
with export_fs.open('course.xml', 'wb') as course_xml:
lxml.etree.ElementTree(root).write(course_xml, encoding='utf-8')
with export_fs.open('course.xml', 'w') as course_xml:
write_xml_to_file(root, course_xml)

def process_extra(self, root, courselike, root_courselike_dir, xml_centric_courselike_key, export_fs):
# Export the modulestore's asset metadata.
Expand All @@ -216,8 +218,8 @@ def process_extra(self, root, courselike, root_courselike_dir, xml_centric_cours
# All asset types are exported using the "asset" tag - but their asset type is specified in each asset key.
asset = lxml.etree.SubElement(asset_root, AssetMetadata.ASSET_XML_TAG)
asset_md.to_xml(asset)
with OSFS(asset_dir).open(AssetMetadata.EXPORTED_ASSET_FILENAME, 'wb') as asset_xml_file:
lxml.etree.ElementTree(asset_root).write(asset_xml_file, encoding='utf-8')
with OSFS(asset_dir).open(AssetMetadata.EXPORTED_ASSET_FILENAME, 'w') as asset_xml_file:
write_xml_to_file(asset_root, asset_xml_file)

# export the static assets
policies_dir = export_fs.makedir('policies', recreate=True)
Expand Down Expand Up @@ -344,9 +346,8 @@ def post_process(self, root, export_fs):
called library.xml.
"""
# Create the Library.xml file, which acts as the index of all library contents.
xml_file = export_fs.open(LIBRARY_ROOT, 'wb')
xml_file.write(lxml.etree.tostring(root, pretty_print=True, encoding='utf-8'))
xml_file.close()
with export_fs.open(LIBRARY_ROOT, 'w') as xml_file:
write_xml_to_file(root, xml_file)


def export_course_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
Expand Down
98 changes: 98 additions & 0 deletions xmodule/util/olx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
Utility methods for parsing/generating OLX.
"""
from __future__ import annotations

import typing as t
from abc import abstractmethod

from lxml import etree


def write_xml_to_file(root_node: etree._Element, target_file: t.IO[str]) -> None:
"""
@@TODO docstring
@@TODO unit tests
"""
xml_str = etree.tostring(root_node, pretty_print=True, encoding='utf-8').decode('utf-8')
convert_quotes = lambda: True # @@TODO: Make this a waffle flag?
if convert_quotes():
xml_chars = iter(xml_str) # @@TODO: try to optimize this by streaming xml rather than rendering xml_str?
target_file.write("".join(_convert_attrs_to_single_quotes(xml_str)))
else:
target_file.write(xml_str)


def _convert_attrs_to_single_quotes(xml_chars: t.Iterator[str]) -> t.Iterable[str]:
"""
Given a stream of chars in an XML string, convert the quoutes around attrs from double to single.

This means exchanging double-quote escape sequences in attributes values (") for literal double quotes (").
It also means exchanging literal single quotes (') for single-quote escape sequences (').
Since OLX heavily uses JSON it our XML attribute values, this conversion generally nets clearner XML.

Example:
Before:
<problem data="{&quot;xyz&quot;: &quot;xyz's value&quot;}">...</problem>
After:
<problem data='{"xyz": "xyz&apos;s value"}'>...</problem>

ASSUMPTIONS:
* String is valid XML based on: <@@TODO link to spec>
* String is rendered by Python's LXML library.
* @@TODO state other syntax assumptions
"""
try:
while c := next(xml_chars):

# We're outside of any tag until we see <

if c == "<":
yield "<"

# We're inside a tag: < ... >

while c := next(xml_chars):
if c == ">":
yield ">"

break # Done with the tag, back to the outside

elif c == "\"":

yield "'" # Convert double-quote close to single-quote close

# We're inside an attribute value: <tagname yada=" ... ">

while c := next(xml_chars):
if c == "\"":
yield "'" # Convert double-quote close to single-quote close

break # Done with the attribute value, back to the tag

elif c == "'":

yield "&apos;" # Single quote in attr --> convert to escape sequence

elif c == "&":

# We're inside an escape sequence: <tagname yada="blah &...; blah">
escape_code = ""
while c := next(xml_chars):
if c == ";":
break # Done with escape sequence
else:
escape_code += c
if escape_code == "quot":
yield "\"" # Escape sequence is &quot; --> convert to literal double-qoute
else:
yield "&{escape_code};" # Any other escape sequence --> leave it as-is

else:
yield c # yield char from attribute value
else:
yield c # yield char from inside a tag
else:
yield c # yield char from outside tags
except StopIteration:
pass
5 changes: 3 additions & 2 deletions xmodule/xml_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from xblock.runtime import KvsFieldData
from xmodule.modulestore import EdxJSONEncoder
from xmodule.modulestore.inheritance import InheritanceKeyValueStore, own_metadata
from xmodule.util.olx import write_xml_to_file

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -472,8 +473,8 @@ def add_xml_to_node(self, node):
self.location.run if self.category == 'course' else url_path,
)
self.runtime.export_fs.makedirs(os.path.dirname(filepath), recreate=True)
with self.runtime.export_fs.open(filepath, 'wb') as fileobj:
ElementTree(xml_object).write(fileobj, pretty_print=True, encoding='utf-8')
with self.runtime.export_fs.open(filepath, 'w') as fileobj:
write_xml_to_file(xml_object, fileobj)
else:
# Write all attributes from xml_object onto node
node.clear()
Expand Down
Loading