diff --git a/common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py b/common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py index 67a058f478af..49c66ed0af78 100644 --- a/common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py +++ b/common/lib/xmodule/xmodule/open_ended_grading_classes/openendedchild.py @@ -1,7 +1,8 @@ import json import logging -from lxml.html.clean import Cleaner, autolink_html import re +import bleach +from html5lib.tokenizer import HTMLTokenizer from xmodule.progress import Progress import capa.xqueue_interface as xqueue_interface from capa.util import * @@ -50,24 +51,14 @@ def upload_to_s3(file_to_upload, keyname, s3_interface): return public_url -class WhiteListCleaner(Cleaner): - """ - By default, lxml cleaner strips out all links that are not in a defined whitelist. - We want to allow all links, and rely on the peer grading flagging mechanic to catch - the "bad" ones. So, don't define a whitelist at all. - """ - def allow_embedded_url(self, el, url): - """ - Override the Cleaner allow_embedded_url method to remove the whitelist url requirement. - Ensure that any tags not in the whitelist are stripped beforehand. - """ - - # Tell cleaner to strip any element with a tag that isn't whitelisted. - if self.whitelist_tags is not None and el.tag not in self.whitelist_tags: - return False - - # Tell cleaner to allow all urls. - return True +# Used by sanitize_html +ALLOWED_HTML_ATTRS = { + '*': ['id', 'class', 'height', 'width', 'alt'], + 'a': ['href', 'title', 'rel', 'target'], + 'embed': ['src'], + 'iframe': ['src'], + 'img': ['src'], +} class OpenEndedChild(object): @@ -228,22 +219,23 @@ def sanitize_html(answer): answer - any string return - a cleaned version of the string """ - try: - answer = autolink_html(answer) - cleaner = WhiteListCleaner( - style=True, - links=True, - add_nofollow=False, - page_structure=True, - safe_attrs_only=True, - whitelist_tags=('embed', 'iframe', 'a', 'img', 'br',) - ) - clean_html = cleaner.clean_html(answer) - clean_html = re.sub(r'
$', '', re.sub(r'^', '', clean_html))
- clean_html = re.sub("\n","
", clean_html)
- except Exception:
- clean_html = answer
- return clean_html
+ clean_html = bleach.clean(answer,
+ tags=['embed', 'iframe', 'a', 'img', 'br'],
+ attributes=ALLOWED_HTML_ATTRS,
+ strip=True)
+ autolinked = bleach.linkify(clean_html,
+ callbacks=[bleach.callbacks.target_blank],
+ skip_pre=True,
+ tokenizer=HTMLTokenizer)
+ return OpenEndedChild.replace_newlines(autolinked)
+
+ @staticmethod
+ def replace_newlines(html):
+ """
+ Replaces "\n" newlines with
+ """
+ retv = re.sub(r'
', '', html))
+ return re.sub("\n","
", retv)
def new_history_entry(self, answer):
"""
diff --git a/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py b/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
index 9aa5aeb5ce2c..47e6d9bdb728 100644
--- a/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
+++ b/common/lib/xmodule/xmodule/tests/test_combined_open_ended.py
@@ -1001,3 +1001,92 @@ def test_link_submission_success(self):
self.assertTrue(response['success'])
self.assertIn(self.answer_link, response['student_response'])
self.assertIn(self.autolink_tag, response['student_response'])
+
+
+class OpenEndedModuleUtilTest(unittest.TestCase):
+ """
+ Tests for the util functions of OpenEndedModule. Currently just for the html_sanitizer and
inserter
+ """
+ script_dirty = u''
+ script_clean = u'alert("xss!")'
+ img_dirty = u''
+ img_clean = u''
+ embed_dirty = u''
+ embed_clean = u'