diff --git a/course/page/code.py b/course/page/code.py index 2eaabd164feefe3e5874d0336c09a4ab223f8086..bd7bbfaaef4f0749810609f55c2a9b0d5357dead 100644 --- a/course/page/code.py +++ b/course/page/code.py @@ -25,7 +25,6 @@ THE SOFTWARE. """ import bleach -import re import six from course.validation import ValidationError @@ -307,7 +306,7 @@ def is_allowed_data_uri(allowed_mimetypes, uri): def filter_from_code_html_attributes(tag, name, value): - from html5lib.filters.sanitizer import attr_val_is_uri + if tag == "audio": if name in ["controls"]: return True @@ -339,34 +338,7 @@ def filter_from_code_html_attributes(tag, name, value): else: return False - else: - # Approach recommended here: https://github.com/mozilla/bleach/issues/348 - - # FIXME: Cannot access 'namespaced name' of attribute, so - # we're unable to match XML's base and xlink. - if (None, name) in attr_val_is_uri: - # Reluctant copy-paste from - # https://github.com/mozilla/bleach/blob/8706b5373b633407e804b1b2975edf8760067ff7/bleach/sanitizer.py#L513-L525 - - from xml.sax.saxutils import unescape - - val_unescaped = re.sub( - "[`\000-\040\177-\240\s]+", - '', - unescape(value)).lower() - - # Remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - - # Drop attributes with uri values that have - # protocols that aren't allowed - if (re.match(r'^[a-z0-9][-+.a-z0-9]*:', val_unescaped) - and - (val_unescaped.split(':')[0] - not in bleach.ALLOWED_PROTOCOLS)): - return False - - return name in bleach.ALLOWED_ATTRIBUTES + return False def sanitize_from_code_html(s): @@ -378,6 +350,9 @@ def sanitize_from_code_html(s): "img"], attributes=filter_from_code_html_attributes, + # strip unwanted tags + strip=True, + # Fixed https://github.com/inducer/relate/issues/435 # Ref: https://github.com/mozilla/bleach/issues/348 protocols=bleach.ALLOWED_PROTOCOLS + ["data"]) diff --git a/tests/base_test_mixins.py b/tests/base_test_mixins.py index 74e0a0afe431ba1dd5de19102314bcc3ddbbfeba..1d6e8684546e1fefee4f0bb72458b5305835e91d 100644 --- a/tests/base_test_mixins.py +++ b/tests/base_test_mixins.py @@ -223,29 +223,44 @@ class ResponseContextMixin(object): def get_response_context_answer_feedback(self, response): return self.get_response_context_value_by_name(response, "feedback") + def get_response_context_answer_feedback_string(self, response, + include_bulk_feedback=True): + answer_feedback = self.get_response_context_value_by_name( + response, "feedback") + + self.assertTrue(hasattr(answer_feedback, "feedback")) + if not include_bulk_feedback: + return answer_feedback.feedback + + if answer_feedback.bulk_feedback is None: + return answer_feedback.feedback + else: + if answer_feedback.feedback is None: + return answer_feedback.bulk_feedback + return answer_feedback.feedback + answer_feedback.bulk_feedback + def assertResponseContextAnswerFeedbackContainsFeedback( # noqa self, response, expected_feedback, include_bulk_feedback=True, html=False): - answer_feedback = self.get_response_context_answer_feedback(response) - feedback_str = answer_feedback.feedback - if include_bulk_feedback: - feedback_str += answer_feedback.bulk_feedback + feedback_str = self.get_response_context_answer_feedback_string( + response, include_bulk_feedback) - self.assertTrue(hasattr(answer_feedback, "feedback")) if not html: self.assertIn(expected_feedback, feedback_str) else: self.assertInHTML(expected_feedback, feedback_str) def assertResponseContextAnswerFeedbackNotContainsFeedback( # noqa - self, response, expected_feedback, - html=False): - answer_feedback = self.get_response_context_answer_feedback(response) - self.assertTrue(hasattr(answer_feedback, "feedback")) + self, response, expected_feedback, + include_bulk_feedback=True, + html=False): + feedback_str = self.get_response_context_answer_feedback_string( + response, include_bulk_feedback) + if not html: - self.assertNotIn(expected_feedback, answer_feedback.feedback) + self.assertNotIn(expected_feedback, feedback_str) else: - self.assertInHTML(expected_feedback, answer_feedback.feedback, count=0) + self.assertInHTML(expected_feedback, feedback_str, count=0) def assertResponseContextAnswerFeedbackCorrectnessEquals( # noqa self, response, expected_correctness): diff --git a/tests/test_pages/test_code.py b/tests/test_pages/test_code.py index e566925ba58b3f486170be61226bc6a1fb5ece3d..e597e38a531922a1f53c0caba6ca4abdf85becde 100644 --- a/tests/test_pages/test_code.py +++ b/tests/test_pages/test_code.py @@ -645,7 +645,9 @@ class CodeQuestionTest(SingleCoursePageSandboxTestBaseMixin, not_expected_msgs = [not_expected_msgs] for msg in not_expected_msgs: self.assertResponseContextAnswerFeedbackNotContainsFeedback( - resp, msg, html=in_html) + resp, msg) + self.assertResponseContextAnswerFeedbackNotContainsFeedback( + resp, msg, html=True) self.assertEqual(resp.status_code, 200) self.assertResponseContextAnswerFeedbackCorrectnessEquals(resp, @@ -782,10 +784,14 @@ class CodeQuestionTest(SingleCoursePageSandboxTestBaseMixin, def test_html_audio(self): b64_data = "T2dnUwACAAAAAAAAAAA+HAAAAAAAAGyawCEBQGZpc2h" - audio_valid = ( + audio_valid1 = ( '<audio controls><source src="data:audio/wav;base64,' '%s" type="audio/wav">' '</audio>' % b64_data) + audio_valid2 = ( + '<audio><source src="data:audio/wav;base64,' + '%s" type="audio/wav">' + '</audio>' % b64_data) audio_invalid1 = ( '<audio control><source src="data:audio/wav;base64,' '%s" type="audio/wav">' @@ -795,25 +801,21 @@ class CodeQuestionTest(SingleCoursePageSandboxTestBaseMixin, '%s" type="audio/wav">' '</audio>' % b64_data) audio_invalid3 = ( - '<audio><source src="data:audio/wav;base64,' - '%s" type="audio/wav">' - '</audio>' % b64_data) - audio_invalid4 = ( '<audio controls><source src="data:audio/ogg;base64,' '%s" type="audio/ogg">' '</audio>' % b64_data) - audio_invalid5 = ( + audio_invalid4 = ( '<audio controls><source src="hosse.wav" type="audio/wav">' '</audio>') - html = [audio_valid, audio_invalid1, audio_invalid2, audio_invalid3, - audio_invalid4, audio_invalid5] + html = [audio_valid1, audio_valid2, audio_invalid1, audio_invalid2, + audio_invalid3, audio_invalid4] self.assert_runpy_result_and_response( "user_error", - expected_msgs=[audio_valid], + expected_msgs=[audio_valid1, audio_valid2], not_expected_msgs=[audio_invalid1, audio_invalid2, audio_invalid3, - audio_invalid4, audio_invalid5], + audio_invalid4], html=html, in_html=True ) @@ -860,8 +862,8 @@ class CodeQuestionTest(SingleCoursePageSandboxTestBaseMixin, '<a src="data:text/html;base64,%s"</a>' % evil_b64_data, '<img src="https://Evil.com">', - '<script src="data:text/html,<script>alert("Evil");</script>"', - '<script href="data:text/html,<script>alert("Evil");</script>"', + '<script src="data:text/html,<script>alert("Evil");"</script>', + '<script href="data:text/html,<script>alert("Evil");"</script>', '<script src="data:text/html;base64,%s"</script>' % evil_b64_data, '<script href="data:text/html;base64,%s"</script>' % evil_b64_data, @@ -887,8 +889,7 @@ class CodeQuestionTest(SingleCoursePageSandboxTestBaseMixin, self.assert_runpy_result_and_response( "user_error", - not_expected_msgs=self.evil_data_html_strings + [ - "Evil", self.evil_data_html_strings], + not_expected_msgs=self.evil_data_html_strings + ["Evil"], html=self.evil_data_html_strings, in_html=True, )