From e2d510d2539f034265fae61cd27d2b464bb5792d Mon Sep 17 00:00:00 2001
From: dzhuang <dzhuang.scut@gmail.com>
Date: Fri, 23 Feb 2018 23:08:12 +0800
Subject: [PATCH] Removed lxml dependency for ipynb utility.

---
 course/content.py                               | 17 ++---------------
 .../course/jinja2/nbconvert_template.tpl        |  2 +-
 course/utils.py                                 | 17 +++++++++++++++++
 requirements.txt                                |  3 ---
 setup.py                                        |  1 -
 tests/test_content.py                           |  2 ++
 6 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/course/content.py b/course/content.py
index 2a31b47f..f4b51531 100644
--- a/course/content.py
+++ b/course/content.py
@@ -912,18 +912,6 @@ def expand_markup(
     return text
 
 
-def unwrap_relate_tmp_pre_tag(html_string):
-    # type: (Text) -> (Text)
-
-    from lxml.html import fromstring, tostring
-    tree = fromstring(html_string)
-
-    for node in tree.iterdescendants("pre"):
-        if "relate_tmp_pre" in node.attrib.get("class", ""):
-            node.drop_tag()
-    return tostring(tree, encoding="unicode")
-
-
 def markup_to_html(
         course,  # type: Optional[Course]
         repo,  # type: Repo_ish
@@ -976,11 +964,13 @@ def markup_to_html(
         return ""
 
     from course.mdx_mathjax import MathJaxExtension
+    from course.utils import NBConvertExtension
     import markdown
 
     extensions = [
         LinkFixerExtension(course, commit_sha, reverse_func=reverse_func),
         MathJaxExtension(),
+        NBConvertExtension(),
         "markdown.extensions.extra",
     ]
 
@@ -998,9 +988,6 @@ def markup_to_html(
         extensions=extensions,
         output_format="html5")
 
-    if result.strip():
-        result = unwrap_relate_tmp_pre_tag(result)
-
     assert isinstance(result, six.text_type)
     if cache_key is not None:
         def_cache.add(cache_key, result, None)
diff --git a/course/templates/course/jinja2/nbconvert_template.tpl b/course/templates/course/jinja2/nbconvert_template.tpl
index f040623a..c59f9a8b 100644
--- a/course/templates/course/jinja2/nbconvert_template.tpl
+++ b/course/templates/course/jinja2/nbconvert_template.tpl
@@ -2,7 +2,7 @@
 
 {# This is to prevent code_cell being process by markdown_to_html #}
 
-{% block input %}<pre class="relate_tmp_pre">{{ super() }}</pre>
+{% block input %}<pre><relate_ipynb>{{ super() }}</relate_ipynb></pre>
 {%- endblock input %}
 
 {# This is to remove the empty cells ahead of markdown_cells #}
diff --git a/course/utils.py b/course/utils.py
index 428576ba..b4ca3ea9 100644
--- a/course/utils.py
+++ b/course/utils.py
@@ -28,6 +28,7 @@ from typing import cast
 
 import six
 import datetime  # noqa
+import markdown
 
 from django.shortcuts import (  # noqa
         render, get_object_or_404)
@@ -1322,6 +1323,22 @@ class IpynbJinjaMacro(RelateJinjaMacroBase):
 
         return body
 
+
+NBCONVERT_PRE_OPEN_RE = re.compile(r"<pre\s*>\s*<relate_ipynb\s*>")
+NBCONVERT_PRE_CLOSE_RE = re.compile(r"</relate_ipynb\s*>\s*</pre\s*>")
+
+
+class NBConvertHTMLPostprocessor(markdown.postprocessors.Postprocessor):
+    def run(self, text):
+        text = NBCONVERT_PRE_OPEN_RE.sub("", text)
+        text = NBCONVERT_PRE_CLOSE_RE.sub("", text)
+        return text
+
+
+class NBConvertExtension(markdown.Extension):
+    def extendMarkdown(self, md, md_globals):  # noqa
+        md.postprocessors['relate_nbconvert'] = NBConvertHTMLPostprocessor(md)
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/requirements.txt b/requirements.txt
index 3dc2f8e9..62959600 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -117,7 +117,4 @@ typing
 nbconvert>=5.2.1
 IPython
 
-# For parsing and edit html
-lxml
-
 # vim: foldmethod=marker
diff --git a/setup.py b/setup.py
index 286d45ec..64eee057 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,6 @@ setup(name="relate-courseware",
           "markdown",
           "dulwich",
           "pyyaml",
-          "lxml",
           "nbconvert>=5.2.1",
           "pymbolic",
           "sympy",
diff --git a/tests/test_content.py b/tests/test_content.py
index 589e9f3c..e8f37b04 100644
--- a/tests/test_content.py
+++ b/tests/test_content.py
@@ -240,6 +240,7 @@ CODE_CELL_HTML_CLASS = "code_cell"
 CODE_CELL_IN_STR_PATTERN = '<div class="prompt input_prompt">In[%s]:</div>'
 CODE_CELL_PRINT_STR1 = "This is function1"
 CODE_CELL_PRINT_STR2 = "This is function2"
+RELATE_IPYNB_CONVERT_PRE_WRAPPER_TAG_NAME = "relate_ipynb"
 
 
 def strip_nbsp(s):
@@ -261,6 +262,7 @@ class NbconvertRenderTestMixin(SingleCoursePageSandboxTestBaseMixin):
         self.assertNotContains(response, "```")
         self.assertNotContains(response, "# First Title of Test NoteBook")
         self.assertNotContains(response, "# Second Title of Test NoteBook")
+        self.assertNotContains(response, RELATE_IPYNB_CONVERT_PRE_WRAPPER_TAG_NAME)
 
     def setUp(self):
         super(NbconvertRenderTestMixin, self).setUp()
-- 
GitLab