From 7eb9f9cce277eb9f46a8dd898e78508bcf0a6490 Mon Sep 17 00:00:00 2001 From: dzhuang Date: Wed, 4 May 2016 08:45:51 -0500 Subject: [PATCH 1/2] add a latex macro to convert Tex code to tag using dataURI. --- course/check.py | 54 +++ course/content.py | 39 ++- course/latex/__init__.py | 118 +++++++ course/latex/converter.py | 688 ++++++++++++++++++++++++++++++++++++++ course/latex/latex.py | 176 ++++++++++ course/latex/utils.py | 392 ++++++++++++++++++++++ course/models.py | 4 +- doc/content.rst | 2 + doc/tex-macro.rst | 225 +++++++++++++ local_settings.py.example | 22 ++ requirements.txt | 3 + 11 files changed, 1721 insertions(+), 2 deletions(-) create mode 100644 course/check.py create mode 100644 course/latex/__init__.py create mode 100644 course/latex/converter.py create mode 100644 course/latex/latex.py create mode 100644 course/latex/utils.py create mode 100644 doc/tex-macro.rst diff --git a/course/check.py b/course/check.py new file mode 100644 index 00000000..d9bdbb6e --- /dev/null +++ b/course/check.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from django.core.checks import register, Tags as DjangoTags +from django.conf import settings + +from course.latex.utils import get_all_indirect_subclasses +from course.latex.converter import CommandBase + + +class Tags(DjangoTags): + relate_course_tag = 'relate_course_tag' + + +@register(Tags.relate_course_tag, deploy=True) +def latex2image_bin_check(app_configs, **kwargs): + """ + Check if all tex compiler and image converter + are correctly configured, if latex utility is + enabled. + """ + if not getattr(settings, "RELATE_LATEX_TO_IMAGE_ENABLED", False): + return [] + klass = get_all_indirect_subclasses(CommandBase) + instance_list = [cls() for cls in klass] + errors = [] + for instance in instance_list: + error = instance.check() + if error: + errors.append(error) + return errors \ No newline at end of file diff --git a/course/content.py b/course/content.py index 59210f0a..a33b68ae 100644 --- a/course/content.py +++ b/course/content.py @@ -759,8 +759,45 @@ def markup_to_html(course, repo, commit_sha, text, reverse_func=None, env = Environment( loader=GitTemplateLoader(repo, commit_sha), undefined=StrictUndefined) + + # {{{ tex2img + + from course.latex import tex_to_img_tag + + def latex_not_enabled_warning(caller, *args, **kwargs): + return "
%s
" % _( + "RELATE_LATEX_TO_IMAGE_ENABLED is set to False, " + "no image will be generated.") + + def jinja_tex_to_img_tag(caller, *args, **kwargs): + from os.path import join + default_saving_folder = getattr( + settings, "RELATE_LATEX_IMAGE_SAVING_FOLDER_PATH", + join(settings.MEDIA_ROOT, "latex_image")) + kwargs["output_dir"] = default_saving_folder + return tex_to_img_tag(caller(), *args, **kwargs) + template = env.from_string(text) - text = template.render(**jinja_env) + latex2image_enabled = getattr( + settings, "RELATE_LATEX_TO_IMAGE_ENABLED", False) + if latex2image_enabled: + try: + env.globals["latex"] = jinja_tex_to_img_tag + text = template.render(**jinja_env) + except: + if validate_only: + raise + else: + # fail silently + text = template.render(**jinja_env) + else: + if not validate_only: + env.globals["latex"] = latex_not_enabled_warning + else: + raise ImproperlyConfigured(_( + "RELATE_LATEX_TO_IMAGE_ENABLED is set to False, " + "no image will be generated.")) + text = template.render(**jinja_env) # }}} diff --git a/course/latex/__init__.py b/course/latex/__init__.py new file mode 100644 index 00000000..c039c632 --- /dev/null +++ b/course/latex/__init__.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import re + +from django.utils.translation import ugettext as _ + +from course.latex.converter import get_tex2img_class +from course.latex.latex import TexDoc +from course.latex.utils import ( + replace_latex_space_seperator, strip_spaces) + +TIKZ_PGF_RE = re.compile(r"\\begin\{(?:tikzpicture|pgfpicture)\}") +DEFAULT_IMG_HTML_CLASS = "img-responsive" + + +def tex_to_img_tag(tex_source, *args, **kwargs): + '''Convert LaTex to IMG tag''' + + compiler = kwargs.get("compiler", None) + if not compiler: + raise ValueError(_("'compiler' must be specified.")) + + image_format = kwargs.get("image_format", "") + if not image_format: + raise ValueError(_("'image_format' must be specified.")) + + output_dir = kwargs.get("output_dir") + + tex_filename = kwargs.get("tex_filename", None) + tex_preamble = kwargs.get("tex_preamble", "") + tex_preamble_extra = kwargs.get("tex_preamble_extra", "") + + force_regenerate = kwargs.get("force_regenerate", False) + html_class_extra = kwargs.get("html_class_extra", "") + empty_pagestyle = kwargs.get("empty_pagestyle", True) + alt = kwargs.get("alt", None) + + # remove spaces added to latex code in jinja template. + tex_source = replace_latex_space_seperator( + strip_spaces(tex_source, allow_single_empty_line=True)) + tex_preamble = replace_latex_space_seperator( + strip_spaces(tex_preamble, allow_single_empty_line=True)) + tex_preamble_extra = replace_latex_space_seperator( + strip_spaces(tex_preamble_extra, + allow_single_empty_line=True)) + + if html_class_extra: + if isinstance(html_class_extra, list): + html_class_extra = " ".join (html_class_extra) + elif not isinstance(html_class_extra, six.string_types): + raise ValueError( + _('"html_class_extra" must be a string or a list')) + html_class = "%s %s" %(DEFAULT_IMG_HTML_CLASS, html_class_extra) + else: html_class = DEFAULT_IMG_HTML_CLASS + + texdoc = TexDoc( + tex_source, preamble=tex_preamble, + preamble_extra=tex_preamble_extra, empty_pagestyle=empty_pagestyle) + + # empty document + if not texdoc.document.strip(): + return "" + + if (compiler == "latex" + and image_format == "png" + and re.search(TIKZ_PGF_RE, tex_source)): + image_format = "svg" + + tex2img_class = get_tex2img_class(compiler, image_format) + + if not alt: + alt = texdoc.document + + if alt: + from django.utils.html import escape + alt = "alt='%s'" % alt.strip().replace("\n","") + + latex2img = tex2img_class( + tex_source=texdoc.as_latex(), + tex_filename=tex_filename, + output_dir=output_dir + ) + + return ( + "" + % { + "src": latex2img.get_data_uri_cached(force_regenerate), + "html_class": html_class, + "alt": alt, + }) + +# vim: foldmethod=marker diff --git a/course/latex/converter.py b/course/latex/converter.py new file mode 100644 index 00000000..97f08b7a --- /dev/null +++ b/course/latex/converter.py @@ -0,0 +1,688 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import os +import platform +import sys +import shutil +import re + +from django.core.checks import Critical +from django.core.management.base import CommandError +from django.core.exceptions import ImproperlyConfigured +from django.utils.encoding import DEFAULT_LOCALE_ENCODING +from django.utils.translation import ugettext as _, string_concat +from django.conf import settings + +from .utils import ( + popen_wrapper, get_basename_or_md5, + _file_read, _file_write, get_abstract_latex_log) + + +# {{{ latex compiler classes and image converter classes + +class CommandBase(object): + @property + def name(self): + """ + The name of the command tool + """ + raise NotImplementedError + + @property + def cmd(self): + """ + The string of the command + """ + raise NotImplementedError + + required_version = "" + bin_path = "" + + def check(self): + error = "" + out = "" + strerror = "" + + try: + out, err, status = popen_wrapper( + [self.bin_path, '--version'], + stdout_encoding=DEFAULT_LOCALE_ENCODING + ) + except CommandError as e: + strerror = e.__str__() + + m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out) + if not m: + error = Critical( + strerror, + hint=("Unable to run '%(cmd)s'. Is " + "%(tool)s installed or has its " + "path correctly configured " + "in local_settings.py?") + % {"cmd": self.cmd, + "tool": self.name, + }, + obj=self.name + ) + elif self.required_version: + version = ".".join(d for d in m.groups() if d) + from distutils.version import LooseVersion as LV + if LV(version) < LV(self.required_version): + error = Critical( + "Version outdated", + hint=("'%(tool)s' with version " + ">=%(required)s is required, " + "current version is %(version)s" + ) + % {"tool": self.name, + "required": self.required_version, + "version": version + }, + obj=self.name + ) + return error + + +class TexCompilerBase(CommandBase): + def __init__(self): + self.bin_path_dir = getattr( + settings, "RELATE_%s_BIN_DIR" % self.name.upper(), + getattr(settings, "RELATE_LATEX_BIN_DIR", "") + ) + self.bin_path = os.path.join( + self.bin_path_dir, self.cmd.lower()) + + +class Latexmk(TexCompilerBase): + name = "latexmk" + cmd = "latexmk" + required_version = "4.39" + + +class LatexCompiler(TexCompilerBase): + latexmk_option = ( + '-latexoption="-no-shell-escape ' + '-interaction=batchmode -halt-on-error "' + ) + + @property + def output_format(self): + raise NotImplementedError() + + def __init__(self): + super(LatexCompiler, self).__init__() + self.latexmk_prog_repl = self._get_latexmk_prog_repl() + + def _get_latexmk_prog_repl(self): + """ + Program replace when using "-pdflatex=" or "-latex=" + arg in latexmk, especially needed when compilers are + not in system's default $PATH. + :return: the latexmk arg "-pdflatex=/path/to/pdflatex" for + # pdflatex or "-pdflatex=/path/to/xelatex" for xelatex + """ + return ( + "-%s=%s" % (self.name.lower(), self.bin_path.lower()) + ) + + def get_latexmk_subpro_cmdline(self, input_path): + latexmk = Latexmk() + return [ + latexmk.bin_path, + "-%s" % self.output_format, + self.latexmk_prog_repl, + self.latexmk_option, + input_path + ] + + +class Latex(LatexCompiler): + name = "latex" + cmd = "latex" + output_format = "dvi" + + +class PdfLatex(LatexCompiler): + name = "PdfLatex" + cmd = "pdflatex" + output_format = "pdf" + + +class LuaLatex(LatexCompiler): + name = "LuaLatex" + cmd = "lualatex" + output_format = "pdf" + def __init__(self): + super(LuaLatex, self).__init__() + self.latexmk_prog_repl = ( + "-%s=%s" % ("pdflatex", self.bin_path) + ) + + +class XeLatex(LatexCompiler): + name = "XeLatex" + cmd = "xelatex" + output_format = "pdf" + def __init__(self): + super(XeLatex, self).__init__() + self.latexmk_prog_repl = ( + "-%s=%s" % ("pdflatex", self.bin_path) + ) + + +class Imageconverter(CommandBase): + + @property + def output_format(self): + raise NotImplementedError + + def __init__(self): + bin_path_dir = getattr( + settings, "RELATE_%s_BIN_DIR" % self.name.upper(), + "" + ) + self.bin_path = os.path.join(bin_path_dir, + self.cmd.lower()) + + def get_converter_cmdline( + self, input_filepath, output_filepath): + raise NotImplementedError + + +class Dvipng(TexCompilerBase, Imageconverter): + # Inheritate TexCompilerBase's bin_path + # since dvipng is usually installed in + # latex compilers' bin dir. + name = "dvipng" + cmd = "dvipng" + output_format = "png" + def get_converter_cmdline( + self, input_filepath, output_filepath): + return [self.bin_path, + '-o', output_filepath, + '-pp', '1', + '-T', 'tight', + '-z9', + input_filepath] + + +class Dvisvg(TexCompilerBase, Imageconverter): + # Inheritate TexCompilerBase's bin_path + # since dvisvgm is usually installed in + # latex compilers' bin dir. + name = "dvisvg" + cmd = "dvisvgm" + output_format = "svg" + def get_converter_cmdline( + self, input_filepath, output_filepath): + return[self.bin_path, + '--no-fonts', + '-o', output_filepath, + input_filepath] + + +class ImageMagick(Imageconverter): + name = "ImageMagick" + cmd = "convert" + output_format = "png" + + def get_converter_cmdline( + self, input_filepath, output_filepath): + return [self.bin_path, + '-density', '96', + '-quality', '85', + '-trim', + input_filepath, + output_filepath + ] + +# }}} + + +# {{{ convert file to data uri + +def get_file_data_uri(file_path): + """ + Convert file to data URI + """ + if not file_path: + return None + + try: + buf = _file_read(file_path) + except OSError: + raise + + from mimetypes import guess_type + mime_type = guess_type(file_path)[0] + + from base64 import b64encode + return "data:%(mime_type)s;base64,%(b64)s" % { + "mime_type": mime_type, + "b64": b64encode(buf).decode(), + } + +# }}} + + +# {{{ Base tex2img class + +class Tex2ImgBase(object): + """The abstract class of converting tex source to images. + """ + + @property + def compiler(self): + """ + :return: an instance of `LatexCompiler` + """ + raise NotImplementedError() + + @property + def converter(self): + """ + :return: an instance of `Imageconverter` + """ + raise NotImplementedError() + + def __init__(self, tex_source, tex_filename, output_dir): + """ + :param tex_source: Required, a string representing the + full tex source code. + :param tex_filename: Optional, a string + :param output_dir: Required, a string of the path where + the images and error logs will be saved. + """ + + if tex_source: + tex_source = tex_source.strip() + if not tex_source: + raise ValueError( + _("Param 'tex_source' can not be an empty string") + ) + assert isinstance(tex_source, unicode) + self.tex_source = tex_source + + if output_dir: + output_dir = output_dir.strip() + if not output_dir: + raise ValueError( + _("Param output_dir must be specified")) + else: + try: + if (not os.path.exists(output_dir) + or not os.path.isdir(output_dir)): + os.makedirs(output_dir) + except Exception: + raise ValueError( + _("Param output_dir '%s' is not a valid path") + % output_dir) + + self.working_dir = None + + self.basename = get_basename_or_md5( + tex_filename, tex_source) + + self.image_format = self.converter.output_format \ + .replace(".", "").lower() + self.image_ext = ".%s" % self.image_format + + self.compiled_ext =".%s" % self.compiler.output_format\ + .replace(".", "").lower() + + # Where the latex compilation error log + # will finally be saved. + self.errlog_saving_path = os.path.join( + output_dir, + "%s_%s.log" % (self.basename, self.compiler.cmd) + ) + + # Where the generated image will finally be saved. + self.image_saving_path = os.path.join( + output_dir, + "%s_%s.%s" % (self.basename, + self.compiler.cmd, + self.image_format) + ) + + def get_compiler_cmdline(self, tex_path): + return self.compiler.get_latexmk_subpro_cmdline(tex_path) + + def get_converter_cmdline(self, input_path, output_path): + return self.converter.get_converter_cmdline( + input_path, output_path) + + def _remove_working_dir(self): + shutil.rmtree(self.working_dir) + + def get_compiled_file(self): + """ + Compile latex source. If failed, error log will copied + to ``output_dir``. + :return: string, the path of the compiled file if succeeded. + """ + from tempfile import mkdtemp + self.working_dir = mkdtemp(prefix="RELATE_LATEX_") + + tex_filename = self.basename + ".tex" + tex_path = os.path.join(self.working_dir, tex_filename) + _file_write(tex_path, self.tex_source.encode('UTF-8')) + + log_path = tex_path.replace(".tex", ".log") + compiled_file_path = tex_path.replace( + ".tex", self.compiled_ext) + + cmdline = self.get_compiler_cmdline(tex_path) + output, error, status = popen_wrapper( + cmdline, cwd=self.working_dir) + + if status != 0: + try: + log = _file_read(log_path) + except OSError: + # no log file is generated + self._remove_working_dir() + raise RuntimeError(error) + + try: + log = get_abstract_latex_log(log) + _file_write(self.errlog_saving_path, log) + except: + raise + finally: + self._remove_working_dir() + from django.utils.html import escape + raise ValueError( + "
%s
" % escape(log).strip()) + + if os.path.isfile(compiled_file_path): + return compiled_file_path + else: + self._remove_working_dir() + raise RuntimeError( + string_concat( + "%s." % error, + _('No %s file was produced.') + % self.compiler.output_format) + ) + + def get_converted_image(self): + """ + Convert compiled file into image. If succeeded, the image + will be copied to ``output_dir``. + :return: string, the path of the generated image + """ + compiled_file_path = self.get_compiled_file() + if not compiled_file_path: + return None + image_path = compiled_file_path.replace( + self.compiled_ext, + self.image_ext) + + cmdline = self.get_converter_cmdline( + compiled_file_path, image_path) + + output, error, status = popen_wrapper( + cmdline, + cwd=self.working_dir + ) + + if status != 0: + self._remove_working_dir() + raise RuntimeError(error) + + n_images = get_number_of_images(image_path, self.image_ext) + if n_images == 0: + raise ValueError( + _("No image was generated.")) + elif n_images > 1: + raise ValueError( + string_concat( + "%s images are generated while expecting 1, " + "possibly due to long pdf file." + % (n_images, ) + )) + + try: + shutil.copyfile(image_path, self.image_saving_path) + except OSError: + raise RuntimeError(error) + finally: + self._remove_working_dir() + + return self.image_saving_path + + def get_compile_err_cached(self): + """ + If the problematic latex source is not modified, check + wheter there is error log both in cache and output_dir. + If it exists, raise the error. + :return: None if no error log find. + """ + err_result = None + + try: + import django.core.cache as cache + except ImproperlyConfigured: + err_cache_key = None + else: + def_cache = cache.caches["default"] + err_cache_key = ("latex_err:%s:%s" + % (self.compiler.cmd, self.basename)) + # Memcache is apparently limited to 250 characters. + if len(err_cache_key) < 240: + err_result = def_cache.get(err_cache_key) + if err_result is not None: + assert isinstance(err_result, six.string_types),\ + err_cache_key + + if err_result is None: + # read the saved err_log if it exists + if os.path.isfile(self.errlog_saving_path): + err_result = _file_read(self.errlog_saving_path) + assert isinstance(err_result, six.string_types) + + if err_result: + if err_cache_key: + if len(err_result) <= getattr( + settings, "RELATE_CACHE_MAX_BYTES", 0): + def_cache.add(err_cache_key, err_result, None) + + from django.utils.html import escape + raise ValueError( + "
%s
" % escape(err_result).strip()) + + return None + + def get_data_uri_cached(self, force_regenerate=False): + """ + :param force_regenerate: :class:`Bool', if True, the tex file + will be recompiled and re-convert the image, regardless of + existing file or cached result. + :return: string, data uri of the coverted image. + """ + uri_result = None + if force_regenerate: + image_path = self.get_converted_image() + uri_result = get_file_data_uri(image_path) + assert isinstance(uri_result, six.string_types) + + if not uri_result: + err_result = self.get_compile_err_cached() + if err_result: + return None + + try: + import django.core.cache as cache + except ImproperlyConfigured: + uri_cache_key = None + else: + def_cache = cache.caches["default"] + + from hashlib import md5 + uri_cache_key = ( + "latex2img:%s:%s" % ( + self.compiler.cmd, + md5( + self.image_saving_path.encode("utf-8") + ).hexdigest() + ) + ) + if not uri_result: + # Memcache is apparently limited to 250 characters. + if len(uri_cache_key) < 240: + uri_result = def_cache.get(uri_cache_key) + if uri_result: + assert isinstance( + uri_result, six.string_types),\ + uri_cache_key + return uri_result + + # Neighter regenerated nor cached, + # then read or generate the image + if not uri_result: + if not os.path.isfile(self.image_saving_path): + self.image_saving_path = self.get_converted_image() + uri_result = get_file_data_uri(self.image_saving_path) + assert isinstance(uri_result, six.string_types) + + assert uri_result + + # no cache configured + if not uri_cache_key: + return uri_result + + # cache configure, but image not cached + allowed_max_bytes = getattr( + settings, "RELATE_IMAGECACHE_MAX_BYTES", + getattr( + settings, "RELATE_CACHE_MAX_BYTES", + ) + ) + if len(uri_result) <= allowed_max_bytes: + # image size larger than allowed_max_bytes + # won't be cached, espeically for svgs. + def_cache.add(uri_cache_key, uri_result, None) + return uri_result + +# }}} + + +# {{{ derived tex2img converter + +class Latex2Svg(Tex2ImgBase): + compiler = Latex() + converter = Dvisvg() + + +class Lualatex2Png(Tex2ImgBase): + compiler = LuaLatex() + converter = ImageMagick() + + +class Latex2Png(Tex2ImgBase): + compiler = Latex() + converter = Dvipng() + + +class Pdflatex2Png(Tex2ImgBase): + compiler = PdfLatex() + converter = ImageMagick() + + +class Xelatex2Png(Tex2ImgBase): + compiler = XeLatex() + converter = ImageMagick() + +# }}} + + +# {{{ get tex2img class + +ALLOWED_COMPILER = ['latex', 'pdflatex', 'xelatex', 'lualatex'] +ALLOWED_LATEX2IMG_FORMAT = ['png', 'svg'] +ALLOWED_COMPILER_FORMAT_COMBINATION = ( + ("latex", "png"), + ("latex", "svg"), + ("lualatex", "png"), + ("pdflatex", "png"), + ("xelatex", "png") +) + + +def get_tex2img_class(compiler, image_format): + image_format = image_format.replace(".", "").lower() + compiler = compiler.lower() + if not image_format in ALLOWED_LATEX2IMG_FORMAT: + raise ValueError( + _("Unsupported image format '%s'") % image_format) + + if not compiler in ALLOWED_COMPILER: + raise ValueError( + _("Unsupported tex compiler '%s'") % compiler) + + if not (compiler, image_format) in ALLOWED_COMPILER_FORMAT_COMBINATION: + raise ValueError( + _("Unsupported combination: " + "('%(compiler)s', '%(format)s'). " + "Currently support %(supported)s.") + % {"compiler": compiler, + "format": image_format, + "supported": ", ".join( + str(e) for e in ALLOWED_COMPILER_FORMAT_COMBINATION)} + ) + + class_name = "%s2%s" % (compiler.title(), image_format.title()) + + return getattr(sys.modules[__name__], class_name) + +# }}} + +# {{{ check if multiple images are generated due to long pdf + +def get_number_of_images(image_path, image_ext): + if os.path.isfile(image_path): + return 1 + count = 0 + while True: + try_path = ( + "%(image_path)s-%(number)d%(ext)s" + % {"image_path": image_path.replace(image_ext, ""), + "number": count, + "ext": image_ext + } + ) + if not os.path.isfile(try_path): + break + count += 1 + + return count + +# }}} + +# vim: foldmethod=marker diff --git a/course/latex/latex.py b/course/latex/latex.py new file mode 100644 index 00000000..aef51406 --- /dev/null +++ b/course/latex/latex.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import re + +from django.utils.translation import ugettext as _ + +from .utils import strip_comments, strip_spaces + +class TexDocParseError(Exception): + pass + +class TexDocMissingElementError(TexDocParseError): + pass + +class TexDocWrongElementOrderError(TexDocParseError): + pass + +class TexDoc(): + """ + Defines a LaTeX document + """ + preamble = "" + document = "" + has_preamble = False + has_begindoc = False + has_enddoc = False + + def is_empty_pagestyle_already(self): + match = re.search(r"\\pagestyle{\s?empty\s?}", self.preamble) + if match: + return True + return False + + def parse(self, latex, test=False): + """ + parse the doc into preamble and document. If test=True, the + method will try to find out which elements of the latex code + is missing. + """ + ele_re_tuple = ( + (r"\documentclass", + r"\\documentclass(\[[\w,= ]*\])?{\w*}"), + (r"\begin{document}", r"\\begin\{document\}"), + (r"\end{document}", r"\\end\{document\}") + ) + ele_position_list = [] + required_ele_list = [] + has_ele = [] + + for ele, pattern in ele_re_tuple: + required_ele_list.append(ele) + iter = re.finditer(pattern, latex) + + matched_indice = [m.start(0) for m in iter] + matched_len = len(matched_indice) + if matched_len == 0: + if not test: + raise TexDocMissingElementError( + _("No %s found in latex source") % ele) + else: + has_ele.append(False) + elif matched_len > 1: + raise TexDocParseError( + _("More than one %s found in latex source") % ele) + else: + if test: + has_ele.append(True) + ele_position_list.append(matched_indice[0]) + + if test: + [self.has_preamble, self.has_begindoc, self.has_enddoc] = has_ele + + if not ele_position_list == sorted(ele_position_list): + raise TexDocWrongElementOrderError( + _("The occurance of %s are not in proper order") + % ",".join(required_ele_list)) + + if not test: + [preamble, document] = latex.split((r"\begin{document}")) + document = document.split((r"\end{document}"))[0] + self.preamble = strip_spaces(preamble) + self.document = strip_spaces(document, allow_single_empty_line=True) + assert self.preamble is not None + assert self.document is not None + + def as_latex(self): + """ + Assemble LaTeX Document + """ + latex = "" + if self.empty_pagestyle: + if not self.is_empty_pagestyle_already(): + self.preamble += "\n\\pagestyle{empty}\n" + + latex += self.preamble + latex += "\\begin{document}\n" + latex += self.document + latex += "\\end{document}\n" + + return latex + + def __str__(self): + return self.document + + def __unicode__(self): + return self.document + + def __init__(self, text=None, preamble="", preamble_extra="", + empty_pagestyle=False): + """ + Parse LaTeX document + :param text: string. Full latex document, or body only if + preamble or preamble_extra are given. + :param preamble: string. If full document is provided in + text, this value will be neglected. + :param preamble_extra: string. Append to existing preamle. + :param empty_pagestyle: bool. If True, the pagestyle will + be set as "empty". We are not using + \documentclass{standalone}. + """ + if not text: + raise ValueError(_("No LaTeX source code is provided.")) + + text = strip_comments(text) + try: + self.parse(text) + except TexDocMissingElementError: + self.parse(text, test=True) + if self.has_preamble: + # begin_document or end_document is missing + raise + elif not preamble and not preamble_extra: + raise + + # in this case, preamble code and document body code + # are seperated, try to assemble them up. + else: + if not self.has_begindoc: + text = "%s\n%s" % ("\\begin{document}", text) + if not self.has_enddoc: + text = "%s\n%s" % (text, "\\end{document}") + + text = "%s\n%s\n%s" % ( + strip_comments(preamble), + strip_comments(preamble_extra), + text) + self.parse(text) + + except: + raise + + self.empty_pagestyle = empty_pagestyle \ No newline at end of file diff --git a/course/latex/utils.py b/course/latex/utils.py new file mode 100644 index 00000000..0d80422c --- /dev/null +++ b/course/latex/utils.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import os +import sys +import ply.lex +from hashlib import md5 +from subprocess import Popen, PIPE + +from django.utils.translation import ( + ugettext as _, string_concat) +from django.core.management.base import CommandError +from django.utils.encoding import ( + DEFAULT_LOCALE_ENCODING, force_text) + + +# {{{ Constants + +ALLOWED_COMPILER = ['latex', 'pdflatex', 'xelatex'] +ALLOWED_LATEX2IMG_FORMAT = ['png', 'svg'] + +ALLOWED_COMPILER_FORMAT_COMBINATION = ( + ("latex", "png"), + ("latex", "svg"), + ("pdflatex", "png"), + ("xelatex", "png") +) + +# }}} + + +# {{{ subprocess popen wrapper + +def popen_wrapper(args, os_err_exc_type=CommandError, + stdout_encoding='utf-8', **kwargs): + """ + Extended from django.core.management.utils.popen_wrapper. + `**kwargs` is added so that more kwargs can be added. + + This method is especially to solve UnicodeDecodeError + raised on Windows platform where the OS stdout is not utf-8. + + Friendly wrapper around Popen + + Returns stdout output, stderr output and OS status code. + """ + + try: + p = Popen(args, stdout=PIPE, + stderr=PIPE, close_fds=os.name != 'nt', **kwargs) + except OSError as e: + strerror = force_text(e.strerror, DEFAULT_LOCALE_ENCODING, + strings_only=True) + six.reraise(os_err_exc_type, os_err_exc_type( + string_concat(_('Error executing'), ' %s: %s') + % (args[0], strerror)), sys.exc_info()[2]) + + output, errors = p.communicate() + return ( + force_text(output, stdout_encoding, strings_only=True, + errors='strict'), + force_text(errors, DEFAULT_LOCALE_ENCODING, + strings_only=True, errors='replace'), + p.returncode + ) + +# }}} + + +# {{{ file read and write + +def get_basename_or_md5(filename, s): + """ + :return: the basename of `filename` if `filename` is not empty, + else, return the md5 of string `s`. + """ + if filename: + basename, ext = os.path.splitext(filename) + else: + if not s: + return None + basename = md5(s).hexdigest() + return basename + + +def _file_read(filename): + '''Read the content of a file and close it properly.''' + f = file(filename, 'rb') + content = f.read() + f.close() + return content + + +def _file_write(filename, content): + '''Write into a file and close it properly.''' + f = file(filename, 'wb') + f.write(content) + f.close() + +# }}} + + +# {{{ convert file to data uri + +def get_file_data_uri(file_path): + '''Convert file to data URI''' + if not file_path: + return None + + from base64 import b64encode + from mimetypes import guess_type + buf = _file_read(file_path) + mime_type = guess_type(file_path)[0] + + return "data:%(mime_type)s;base64,%(b64)s" % { + "mime_type": mime_type, + "b64": b64encode(buf).decode(), + } + +# }}} + + +# {{{ get error log abstracted + +LATEX_ERR_LOG_BEGIN_LINE_STARTS = "\n! " +LATEX_ERR_LOG_END_LINE_STARTS = "\nHere is how much of TeX's memory" +LATEX_LOG_OMIT_LINE_STARTS = ( + "See the LaTeX manual or LaTeX", + "Type H for", + " ...", + # more +) + +def get_abstract_latex_log(log): + '''abstract error msg from latex compilation log''' + msg = log.split(LATEX_ERR_LOG_BEGIN_LINE_STARTS)[1]\ + .split(LATEX_ERR_LOG_END_LINE_STARTS)[0] + + if LATEX_LOG_OMIT_LINE_STARTS: + msg = "\n".join( + line for line in msg.splitlines() + if (not line.startswith(LATEX_LOG_OMIT_LINE_STARTS) + and + line.strip() != "")) + return msg + +# }}} + + +# {{{ strip comments from source + +def strip_comments(source): + # modified from https://gist.github.com/amerberg/a273ca1e579ab573b499 + tokens = ( + 'PERCENT', 'BEGINCOMMENT', 'ENDCOMMENT', + 'BACKSLASH', 'CHAR', 'BEGINVERBATIM', + 'ENDVERBATIM', 'NEWLINE', 'ESCPCT', + 'MAKEATLETTER', 'MAKEATOTHER', + ) + states = ( + ('makeatblock', 'exclusive'), + ('makeatlinecomment', 'exclusive'), + ('linecomment', 'exclusive'), + ('commentenv', 'exclusive'), + ('verbatim', 'exclusive') + ) + + # Deal with escaped backslashes, so we don't + # think they're escaping % + def t_BACKSLASH(t): + r"\\\\" + return t + + # Leaving all % in makeatblock + def t_MAKEATLETTER(t): + r"\\makeatletter" + t.lexer.begin("makeatblock") + return t + + # One-line comments + def t_PERCENT(t): + r"\%" + t.lexer.begin("linecomment") + + # Escaped percent signs + def t_ESCPCT(t): + r"\\\%" + return t + + # Comment environment, as defined by verbatim package + def t_BEGINCOMMENT(t): + r"\\begin\s*{\s*comment\s*}" + t.lexer.begin("commentenv") + + #Verbatim environment (different treatment of comments within) + def t_BEGINVERBATIM(t): + r"\\begin\s*{\s*verbatim\s*}" + t.lexer.begin("verbatim") + return t + + #Any other character in initial state we leave alone + def t_CHAR(t): + r"." + return t + + def t_NEWLINE(t): + r"\n" + return t + + # End comment environment + def t_commentenv_ENDCOMMENT(t): + r"\\end\s*{\s*comment\s*}" + #Anything after \end{comment} on a line is ignored! + t.lexer.begin('linecomment') + + # Ignore comments of comment environment + def t_commentenv_CHAR(t): + r"." + pass + + def t_commentenv_NEWLINE(t): + r"\n" + pass + + #End of verbatim environment + def t_verbatim_ENDVERBATIM(t): + r"\\end\s*{\s*verbatim\s*}" + t.lexer.begin('INITIAL') + return t + + #Leave contents of verbatim environment alone + def t_verbatim_CHAR(t): + r"." + return t + + def t_verbatim_NEWLINE(t): + r"\n" + return t + + #End a % comment when we get to a new line + def t_linecomment_ENDCOMMENT(t): + r"\n" + t.lexer.begin("INITIAL") + + # Newline at the end of a line comment is presevered. + return t + + #Ignore anything after a % on a line + def t_linecomment_CHAR(t): + r"." + pass + + def t_makeatblock_MAKEATOTHER(t): + r"\\makeatother" + t.lexer.begin('INITIAL') + return t + + def t_makeatblock_BACKSLASH(t): + r"\\\\" + return t + + # Escaped percent signs in makeatblock + def t_makeatblock_ESCPCT(t): + r"\\\%" + return t + + # presever % in makeatblock + def t_makeatblock_PERCENT(t): + r"\%" + t.lexer.begin("makeatlinecomment") + return t + + def t_makeatlinecomment_NEWLINE(t): + r"\n" + t.lexer.begin('makeatblock') + return t + + # Leave contents of makeatblock alone + def t_makeatblock_CHAR(t): + r"." + return t + + def t_makeatblock_NEWLINE(t): + r"\n" + return t + + # For bad characters, we just skip over it + def t_ANY_error(t): + t.lexer.skip(1) + + lexer = ply.lex.lex() + lexer.input(source) + return u"".join([tok.value for tok in lexer]) + +# }}} + + +# {{{ remove redundant strings + +def strip_spaces(s, allow_single_empty_line=False): + """ + strip spaces in s, so that the result will be + considered same although new empty lines or + extra spaces are added. Especially for generating + md5 of the string. + :param s: string. The source code + :param allow_single_empty_line: bool. If True, + single empty line will be preserved, this is need + for latex document body. If False, all empty line + will be removed. + :return: string. + """ + + # strip all lines + s = "\n".join([l.strip() for l in s.split("\n")]) + + if not allow_single_empty_line: + while "\n\n" in s: + s = s.replace('\n\n', '\n') + else: + while "\n\n\n" in s: + s = s.replace('\n\n\n', '\n\n') + + # remove redundant white spaces and tabs + s = s.replace ("\t", " ") + while " " in s: + s = s.replace(" ", " ") + + return s + +## }}} + + +def get_all_indirect_subclasses(cls): + all_subcls = [] + + for subcls in cls.__subclasses__(): + if not subcls.__subclasses__(): + # has no child + all_subcls.append(subcls) + all_subcls.extend(get_all_indirect_subclasses(subcls)) + + return list(set(all_subcls)) + + +def replace_latex_space_seperator(s): + """ + "{{", "}}", "{%", %}", "{#" and "#}" are used in jinja + template, so we have to put spaces between those + characters in latex source in the latex macro. + To compile the source, we are now removing the spaces. + """ + pattern_list = [ + r'{ {', + r'} }', + r'{ #', + r'# }', + r'{ %', + r'% }' + ] + for pattern in pattern_list: + while pattern in s: + s = s.replace(pattern, pattern.replace(" ", "")) + + return s + + +# vim: foldmethod=marker diff --git a/course/models.py b/course/models.py index 9e448235..dffc65df 100644 --- a/course/models.py +++ b/course/models.py @@ -1182,7 +1182,9 @@ class GradeChange(models.Model): "in the same course")) def percentage(self): - if self.max_points is not None and self.points is not None: + if (self.max_points is not None + and self.points is not None + and self.max_points != 0): return 100*self.points/self.max_points else: return None diff --git a/doc/content.rst b/doc/content.rst index 8c5ae9d2..b0af060e 100644 --- a/doc/content.rst +++ b/doc/content.rst @@ -343,6 +343,8 @@ This could then be used from wherever RELATE markup is allowed:: to embed a YouTube player. (YouTube is a registered trademark.) +.. include:: tex-macro.rst + .. _course_yml: The Main Course Page File diff --git a/doc/tex-macro.rst b/doc/tex-macro.rst new file mode 100644 index 00000000..83636a1b --- /dev/null +++ b/doc/tex-macro.rst @@ -0,0 +1,225 @@ +.. _latex_to_img: + +Built-in LaTeX to image macro +----------------------------- + +.. currentmodule:: course.latex + +Relate provides a built-in +`Jinja `_ macro named +``latex`` which automatically converts the wrapped LaTex code into a cached +image, which is consequently rendered in a html page. +For example: + +.. code-block:: jinja + +

+ {% call latex(compiler="pdflatex", image_format="png", alt="example") %} +.. code-block:: latex + + \documentclass{article} + \usepackage[utf8]{inputenc} + \usepackage[table]{xcolor} + \setlength{\arrayrulewidth}{1mm} + \setlength{\tabcolsep}{18pt} + \renewcommand{\arraystretch}{2.5} + \newcolumntype{s}{>{\columncolor[HTML]{AAACED} } p{3cm} } + \arrayrulecolor[HTML]{DB5800} + \begin{document} + \begin{tabular}{ |s|p{3cm}|p{3cm}| } + \hline + \rowcolor{lightgray} \multicolumn{3}{|c|}{Country List} \\ + \hline + Country Name or Area Name& ISO ALPHA 2 Code &ISO ALPHA 3 \\ + \hline + Afghanistan & AF &AFG \\ + \rowcolor{gray} + Aland Islands & AX & ALA \\ + Albania &AL & ALB \\ + Algeria &DZ & DZA \\ + American Samoa & AS & ASM \\ + Andorra & AD & \cellcolor[HTML]{AA0044} AND \\ + Angola & AO & AGO \\ + \hline + \end{tabular} + + \end{document} + +.. code-block:: jinja + + {% endcall %} +

+ +The above markup creates a horizontally centered image of a colored +LaTex table:: + +

+ example +

+ +Prerequisites +^^^^^^^^^^^^^ +* **TexLive** + - For installation of TexLive. + See `Instructions on TUG `_. + - For Linux platform + - Vanilla TexLive is preferable if you want to use latest version + of TexLive and not to bother upgrading packages. + - If you don't mind use outdated version of TexLive, you can also + install the version comes with Linux distributed by:: + + sudo apt-get install texlive-full + +* **latexmk**, required, shipped with TexLive full installation, + version >= 4.39 is required. +* **dvisvgm**, optional, shipped with TexLive full installation. +* **dvipng**, required, shipped with TexLive full installation. +* **ImageMagick**, required. + - For Windows platform, install with option + ``install legacy component`` ticked. + +Configurations +^^^^^^^^^^^^^^ +In your :file:`local_settings.py`: + +* Enable latex to image functionality by configuring + "RELATE_LATEX_TO_IMAGE_ENABLED = True". +* Configure "RELATE_LATEX_BIN_PATH" option to + the bin path of the TexLive installation. For example, + "/usr/local/texlive/2015/bin/x86_64-linux". That is also + where ``latexmk``, ``dvisvgm`` and ``dvipng`` should be + found. +* Configure ImageMagick bin path "RELATE_IMAGEMAGICK_BIN_DIR" to + the absolute path of the location of the bin ``convert`` + (``convert.exe`` for Windows) of your ImageMagick installation. + This is required for Windows since Windows itself has another + cmd named ``convert``. +* Configure "IMAGE_CACHE_MAX_BYTES" option to set the max cache + bytes for generated images. Note that the value should be + larger than most images converted, or else those images won't be + cached and will resulting in more disk reading. + +Usage +^^^^^ + +Generally, it can be done by simply wrapping the source code +of a full latex document by the ``latex`` caller: + +.. code-block:: jinja + + {% call latex(compiler="pdflatex", image_format="png") %} + The latex source code + {% endcall %} + +By *full document* we mean it must contain ``\documentclass``, +``\begin{document}`` and ``end{document}``. + +* Required params: + **compiler** + str, the command line used to compile the tex + file, currently available: ``xelatex``, ``pdflatex``, ``latex`` + and ``lualatex``. + **image_format** + str, the output format of the image, only + ``png`` and ``svg`` are available. + + .. note:: + * Output with ``svg`` format only support ``latex`` as compiler. + If ``compiler="latex"``. + * If the code contains figure generated by ``tikz/pgf`` , the + image_format will be forced using ``svg``. + * ``svg`` images generated are always huge in size, sometimes 10 + times of that of the png images. It consume more caches and slow + down the rendering of pages on browsers. + +* Optional params: + **tex_filename** + A string, the based filename of the latex, and image + as well, if not set, use md5 of the full latex code. + **tex_preamble** + A string, which allows user move the preamble part of + the latex code out of the ``latex`` macro. For example, we can use + ``{% set foo %}{% endset %}`` block to define ``foo`` as the preamble + and then use ``tex_premable=foo`` as an argument of the ``latex`` + caller. Those definitions can also be saved as macros in the course + repo. That is a good practice for latex code with common preambles. + However, this should be used with cautiousness, as changing of the + preamble definition will force all the originally valid images, + whose preambles have been changed, to be recompiled, which might + result in failure (same for ``tex_preamble_extra``). For latex code + that might be changed in the future, the best practice is to keep + full latex document in the page markup. + **tex_preamble_extra** + A string, more packages or settings appended to + ``tex_preamble``. + + Using ``tex_preamble``, the above example can be changed to the following + which generate exactly the same output. + + .. code-block:: jinja + + {% set preamble %} + \documentclass{article} + \usepackage[utf8]{inputenc} + \usepackage[table]{xcolor} + \setlength{\arrayrulewidth}{1mm} + \setlength{\tabcolsep}{18pt} + \renewcommand{\arraystretch}{2.5} + \newcolumntype{s}{>{\columncolor[HTML]{AAACED} } p{3cm} } + \arrayrulecolor[HTML]{DB5800} + {% endset %} + +

+ {% call latex(compiler="pdflatex", image_format="png", + tex_preamble=preamble, alt="example") %} + .. code-block:: latex + + \begin{document} + \begin{tabular}{ |s|p{3cm}|p{3cm}| } + \hline + \rowcolor{lightgray} \multicolumn{3}{|c|}{Country List} \\ + \hline + Country Name or Area Name& ISO ALPHA 2 Code &ISO ALPHA 3 \\ + \hline + Afghanistan & AF &AFG \\ + \rowcolor{gray} + Aland Islands & AX & ALA \\ + Albania &AL & ALB \\ + Algeria &DZ & DZA \\ + American Samoa & AS & ASM \\ + Andorra & AD & \cellcolor[HTML]{AA0044} AND \\ + Angola & AO & AGO \\ + \hline + \end{tabular} + \end{document} + .. code-block:: jinja + + {% endcall %} +

+ + **force_regenerate** + A boolean, if True, regenerate the image no matter + it exists or not. Default to ``False``. + **html_class_extra** + A string, extra html class for the ```` tag, + besides ``img-responsive``. + **alt** + A string, a brief description of the image, Default to the + document part of the tex source. + **empty_pagestyle** + A boolean, if ``True``, ``\pagestyle{empty}`` will + be added to existing preamble and a standalone image will be + generated. Default to ``True``. + +.. note:: + + ``{{``, ``}}``, ``{%``, ``%}``, ``{#`` and ``#}`` are used as marking + strings in jinja template, latex code submitted (including preabmle + part and self-defined commands) should avoid containing those strings, + or else jinja will just fail to render. The work around is to manually + insert a space (spaces or tabs) between the two character (e.g., + ``{{`` --> ``{ {``) for each of those strings appeared in latex code. + +A more sophisticated example can be found at +`relate-example `_. \ No newline at end of file diff --git a/local_settings.py.example b/local_settings.py.example index 00aac329..ba04a068 100644 --- a/local_settings.py.example +++ b/local_settings.py.example @@ -145,6 +145,28 @@ RELATE_EDITABLE_INST_ID_BEFORE_VERIFICATION = True # }}} + +# {{{ convert LaTeX to image settings + +# To enable tex2img functionality, uncomment the following line. +#RELATE_LATEX_TO_IMAGE_ENABLED = True + +# The bin dir of tex compiler and image converter should be +# correctly configured or RELATE will failed to start. +#RELATE_LATEX_BIN_PATH = "/usr/local/texlive/2015/bin/x86_64-linux" +#RELATE_IMAGEMAGICK_BIN_DIR = "/path/to/imagemagic/convert/bin/" + +# The full path of the directory where images converted +# (from latex source) are saved. If not set, the default +# path is a subfolder named "latex_image" in MEDIA_ROOT. +# RELATE_LATEX_IMAGE_SAVING_FOLDER_PATH = "" + +# image, especially svg have large file size, files with size +# exceed the following won't be cached. +RELATE_IMAGE_CACHE_MAX_BYTES = 65536 + +# }}} + # {{{ docker # A string containing the image ID of the docker image to be used to run diff --git a/requirements.txt b/requirements.txt index 2e5e0012..931c955a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -84,3 +84,6 @@ django_select2>=5.5.0 # For query lexing pytools + +# For string parse +ply \ No newline at end of file -- GitLab From 5ccf9a5a2630df749de2c7a7b88bd46cb97d5d8b Mon Sep 17 00:00:00 2001 From: dzhuang Date: Mon, 10 Apr 2017 13:03:20 +0800 Subject: [PATCH 2/2] use mongodb to store the results. --- course/check.py | 2 +- course/content.py | 43 ++++- course/latex/__init__.py | 28 +-- course/latex/converter.py | 360 +++++++++++++++++++++++++------------- course/latex/latex.py | 13 +- course/latex/utils.py | 114 ++++++++---- doc/tex-macro.rst | 1 + local_settings.example.py | 20 ++- requirements.txt | 3 + 9 files changed, 404 insertions(+), 180 deletions(-) diff --git a/course/check.py b/course/check.py index d9bdbb6e..c482b22a 100644 --- a/course/check.py +++ b/course/check.py @@ -51,4 +51,4 @@ def latex2image_bin_check(app_configs, **kwargs): error = instance.check() if error: errors.append(error) - return errors \ No newline at end of file + return errors diff --git a/course/content.py b/course/content.py index fb3fcb93..128b783c 100644 --- a/course/content.py +++ b/course/content.py @@ -855,6 +855,7 @@ def expand_markup( repo, # type: Repo_ish commit_sha, # type: bytes text, # type: Text + validate_only=False, # type: bool use_jinja=True, # type: bool jinja_env={}, # type: Dict ): @@ -870,8 +871,45 @@ def expand_markup( env = Environment( loader=GitTemplateLoader(repo, commit_sha), undefined=StrictUndefined) + template = env.from_string(text) - text = template.render(**jinja_env) + kwargs = {} + if jinja_env: + kwargs.update(jinja_env) + + # {{{ tex2img + + def latex_not_enabled_warning(caller, *args, **kwargs): + return ( + "
%s
" % + ("RELATE_LATEX_TO_IMAGE_ENABLED is set to False, " + "no image will be generated.")) + + def jinja_tex_to_img_tag(caller, *args, **kwargs): + try: + from course.latex import tex_to_img_tag + return tex_to_img_tag(caller(), *args, **kwargs) + except Exception as e: + raise ValueError( + u"
" + u"Error: %s: %s
" + % (type(e).__name__, str(e))) + + latex2image_enabled = getattr( + settings, "RELATE_LATEX_TO_IMAGE_ENABLED", False) + + if latex2image_enabled: + env.globals["latex"] = jinja_tex_to_img_tag + else: + if not validate_only: + env.globals["latex"] = latex_not_enabled_warning + else: + raise ImproperlyConfigured( + _("RELATE_LATEX_TO_IMAGE_ENABLED is set to False, " + "no image will be generated.")) + # }}} + + text = template.render(**kwargs) # }}} @@ -912,7 +950,8 @@ def markup_to_html( cache_key = None text = expand_markup( - course, repo, commit_sha, text, use_jinja=use_jinja, jinja_env=jinja_env) + course, repo, commit_sha, text, validate_only=validate_only, + use_jinja=use_jinja, jinja_env=jinja_env) if reverse_func is None: from django.urls import reverse diff --git a/course/latex/__init__.py b/course/latex/__init__.py index c039c632..a2adff5c 100644 --- a/course/latex/__init__.py +++ b/course/latex/__init__.py @@ -37,8 +37,16 @@ from course.latex.utils import ( TIKZ_PGF_RE = re.compile(r"\\begin\{(?:tikzpicture|pgfpicture)\}") DEFAULT_IMG_HTML_CLASS = "img-responsive" +# {{{ mypy + +if False: + from typing import Text, Any, Optional # noqa + +# }}} + def tex_to_img_tag(tex_source, *args, **kwargs): + # type: (Text, *Any, **Any) -> Optional[Text] '''Convert LaTex to IMG tag''' compiler = kwargs.get("compiler", None) @@ -49,8 +57,6 @@ def tex_to_img_tag(tex_source, *args, **kwargs): if not image_format: raise ValueError(_("'image_format' must be specified.")) - output_dir = kwargs.get("output_dir") - tex_filename = kwargs.get("tex_filename", None) tex_preamble = kwargs.get("tex_preamble", "") tex_preamble_extra = kwargs.get("tex_preamble_extra", "") @@ -71,12 +77,13 @@ def tex_to_img_tag(tex_source, *args, **kwargs): if html_class_extra: if isinstance(html_class_extra, list): - html_class_extra = " ".join (html_class_extra) + html_class_extra = " ".join(html_class_extra) elif not isinstance(html_class_extra, six.string_types): raise ValueError( _('"html_class_extra" must be a string or a list')) - html_class = "%s %s" %(DEFAULT_IMG_HTML_CLASS, html_class_extra) - else: html_class = DEFAULT_IMG_HTML_CLASS + html_class = "%s %s" % (DEFAULT_IMG_HTML_CLASS, html_class_extra) + else: + html_class = DEFAULT_IMG_HTML_CLASS texdoc = TexDoc( tex_source, preamble=tex_preamble, @@ -88,22 +95,23 @@ def tex_to_img_tag(tex_source, *args, **kwargs): if (compiler == "latex" and image_format == "png" - and re.search(TIKZ_PGF_RE, tex_source)): + and + re.search(TIKZ_PGF_RE, tex_source)): image_format = "svg" - tex2img_class = get_tex2img_class(compiler, image_format) + assert isinstance(compiler, six.text_type) + + tex2img_class = get_tex2img_class(compiler, image_format) # type: ignore if not alt: alt = texdoc.document if alt: - from django.utils.html import escape - alt = "alt='%s'" % alt.strip().replace("\n","") + alt = "alt='%s'" % alt.strip().replace("\n", "") latex2img = tex2img_class( tex_source=texdoc.as_latex(), tex_filename=tex_filename, - output_dir=output_dir ) return ( diff --git a/course/latex/converter.py b/course/latex/converter.py index 97f08b7a..59ea1d87 100644 --- a/course/latex/converter.py +++ b/course/latex/converter.py @@ -26,28 +26,67 @@ THE SOFTWARE. import six import os -import platform import sys import shutil import re +from hashlib import md5 from django.core.checks import Critical from django.core.management.base import CommandError from django.core.exceptions import ImproperlyConfigured +from django.utils.html import escape from django.utils.encoding import DEFAULT_LOCALE_ENCODING from django.utils.translation import ugettext as _, string_concat from django.conf import settings +from relate.utils import local_now + +from course.latex.utils import get_mongo_db + from .utils import ( popen_wrapper, get_basename_or_md5, - _file_read, _file_write, get_abstract_latex_log) + file_read, file_write, get_abstract_latex_log) + +# mypy +if False: + from typing import Text, Optional, Any, List # noqa + from pymongo import MongoClient # noqa + from pymongo.collection import Collection # noqa + +DB = get_mongo_db() + + +def get_latex_datauri_mongo_collection(name=None, db=DB, index_name="key"): + # type: (Optional[Text], Optional[MongoClient], Optional[Text]) -> Collection + if not name: + name = getattr( + settings, "RELATE_LATEX_DATAURI_MONGO_COLLECTION_NAME", + "relate_latex_datauri") + collection = db[name] + if index_name: + collection.ensure_index(index_name, unique=True) + return collection + + +def get_latex_error_mongo_collection(name=None, db=DB, index_name="key"): + # type: (Optional[Text], Optional[MongoClient], Optional[Text]) -> Collection + if not name: + name = getattr( + settings, "RELATE_LATEX_ERROR_MONGO_COLLECTION_NAME", + "relate_latex_error") + collection = db[name] + if index_name: + collection.ensure_index(index_name, unique=True) + return collection # {{{ latex compiler classes and image converter classes + class CommandBase(object): @property def name(self): + # type: () -> Text """ The name of the command tool """ @@ -55,15 +94,17 @@ class CommandBase(object): @property def cmd(self): + # type: () -> Text """ The string of the command """ raise NotImplementedError - required_version = "" - bin_path = "" + required_version = "" # type: Text + bin_path = "" # type: Text def check(self): + # type: () -> Text error = "" out = "" strerror = "" @@ -83,26 +124,25 @@ class CommandBase(object): hint=("Unable to run '%(cmd)s'. Is " "%(tool)s installed or has its " "path correctly configured " - "in local_settings.py?") - % {"cmd": self.cmd, - "tool": self.name, - }, + "in local_settings.py?") % { + "cmd": self.cmd, + "tool": self.name, + }, obj=self.name ) elif self.required_version: version = ".".join(d for d in m.groups() if d) - from distutils.version import LooseVersion as LV + import distutils.version.LooseVersion as LV if LV(version) < LV(self.required_version): error = Critical( "Version outdated", hint=("'%(tool)s' with version " ">=%(required)s is required, " "current version is %(version)s" - ) - % {"tool": self.name, - "required": self.required_version, - "version": version - }, + ) % { + "tool": self.name, + "required": self.required_version, + "version": version}, obj=self.name ) return error @@ -110,6 +150,7 @@ class CommandBase(object): class TexCompilerBase(CommandBase): def __init__(self): + # type: () -> None self.bin_path_dir = getattr( settings, "RELATE_%s_BIN_DIR" % self.name.upper(), getattr(settings, "RELATE_LATEX_BIN_DIR", "") @@ -132,13 +173,16 @@ class LatexCompiler(TexCompilerBase): @property def output_format(self): + # type: () -> Text raise NotImplementedError() def __init__(self): + # type: () -> None super(LatexCompiler, self).__init__() self.latexmk_prog_repl = self._get_latexmk_prog_repl() def _get_latexmk_prog_repl(self): + # type: () -> Text """ Program replace when using "-pdflatex=" or "-latex=" arg in latexmk, especially needed when compilers are @@ -151,6 +195,7 @@ class LatexCompiler(TexCompilerBase): ) def get_latexmk_subpro_cmdline(self, input_path): + # type: (Text) -> List[Text] latexmk = Latexmk() return [ latexmk.bin_path, @@ -177,31 +222,33 @@ class LuaLatex(LatexCompiler): name = "LuaLatex" cmd = "lualatex" output_format = "pdf" + def __init__(self): + # type: () -> None super(LuaLatex, self).__init__() - self.latexmk_prog_repl = ( - "-%s=%s" % ("pdflatex", self.bin_path) - ) + self.latexmk_prog_repl = "-%s=%s" % ("pdflatex", self.bin_path) class XeLatex(LatexCompiler): name = "XeLatex" cmd = "xelatex" output_format = "pdf" + def __init__(self): + # type: () -> None super(XeLatex, self).__init__() - self.latexmk_prog_repl = ( - "-%s=%s" % ("pdflatex", self.bin_path) - ) + self.latexmk_prog_repl = "-%s=%s" % ("pdflatex", self.bin_path) class Imageconverter(CommandBase): @property def output_format(self): + # type: () -> Text raise NotImplementedError def __init__(self): + # type: () -> None bin_path_dir = getattr( settings, "RELATE_%s_BIN_DIR" % self.name.upper(), "" @@ -211,6 +258,7 @@ class Imageconverter(CommandBase): def get_converter_cmdline( self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] raise NotImplementedError @@ -221,8 +269,10 @@ class Dvipng(TexCompilerBase, Imageconverter): name = "dvipng" cmd = "dvipng" output_format = "png" + def get_converter_cmdline( self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] return [self.bin_path, '-o', output_filepath, '-pp', '1', @@ -238,8 +288,10 @@ class Dvisvg(TexCompilerBase, Imageconverter): name = "dvisvg" cmd = "dvisvgm" output_format = "svg" + def get_converter_cmdline( self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] return[self.bin_path, '--no-fonts', '-o', output_filepath, @@ -253,6 +305,7 @@ class ImageMagick(Imageconverter): def get_converter_cmdline( self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] return [self.bin_path, '-density', '96', '-quality', '85', @@ -266,7 +319,8 @@ class ImageMagick(Imageconverter): # {{{ convert file to data uri -def get_file_data_uri(file_path): +def get_image_datauri(file_path): + # type: (Text) -> Optional[Text] """ Convert file to data URI """ @@ -274,7 +328,7 @@ def get_file_data_uri(file_path): return None try: - buf = _file_read(file_path) + buf = file_read(file_path) except OSError: raise @@ -298,6 +352,7 @@ class Tex2ImgBase(object): @property def compiler(self): + # type: () -> LatexCompiler """ :return: an instance of `LatexCompiler` """ @@ -305,18 +360,18 @@ class Tex2ImgBase(object): @property def converter(self): + # type: () -> Imageconverter """ :return: an instance of `Imageconverter` """ raise NotImplementedError() - def __init__(self, tex_source, tex_filename, output_dir): + def __init__(self, tex_source, tex_filename): + # type: (...) -> None """ :param tex_source: Required, a string representing the full tex source code. :param tex_filename: Optional, a string - :param output_dir: Required, a string of the path where - the images and error logs will be saved. """ if tex_source: @@ -325,24 +380,9 @@ class Tex2ImgBase(object): raise ValueError( _("Param 'tex_source' can not be an empty string") ) - assert isinstance(tex_source, unicode) + assert isinstance(tex_source, six.text_type) self.tex_source = tex_source - if output_dir: - output_dir = output_dir.strip() - if not output_dir: - raise ValueError( - _("Param output_dir must be specified")) - else: - try: - if (not os.path.exists(output_dir) - or not os.path.isdir(output_dir)): - os.makedirs(output_dir) - except Exception: - raise ValueError( - _("Param output_dir '%s' is not a valid path") - % output_dir) - self.working_dir = None self.basename = get_basename_or_md5( @@ -352,47 +392,47 @@ class Tex2ImgBase(object): .replace(".", "").lower() self.image_ext = ".%s" % self.image_format - self.compiled_ext =".%s" % self.compiler.output_format\ + self.compiled_ext = ".%s" % self.compiler.output_format\ .replace(".", "").lower() - # Where the latex compilation error log - # will finally be saved. - self.errlog_saving_path = os.path.join( - output_dir, - "%s_%s.log" % (self.basename, self.compiler.cmd) - ) - - # Where the generated image will finally be saved. - self.image_saving_path = os.path.join( - output_dir, - "%s_%s.%s" % (self.basename, + self.datauri_basename = ( + "%s_%s_%s_datauri" % (self.basename, self.compiler.cmd, self.image_format) ) def get_compiler_cmdline(self, tex_path): + # type: (Text) -> List[Text] return self.compiler.get_latexmk_subpro_cmdline(tex_path) def get_converter_cmdline(self, input_path, output_path): + # type: (Text, Text) -> List[Text] return self.converter.get_converter_cmdline( input_path, output_path) def _remove_working_dir(self): - shutil.rmtree(self.working_dir) + # type: () -> None + if self.working_dir: + shutil.rmtree(self.working_dir) def get_compiled_file(self): + # type: () -> Optional[Text] """ - Compile latex source. If failed, error log will copied - to ``output_dir``. + Compile latex source. :return: string, the path of the compiled file if succeeded. """ from tempfile import mkdtemp - self.working_dir = mkdtemp(prefix="RELATE_LATEX_") + # https://github.com/python/mypy/issues/1833 + self.working_dir = mkdtemp(prefix="RELATE_LATEX_") # type: ignore + + assert self.basename is not None + assert self.working_dir is not None tex_filename = self.basename + ".tex" tex_path = os.path.join(self.working_dir, tex_filename) - _file_write(tex_path, self.tex_source.encode('UTF-8')) + file_write(tex_path, self.tex_source.encode('UTF-8')) + assert tex_path is not None log_path = tex_path.replace(".tex", ".log") compiled_file_path = tex_path.replace( ".tex", self.compiled_ext) @@ -403,7 +443,7 @@ class Tex2ImgBase(object): if status != 0: try: - log = _file_read(log_path) + log = file_read(log_path).decode("utf-8") except OSError: # no log file is generated self._remove_working_dir() @@ -411,12 +451,42 @@ class Tex2ImgBase(object): try: log = get_abstract_latex_log(log) - _file_write(self.errlog_saving_path, log) + + err_key = ("latex_err:%s:%s" + % (self.compiler.cmd, self.basename)) + + try: + import django.core.cache as cache + except ImproperlyConfigured: + err_cache_key = None + else: + def_cache = cache.caches["latex"] + err_cache_key = err_key + + if not isinstance(log, six.text_type): + log = six.text_type(log) + + get_latex_error_mongo_collection().update_one( + {"key": err_key}, + {"$setOnInsert": + {"key": err_key, + "errorlog": log.encode('utf-8'), + "source": self.tex_source.encode('utf-8'), + "creation_time": local_now() + }}, + upsert=True, + ) + + if err_cache_key: + assert isinstance(log, six.text_type) + if len(log) <= getattr( + settings, "RELATE_CACHE_MAX_BYTES", 0): + def_cache.add(err_cache_key, log) + except: raise finally: self._remove_working_dir() - from django.utils.html import escape raise ValueError( "
%s
" % escape(log).strip()) @@ -431,11 +501,11 @@ class Tex2ImgBase(object): % self.compiler.output_format) ) - def get_converted_image(self): + def get_converted_image_datauri(self): + # type: () -> Optional[Text] """ - Convert compiled file into image. If succeeded, the image - will be copied to ``output_dir``. - :return: string, the path of the generated image + Convert compiled file into image. + :return: string, the datauri """ compiled_file_path = self.get_compiled_file() if not compiled_file_path: @@ -469,113 +539,149 @@ class Tex2ImgBase(object): )) try: - shutil.copyfile(image_path, self.image_saving_path) + datauri = get_image_datauri(image_path) + except OSError: raise RuntimeError(error) finally: self._remove_working_dir() - return self.image_saving_path + return datauri - def get_compile_err_cached(self): + def get_compile_err_cached(self, force_regenerate=False): + # type: (Optional[bool]) -> Optional[Text] """ If the problematic latex source is not modified, check - wheter there is error log both in cache and output_dir. + whether there is error log both in cache or mongo. If it exists, raise the error. :return: None if no error log find. """ err_result = None + err_key = ("latex_err:%s:%s" + % (self.compiler.cmd, self.basename)) try: import django.core.cache as cache except ImproperlyConfigured: err_cache_key = None else: - def_cache = cache.caches["default"] - err_cache_key = ("latex_err:%s:%s" - % (self.compiler.cmd, self.basename)) + def_cache = cache.caches["latex"] + err_cache_key = err_key # Memcache is apparently limited to 250 characters. if len(err_cache_key) < 240: - err_result = def_cache.get(err_cache_key) + if not force_regenerate: + err_result = def_cache.get(err_cache_key) + else: + def_cache.delete(err_cache_key) + get_latex_error_mongo_collection().delete_one({"key": err_key}) if err_result is not None: - assert isinstance(err_result, six.string_types),\ - err_cache_key + raise ValueError( + "
%s
" % escape(err_result).strip()) if err_result is None: # read the saved err_log if it exists - if os.path.isfile(self.errlog_saving_path): - err_result = _file_read(self.errlog_saving_path) - assert isinstance(err_result, six.string_types) + mongo_result = get_latex_error_mongo_collection().find_one( + {"key": err_key} + ) + if mongo_result: + err_result = mongo_result["errorlog"].decode("utf-8") if err_result: if err_cache_key: + assert isinstance(err_result, six.text_type) if len(err_result) <= getattr( settings, "RELATE_CACHE_MAX_BYTES", 0): - def_cache.add(err_cache_key, err_result, None) + def_cache.add(err_cache_key, err_result) - from django.utils.html import escape raise ValueError( "
%s
" % escape(err_result).strip()) return None def get_data_uri_cached(self, force_regenerate=False): + # type: (Optional[bool]) -> Text """ :param force_regenerate: :class:`Bool', if True, the tex file will be recompiled and re-convert the image, regardless of existing file or cached result. :return: string, data uri of the coverted image. """ - uri_result = None - if force_regenerate: - image_path = self.get_converted_image() - uri_result = get_file_data_uri(image_path) - assert isinstance(uri_result, six.string_types) + result = None - if not uri_result: - err_result = self.get_compile_err_cached() + if force_regenerate: + # first remove cached error results and files + self.get_compile_err_cached(force_regenerate) + result = self.get_converted_image_datauri() + if not isinstance(result, six.text_type): + result = six.text_type(result) + + if not result: + err_result = self.get_compile_err_cached(force_regenerate) if err_result: - return None - + raise ValueError( + "
%s
" % escape(err_result).strip()) + + # we make the key so that it can be used when cache is not configured + # and it can be used by mongo + uri_key = ( + "latex2img:%s:%s" % ( + self.compiler.cmd, + md5( + self.datauri_basename.encode("utf-8") + ).hexdigest() + ) + ) try: import django.core.cache as cache except ImproperlyConfigured: uri_cache_key = None else: - def_cache = cache.caches["default"] - - from hashlib import md5 - uri_cache_key = ( - "latex2img:%s:%s" % ( - self.compiler.cmd, - md5( - self.image_saving_path.encode("utf-8") - ).hexdigest() - ) - ) - if not uri_result: + def_cache = cache.caches["latex"] + uri_cache_key = uri_key + + if force_regenerate: + def_cache.delete(uri_cache_key) + get_latex_datauri_mongo_collection().delete_one({"key": uri_key}) + elif not result: # Memcache is apparently limited to 250 characters. if len(uri_cache_key) < 240: - uri_result = def_cache.get(uri_cache_key) - if uri_result: - assert isinstance( - uri_result, six.string_types),\ - uri_cache_key - return uri_result + result = def_cache.get(uri_cache_key) + if result: + if not isinstance(result, six.text_type): + result = six.text_type(result) + return result # Neighter regenerated nor cached, - # then read or generate the image - if not uri_result: - if not os.path.isfile(self.image_saving_path): - self.image_saving_path = self.get_converted_image() - uri_result = get_file_data_uri(self.image_saving_path) - assert isinstance(uri_result, six.string_types) + # then read from mongo + if not result: + mongo_result = get_latex_datauri_mongo_collection().find_one( + {"key": uri_key} + ) + if mongo_result: + result = mongo_result["datauri"].decode("utf-8") + if not isinstance(result, six.text_type): + result = six.text_type(result) + + # Not found in mongo, regenerate it + if not result: + result = self.get_converted_image_datauri() + if not isinstance(result, six.text_type): + result = six.text_type(result) + get_latex_datauri_mongo_collection().update_one( + {"key": uri_key}, + {"$setOnInsert": + {"key": uri_key, + "datauri": result.encode('utf-8'), + "creation_time": local_now() + }}, + upsert=True, + ) - assert uri_result + assert result # no cache configured if not uri_cache_key: - return uri_result + return result # cache configure, but image not cached allowed_max_bytes = getattr( @@ -584,11 +690,14 @@ class Tex2ImgBase(object): settings, "RELATE_CACHE_MAX_BYTES", ) ) - if len(uri_result) <= allowed_max_bytes: + + if len(result) <= allowed_max_bytes: # image size larger than allowed_max_bytes # won't be cached, espeically for svgs. - def_cache.add(uri_cache_key, uri_result, None) - return uri_result + assert isinstance(result, six.text_type), \ + uri_cache_key + def_cache.add(uri_cache_key, result) + return result # }}} @@ -636,13 +745,14 @@ ALLOWED_COMPILER_FORMAT_COMBINATION = ( def get_tex2img_class(compiler, image_format): + # type: (Text, Text) -> Any image_format = image_format.replace(".", "").lower() compiler = compiler.lower() - if not image_format in ALLOWED_LATEX2IMG_FORMAT: + if image_format not in ALLOWED_LATEX2IMG_FORMAT: raise ValueError( _("Unsupported image format '%s'") % image_format) - if not compiler in ALLOWED_COMPILER: + if compiler not in ALLOWED_COMPILER: raise ValueError( _("Unsupported tex compiler '%s'") % compiler) @@ -651,10 +761,10 @@ def get_tex2img_class(compiler, image_format): _("Unsupported combination: " "('%(compiler)s', '%(format)s'). " "Currently support %(supported)s.") - % {"compiler": compiler, - "format": image_format, - "supported": ", ".join( - str(e) for e in ALLOWED_COMPILER_FORMAT_COMBINATION)} + % {"compiler": compiler, + "format": image_format, + "supported": ", ".join( + str(e) for e in ALLOWED_COMPILER_FORMAT_COMBINATION)} ) class_name = "%s2%s" % (compiler.title(), image_format.title()) @@ -663,9 +773,11 @@ def get_tex2img_class(compiler, image_format): # }}} + # {{{ check if multiple images are generated due to long pdf def get_number_of_images(image_path, image_ext): + # type: (Text, Text) -> int if os.path.isfile(image_path): return 1 count = 0 diff --git a/course/latex/latex.py b/course/latex/latex.py index aef51406..ab2cab3d 100644 --- a/course/latex/latex.py +++ b/course/latex/latex.py @@ -30,15 +30,22 @@ from django.utils.translation import ugettext as _ from .utils import strip_comments, strip_spaces +if False: + from typing import Text, Any, Optional # noqa + + class TexDocParseError(Exception): pass + class TexDocMissingElementError(TexDocParseError): pass + class TexDocWrongElementOrderError(TexDocParseError): pass + class TexDoc(): """ Defines a LaTeX document @@ -50,12 +57,14 @@ class TexDoc(): has_enddoc = False def is_empty_pagestyle_already(self): + # type: () -> bool match = re.search(r"\\pagestyle{\s?empty\s?}", self.preamble) if match: return True return False def parse(self, latex, test=False): + # type: (Text, Optional[bool]) -> None """ parse the doc into preamble and document. If test=True, the method will try to find out which elements of the latex code @@ -108,6 +117,7 @@ class TexDoc(): assert self.document is not None def as_latex(self): + # type: () -> Text """ Assemble LaTeX Document """ @@ -131,6 +141,7 @@ class TexDoc(): def __init__(self, text=None, preamble="", preamble_extra="", empty_pagestyle=False): + # type: (...) -> None """ Parse LaTeX document :param text: string. Full latex document, or body only if @@ -173,4 +184,4 @@ class TexDoc(): except: raise - self.empty_pagestyle = empty_pagestyle \ No newline at end of file + self.empty_pagestyle = empty_pagestyle diff --git a/course/latex/utils.py b/course/latex/utils.py index 0d80422c..eb949299 100644 --- a/course/latex/utils.py +++ b/course/latex/utils.py @@ -33,9 +33,22 @@ from subprocess import Popen, PIPE from django.utils.translation import ( ugettext as _, string_concat) +from django.core.files import File from django.core.management.base import CommandError from django.utils.encoding import ( DEFAULT_LOCALE_ENCODING, force_text) +from django.conf import settings + +from pymongo import MongoClient + + +# {{{ mypy + +if False: + from typing import Any, Text, List, Tuple, Optional # noqa + from course.latex.converter import CommandBase # noqa + +# }}} # {{{ Constants @@ -57,6 +70,7 @@ ALLOWED_COMPILER_FORMAT_COMBINATION = ( def popen_wrapper(args, os_err_exc_type=CommandError, stdout_encoding='utf-8', **kwargs): + # type: (...) -> Tuple[Text, Text, int] """ Extended from django.core.management.utils.popen_wrapper. `**kwargs` is added so that more kwargs can be added. @@ -94,6 +108,7 @@ def popen_wrapper(args, os_err_exc_type=CommandError, # {{{ file read and write def get_basename_or_md5(filename, s): + # type: (Text, Text) -> Optional[Text] """ :return: the basename of `filename` if `filename` is not empty, else, return the md5 of string `s`. @@ -103,23 +118,25 @@ def get_basename_or_md5(filename, s): else: if not s: return None - basename = md5(s).hexdigest() + basename = md5(s.encode("utf-8")).hexdigest() return basename -def _file_read(filename): +def file_read(filename): + # type: (Text) -> bytes '''Read the content of a file and close it properly.''' - f = file(filename, 'rb') - content = f.read() - f.close() + with open(filename, 'rb') as f: + ff = File(f) + content = ff.read() return content -def _file_write(filename, content): +def file_write(filename, content): + # type: (Text, bytes) -> None '''Write into a file and close it properly.''' - f = file(filename, 'wb') - f.write(content) - f.close() + with open(filename, 'wb') as f: + ff = File(f) + ff.write(content) # }}} @@ -127,13 +144,14 @@ def _file_write(filename, content): # {{{ convert file to data uri def get_file_data_uri(file_path): + # type: (Text) -> Optional[Text] '''Convert file to data URI''' if not file_path: return None from base64 import b64encode from mimetypes import guess_type - buf = _file_read(file_path) + buf = file_read(file_path) mime_type = guess_type(file_path)[0] return "data:%(mime_type)s;base64,%(b64)s" % { @@ -155,7 +173,9 @@ LATEX_LOG_OMIT_LINE_STARTS = ( # more ) + def get_abstract_latex_log(log): + # type: (Text) -> Text '''abstract error msg from latex compilation log''' msg = log.split(LATEX_ERR_LOG_BEGIN_LINE_STARTS)[1]\ .split(LATEX_ERR_LOG_END_LINE_STARTS)[0] @@ -174,14 +194,15 @@ def get_abstract_latex_log(log): # {{{ strip comments from source def strip_comments(source): + # type: (Text) -> Text # modified from https://gist.github.com/amerberg/a273ca1e579ab573b499 - tokens = ( + tokens = ( # noqa 'PERCENT', 'BEGINCOMMENT', 'ENDCOMMENT', 'BACKSLASH', 'CHAR', 'BEGINVERBATIM', 'ENDVERBATIM', 'NEWLINE', 'ESCPCT', 'MAKEATLETTER', 'MAKEATOTHER', ) - states = ( + states = ( # noqa ('makeatblock', 'exclusive'), ('makeatlinecomment', 'exclusive'), ('linecomment', 'exclusive'), @@ -191,78 +212,78 @@ def strip_comments(source): # Deal with escaped backslashes, so we don't # think they're escaping % - def t_BACKSLASH(t): + def t_BACKSLASH(t): # noqa r"\\\\" return t # Leaving all % in makeatblock - def t_MAKEATLETTER(t): + def t_MAKEATLETTER(t): # noqa r"\\makeatletter" t.lexer.begin("makeatblock") return t # One-line comments - def t_PERCENT(t): + def t_PERCENT(t): # noqa r"\%" t.lexer.begin("linecomment") # Escaped percent signs - def t_ESCPCT(t): + def t_ESCPCT(t): # noqa r"\\\%" return t # Comment environment, as defined by verbatim package - def t_BEGINCOMMENT(t): + def t_BEGINCOMMENT(t): # noqa r"\\begin\s*{\s*comment\s*}" t.lexer.begin("commentenv") #Verbatim environment (different treatment of comments within) - def t_BEGINVERBATIM(t): + def t_BEGINVERBATIM(t): # noqa r"\\begin\s*{\s*verbatim\s*}" t.lexer.begin("verbatim") return t #Any other character in initial state we leave alone - def t_CHAR(t): + def t_CHAR(t): # noqa r"." return t - def t_NEWLINE(t): + def t_NEWLINE(t): # noqa r"\n" return t # End comment environment - def t_commentenv_ENDCOMMENT(t): + def t_commentenv_ENDCOMMENT(t): # noqa r"\\end\s*{\s*comment\s*}" #Anything after \end{comment} on a line is ignored! t.lexer.begin('linecomment') # Ignore comments of comment environment - def t_commentenv_CHAR(t): + def t_commentenv_CHAR(t): # noqa r"." pass - def t_commentenv_NEWLINE(t): + def t_commentenv_NEWLINE(t): # noqa r"\n" pass #End of verbatim environment - def t_verbatim_ENDVERBATIM(t): + def t_verbatim_ENDVERBATIM(t): # noqa r"\\end\s*{\s*verbatim\s*}" t.lexer.begin('INITIAL') return t #Leave contents of verbatim environment alone - def t_verbatim_CHAR(t): + def t_verbatim_CHAR(t): # noqa r"." return t - def t_verbatim_NEWLINE(t): + def t_verbatim_NEWLINE(t): # noqa r"\n" return t #End a % comment when we get to a new line - def t_linecomment_ENDCOMMENT(t): + def t_linecomment_ENDCOMMENT(t): # noqa r"\n" t.lexer.begin("INITIAL") @@ -270,46 +291,46 @@ def strip_comments(source): return t #Ignore anything after a % on a line - def t_linecomment_CHAR(t): + def t_linecomment_CHAR(t): # noqa r"." pass - def t_makeatblock_MAKEATOTHER(t): + def t_makeatblock_MAKEATOTHER(t): # noqa r"\\makeatother" t.lexer.begin('INITIAL') return t - def t_makeatblock_BACKSLASH(t): + def t_makeatblock_BACKSLASH(t): # noqa r"\\\\" return t # Escaped percent signs in makeatblock - def t_makeatblock_ESCPCT(t): + def t_makeatblock_ESCPCT(t): # noqa r"\\\%" return t # presever % in makeatblock - def t_makeatblock_PERCENT(t): + def t_makeatblock_PERCENT(t): # noqa r"\%" t.lexer.begin("makeatlinecomment") return t - def t_makeatlinecomment_NEWLINE(t): + def t_makeatlinecomment_NEWLINE(t): # noqa r"\n" t.lexer.begin('makeatblock') return t # Leave contents of makeatblock alone - def t_makeatblock_CHAR(t): + def t_makeatblock_CHAR(t): # noqa r"." return t - def t_makeatblock_NEWLINE(t): + def t_makeatblock_NEWLINE(t): # noqa r"\n" return t # For bad characters, we just skip over it - def t_ANY_error(t): + def t_ANY_error(t): # noqa t.lexer.skip(1) lexer = ply.lex.lex() @@ -322,6 +343,7 @@ def strip_comments(source): # {{{ remove redundant strings def strip_spaces(s, allow_single_empty_line=False): + # type: (Text, Optional[bool]) -> Text """ strip spaces in s, so that the result will be considered same although new empty lines or @@ -346,16 +368,17 @@ def strip_spaces(s, allow_single_empty_line=False): s = s.replace('\n\n\n', '\n\n') # remove redundant white spaces and tabs - s = s.replace ("\t", " ") + s = s.replace("\t", " ") while " " in s: s = s.replace(" ", " ") return s -## }}} +# }}} def get_all_indirect_subclasses(cls): + # type: (Any) -> List[Any] all_subcls = [] for subcls in cls.__subclasses__(): @@ -368,6 +391,7 @@ def get_all_indirect_subclasses(cls): def replace_latex_space_seperator(s): + # type: (Text) -> Text """ "{{", "}}", "{%", %}", "{#" and "#}" are used in jinja template, so we have to put spaces between those @@ -389,4 +413,18 @@ def replace_latex_space_seperator(s): return s +def get_mongo_db(database=None): + # type: (Optional[Text]) -> MongoClient + if not database: + database = getattr( + settings, "RELATE_MONGODB_NAME", + "relate-mongodb") + args = [] + uri = getattr(settings, "RELATE_MONGO_URI", None) + if uri: + args.append(uri) + client = MongoClient(*args, connect=False) + db = client[database] + return db + # vim: foldmethod=marker diff --git a/doc/tex-macro.rst b/doc/tex-macro.rst index 83636a1b..fe40d269 100644 --- a/doc/tex-macro.rst +++ b/doc/tex-macro.rst @@ -78,6 +78,7 @@ Prerequisites * **ImageMagick**, required. - For Windows platform, install with option ``install legacy component`` ticked. +* **MongoDB**, required. Configurations ^^^^^^^^^^^^^^ diff --git a/local_settings.example.py b/local_settings.example.py index b6d8d13f..14607617 100644 --- a/local_settings.example.py +++ b/local_settings.example.py @@ -11,6 +11,9 @@ ALLOWED_HOSTS = [ # Configure the following as url as above. RELATE_BASE_URL = "http://YOUR/RELATE/SITE/DOMAIN" +RELATE_LATEX_TO_IMAGE_ENABLED = True +RELATE_LATEX_BIN_DIR = \ + "/usr/local/texlive/2015/bin/x86_64-linux" # Uncomment this to use a real database. If left commented out, a local SQLite3 # database will be used, which is not recommended for production use. # @@ -229,6 +232,15 @@ RELATE_EDITABLE_INST_ID_BEFORE_VERIFICATION = True # {{{ convert LaTeX to image settings +# The MongoDB database name for RELATE +RELATE_MONGODB_NAME = "my_relate-mongodb" + +# The collection name in RELATE_MONGODB_NAME where the datauris are stored +RELATE_LATEX_DATAURI_MONGO_COLLECTION_NAME = "my_relate_latex_datauri" + +# The collection name in RELATE_MONGODB_NAME where the compile error are stored +RELATE_LATEX_ERROR_MONGO_COLLECTION_NAME = "my_relate_latex_error" + # To enable tex2img functionality, uncomment the following line. #RELATE_LATEX_TO_IMAGE_ENABLED = True @@ -237,10 +249,10 @@ RELATE_EDITABLE_INST_ID_BEFORE_VERIFICATION = True #RELATE_LATEX_BIN_PATH = "/usr/local/texlive/2015/bin/x86_64-linux" #RELATE_IMAGEMAGICK_BIN_DIR = "/path/to/imagemagic/convert/bin/" -# The full path of the directory where images converted -# (from latex source) are saved. If not set, the default -# path is a subfolder named "latex_image" in MEDIA_ROOT. -# RELATE_LATEX_IMAGE_SAVING_FOLDER_PATH = "" +# configure the following only if dvisvgm or dvipng can't be found +# in sys evn.. +#RELATE_DVISVGM_BIN_DIR = "/path/to/dvisvgm/bin/" +#RELATE_DVIPNG_BIN_DIR = "/path/to/dvipng/bin/" # image, especially svg have large file size, files with size # exceed the following won't be cached. diff --git a/requirements.txt b/requirements.txt index d5e856b3..8996fdfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -109,6 +109,9 @@ pytools # For mypy (static type checking) support typing +# For mongodb +pymongo + # For string parse ply # vim: foldmethod=marker -- GitLab