diff --git a/course/check.py b/course/check.py new file mode 100644 index 0000000000000000000000000000000000000000..c482b22af438665a7cdf512ffb0ed7c79e66a1bf --- /dev/null +++ b/course/check.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from django.core.checks import register, Tags as DjangoTags +from django.conf import settings + +from course.latex.utils import get_all_indirect_subclasses +from course.latex.converter import CommandBase + + +class Tags(DjangoTags): + relate_course_tag = 'relate_course_tag' + + +@register(Tags.relate_course_tag, deploy=True) +def latex2image_bin_check(app_configs, **kwargs): + """ + Check if all tex compiler and image converter + are correctly configured, if latex utility is + enabled. + """ + if not getattr(settings, "RELATE_LATEX_TO_IMAGE_ENABLED", False): + return [] + klass = get_all_indirect_subclasses(CommandBase) + instance_list = [cls() for cls in klass] + errors = [] + for instance in instance_list: + error = instance.check() + if error: + errors.append(error) + return errors diff --git a/course/content.py b/course/content.py index fb3fcb93a420680b6f7fcc0fdb9943d7fe04d91c..128b783c4bad28d1a01b820b4a6dda2b17b84d4e 100644 --- a/course/content.py +++ b/course/content.py @@ -855,6 +855,7 @@ def expand_markup( repo, # type: Repo_ish commit_sha, # type: bytes text, # type: Text + validate_only=False, # type: bool use_jinja=True, # type: bool jinja_env={}, # type: Dict ): @@ -870,8 +871,45 @@ def expand_markup( env = Environment( loader=GitTemplateLoader(repo, commit_sha), undefined=StrictUndefined) + template = env.from_string(text) - text = template.render(**jinja_env) + kwargs = {} + if jinja_env: + kwargs.update(jinja_env) + + # {{{ tex2img + + def latex_not_enabled_warning(caller, *args, **kwargs): + return ( + "
" + % (type(e).__name__, str(e))) + + latex2image_enabled = getattr( + settings, "RELATE_LATEX_TO_IMAGE_ENABLED", False) + + if latex2image_enabled: + env.globals["latex"] = jinja_tex_to_img_tag + else: + if not validate_only: + env.globals["latex"] = latex_not_enabled_warning + else: + raise ImproperlyConfigured( + _("RELATE_LATEX_TO_IMAGE_ENABLED is set to False, " + "no image will be generated.")) + # }}} + + text = template.render(**kwargs) # }}} @@ -912,7 +950,8 @@ def markup_to_html( cache_key = None text = expand_markup( - course, repo, commit_sha, text, use_jinja=use_jinja, jinja_env=jinja_env) + course, repo, commit_sha, text, validate_only=validate_only, + use_jinja=use_jinja, jinja_env=jinja_env) if reverse_func is None: from django.urls import reverse diff --git a/course/latex/__init__.py b/course/latex/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2adff5c12ce826c94070582d8a80a4c628bbcf5 --- /dev/null +++ b/course/latex/__init__.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import re + +from django.utils.translation import ugettext as _ + +from course.latex.converter import get_tex2img_class +from course.latex.latex import TexDoc +from course.latex.utils import ( + replace_latex_space_seperator, strip_spaces) + +TIKZ_PGF_RE = re.compile(r"\\begin\{(?:tikzpicture|pgfpicture)\}") +DEFAULT_IMG_HTML_CLASS = "img-responsive" + +# {{{ mypy + +if False: + from typing import Text, Any, Optional # noqa + +# }}} + + +def tex_to_img_tag(tex_source, *args, **kwargs): + # type: (Text, *Any, **Any) -> Optional[Text] + '''Convert LaTex to IMG tag''' + + compiler = kwargs.get("compiler", None) + if not compiler: + raise ValueError(_("'compiler' must be specified.")) + + image_format = kwargs.get("image_format", "") + if not image_format: + raise ValueError(_("'image_format' must be specified.")) + + tex_filename = kwargs.get("tex_filename", None) + tex_preamble = kwargs.get("tex_preamble", "") + tex_preamble_extra = kwargs.get("tex_preamble_extra", "") + + force_regenerate = kwargs.get("force_regenerate", False) + html_class_extra = kwargs.get("html_class_extra", "") + empty_pagestyle = kwargs.get("empty_pagestyle", True) + alt = kwargs.get("alt", None) + + # remove spaces added to latex code in jinja template. + tex_source = replace_latex_space_seperator( + strip_spaces(tex_source, allow_single_empty_line=True)) + tex_preamble = replace_latex_space_seperator( + strip_spaces(tex_preamble, allow_single_empty_line=True)) + tex_preamble_extra = replace_latex_space_seperator( + strip_spaces(tex_preamble_extra, + allow_single_empty_line=True)) + + if html_class_extra: + if isinstance(html_class_extra, list): + html_class_extra = " ".join(html_class_extra) + elif not isinstance(html_class_extra, six.string_types): + raise ValueError( + _('"html_class_extra" must be a string or a list')) + html_class = "%s %s" % (DEFAULT_IMG_HTML_CLASS, html_class_extra) + else: + html_class = DEFAULT_IMG_HTML_CLASS + + texdoc = TexDoc( + tex_source, preamble=tex_preamble, + preamble_extra=tex_preamble_extra, empty_pagestyle=empty_pagestyle) + + # empty document + if not texdoc.document.strip(): + return "" + + if (compiler == "latex" + and image_format == "png" + and + re.search(TIKZ_PGF_RE, tex_source)): + image_format = "svg" + + assert isinstance(compiler, six.text_type) + + tex2img_class = get_tex2img_class(compiler, image_format) # type: ignore + + if not alt: + alt = texdoc.document + + if alt: + alt = "alt='%s'" % alt.strip().replace("\n", "") + + latex2img = tex2img_class( + tex_source=texdoc.as_latex(), + tex_filename=tex_filename, + ) + + return ( + "" + % { + "src": latex2img.get_data_uri_cached(force_regenerate), + "html_class": html_class, + "alt": alt, + }) + +# vim: foldmethod=marker diff --git a/course/latex/converter.py b/course/latex/converter.py new file mode 100644 index 0000000000000000000000000000000000000000..59ea1d87729fc611f8419dbddafa8a2f03d23e4e --- /dev/null +++ b/course/latex/converter.py @@ -0,0 +1,800 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import os +import sys +import shutil +import re +from hashlib import md5 + +from django.core.checks import Critical +from django.core.management.base import CommandError +from django.core.exceptions import ImproperlyConfigured +from django.utils.html import escape +from django.utils.encoding import DEFAULT_LOCALE_ENCODING +from django.utils.translation import ugettext as _, string_concat +from django.conf import settings + +from relate.utils import local_now + +from course.latex.utils import get_mongo_db + +from .utils import ( + popen_wrapper, get_basename_or_md5, + file_read, file_write, get_abstract_latex_log) + +# mypy +if False: + from typing import Text, Optional, Any, List # noqa + from pymongo import MongoClient # noqa + from pymongo.collection import Collection # noqa + +DB = get_mongo_db() + + +def get_latex_datauri_mongo_collection(name=None, db=DB, index_name="key"): + # type: (Optional[Text], Optional[MongoClient], Optional[Text]) -> Collection + if not name: + name = getattr( + settings, "RELATE_LATEX_DATAURI_MONGO_COLLECTION_NAME", + "relate_latex_datauri") + collection = db[name] + if index_name: + collection.ensure_index(index_name, unique=True) + return collection + + +def get_latex_error_mongo_collection(name=None, db=DB, index_name="key"): + # type: (Optional[Text], Optional[MongoClient], Optional[Text]) -> Collection + if not name: + name = getattr( + settings, "RELATE_LATEX_ERROR_MONGO_COLLECTION_NAME", + "relate_latex_error") + collection = db[name] + if index_name: + collection.ensure_index(index_name, unique=True) + return collection + + +# {{{ latex compiler classes and image converter classes + + +class CommandBase(object): + @property + def name(self): + # type: () -> Text + """ + The name of the command tool + """ + raise NotImplementedError + + @property + def cmd(self): + # type: () -> Text + """ + The string of the command + """ + raise NotImplementedError + + required_version = "" # type: Text + bin_path = "" # type: Text + + def check(self): + # type: () -> Text + error = "" + out = "" + strerror = "" + + try: + out, err, status = popen_wrapper( + [self.bin_path, '--version'], + stdout_encoding=DEFAULT_LOCALE_ENCODING + ) + except CommandError as e: + strerror = e.__str__() + + m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out) + if not m: + error = Critical( + strerror, + hint=("Unable to run '%(cmd)s'. Is " + "%(tool)s installed or has its " + "path correctly configured " + "in local_settings.py?") % { + "cmd": self.cmd, + "tool": self.name, + }, + obj=self.name + ) + elif self.required_version: + version = ".".join(d for d in m.groups() if d) + import distutils.version.LooseVersion as LV + if LV(version) < LV(self.required_version): + error = Critical( + "Version outdated", + hint=("'%(tool)s' with version " + ">=%(required)s is required, " + "current version is %(version)s" + ) % { + "tool": self.name, + "required": self.required_version, + "version": version}, + obj=self.name + ) + return error + + +class TexCompilerBase(CommandBase): + def __init__(self): + # type: () -> None + self.bin_path_dir = getattr( + settings, "RELATE_%s_BIN_DIR" % self.name.upper(), + getattr(settings, "RELATE_LATEX_BIN_DIR", "") + ) + self.bin_path = os.path.join( + self.bin_path_dir, self.cmd.lower()) + + +class Latexmk(TexCompilerBase): + name = "latexmk" + cmd = "latexmk" + required_version = "4.39" + + +class LatexCompiler(TexCompilerBase): + latexmk_option = ( + '-latexoption="-no-shell-escape ' + '-interaction=batchmode -halt-on-error "' + ) + + @property + def output_format(self): + # type: () -> Text + raise NotImplementedError() + + def __init__(self): + # type: () -> None + super(LatexCompiler, self).__init__() + self.latexmk_prog_repl = self._get_latexmk_prog_repl() + + def _get_latexmk_prog_repl(self): + # type: () -> Text + """ + Program replace when using "-pdflatex=" or "-latex=" + arg in latexmk, especially needed when compilers are + not in system's default $PATH. + :return: the latexmk arg "-pdflatex=/path/to/pdflatex" for + # pdflatex or "-pdflatex=/path/to/xelatex" for xelatex + """ + return ( + "-%s=%s" % (self.name.lower(), self.bin_path.lower()) + ) + + def get_latexmk_subpro_cmdline(self, input_path): + # type: (Text) -> List[Text] + latexmk = Latexmk() + return [ + latexmk.bin_path, + "-%s" % self.output_format, + self.latexmk_prog_repl, + self.latexmk_option, + input_path + ] + + +class Latex(LatexCompiler): + name = "latex" + cmd = "latex" + output_format = "dvi" + + +class PdfLatex(LatexCompiler): + name = "PdfLatex" + cmd = "pdflatex" + output_format = "pdf" + + +class LuaLatex(LatexCompiler): + name = "LuaLatex" + cmd = "lualatex" + output_format = "pdf" + + def __init__(self): + # type: () -> None + super(LuaLatex, self).__init__() + self.latexmk_prog_repl = "-%s=%s" % ("pdflatex", self.bin_path) + + +class XeLatex(LatexCompiler): + name = "XeLatex" + cmd = "xelatex" + output_format = "pdf" + + def __init__(self): + # type: () -> None + super(XeLatex, self).__init__() + self.latexmk_prog_repl = "-%s=%s" % ("pdflatex", self.bin_path) + + +class Imageconverter(CommandBase): + + @property + def output_format(self): + # type: () -> Text + raise NotImplementedError + + def __init__(self): + # type: () -> None + bin_path_dir = getattr( + settings, "RELATE_%s_BIN_DIR" % self.name.upper(), + "" + ) + self.bin_path = os.path.join(bin_path_dir, + self.cmd.lower()) + + def get_converter_cmdline( + self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] + raise NotImplementedError + + +class Dvipng(TexCompilerBase, Imageconverter): + # Inheritate TexCompilerBase's bin_path + # since dvipng is usually installed in + # latex compilers' bin dir. + name = "dvipng" + cmd = "dvipng" + output_format = "png" + + def get_converter_cmdline( + self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] + return [self.bin_path, + '-o', output_filepath, + '-pp', '1', + '-T', 'tight', + '-z9', + input_filepath] + + +class Dvisvg(TexCompilerBase, Imageconverter): + # Inheritate TexCompilerBase's bin_path + # since dvisvgm is usually installed in + # latex compilers' bin dir. + name = "dvisvg" + cmd = "dvisvgm" + output_format = "svg" + + def get_converter_cmdline( + self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] + return[self.bin_path, + '--no-fonts', + '-o', output_filepath, + input_filepath] + + +class ImageMagick(Imageconverter): + name = "ImageMagick" + cmd = "convert" + output_format = "png" + + def get_converter_cmdline( + self, input_filepath, output_filepath): + # type: (Text, Text) -> List[Text] + return [self.bin_path, + '-density', '96', + '-quality', '85', + '-trim', + input_filepath, + output_filepath + ] + +# }}} + + +# {{{ convert file to data uri + +def get_image_datauri(file_path): + # type: (Text) -> Optional[Text] + """ + Convert file to data URI + """ + if not file_path: + return None + + try: + buf = file_read(file_path) + except OSError: + raise + + from mimetypes import guess_type + mime_type = guess_type(file_path)[0] + + from base64 import b64encode + return "data:%(mime_type)s;base64,%(b64)s" % { + "mime_type": mime_type, + "b64": b64encode(buf).decode(), + } + +# }}} + + +# {{{ Base tex2img class + +class Tex2ImgBase(object): + """The abstract class of converting tex source to images. + """ + + @property + def compiler(self): + # type: () -> LatexCompiler + """ + :return: an instance of `LatexCompiler` + """ + raise NotImplementedError() + + @property + def converter(self): + # type: () -> Imageconverter + """ + :return: an instance of `Imageconverter` + """ + raise NotImplementedError() + + def __init__(self, tex_source, tex_filename): + # type: (...) -> None + """ + :param tex_source: Required, a string representing the + full tex source code. + :param tex_filename: Optional, a string + """ + + if tex_source: + tex_source = tex_source.strip() + if not tex_source: + raise ValueError( + _("Param 'tex_source' can not be an empty string") + ) + assert isinstance(tex_source, six.text_type) + self.tex_source = tex_source + + self.working_dir = None + + self.basename = get_basename_or_md5( + tex_filename, tex_source) + + self.image_format = self.converter.output_format \ + .replace(".", "").lower() + self.image_ext = ".%s" % self.image_format + + self.compiled_ext = ".%s" % self.compiler.output_format\ + .replace(".", "").lower() + + self.datauri_basename = ( + "%s_%s_%s_datauri" % (self.basename, + self.compiler.cmd, + self.image_format) + ) + + def get_compiler_cmdline(self, tex_path): + # type: (Text) -> List[Text] + return self.compiler.get_latexmk_subpro_cmdline(tex_path) + + def get_converter_cmdline(self, input_path, output_path): + # type: (Text, Text) -> List[Text] + return self.converter.get_converter_cmdline( + input_path, output_path) + + def _remove_working_dir(self): + # type: () -> None + if self.working_dir: + shutil.rmtree(self.working_dir) + + def get_compiled_file(self): + # type: () -> Optional[Text] + """ + Compile latex source. + :return: string, the path of the compiled file if succeeded. + """ + from tempfile import mkdtemp + + # https://github.com/python/mypy/issues/1833 + self.working_dir = mkdtemp(prefix="RELATE_LATEX_") # type: ignore + + assert self.basename is not None + assert self.working_dir is not None + tex_filename = self.basename + ".tex" + tex_path = os.path.join(self.working_dir, tex_filename) + file_write(tex_path, self.tex_source.encode('UTF-8')) + + assert tex_path is not None + log_path = tex_path.replace(".tex", ".log") + compiled_file_path = tex_path.replace( + ".tex", self.compiled_ext) + + cmdline = self.get_compiler_cmdline(tex_path) + output, error, status = popen_wrapper( + cmdline, cwd=self.working_dir) + + if status != 0: + try: + log = file_read(log_path).decode("utf-8") + except OSError: + # no log file is generated + self._remove_working_dir() + raise RuntimeError(error) + + try: + log = get_abstract_latex_log(log) + + err_key = ("latex_err:%s:%s" + % (self.compiler.cmd, self.basename)) + + try: + import django.core.cache as cache + except ImproperlyConfigured: + err_cache_key = None + else: + def_cache = cache.caches["latex"] + err_cache_key = err_key + + if not isinstance(log, six.text_type): + log = six.text_type(log) + + get_latex_error_mongo_collection().update_one( + {"key": err_key}, + {"$setOnInsert": + {"key": err_key, + "errorlog": log.encode('utf-8'), + "source": self.tex_source.encode('utf-8'), + "creation_time": local_now() + }}, + upsert=True, + ) + + if err_cache_key: + assert isinstance(log, six.text_type) + if len(log) <= getattr( + settings, "RELATE_CACHE_MAX_BYTES", 0): + def_cache.add(err_cache_key, log) + + except: + raise + finally: + self._remove_working_dir() + raise ValueError( + "" + u"Error: %s: %s
%s" % escape(log).strip()) + + if os.path.isfile(compiled_file_path): + return compiled_file_path + else: + self._remove_working_dir() + raise RuntimeError( + string_concat( + "%s." % error, + _('No %s file was produced.') + % self.compiler.output_format) + ) + + def get_converted_image_datauri(self): + # type: () -> Optional[Text] + """ + Convert compiled file into image. + :return: string, the datauri + """ + compiled_file_path = self.get_compiled_file() + if not compiled_file_path: + return None + image_path = compiled_file_path.replace( + self.compiled_ext, + self.image_ext) + + cmdline = self.get_converter_cmdline( + compiled_file_path, image_path) + + output, error, status = popen_wrapper( + cmdline, + cwd=self.working_dir + ) + + if status != 0: + self._remove_working_dir() + raise RuntimeError(error) + + n_images = get_number_of_images(image_path, self.image_ext) + if n_images == 0: + raise ValueError( + _("No image was generated.")) + elif n_images > 1: + raise ValueError( + string_concat( + "%s images are generated while expecting 1, " + "possibly due to long pdf file." + % (n_images, ) + )) + + try: + datauri = get_image_datauri(image_path) + + except OSError: + raise RuntimeError(error) + finally: + self._remove_working_dir() + + return datauri + + def get_compile_err_cached(self, force_regenerate=False): + # type: (Optional[bool]) -> Optional[Text] + """ + If the problematic latex source is not modified, check + whether there is error log both in cache or mongo. + If it exists, raise the error. + :return: None if no error log find. + """ + err_result = None + err_key = ("latex_err:%s:%s" + % (self.compiler.cmd, self.basename)) + + try: + import django.core.cache as cache + except ImproperlyConfigured: + err_cache_key = None + else: + def_cache = cache.caches["latex"] + err_cache_key = err_key + # Memcache is apparently limited to 250 characters. + if len(err_cache_key) < 240: + if not force_regenerate: + err_result = def_cache.get(err_cache_key) + else: + def_cache.delete(err_cache_key) + get_latex_error_mongo_collection().delete_one({"key": err_key}) + if err_result is not None: + raise ValueError( + "
%s" % escape(err_result).strip()) + + if err_result is None: + # read the saved err_log if it exists + mongo_result = get_latex_error_mongo_collection().find_one( + {"key": err_key} + ) + if mongo_result: + err_result = mongo_result["errorlog"].decode("utf-8") + + if err_result: + if err_cache_key: + assert isinstance(err_result, six.text_type) + if len(err_result) <= getattr( + settings, "RELATE_CACHE_MAX_BYTES", 0): + def_cache.add(err_cache_key, err_result) + + raise ValueError( + "
%s" % escape(err_result).strip()) + + return None + + def get_data_uri_cached(self, force_regenerate=False): + # type: (Optional[bool]) -> Text + """ + :param force_regenerate: :class:`Bool', if True, the tex file + will be recompiled and re-convert the image, regardless of + existing file or cached result. + :return: string, data uri of the coverted image. + """ + result = None + + if force_regenerate: + # first remove cached error results and files + self.get_compile_err_cached(force_regenerate) + result = self.get_converted_image_datauri() + if not isinstance(result, six.text_type): + result = six.text_type(result) + + if not result: + err_result = self.get_compile_err_cached(force_regenerate) + if err_result: + raise ValueError( + "
%s" % escape(err_result).strip()) + + # we make the key so that it can be used when cache is not configured + # and it can be used by mongo + uri_key = ( + "latex2img:%s:%s" % ( + self.compiler.cmd, + md5( + self.datauri_basename.encode("utf-8") + ).hexdigest() + ) + ) + try: + import django.core.cache as cache + except ImproperlyConfigured: + uri_cache_key = None + else: + def_cache = cache.caches["latex"] + uri_cache_key = uri_key + + if force_regenerate: + def_cache.delete(uri_cache_key) + get_latex_datauri_mongo_collection().delete_one({"key": uri_key}) + elif not result: + # Memcache is apparently limited to 250 characters. + if len(uri_cache_key) < 240: + result = def_cache.get(uri_cache_key) + if result: + if not isinstance(result, six.text_type): + result = six.text_type(result) + return result + + # Neighter regenerated nor cached, + # then read from mongo + if not result: + mongo_result = get_latex_datauri_mongo_collection().find_one( + {"key": uri_key} + ) + if mongo_result: + result = mongo_result["datauri"].decode("utf-8") + if not isinstance(result, six.text_type): + result = six.text_type(result) + + # Not found in mongo, regenerate it + if not result: + result = self.get_converted_image_datauri() + if not isinstance(result, six.text_type): + result = six.text_type(result) + get_latex_datauri_mongo_collection().update_one( + {"key": uri_key}, + {"$setOnInsert": + {"key": uri_key, + "datauri": result.encode('utf-8'), + "creation_time": local_now() + }}, + upsert=True, + ) + + assert result + + # no cache configured + if not uri_cache_key: + return result + + # cache configure, but image not cached + allowed_max_bytes = getattr( + settings, "RELATE_IMAGECACHE_MAX_BYTES", + getattr( + settings, "RELATE_CACHE_MAX_BYTES", + ) + ) + + if len(result) <= allowed_max_bytes: + # image size larger than allowed_max_bytes + # won't be cached, espeically for svgs. + assert isinstance(result, six.text_type), \ + uri_cache_key + def_cache.add(uri_cache_key, result) + return result + +# }}} + + +# {{{ derived tex2img converter + +class Latex2Svg(Tex2ImgBase): + compiler = Latex() + converter = Dvisvg() + + +class Lualatex2Png(Tex2ImgBase): + compiler = LuaLatex() + converter = ImageMagick() + + +class Latex2Png(Tex2ImgBase): + compiler = Latex() + converter = Dvipng() + + +class Pdflatex2Png(Tex2ImgBase): + compiler = PdfLatex() + converter = ImageMagick() + + +class Xelatex2Png(Tex2ImgBase): + compiler = XeLatex() + converter = ImageMagick() + +# }}} + + +# {{{ get tex2img class + +ALLOWED_COMPILER = ['latex', 'pdflatex', 'xelatex', 'lualatex'] +ALLOWED_LATEX2IMG_FORMAT = ['png', 'svg'] +ALLOWED_COMPILER_FORMAT_COMBINATION = ( + ("latex", "png"), + ("latex", "svg"), + ("lualatex", "png"), + ("pdflatex", "png"), + ("xelatex", "png") +) + + +def get_tex2img_class(compiler, image_format): + # type: (Text, Text) -> Any + image_format = image_format.replace(".", "").lower() + compiler = compiler.lower() + if image_format not in ALLOWED_LATEX2IMG_FORMAT: + raise ValueError( + _("Unsupported image format '%s'") % image_format) + + if compiler not in ALLOWED_COMPILER: + raise ValueError( + _("Unsupported tex compiler '%s'") % compiler) + + if not (compiler, image_format) in ALLOWED_COMPILER_FORMAT_COMBINATION: + raise ValueError( + _("Unsupported combination: " + "('%(compiler)s', '%(format)s'). " + "Currently support %(supported)s.") + % {"compiler": compiler, + "format": image_format, + "supported": ", ".join( + str(e) for e in ALLOWED_COMPILER_FORMAT_COMBINATION)} + ) + + class_name = "%s2%s" % (compiler.title(), image_format.title()) + + return getattr(sys.modules[__name__], class_name) + +# }}} + + +# {{{ check if multiple images are generated due to long pdf + +def get_number_of_images(image_path, image_ext): + # type: (Text, Text) -> int + if os.path.isfile(image_path): + return 1 + count = 0 + while True: + try_path = ( + "%(image_path)s-%(number)d%(ext)s" + % {"image_path": image_path.replace(image_ext, ""), + "number": count, + "ext": image_ext + } + ) + if not os.path.isfile(try_path): + break + count += 1 + + return count + +# }}} + +# vim: foldmethod=marker diff --git a/course/latex/latex.py b/course/latex/latex.py new file mode 100644 index 0000000000000000000000000000000000000000..ab2cab3da6fdd82b4d30f34a6573c4ceee72a93b --- /dev/null +++ b/course/latex/latex.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import re + +from django.utils.translation import ugettext as _ + +from .utils import strip_comments, strip_spaces + +if False: + from typing import Text, Any, Optional # noqa + + +class TexDocParseError(Exception): + pass + + +class TexDocMissingElementError(TexDocParseError): + pass + + +class TexDocWrongElementOrderError(TexDocParseError): + pass + + +class TexDoc(): + """ + Defines a LaTeX document + """ + preamble = "" + document = "" + has_preamble = False + has_begindoc = False + has_enddoc = False + + def is_empty_pagestyle_already(self): + # type: () -> bool + match = re.search(r"\\pagestyle{\s?empty\s?}", self.preamble) + if match: + return True + return False + + def parse(self, latex, test=False): + # type: (Text, Optional[bool]) -> None + """ + parse the doc into preamble and document. If test=True, the + method will try to find out which elements of the latex code + is missing. + """ + ele_re_tuple = ( + (r"\documentclass", + r"\\documentclass(\[[\w,= ]*\])?{\w*}"), + (r"\begin{document}", r"\\begin\{document\}"), + (r"\end{document}", r"\\end\{document\}") + ) + ele_position_list = [] + required_ele_list = [] + has_ele = [] + + for ele, pattern in ele_re_tuple: + required_ele_list.append(ele) + iter = re.finditer(pattern, latex) + + matched_indice = [m.start(0) for m in iter] + matched_len = len(matched_indice) + if matched_len == 0: + if not test: + raise TexDocMissingElementError( + _("No %s found in latex source") % ele) + else: + has_ele.append(False) + elif matched_len > 1: + raise TexDocParseError( + _("More than one %s found in latex source") % ele) + else: + if test: + has_ele.append(True) + ele_position_list.append(matched_indice[0]) + + if test: + [self.has_preamble, self.has_begindoc, self.has_enddoc] = has_ele + + if not ele_position_list == sorted(ele_position_list): + raise TexDocWrongElementOrderError( + _("The occurance of %s are not in proper order") + % ",".join(required_ele_list)) + + if not test: + [preamble, document] = latex.split((r"\begin{document}")) + document = document.split((r"\end{document}"))[0] + self.preamble = strip_spaces(preamble) + self.document = strip_spaces(document, allow_single_empty_line=True) + assert self.preamble is not None + assert self.document is not None + + def as_latex(self): + # type: () -> Text + """ + Assemble LaTeX Document + """ + latex = "" + if self.empty_pagestyle: + if not self.is_empty_pagestyle_already(): + self.preamble += "\n\\pagestyle{empty}\n" + + latex += self.preamble + latex += "\\begin{document}\n" + latex += self.document + latex += "\\end{document}\n" + + return latex + + def __str__(self): + return self.document + + def __unicode__(self): + return self.document + + def __init__(self, text=None, preamble="", preamble_extra="", + empty_pagestyle=False): + # type: (...) -> None + """ + Parse LaTeX document + :param text: string. Full latex document, or body only if + preamble or preamble_extra are given. + :param preamble: string. If full document is provided in + text, this value will be neglected. + :param preamble_extra: string. Append to existing preamle. + :param empty_pagestyle: bool. If True, the pagestyle will + be set as "empty". We are not using + \documentclass{standalone}. + """ + if not text: + raise ValueError(_("No LaTeX source code is provided.")) + + text = strip_comments(text) + try: + self.parse(text) + except TexDocMissingElementError: + self.parse(text, test=True) + if self.has_preamble: + # begin_document or end_document is missing + raise + elif not preamble and not preamble_extra: + raise + + # in this case, preamble code and document body code + # are seperated, try to assemble them up. + else: + if not self.has_begindoc: + text = "%s\n%s" % ("\\begin{document}", text) + if not self.has_enddoc: + text = "%s\n%s" % (text, "\\end{document}") + + text = "%s\n%s\n%s" % ( + strip_comments(preamble), + strip_comments(preamble_extra), + text) + self.parse(text) + + except: + raise + + self.empty_pagestyle = empty_pagestyle diff --git a/course/latex/utils.py b/course/latex/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..eb94929934129514498896f7a3578334d2d03726 --- /dev/null +++ b/course/latex/utils.py @@ -0,0 +1,430 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +__copyright__ = "Copyright (C) 2016 Dong Zhuang, Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +import os +import sys +import ply.lex +from hashlib import md5 +from subprocess import Popen, PIPE + +from django.utils.translation import ( + ugettext as _, string_concat) +from django.core.files import File +from django.core.management.base import CommandError +from django.utils.encoding import ( + DEFAULT_LOCALE_ENCODING, force_text) +from django.conf import settings + +from pymongo import MongoClient + + +# {{{ mypy + +if False: + from typing import Any, Text, List, Tuple, Optional # noqa + from course.latex.converter import CommandBase # noqa + +# }}} + + +# {{{ Constants + +ALLOWED_COMPILER = ['latex', 'pdflatex', 'xelatex'] +ALLOWED_LATEX2IMG_FORMAT = ['png', 'svg'] + +ALLOWED_COMPILER_FORMAT_COMBINATION = ( + ("latex", "png"), + ("latex", "svg"), + ("pdflatex", "png"), + ("xelatex", "png") +) + +# }}} + + +# {{{ subprocess popen wrapper + +def popen_wrapper(args, os_err_exc_type=CommandError, + stdout_encoding='utf-8', **kwargs): + # type: (...) -> Tuple[Text, Text, int] + """ + Extended from django.core.management.utils.popen_wrapper. + `**kwargs` is added so that more kwargs can be added. + + This method is especially to solve UnicodeDecodeError + raised on Windows platform where the OS stdout is not utf-8. + + Friendly wrapper around Popen + + Returns stdout output, stderr output and OS status code. + """ + + try: + p = Popen(args, stdout=PIPE, + stderr=PIPE, close_fds=os.name != 'nt', **kwargs) + except OSError as e: + strerror = force_text(e.strerror, DEFAULT_LOCALE_ENCODING, + strings_only=True) + six.reraise(os_err_exc_type, os_err_exc_type( + string_concat(_('Error executing'), ' %s: %s') + % (args[0], strerror)), sys.exc_info()[2]) + + output, errors = p.communicate() + return ( + force_text(output, stdout_encoding, strings_only=True, + errors='strict'), + force_text(errors, DEFAULT_LOCALE_ENCODING, + strings_only=True, errors='replace'), + p.returncode + ) + +# }}} + + +# {{{ file read and write + +def get_basename_or_md5(filename, s): + # type: (Text, Text) -> Optional[Text] + """ + :return: the basename of `filename` if `filename` is not empty, + else, return the md5 of string `s`. + """ + if filename: + basename, ext = os.path.splitext(filename) + else: + if not s: + return None + basename = md5(s.encode("utf-8")).hexdigest() + return basename + + +def file_read(filename): + # type: (Text) -> bytes + '''Read the content of a file and close it properly.''' + with open(filename, 'rb') as f: + ff = File(f) + content = ff.read() + return content + + +def file_write(filename, content): + # type: (Text, bytes) -> None + '''Write into a file and close it properly.''' + with open(filename, 'wb') as f: + ff = File(f) + ff.write(content) + +# }}} + + +# {{{ convert file to data uri + +def get_file_data_uri(file_path): + # type: (Text) -> Optional[Text] + '''Convert file to data URI''' + if not file_path: + return None + + from base64 import b64encode + from mimetypes import guess_type + buf = file_read(file_path) + mime_type = guess_type(file_path)[0] + + return "data:%(mime_type)s;base64,%(b64)s" % { + "mime_type": mime_type, + "b64": b64encode(buf).decode(), + } + +# }}} + + +# {{{ get error log abstracted + +LATEX_ERR_LOG_BEGIN_LINE_STARTS = "\n! " +LATEX_ERR_LOG_END_LINE_STARTS = "\nHere is how much of TeX's memory" +LATEX_LOG_OMIT_LINE_STARTS = ( + "See the LaTeX manual or LaTeX", + "Type H
+ {% call latex(compiler="pdflatex", image_format="png", alt="example") %} +.. code-block:: latex + + \documentclass{article} + \usepackage[utf8]{inputenc} + \usepackage[table]{xcolor} + \setlength{\arrayrulewidth}{1mm} + \setlength{\tabcolsep}{18pt} + \renewcommand{\arraystretch}{2.5} + \newcolumntype{s}{>{\columncolor[HTML]{AAACED} } p{3cm} } + \arrayrulecolor[HTML]{DB5800} + \begin{document} + \begin{tabular}{ |s|p{3cm}|p{3cm}| } + \hline + \rowcolor{lightgray} \multicolumn{3}{|c|}{Country List} \\ + \hline + Country Name or Area Name& ISO ALPHA 2 Code &ISO ALPHA 3 \\ + \hline + Afghanistan & AF &AFG \\ + \rowcolor{gray} + Aland Islands & AX & ALA \\ + Albania &AL & ALB \\ + Algeria &DZ & DZA \\ + American Samoa & AS & ASM \\ + Andorra & AD & \cellcolor[HTML]{AA0044} AND \\ + Angola & AO & AGO \\ + \hline + \end{tabular} + + \end{document} + +.. code-block:: jinja + + {% endcall %} +
+ +The above markup creates a horizontally centered image of a colored +LaTex table:: + ++ +
+ +Prerequisites +^^^^^^^^^^^^^ +* **TexLive** + - For installation of TexLive. + See `Instructions on TUG+ {% call latex(compiler="pdflatex", image_format="png", + tex_preamble=preamble, alt="example") %} + .. code-block:: latex + + \begin{document} + \begin{tabular}{ |s|p{3cm}|p{3cm}| } + \hline + \rowcolor{lightgray} \multicolumn{3}{|c|}{Country List} \\ + \hline + Country Name or Area Name& ISO ALPHA 2 Code &ISO ALPHA 3 \\ + \hline + Afghanistan & AF &AFG \\ + \rowcolor{gray} + Aland Islands & AX & ALA \\ + Albania &AL & ALB \\ + Algeria &DZ & DZA \\ + American Samoa & AS & ASM \\ + Andorra & AD & \cellcolor[HTML]{AA0044} AND \\ + Angola & AO & AGO \\ + \hline + \end{tabular} + \end{document} + .. code-block:: jinja + + {% endcall %} +
+ + **force_regenerate** + A boolean, if True, regenerate the image no matter + it exists or not. Default to ``False``. + **html_class_extra** + A string, extra html class for the ```` tag, + besides ``img-responsive``. + **alt** + A string, a brief description of the image, Default to the + document part of the tex source. + **empty_pagestyle** + A boolean, if ``True``, ``\pagestyle{empty}`` will + be added to existing preamble and a standalone image will be + generated. Default to ``True``. + +.. note:: + + ``{{``, ``}}``, ``{%``, ``%}``, ``{#`` and ``#}`` are used as marking + strings in jinja template, latex code submitted (including preabmle + part and self-defined commands) should avoid containing those strings, + or else jinja will just fail to render. The work around is to manually + insert a space (spaces or tabs) between the two character (e.g., + ``{{`` --> ``{ {``) for each of those strings appeared in latex code. + +A more sophisticated example can be found at +`relate-example