diff --git a/loopy/cli.py b/loopy/cli.py index 060340d59e4414dd06c714bcd423a97029ca281b..15d2336bfee7cd81559a8f2a99553e42a4bd7a2d 100644 --- a/loopy/cli.py +++ b/loopy/cli.py @@ -63,7 +63,8 @@ def main(): help="Defaults to stdout ('-').", nargs='?') parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") parser.add_argument("--target", choices=( - "opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"), + "opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda", + "lua-opencl-occa"), default="opencl") parser.add_argument("--name") parser.add_argument("--transform") @@ -91,6 +92,9 @@ def main(): elif args.target == "cuda": from loopy.target.cuda import CudaTarget target = CudaTarget() + elif args.target == "lua-opencl-occa": + from loopy.target.lua import LuaOccaOpenCLTarget + target = LuaOccaOpenCLTarget() else: raise ValueError("unknown target: %s" % target) @@ -212,19 +216,19 @@ def main(): kernels = new_kernels del new_kernels - codes = [] - from loopy.codegen import generate_code - for kernel in kernels: - kernel = lp.preprocess_kernel(kernel) - code, impl_arg_info = generate_code(kernel) - codes.append(code) + if args.target == "lua-opencl-occa": + from loopy.target.lua import emit_occa_lua + code = "\n\n".join(emit_occa_lua(knl) for knl in kernels) - if args.outfile is not None: - outfile = args.outfile else: - outfile = "-" + codes = [] + from loopy.codegen import generate_code + for kernel in kernels: + kernel = lp.preprocess_kernel(kernel) + code, impl_arg_info = generate_code(kernel) + codes.append(code) - code = "\n\n".join(codes) + code = "\n\n".join(codes) # {{{ edit code if requested @@ -245,6 +249,11 @@ def main(): # }}} + if args.outfile is not None: + outfile = args.outfile + else: + outfile = "-" + if outfile == "-": sys.stdout.write(code) else: diff --git a/loopy/target/lua.py b/loopy/target/lua.py new file mode 100644 index 0000000000000000000000000000000000000000..1ebd2d85405932d2ede9b32144c2b7ad1dbe99c5 --- /dev/null +++ b/loopy/target/lua.py @@ -0,0 +1,485 @@ +"""Lua host AST builder for integration with OCCA.""" + +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2017 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six # noqa: F401 +import numpy as np # noqa: F401 + +from loopy.diagnostic import LoopyError # noqa + +from pymbolic.mapper import Mapper +from pymbolic.mapper.stringifier import StringifyMapper +from loopy.type_inference import TypeInferenceMapper +from loopy.target import ASTBuilderBase +from loopy.target.opencl import OpenCLTarget, OpenCLCASTBuilder +from loopy.kernel.data import ValueArg + + +# {{{ Lua AST + +class Generable(object): + def __str__(self): + """Return a single string (possibly containing newlines) representing + this code construct.""" + return "\n".join(l.rstrip() for l in self.generate()) + + def generate(self, with_semicolon=True): + """Generate (i.e. yield) the lines making up this code construct.""" + + raise NotImplementedError + + +class Line(Generable): + def __init__(self, text=""): + self.text = text + + def generate(self): + yield self.text + + mapper_method = "map_line" + + +class Statement(Line): + pass + + +class LineComment(Generable): + def __init__(self, text): + assert "\n" not in text + self.text = text + + def generate(self): + yield "-- %s" % self.text + + mapper_method = "map_line_comment" + + +class Assign(Generable): + def __init__(self, lvalue, rvalue): + self.lvalue = lvalue + self.rvalue = rvalue + + def generate(self): + yield "%s = %s" % (self.lvalue, self.rvalue) + + +class If(Generable): + def __init__(self, condition, then_, else_=None): + self.condition = condition + + assert isinstance(then_, Generable) + if else_ is not None: + assert isinstance(else_, Generable) + + self.then_ = then_ + self.else_ = else_ + + def generate(self): + condition_lines = self.condition.split("\n") + if len(condition_lines) > 1: + yield "if" + for l in condition_lines: + yield " "+l + yield "then" + else: + yield "if %s then" % self.condition + + for line in self.then_.generate(): + yield " " + line + + if self.else_ is not None: + yield "else" + for line in self.else_.generate(): + yield " " + line + + yield "end" + + +class Block(Generable): + def __init__(self, contents=[]): + if(isinstance(contents, Block)): + contents = contents.contents + self.contents = contents[:] + + for item in contents: + assert isinstance(item, Generable) + + def generate_leader_line(self): + raise NotImplementedError() + + def generate(self): + yield self.generate_leader_line() + for item in self.contents: + for item_line in item.generate(): + yield " " + item_line + yield "end" + + def append(self, data): + self.contents.append(data) + + def extend(self, data): + self.contents.extend(data) + + def insert(self, i, data): + self.contents.insert(i, data) + + mapper_method = "map_block" + + +class ScopeBlock(Block): + def generate_leader_line(self): + return "do" + + +class Function(Block): + def __init__(self, name, args, body): + assert isinstance(body, Generable) + self.name = name + self.args = args + + super(Function, self).__init__(body) + + def generate_leader_line(self): + return "function %s(%s)" % (self.name, ", ".join(self.args)) + + +class For(Generable): + def __init__(self, var, start, end, body): + self.var = var + self.start = start + self.end = end + + super(For, self).__init__(body) + + def generate_leader_line(self): + return "for %s = %s, %s do" % (self.var, self.start, self.end) + +# }}} + + +# {{{ expression to code + +class ExpressionToLuaMapper(StringifyMapper): + def __init__(self, codegen_state, type_inf_mapper=None): + self.kernel = codegen_state.kernel + self.codegen_state = codegen_state + + if type_inf_mapper is None: + type_inf_mapper = TypeInferenceMapper(self.kernel) + self.type_inf_mapper = type_inf_mapper + + def handle_unsupported_expression(self, victim, enclosing_prec): + return Mapper.handle_unsupported_expression(self, victim, enclosing_prec) + + def rec(self, expr, prec, type_context=None, needed_dtype=None): + return super(ExpressionToLuaMapper, self).rec(expr, prec) + + __call__ = rec + + def map_constant(self, expr, enclosing_prec): + return repr(expr) + + def map_variable(self, expr, enclosing_prec): + if expr.name in self.codegen_state.var_subst_map: + # Unimplemented: annotate_inames + return str(self.rec( + self.codegen_state.var_subst_map[expr.name], + enclosing_prec)) + + if expr.name in self.kernel.all_inames(): + return super(ExpressionToLuaMapper, self).map_variable( + expr, enclosing_prec) + + var_descr = self.kernel.get_var_descriptor(expr.name) + if isinstance(var_descr, ValueArg): + return super(ExpressionToLuaMapper, self).map_variable( + expr, enclosing_prec) + + return super(ExpressionToLuaMapper, self).map_variable( + expr, enclosing_prec) + + def map_subscript(self, expr, enclosing_prec): + return super(ExpressionToLuaMapper, self).map_subscript( + expr, enclosing_prec) + + def map_call(self, expr, enclosing_prec): + from pymbolic.primitives import Variable + from pymbolic.mapper.stringifier import PREC_NONE + + identifier = expr.function + + if identifier.name in ["indexof", "indexof_vec"]: + raise LoopyError( + "indexof, indexof_vec not yet supported in Python") + + if isinstance(identifier, Variable): + identifier = identifier.name + + par_dtypes = tuple(self.type_inf_mapper(par) for par in expr.parameters) + + str_parameters = None + + mangle_result = self.kernel.mangle_function( + identifier, par_dtypes, + ast_builder=self.codegen_state.ast_builder) + + if mangle_result is None: + raise RuntimeError("function '%s' unknown--" + "maybe you need to register a function mangler?" + % identifier) + + if len(mangle_result.result_dtypes) != 1: + raise LoopyError("functions with more or fewer than one return value " + "may not be used in an expression") + + str_parameters = [ + self.rec(par, PREC_NONE) + for par, par_dtype, tgt_dtype in zip( + expr.parameters, par_dtypes, mangle_result.arg_dtypes)] + + from loopy.codegen import SeenFunction + self.codegen_state.seen_functions.add( + SeenFunction(identifier, + mangle_result.target_name, + mangle_result.arg_dtypes or par_dtypes)) + + return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters)) + + def map_group_hw_index(self, expr, enclosing_prec): + raise LoopyError("plain Lua does not have group hw axes") + + def map_local_hw_index(self, expr, enclosing_prec): + raise LoopyError("plain Lua does not have local hw axes") + + def map_if(self, expr, enclosing_prec): + # Synthesize PREC_IFTHENELSE, make sure it is in the right place in the + # operator precedence hierarchy (right above "or"). + from pymbolic.mapper.stringifier import PREC_LOGICAL_OR, PREC_NONE + PREC_IFTHENELSE = PREC_LOGICAL_OR - 1 # noqa + + return self.parenthesize_if_needed( + "{cond} and {then} or {else_}".format( + then=self.rec(expr.then, PREC_IFTHENELSE), + cond=self.rec(expr.condition, PREC_IFTHENELSE), + else_=self.rec(expr.else_, PREC_IFTHENELSE)), + enclosing_prec, PREC_NONE) + +# }}} + + +# {{{ ast builder + +class LuaASTBuilder(ASTBuilderBase): + """A Lua host AST builder for integration with OCCA. + """ + + # {{{ code generation guts + + def get_function_declaration(self, codegen_state, codegen_result, + schedule_index): + return None + + def get_function_definition(self, codegen_state, codegen_result, + schedule_index, + function_decl, function_body): + + assert function_decl is None + + return Function( + codegen_result.current_program(codegen_state).name, + [idi.name for idi in codegen_state.implemented_data_info], + function_body) + + def get_temporary_decls(self, codegen_state, schedule_index): + kernel = codegen_state.kernel + + result = [] + + for tv in sorted( + six.itervalues(kernel.temporary_variables), + key=lambda tv: tv.name): + if tv.shape: + raise NotImplementedError("array temporaries in Lua") + + return result + + def get_expression_to_code_mapper(self, codegen_state): + return ExpressionToLuaMapper(codegen_state) + + @property + def ast_block_class(self): + return Block + + @property + def ast_block_scope_class(self): + return ScopeBlock + + def emit_sequential_loop(self, codegen_state, iname, iname_dtype, + lbound, ubound, inner): + ecm = codegen_state.expression_to_code_mapper + + from pymbolic.mapper.stringifier import PREC_NONE + + return For( + iname, + ecm(lbound, PREC_NONE, "i"), + ecm(ubound, PREC_NONE, "i"), + inner) + + def emit_initializer(self, codegen_state, dtype, name, val_str, is_const): + return Assign(name, val_str) + + def emit_blank_line(self): + return Line() + + def emit_comment(self, s): + return LineComment(s) + + def emit_if(self, condition_str, ast): + return If(condition_str, ast) + + def emit_assignment(self, codegen_state, insn): + ecm = codegen_state.expression_to_code_mapper + + if insn.atomicity: + raise NotImplementedError("atomic ops in Lua") + + from pymbolic.mapper.stringifier import PREC_NONE + + return Assign( + ecm(insn.assignee, prec=PREC_NONE, type_context=None), + ecm(insn.expression, prec=PREC_NONE, type_context=None)) + + # }}} + +# }}} + + +# {{{ lua-occa host AST builder + +class LuaOccaASTBuilder(LuaASTBuilder): + def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): + ecm = self.get_expression_to_code_mapper(codegen_state) + + if not gsize: + gsize = (1,) + if not lsize: + lsize = (1,) + + from pymbolic.mapper.stringifier import PREC_NONE + + def emit_arg(arg): + from loopy.kernel.data import ValueArg + if isinstance(arg, ValueArg): + return '{"%s", %s}' % ( + self.target.get_dtype_registry().dtype_to_ctype(arg.dtype), + arg.name) + else: + return arg.name + + return Statement('occa_call_kernel(' + 'occa_kernels["%(kernel_name)s"]["%(device_program_name)s"], ' + "{%(gsize)s}, {%(lsize)s}, {%(args)s})" + % dict( + kernel_name=codegen_state.kernel.name, + device_program_name=name, + gsize=", ".join( + ecm(gs, prec=PREC_NONE, type_context="i") + for gs in gsize), + lsize=", ".join( + ecm(ls, prec=PREC_NONE, type_context="i") + for ls in lsize), + args=", ".join( + [ + emit_arg(arg) + for arg in codegen_state.kernel.args] + + [ + ecm(ea, prec=PREC_NONE) + for ea in extra_args] + ))) + +# }}} + + +# {{{ opencl - occa - lua target + +class LuaOccaOpenCLTarget(OpenCLTarget): + host_program_name_prefix = "" + host_program_name_suffix = "" + + def preprocess(self, kernel): + return kernel + + def get_host_ast_builder(self): + return LuaOccaASTBuilder(self) + + def get_device_ast_builder(self): + return OpenCLCASTBuilder(self) + + def get_kernel_executor_cache_key(self, queue, **kwargs): + raise NotImplementedError() + + def get_kernel_executor(self, kernel, queue, **kwargs): + raise NotImplementedError() + +# }}} + + +def emit_occa_lua(kernel): + kernel = kernel.copy(target=LuaOccaOpenCLTarget()) + + import loopy as lp + cgr = lp.generate_code_v2(kernel) + + def multiline_str_to_lua(s): + s = str(s) + + return '"' + s.replace("\n", "\\n\\z\n") + '"' + + from mako.template import Template + from loopy.tools import remove_common_indentation + tpl = Template(remove_common_indentation(""" + if occa_kernels == nil then + occa_kernels = {} + end + + occa_kernels["${kernel.name}"] = {} + % for dp in cgr.device_programs: + _src = ${multiline_str_to_lua(dp.ast)} + occa_kernels["${kernel.name}"]["${dp.name}"] = occa_build_kernel( + occa_device, _src, "${dp.name}", occa_info) + + % endfor + + ${cgr.host_code()} + """), strict_undefined=True) + + return tpl.render( + kernel=kernel, + cgr=cgr, + multiline_str_to_lua=multiline_str_to_lua, + ) + +# vim: foldmethod=marker diff --git a/test/test_target.py b/test/test_target.py index b656383e7bbe008892f45159faadd2d195d67a3b..e70272f1290861f1622e6e029d9ac88c23c8904b 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -224,6 +224,21 @@ def test_numba_cuda_target(): print(lp.generate_code_v2(knl).all_code()) +def test_lua_occa_target(): + knl = lp.make_kernel( + "{ [i]: 0<=i 1: exec(sys.argv[1])