From 67669015c5d585c2c018e97f8f27c2187dde3ecb Mon Sep 17 00:00:00 2001 From: arghdos <arghdos@gmail.com> Date: Mon, 1 May 2017 12:11:53 -0400 Subject: [PATCH] first attempt at subclassed executors / wrappers --- loopy/execution.py | 707 ++++++++++++++++++++++++++- loopy/target/c/__init__.py | 10 +- loopy/target/c/c_execution.py | 265 +++++++++++ loopy/target/pyopencl_execution.py | 734 ++++++----------------------- 4 files changed, 1121 insertions(+), 595 deletions(-) create mode 100644 loopy/target/c/c_execution.py diff --git a/loopy/execution.py b/loopy/execution.py index 5680fdbfe..65968e663 100644 --- a/loopy/execution.py +++ b/loopy/execution.py @@ -27,6 +27,8 @@ import six import numpy as np from pytools import ImmutableRecord, memoize_method from loopy.diagnostic import LoopyError +from pytools.py_codegen import ( + Indentation, PythonFunctionGenerator) # {{{ object array argument packing @@ -111,7 +113,613 @@ class SeparateArrayPackingController(object): # }}} -# {{{ KernelExecutorBase +# {{{ ExecutionWrapperGeneratorBase + +class ExecutionWrapperGeneratorBase(object): + """ + A set of common methods for generating a wrapper + for execution of C-based languages + + """ + + def __init__(self, system_args=["_lpy_c_kernels"]): + self.system_args = system_args[:] + + def python_dtype_str(self, dtype): + if dtype.isbuiltin: + return "_lpy_np."+dtype.name + raise Exception('dtype: {} not recognized'.format(dtype)) + + # {{{ invoker generation + + # /!\ This code runs in a namespace controlled by the user. + # Prefix all auxiliary variables with "_lpy". + + # {{{ integer arg finding from shapes + + def generate_integer_arg_finding_from_shapes( + self, gen, kernel, implemented_data_info): + # a mapping from integer argument names to a list of tuples + # (arg_name, expression), where expression is a + # unary function of kernel.arg_dict[arg_name] + # returning the desired integer argument. + iarg_to_sources = {} + + from loopy.kernel.data import GlobalArg + from loopy.symbolic import DependencyMapper, StringifyMapper + from loopy.diagnostic import ParameterFinderWarning + dep_map = DependencyMapper() + + from pymbolic import var + for arg in implemented_data_info: + if arg.arg_class is GlobalArg: + sym_shape = var(arg.name).attr("shape") + for axis_nr, shape_i in enumerate(arg.shape): + if shape_i is None: + continue + + deps = dep_map(shape_i) + + if len(deps) == 1: + integer_arg_var, = deps + + if kernel.arg_dict[integer_arg_var.name].dtype.is_integral(): + from pymbolic.algorithm import solve_affine_equations_for + try: + # friggin' overkill :) + iarg_expr = solve_affine_equations_for( + [integer_arg_var.name], + [(shape_i, sym_shape.index(axis_nr))] + )[integer_arg_var] + except Exception as e: + #from traceback import print_exc + #print_exc() + + # went wrong? oh well + from warnings import warn + warn("Unable to generate code to automatically " + "find '%s' from the shape of '%s':\n%s" + % (integer_arg_var.name, arg.name, str(e)), + ParameterFinderWarning) + else: + iarg_to_sources.setdefault(integer_arg_var.name, [])\ + .append((arg.name, iarg_expr)) + + gen("# {{{ find integer arguments from shapes") + gen("") + + for iarg_name, sources in six.iteritems(iarg_to_sources): + gen("if %s is None:" % iarg_name) + with Indentation(gen): + if_stmt = "if" + for arg_name, value_expr in sources: + gen("%s %s is not None:" % (if_stmt, arg_name)) + with Indentation(gen): + gen("%s = %s" + % (iarg_name, StringifyMapper()(value_expr))) + + if_stmt = "elif" + + gen("") + + gen("# }}}") + gen("") + + # }}} + + # {{{ integer arg finding from offsets + + def generate_integer_arg_finding_from_offsets(self, gen, kernel, + implemented_data_info): + options = kernel.options + + gen("# {{{ find integer arguments from offsets") + gen("") + + for arg in implemented_data_info: + impl_array_name = arg.offset_for_name + if impl_array_name is not None: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("if %s is None:" % impl_array_name) + with Indentation(gen): + gen("# Output variable, we'll be allocating " + "it, with zero offset.") + gen("%s = 0" % arg.name) + gen("else:") + with Indentation(gen): + if not options.no_numpy: + gen("_lpy_offset = getattr(%s, \"offset\", 0)" + % impl_array_name) + else: + gen("_lpy_offset = %s.offset" % impl_array_name) + + base_arg = kernel.impl_arg_to_arg[impl_array_name] + + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" + % (arg.name, base_arg.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Offset of array '%s' is " + "not divisible by its dtype itemsize\"" + % impl_array_name) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) + + if not options.skip_arg_checks: + gen("del _lpy_offset") + + gen("# }}}") + gen("") + + # }}} + + # {{{ integer arg finding from strides + + def generate_integer_arg_finding_from_strides( + self, gen, kernel, implemented_data_info): + options = kernel.options + + gen("# {{{ find integer arguments from strides") + gen("") + + for arg in implemented_data_info: + if arg.stride_for_name_and_axis is not None: + impl_array_name, stride_impl_axis = arg.stride_for_name_and_axis + + gen("if %s is None:" % arg.name) + with Indentation(gen): + if not options.skip_arg_checks: + gen("if %s is None:" % impl_array_name) + with Indentation(gen): + gen("raise RuntimeError(\"required stride '%s' for " + "argument '%s' not given or deducible from " + "passed array\")" + % (arg.name, impl_array_name)) + + base_arg = kernel.impl_arg_to_arg[impl_array_name] + + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" + % (arg.name, impl_array_name, stride_impl_axis, + base_arg.dtype.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " + " is not divisible by its dtype itemsize\"" + % (stride_impl_axis, impl_array_name)) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) + + gen("# }}}") + gen("") + + # }}} + + # {{{ check that value args are present + + def generate_value_arg_check( + self, gen, kernel, implemented_data_info): + if kernel.options.skip_arg_checks: + return + + from loopy.kernel.data import ValueArg + + gen("# {{{ check that value args are present") + gen("") + + for arg in implemented_data_info: + if not issubclass(arg.arg_class, ValueArg): + continue + + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise TypeError(\"value argument '%s' " + "was not given and could not be automatically " + "determined\")" % arg.name) + + gen("# }}}") + gen("") + + # }}} + + # {{{ handle non numpy arguements + + def handle_non_numpy_arg(self, gen, arg): + raise Exception('Non-numpy args are not allowed for C-execution') + + # }}} + + # {{{ handle allocation of unspecified arguements + + def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): + """ + Handle allocation of non-specified arguements for C-execution + """ + from pymbolic import var + + num_axes = len(arg.unvec_shape) + for i in range(num_axes): + gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) + + sym_order = var('_lpy_order') + gen("%s = %s" % (strify(sym_order), arg.order)) + + sym_shape = tuple( + var("_lpy_shape_%d" % i) + for i in range(num_axes)) + + if not skip_arg_checks: + for i in range(num_axes): + gen("assert _lpy_shape_%d > 0, " + "\"'%s' has negative shape in axis %d\"" + % (i, arg.name, i)) + + gen("%(name)s = _lpy_np.empty(%(shape)s, " + "%(dtype)s, order=%(order)s)" + % dict( + name=arg.name, + shape=strify(sym_shape), + order=strify(sym_order), + dtype=self.python_dtype_str( + kernel_arg.dtype.numpy_dtype))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("del _lpy_shape_%d" % i) + gen("del %s" % strify(sym_order)) + gen("") + + # }}} + + # {{{ arg setup + + def generate_arg_setup( + self, gen, kernel, implemented_data_info, options): + import loopy as lp + + from loopy.kernel.data import KernelArgument + from loopy.kernel.array import ArrayBase + from loopy.symbolic import StringifyMapper + from loopy.types import NumpyType + + gen("# {{{ set up array arguments") + gen("") + + if not options.no_numpy: + gen("_lpy_encountered_numpy = False") + gen("_lpy_encountered_dev = False") + gen("") + + args = [] + + strify = StringifyMapper() + + expect_no_more_arguments = False + + for arg_idx, arg in enumerate(implemented_data_info): + is_written = arg.base_name in kernel.get_written_variables() + kernel_arg = kernel.impl_arg_to_arg.get(arg.name) + + if not issubclass(arg.arg_class, KernelArgument): + expect_no_more_arguments = True + continue + + if expect_no_more_arguments: + raise LoopyError("Further arguments encountered after arg info " + "describing a global temporary variable") + + if not issubclass(arg.arg_class, ArrayBase): + args.append(arg.name) + continue + + gen("# {{{ process %s" % arg.name) + gen("") + + if not options.no_numpy: + self.handle_non_numpy_arg(gen, arg) + + if not options.skip_arg_checks and not is_written: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"input argument '%s' must " + "be supplied\")" % arg.name) + gen("") + + if (is_written + and arg.arg_class is lp.ImageArg + and not options.skip_arg_checks): + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"written image '%s' must " + "be supplied\")" % arg.name) + gen("") + + if is_written and arg.shape is None and not options.skip_arg_checks: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"written argument '%s' has " + "unknown shape and must be supplied\")" % arg.name) + gen("") + + possibly_made_by_loopy = False + + # {{{ allocate written arrays, if needed + + if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ + and arg.shape is not None: + + if not isinstance(arg.dtype, NumpyType): + raise LoopyError("do not know how to pass arg of type '%s'" + % arg.dtype) + + possibly_made_by_loopy = True + gen("_lpy_made_by_loopy = False") + gen("") + + gen("if %s is None:" % arg.name) + with Indentation(gen): + self.handle_alloc( + gen, arg, kernel_arg, strify, options.skip_arg_checks) + gen("_lpy_made_by_loopy = True") + gen("") + + # }}} + + # {{{ argument checking + + if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ + and not options.skip_arg_checks: + if possibly_made_by_loopy: + gen("if not _lpy_made_by_loopy:") + else: + gen("if True:") + + with Indentation(gen): + gen("if %s.dtype != %s:" + % (arg.name, self.python_dtype_str( + kernel_arg.dtype.numpy_dtype))) + with Indentation(gen): + gen("raise TypeError(\"dtype mismatch on argument '%s' " + "(got: %%s, expected: %s)\" %% %s.dtype)" + % (arg.name, arg.dtype, arg.name)) + + # {{{ generate shape checking code + + def strify_allowing_none(shape_axis): + if shape_axis is None: + return "None" + else: + return strify(shape_axis) + + def strify_tuple(t): + if len(t) == 0: + return "()" + else: + return "(%s,)" % ", ".join( + strify_allowing_none(sa) + for sa in t) + + shape_mismatch_msg = ( + "raise TypeError(\"shape mismatch on argument '%s' " + "(got: %%s, expected: %%s)\" " + "%% (%s.shape, %s))" + % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) + + if kernel_arg.shape is None: + pass + + elif any(shape_axis is None for shape_axis in kernel_arg.shape): + gen("if len(%s.shape) != %s:" + % (arg.name, len(arg.unvec_shape))) + with Indentation(gen): + gen(shape_mismatch_msg) + + for i, shape_axis in enumerate(arg.unvec_shape): + if shape_axis is None: + continue + + gen("if %s.shape[%d] != %s:" + % (arg.name, i, strify(shape_axis))) + with Indentation(gen): + gen(shape_mismatch_msg) + + else: # not None, no Nones in tuple + gen("if %s.shape != %s:" + % (arg.name, strify(arg.unvec_shape))) + with Indentation(gen): + gen(shape_mismatch_msg) + + # }}} + + if arg.unvec_strides and kernel_arg.dim_tags: + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + sym_strides = tuple( + itemsize*s_i for s_i in arg.unvec_strides) + gen("if %s.strides != %s:" + % (arg.name, strify(sym_strides))) + with Indentation(gen): + gen("raise TypeError(\"strides mismatch on " + "argument '%s' (got: %%s, expected: %%s)\" " + "%% (%s.strides, %s))" + % (arg.name, arg.name, strify(sym_strides))) + + if not arg.allows_offset: + gen("if %s.offset:" % arg.name) + with Indentation(gen): + gen("raise ValueError(\"Argument '%s' does not " + "allow arrays with offsets. Try passing " + "default_offset=loopy.auto to make_kernel()." + "\")" % arg.name) + gen("") + + # }}} + + if possibly_made_by_loopy and not options.skip_arg_checks: + gen("del _lpy_made_by_loopy") + gen("") + + if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]: + args.append("%s.base_data" % arg.name) + else: + args.append("%s" % arg.name) + + gen("") + + gen("# }}}") + gen("") + + gen("# }}}") + gen("") + + return args + + # }}} + + def target_specific_preamble(self, gen): + """ + Add default C-imports to preamble + """ + gen.add_to_preamble("import numpy as _lpy_np") + gen.add_to_preamble("import loopy.target.c_execution as _lpy_c") + + def intialize_system_args(self, gen): + """ + Override to intialize any default system args + """ + pass + + # {{{ generate invocation + + def generate_invocation(self, gen, kernel_name, args): + gen("for knl in _lpy_c_kernels:") + with Indentation(gen): + gen("{kernel_name}({args})" + .format( + kernel_name='knl.name', + args=", ".join(args))) + + # }}} + + # {{{ output + + def generate_output_handler( + self, gen, options, kernel, implemented_data_info): + + from loopy.kernel.data import KernelArgument + + if not options.no_numpy: + gen("if out_host is None and (_lpy_encountered_numpy " + "and not _lpy_encountered_dev):") + with Indentation(gen): + gen("out_host = True") + + gen("if out_host:") + with Indentation(gen): + gen("pass") # if no outputs (?!) + for arg in implemented_data_info: + if not issubclass(arg.arg_class, KernelArgument): + continue + + gen("") + + if options.return_dict: + gen("return None, {%s}" + % ", ".join("\"%s\": %s" % (arg.name, arg.name) + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables())) + else: + out_args = [arg + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables()] + if out_args: + gen("return None, (%s,)" + % ", ".join(arg.name for arg in out_args)) + else: + gen("return None, ()") + + # }}} + + def __call__(self, kernel, codegen_result): + """ + Generates the wrapping python invoker for this execution target + + :arg kernel: the loopy :class:`LoopKernel`(s) to be executued + :codegen_result: the loopy :class:`CodeGenerationResult` created + by code generation + + :returns: py_func, a python function that handles excution of this + kernel + """ + options = kernel.options + implemented_data_info = codegen_result.implemented_data_info + host_code = codegen_result.host_code() + + from loopy.kernel.data import KernelArgument + gen = PythonFunctionGenerator( + "invoke_%s_loopy_kernel" % kernel.name, + self.system_args + [ + "%s=None" % idi.name + for idi in implemented_data_info + if issubclass(idi.arg_class, KernelArgument) + ]) + + gen.add_to_preamble("from __future__ import division") + gen.add_to_preamble("") + self.target_specific_preamble(gen) + gen.add_to_preamble("") + gen.add_to_preamble(host_code) + gen.add_to_preamble("") + + self.intialize_system_args(gen) + + self.generate_integer_arg_finding_from_shapes( + gen, kernel, implemented_data_info) + self.generate_integer_arg_finding_from_offsets( + gen, kernel, implemented_data_info) + self.generate_integer_arg_finding_from_strides( + gen, kernel, implemented_data_info) + self.generate_value_arg_check( + gen, kernel, implemented_data_info) + + args = self.generate_arg_setup( + gen, kernel, implemented_data_info, options) + + self.generate_invocation(codegen_result.host_program.name, args) + + self.generate_output_handler(gen, options, kernel, implemented_data_info) + + if options.write_wrapper: + output = gen.get() + if options.highlight_wrapper: + output = get_highlighted_python_code(output) + + if options.write_wrapper is True: + print(output) + else: + with open(options.write_wrapper, "w") as outf: + outf.write(output) + + return gen.get_function() + + +# }}} + + +class _KernelInfo(ImmutableRecord): + pass + + +class _Kernels(object): + pass + + +# {{{ kernel executor class KernelExecutorBase(object): """An object connecting a kernel to a :class:`pyopencl.Context` @@ -121,7 +729,7 @@ class KernelExecutorBase(object): .. automethod:: __call__ """ - def __init__(self, kernel): + def __init__(self, kernel, invoker=ExecutionWrapperGeneratorBase()): """ :arg kernel: a loopy.LoopKernel """ @@ -137,6 +745,8 @@ class KernelExecutorBase(object): arg.dtype is None for arg in kernel.args) + self.invoker = invoker + @memoize_method def get_typed_and_scheduled_kernel(self, var_to_dtype_set): kernel = self.kernel @@ -195,6 +805,99 @@ class KernelExecutorBase(object): return frozenset(six.iteritems(arg_to_dtype)) + # {{{ debugging aids + + def get_highlighted_code(self, arg_to_dtype=None): + return get_highlighted_code( + self.get_code(arg_to_dtype)) + + def get_code(self, arg_to_dtype=None): + if arg_to_dtype is not None: + arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) + + kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) + + from loopy.codegen import generate_code_v2 + code = generate_code_v2(kernel) + return code.device_code() + + # }}} + + # {{{ call and info generator + + @memoize_method + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + raise NotImplementedError() + + def __call__(self, queue, **kwargs): + """ + :arg allocator: a callable passed a byte count and returning + a :class:`pyopencl.Buffer`. A :class:`pyopencl` allocator + maybe. + :arg wait_for: A list of :class:`pyopencl.Event` instances + for which to wait. + :arg out_host: :class:`bool` + Decides whether output arguments (i.e. arguments + written by the kernel) are to be returned as + :mod:`numpy` arrays. *True* for yes, *False* for no. + + For the default value of *None*, if all (input) array + arguments are :mod:`numpy` arrays, defaults to + returning :mod:`numpy` arrays as well. + + :returns: ``(evt, output)`` where *evt* is a :class:`pyopencl.Event` + associated with the execution of the kernel, and + output is a tuple of output arguments (arguments that + are written as part of the kernel). The order is given + by the order of kernel arguments. If this order is unspecified + (such as when kernel arguments are inferred automatically), + enable :attr:`loopy.Options.return_dict` to make *output* a + :class:`dict` instead, with keys of argument names and values + of the returned arrays. + """ + + allocator = kwargs.pop("allocator", None) + wait_for = kwargs.pop("wait_for", None) + out_host = kwargs.pop("out_host", None) + + kwargs = self.packing_controller.unpack(kwargs) + + kernel_info = self.cl_kernel_info(self.arg_to_dtype_set(kwargs)) + + return kernel_info.invoker( + kernel_info.cl_kernels, queue, allocator, wait_for, + out_host, **kwargs) + + # }}} + +# }}} + +# {{{ code highlighers + + +def get_highlighted_python_code(text): + try: + from pygments import highlight + except ImportError: + return text + else: + from pygments.lexers import PythonLexer + from pygments.formatters import TerminalFormatter + + return highlight(text, PythonLexer(), TerminalFormatter()) + + +def get_highlighted_code(text): + try: + from pygments import highlight + except ImportError: + return text + else: + from pygments.lexers import CLexer + from pygments.formatters import TerminalFormatter + + return highlight(text, CLexer(), TerminalFormatter()) + # }}} # vim: foldmethod=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e4835a363..85d751260 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -260,8 +260,9 @@ class CTarget(TargetBase): hash_fields = TargetBase.hash_fields + ("fortran_abi",) comparison_fields = TargetBase.comparison_fields + ("fortran_abi",) - def __init__(self, fortran_abi=False): + def __init__(self, fortran_abi=False, compiler=None): self.fortran_abi = fortran_abi + self.compiler = compiler super(CTarget, self).__init__() def split_kernel_at_global_barriers(self): @@ -298,6 +299,13 @@ class CTarget(TargetBase): # These kind of shouldn't be here. return self.get_dtype_registry().dtype_to_ctype(dtype) + def get_kernel_executor_cache_key(self, *args, **kwargs): + return self.compiler + + def get_kernel_executor(self, knl, *args, **kwargs): + from loopy.target.c import CKernelExecutor + return CKernelExecutor(knl, self.compiler) + # }}} diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py new file mode 100644 index 000000000..230777529 --- /dev/null +++ b/loopy/target/c/c_execution.py @@ -0,0 +1,265 @@ +from __future__ import division, with_statement, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import tempfile +import cgen +import os +import subprocess + +from loopy.target.c import CTarget, generate_header +from loopy.codegen import generate_code +from loopy.execution import (KernelExecutorBase, _Kernels, + _KernelInfo, ExecutionWrapperGeneratorBase) +from pytools import memoize_method +import weakref + +import ctypes + +import logging +logger = logging.getLogger(__name__) + +""" +The compiler module handles invocation of compilers to generate a shared lib +which can be loaded via ctypes. +""" + + +class CCompiler(object): + """ + Wraps a C compiler to build and load shared libraries. + Defaults to gcc + """ + + source_suffix = 'c' + default_exe = 'gcc' + default_compile_flags = '-std=c99 -g -O3 -fPIC'.split() + default_link_flags = '-shared'.split() + + def __init__(self, cc=None, + cflags=None, + ldflags=None): + self.exe = cc if cc else self.default_exe + self.cflags = cflags or self.default_compile_flags[:] + self.ldflags = ldflags or self.default_link_flags[:] + self.tempdir = tempfile.TemporaryDirectory() + + def _tempname(self, name): + """Build temporary filename path in tempdir.""" + return os.path.join(self.tempdir.name, name) + + def _call(self, args, **kwargs): + """Invoke compiler with arguments.""" + cwd = self.tempdir.name + args_ = [self.exe] + args + logger.debug(args_) + subprocess.check_call(args_, cwd=cwd, **kwargs) + + def build(self, code): + """Compile code, build and load shared library.""" + logger.debug(code) + c_fname = self._tempname('code.' + self.source_suffix) + obj_fname = self._tempname('code.o') + dll_fname = self._tempname('code.so') + with open(c_fname, 'w') as fd: + fd.write(code) + self._call(self.compile_args(c_fname)) + self._call(self.link_args(obj_fname, dll_fname)) + return ctypes.CDLL(dll_fname) + + def compile_args(self, c_fname): + "Construct args for compile command." + return self.cflags + ['-c', c_fname] + + def link_args(self, obj_fname, dll_fname): + "Construct args for link command." + return self.ldflags + ['-shared', obj_fname, '-o', dll_fname] + + +class CppCompiler(CCompiler): + """Subclass of Compiler to invoke a C++ compiler. + Defaults to g++""" + source_suffix = 'cpp' + default_exe = 'g++' + default_compile_flags = '-g -O3'.split() + + +class CompiledKernel(object): + """ + A CompiledKernel wraps a loopy kernel, compiling it and loading the + result as a shared library, and provides access to the kernel as a + ctypes function object, wrapped by the __call__ method, which attempts + to automatically map argument types. + """ + + def __init__(self, knl, comp=None): + assert isinstance(knl.target, CTarget) + self.knl = knl + self.code, _ = generate_code(knl) + self.comp = comp or CCompiler() + self.dll = self.comp.build(self.code) + self.func_decl, = generate_header(knl) + self._arg_info = [] + # TODO knl.args[:].dtype is sufficient + self._visit_func_decl(self.func_decl) + self.name = self.knl.name + restype = self.func_decl.subdecl.typename + if restype == 'void': + self.restype = None + else: + raise ValueError('Unhandled restype %r' % (restype, )) + self._fn = getattr(self.dll, self.name) + self._fn.restype = self.restype + self._fn.argtypes = [ctype for name, ctype in self._arg_info] + self._prepared_call_cache = weakref.WeakKeyDictionary() + + def __call__(self, **kwargs): + """Execute kernel with given args mapped to ctypes equivalents.""" + args_ = [] + for knl_arg, arg_t in zip(self.knl.args, self._fn.argtypes): + arg = kwargs[knl_arg.name] + if hasattr(arg, 'ctypes'): + if arg.size == 0: + # TODO eliminate unused arguments from kernel + arg_ = arg_t(0.0) + else: + arg_ = arg.ctypes.data_as(arg_t) + else: + arg_ = arg_t(arg) + args_.append(arg_) + self._fn(*args_) + + def _append_arg(self, name, dtype, pointer=False): + """Append arg info to current argument list.""" + self._arg_info.append(( + name, + self._dtype_to_ctype(dtype, pointer=pointer) + )) + + def _visit_const(self, node): + """Visit const arg of kernel.""" + if isinstance(node.subdecl, cgen.RestrictPointer): + self._visit_pointer(node.subdecl) + else: + pod = node.subdecl # type: cgen.POD + self._append_arg(pod.name, pod.dtype) + + def _visit_pointer(self, node): + "Visit pointer argument of kernel." + pod = node.subdecl # type: cgen.POD + self._append_arg(pod.name, pod.dtype, pointer=True) + + def _visit_func_decl(self, func_decl): + """Visit nodes of function declaration of kernel.""" + for i, arg in enumerate(func_decl.arg_decls): + if isinstance(arg, cgen.Const): + self._visit_const(arg) + elif isinstance(arg, cgen.RestrictPointer): + self._visit_pointer(arg) + else: + raise ValueError('unhandled type for arg %r' % (arg, )) + + def _dtype_to_ctype(self, dtype, pointer=False): + """Map NumPy dtype to equivalent ctypes type.""" + target = self.knl.target # type: CTarget + registry = target.get_dtype_registry().wrapped_registry + typename = registry.dtype_to_ctype(dtype) + typename = {'unsigned': 'uint'}.get(typename, typename) + basetype = getattr(ctypes, 'c_' + typename) + if pointer: + return ctypes.POINTER(basetype) + return basetype + + +class CKernelExecutor(KernelExecutorBase): + """An object connecting a kernel to a :class:`CompiledKernel` + for execution. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, kernel, compiler=None): + """ + :arg kernel: may be a loopy.LoopKernel, a generator returning kernels + (a warning will be issued if more than one is returned). If the + kernel has not yet been loop-scheduled, that is done, too, with no + specific arguments. + """ + + self.compiler = compiler if compiler else CCompiler() + super(CKernelExecutor, self).__init__(kernel) + + @memoize_method + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) + + from loopy.codegen import generate_code_v2 + codegen_result = generate_code_v2(kernel) + + dev_code = codegen_result.device_code() + + if self.kernel.options.write_cl: + output = dev_code + if self.kernel.options.highlight_cl: + output = self.get_highlighted_code(output) + + if self.kernel.options.write_cl is True: + print(output) + else: + with open(self.kernel.options.write_cl, "w") as outf: + outf.write(output) + + if self.kernel.options.edit_cl: + from pytools import invoke_editor + dev_code = invoke_editor(dev_code, "code.cl") + + c_kernels = _Kernels() + for dp in codegen_result.device_programs: + setattr(c_kernels, dp.name, CompiledKernel(dp, self.compiler)) + + return _KernelInfo( + kernel=kernel, + c_kernels=c_kernels, + implemented_data_info=codegen_result.implemented_data_info, + invoker=self.invoker(kernel, codegen_result)) + + # }}} + + def __call__(self, **kwargs): + """ + :returns: ``(None, output)`` the output is a tuple of output arguments + (arguments that are written as part of the kernel). The order is given + by the order of kernel arguments. If this order is unspecified + (such as when kernel arguments are inferred automatically), + enable :attr:`loopy.Options.return_dict` to make *output* a + :class:`dict` instead, with keys of argument names and values + of the returned arrays. + """ + + kwargs = self.packing_controller.unpack(kwargs) + + kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) + + return kernel_info.invoker( + kernel_info.c_kernels, **kwargs) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index a8f47adb9..a2574bf8a 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -22,17 +22,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six from six.moves import range, zip from pytools import ImmutableRecord, memoize_method -from loopy.diagnostic import ParameterFinderWarning -from pytools.py_codegen import ( - Indentation, PythonFunctionGenerator) -from loopy.diagnostic import LoopyError -from loopy.types import NumpyType -from loopy.execution import KernelExecutorBase - +from pytools.py_codegen import Indentation +from loopy.execution import (KernelExecutorBase, ExecutionWrapperGeneratorBase, + _KernelInfo, _Kernels) import logging logger = logging.getLogger(__name__) @@ -43,507 +38,123 @@ logger = logging.getLogger(__name__) # Prefix all auxiliary variables with "_lpy". -def python_dtype_str(dtype): - import pyopencl.tools as cl_tools - if dtype.isbuiltin: - return "_lpy_np."+dtype.name - else: - return ("_lpy_cl_tools.get_or_register_dtype(\"%s\")" - % cl_tools.dtype_to_ctype(dtype)) - - -# {{{ integer arg finding from shapes - -def generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info): - # a mapping from integer argument names to a list of tuples - # (arg_name, expression), where expression is a - # unary function of kernel.arg_dict[arg_name] - # returning the desired integer argument. - iarg_to_sources = {} - - from loopy.kernel.data import GlobalArg - from loopy.symbolic import DependencyMapper, StringifyMapper - dep_map = DependencyMapper() - - from pymbolic import var - for arg in implemented_data_info: - if arg.arg_class is GlobalArg: - sym_shape = var(arg.name).attr("shape") - for axis_nr, shape_i in enumerate(arg.shape): - if shape_i is None: - continue - - deps = dep_map(shape_i) - - if len(deps) == 1: - integer_arg_var, = deps - - if kernel.arg_dict[integer_arg_var.name].dtype.is_integral(): - from pymbolic.algorithm import solve_affine_equations_for - try: - # friggin' overkill :) - iarg_expr = solve_affine_equations_for( - [integer_arg_var.name], - [(shape_i, sym_shape.index(axis_nr))] - )[integer_arg_var] - except Exception as e: - #from traceback import print_exc - #print_exc() - - # went wrong? oh well - from warnings import warn - warn("Unable to generate code to automatically " - "find '%s' from the shape of '%s':\n%s" - % (integer_arg_var.name, arg.name, str(e)), - ParameterFinderWarning) - else: - iarg_to_sources.setdefault(integer_arg_var.name, []) \ - .append((arg.name, iarg_expr)) - - gen("# {{{ find integer arguments from shapes") - gen("") - - for iarg_name, sources in six.iteritems(iarg_to_sources): - gen("if %s is None:" % iarg_name) - with Indentation(gen): - if_stmt = "if" - for arg_name, value_expr in sources: - gen("%s %s is not None:" % (if_stmt, arg_name)) - with Indentation(gen): - gen("%s = %s" - % (iarg_name, StringifyMapper()(value_expr))) - - if_stmt = "elif" - - gen("") - - gen("# }}}") - gen("") - -# }}} - - -# {{{ integer arg finding from offsets - -def generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info): - options = kernel.options - - gen("# {{{ find integer arguments from offsets") - gen("") - - for arg in implemented_data_info: - impl_array_name = arg.offset_for_name - if impl_array_name is not None: - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("if %s is None:" % impl_array_name) - with Indentation(gen): - gen("# Output variable, we'll be allocating " - "it, with zero offset.") - gen("%s = 0" % arg.name) - gen("else:") - with Indentation(gen): - if not options.no_numpy: - gen("_lpy_offset = getattr(%s, \"offset\", 0)" - % impl_array_name) - else: - gen("_lpy_offset = %s.offset" % impl_array_name) - - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" - % (arg.name, base_arg.dtype.itemsize)) - - gen("assert _lpy_remdr == 0, \"Offset of array '%s' is " - "not divisible by its dtype itemsize\"" - % impl_array_name) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) - - if not options.skip_arg_checks: - gen("del _lpy_offset") - - gen("# }}}") - gen("") - -# }}} - - -# {{{ integer arg finding from strides - -def generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info): - options = kernel.options - - gen("# {{{ find integer arguments from strides") - gen("") - - for arg in implemented_data_info: - if arg.stride_for_name_and_axis is not None: - impl_array_name, stride_impl_axis = arg.stride_for_name_and_axis - - gen("if %s is None:" % arg.name) - with Indentation(gen): - if not options.skip_arg_checks: - gen("if %s is None:" % impl_array_name) - with Indentation(gen): - gen("raise RuntimeError(\"required stride '%s' for " - "argument '%s' not given or deducible from " - "passed array\")" - % (arg.name, impl_array_name)) - - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" - % (arg.name, impl_array_name, stride_impl_axis, - base_arg.dtype.dtype.itemsize)) - - gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' is " - "not divisible by its dtype itemsize\"" - % (stride_impl_axis, impl_array_name)) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) - - gen("# }}}") - gen("") - -# }}} - - -# {{{ check that value args are present - -def generate_value_arg_check(gen, kernel, implemented_data_info): - if kernel.options.skip_arg_checks: - return +class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): + """ + Specialized form of the :class:`ExecutionWrapperGeneratorBase` for + pyopencl execution + """ - from loopy.kernel.data import ValueArg + def __init__(self): + system_args = [ + "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", + # ignored if options.no_numpy + "out_host=None" + ] + super(PyOpenCLExecutionWrapperGenerator, self).__init__(system_args) - gen("# {{{ check that value args are present") - gen("") + def python_dtype_str(self, dtype): + import pyopencl.tools as cl_tools + if dtype.isbuiltin: + return "_lpy_np."+dtype.name + else: + return ("_lpy_cl_tools.get_or_register_dtype(\"%s\")" + % cl_tools.dtype_to_ctype(dtype)) - for arg in implemented_data_info: - if not issubclass(arg.arg_class, ValueArg): - continue + # {{{ handle non-numpy args - gen("if %s is None:" % arg.name) + def handle_non_numpy_arg(self, gen, arg): + gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) with Indentation(gen): - gen("raise TypeError(\"value argument '%s' " - "was not given and could not be automatically " - "determined\")" % arg.name) - - gen("# }}}") - gen("") - -# }}} - - -# {{{ arg setup - -def generate_arg_setup(gen, kernel, implemented_data_info, options): - import loopy as lp - - from loopy.kernel.data import KernelArgument - from loopy.kernel.array import ArrayBase - from loopy.symbolic import StringifyMapper - from pymbolic import var - - gen("# {{{ set up array arguments") - gen("") - - if not options.no_numpy: - gen("_lpy_encountered_numpy = False") - gen("_lpy_encountered_dev = False") - gen("") - - args = [] - - strify = StringifyMapper() - - expect_no_more_arguments = False - - for arg_idx, arg in enumerate(implemented_data_info): - is_written = arg.base_name in kernel.get_written_variables() - kernel_arg = kernel.impl_arg_to_arg.get(arg.name) - - if not issubclass(arg.arg_class, KernelArgument): - expect_no_more_arguments = True - continue - - if expect_no_more_arguments: - raise LoopyError("Further arguments encountered after arg info " - "describing a global temporary variable") - - if not issubclass(arg.arg_class, ArrayBase): - args.append(arg.name) - continue + gen("# synchronous, nothing to worry about") + gen("%s = _lpy_cl_array.to_device(" + "queue, %s, allocator=allocator)" + % (arg.name, arg.name)) + gen("_lpy_encountered_numpy = True") + gen("elif %s is not None:" % arg.name) + with Indentation(gen): + gen("_lpy_encountered_dev = True") - gen("# {{{ process %s" % arg.name) gen("") - if not options.no_numpy: - gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) - with Indentation(gen): - gen("# synchronous, nothing to worry about") - gen("%s = _lpy_cl_array.to_device(" - "queue, %s, allocator=allocator)" - % (arg.name, arg.name)) - gen("_lpy_encountered_numpy = True") - gen("elif %s is not None:" % arg.name) - with Indentation(gen): - gen("_lpy_encountered_dev = True") - - gen("") - - if not options.skip_arg_checks and not is_written: - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("raise RuntimeError(\"input argument '%s' must " - "be supplied\")" % arg.name) - gen("") - - if (is_written - and arg.arg_class is lp.ImageArg - and not options.skip_arg_checks): - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("raise RuntimeError(\"written image '%s' must " - "be supplied\")" % arg.name) - gen("") - - if is_written and arg.shape is None and not options.skip_arg_checks: - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("raise RuntimeError(\"written argument '%s' has " - "unknown shape and must be supplied\")" % arg.name) - gen("") - - possibly_made_by_loopy = False - - # {{{ allocate written arrays, if needed + # {{{ handle allocation of unspecified arguements - if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and arg.shape is not None: - - if not isinstance(arg.dtype, NumpyType): - raise LoopyError("do not know how to pass arg of type '%s'" - % arg.dtype) - - possibly_made_by_loopy = True - gen("_lpy_made_by_loopy = False") - gen("") - - gen("if %s is None:" % arg.name) - with Indentation(gen): - num_axes = len(arg.strides) - for i in range(num_axes): - gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) - - itemsize = kernel_arg.dtype.numpy_dtype.itemsize - for i in range(num_axes): - gen("_lpy_strides_%d = %s" % (i, strify( - itemsize*arg.unvec_strides[i]))) - - if not options.skip_arg_checks: - for i in range(num_axes): - gen("assert _lpy_strides_%d > 0, " - "\"'%s' has negative stride in axis %d\"" - % (i, arg.name, i)) - - sym_strides = tuple( - var("_lpy_strides_%d" % i) - for i in range(num_axes)) - sym_shape = tuple( - var("_lpy_shape_%d" % i) - for i in range(num_axes)) - - alloc_size_expr = (sum(astrd*(alen-1) - for alen, astrd in zip(sym_shape, sym_strides)) - + itemsize) - - gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) - gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " - "%(dtype)s, strides=%(strides)s, " - "data=allocator(_lpy_alloc_size), allocator=allocator)" - % dict( - name=arg.name, - shape=strify(sym_shape), - strides=strify(sym_strides), - dtype=python_dtype_str(kernel_arg.dtype.numpy_dtype))) - - if not options.skip_arg_checks: - for i in range(num_axes): - gen("del _lpy_shape_%d" % i) - gen("del _lpy_strides_%d" % i) - gen("del _lpy_alloc_size") - gen("") - - gen("_lpy_made_by_loopy = True") - gen("") - - # }}} - - # {{{ argument checking - - if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and not options.skip_arg_checks: - if possibly_made_by_loopy: - gen("if not _lpy_made_by_loopy:") - else: - gen("if True:") - - with Indentation(gen): - gen("if %s.dtype != %s:" - % (arg.name, python_dtype_str(kernel_arg.dtype.numpy_dtype))) - with Indentation(gen): - gen("raise TypeError(\"dtype mismatch on argument '%s' " - "(got: %%s, expected: %s)\" %% %s.dtype)" - % (arg.name, arg.dtype, arg.name)) - - # {{{ generate shape checking code - - def strify_allowing_none(shape_axis): - if shape_axis is None: - return "None" - else: - return strify(shape_axis) - - def strify_tuple(t): - if len(t) == 0: - return "()" - else: - return "(%s,)" % ", ".join( - strify_allowing_none(sa) - for sa in t) - - shape_mismatch_msg = ( - "raise TypeError(\"shape mismatch on argument '%s' " - "(got: %%s, expected: %%s)\" " - "%% (%s.shape, %s))" - % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) - - if kernel_arg.shape is None: - pass - - elif any(shape_axis is None for shape_axis in kernel_arg.shape): - gen("if len(%s.shape) != %s:" - % (arg.name, len(arg.unvec_shape))) - with Indentation(gen): - gen(shape_mismatch_msg) - - for i, shape_axis in enumerate(arg.unvec_shape): - if shape_axis is None: - continue - - gen("if %s.shape[%d] != %s:" - % (arg.name, i, strify(shape_axis))) - with Indentation(gen): - gen(shape_mismatch_msg) - - else: # not None, no Nones in tuple - gen("if %s.shape != %s:" - % (arg.name, strify(arg.unvec_shape))) - with Indentation(gen): - gen(shape_mismatch_msg) - - # }}} - - if arg.unvec_strides and kernel_arg.dim_tags: - itemsize = kernel_arg.dtype.numpy_dtype.itemsize - sym_strides = tuple( - itemsize*s_i for s_i in arg.unvec_strides) - gen("if %s.strides != %s:" - % (arg.name, strify(sym_strides))) - with Indentation(gen): - gen("raise TypeError(\"strides mismatch on " - "argument '%s' (got: %%s, expected: %%s)\" " - "%% (%s.strides, %s))" - % (arg.name, arg.name, strify(sym_strides))) - - if not arg.allows_offset: - gen("if %s.offset:" % arg.name) - with Indentation(gen): - gen("raise ValueError(\"Argument '%s' does not " - "allow arrays with offsets. Try passing " - "default_offset=loopy.auto to make_kernel()." - "\")" % arg.name) - gen("") - - # }}} - - if possibly_made_by_loopy and not options.skip_arg_checks: - gen("del _lpy_made_by_loopy") + def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): + """ + Handle allocation of non-specified arguements for pyopencl execution + """ + from pymbolic import var + + num_axes = len(arg.strides) + for i in range(num_axes): + gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) + + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + for i in range(num_axes): + gen("_lpy_strides_%d = %s" % (i, strify( + itemsize*arg.unvec_strides[i]))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("assert _lpy_strides_%d > 0, " + "\"'%s' has negative stride in axis %d\"" + % (i, arg.name, i)) + + sym_strides = tuple( + var("_lpy_strides_%d" % i) + for i in range(num_axes)) + sym_shape = tuple( + var("_lpy_shape_%d" % i) + for i in range(num_axes)) + + alloc_size_expr = (sum(astrd*(alen-1) + for alen, astrd in zip(sym_shape, sym_strides)) + + itemsize) + + gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) + gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " + "%(dtype)s, strides=%(strides)s, " + "data=allocator(_lpy_alloc_size), allocator=allocator)" + % dict( + name=arg.name, + shape=strify(sym_shape), + strides=strify(sym_strides), + dtype=self.python_dtype_str(kernel_arg.dtype.numpy_dtype))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("del _lpy_shape_%d" % i) + gen("del _lpy_strides_%d" % i) + gen("del _lpy_alloc_size") gen("") - if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]: - args.append("%s.base_data" % arg.name) - else: - args.append("%s" % arg.name) + # }}} - gen("") + def target_specific_preamble(self, gen): + """ + Add default pyopencl imports to preamble + """ + gen.add_to_preamble("import numpy as _lpy_np") + gen.add_to_preamble("import pyopencl as _lpy_cl") + gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") + gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") - gen("# }}}") + def initialize_system_args(self, gen): + """ + Initializes possibly empty system arguements + """ + gen("if allocator is None:") + with Indentation(gen): + gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") - gen("# }}}") - gen("") - - return args - -# }}} - - -def generate_invoker(kernel, codegen_result): - options = kernel.options - implemented_data_info = codegen_result.implemented_data_info - host_code = codegen_result.host_code() - - system_args = [ - "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", - # ignored if options.no_numpy - "out_host=None" - ] - - from loopy.kernel.data import KernelArgument - gen = PythonFunctionGenerator( - "invoke_%s_loopy_kernel" % kernel.name, - system_args + [ - "%s=None" % idi.name - for idi in implemented_data_info - if issubclass(idi.arg_class, KernelArgument) - ]) - - gen.add_to_preamble("from __future__ import division") - gen.add_to_preamble("") - gen.add_to_preamble("import pyopencl as _lpy_cl") - gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") - gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") - gen.add_to_preamble("import numpy as _lpy_np") - gen.add_to_preamble("") - gen.add_to_preamble(host_code) - gen.add_to_preamble("") - - gen("if allocator is None:") - with Indentation(gen): - gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") - gen("") - - generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) - generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) - generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) - generate_value_arg_check(gen, kernel, implemented_data_info) - - args = generate_arg_setup(gen, kernel, implemented_data_info, options) - # {{{ generate invocation - gen("_lpy_evt = {kernel_name}({args})" + def generate_invocation(self, gen, kernel_name, args): + gen("for knl in _lpy_cl_kernels:") + with Indentation(gen): + gen("_lpy_evt = {kernel_name}({args})" .format( - kernel_name=codegen_result.host_program.name, + kernel_name=kernel_name, args=", ".join( ["_lpy_cl_kernels", "queue"] + args @@ -551,72 +162,52 @@ def generate_invoker(kernel, codegen_result): # }}} - # {{{ output + # {{{ - if not options.no_numpy: - gen("if out_host is None and (_lpy_encountered_numpy " - "and not _lpy_encountered_dev):") - with Indentation(gen): - gen("out_host = True") + def generate_output_handler( + self, gen, options, kernel, implemented_data_info): - gen("if out_host:") - with Indentation(gen): - gen("pass") # if no outputs (?!) - for arg in implemented_data_info: - if not issubclass(arg.arg_class, KernelArgument): - continue + from loopy.kernel.data import KernelArgument - is_written = arg.base_name in kernel.get_written_variables() - if is_written: - gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) - - gen("") - - if options.return_dict: - gen("return _lpy_evt, {%s}" - % ", ".join("\"%s\": %s" % (arg.name, arg.name) - for arg in implemented_data_info - if issubclass(arg.arg_class, KernelArgument) - if arg.base_name in kernel.get_written_variables())) - else: - out_args = [arg - for arg in implemented_data_info - if issubclass(arg.arg_class, KernelArgument) - if arg.base_name in kernel.get_written_variables()] - if out_args: - gen("return _lpy_evt, (%s,)" - % ", ".join(arg.name for arg in out_args)) - else: - gen("return _lpy_evt, ()") + if not options.no_numpy: + gen("if out_host is None and (_lpy_encountered_numpy " + "and not _lpy_encountered_dev):") + with Indentation(gen): + gen("out_host = True") - # }}} + gen("if out_host:") + with Indentation(gen): + gen("pass") # if no outputs (?!) + for arg in implemented_data_info: + if not issubclass(arg.arg_class, KernelArgument): + continue - if options.write_wrapper: - output = gen.get() - if options.highlight_wrapper: - output = get_highlighted_python_code(output) + gen("") - if options.write_wrapper is True: - print(output) + if options.return_dict: + gen("return None, {%s}" + % ", ".join("\"%s\": %s" % (arg.name, arg.name) + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables())) else: - with open(options.write_wrapper, "w") as outf: - outf.write(output) - - return gen.get_function() + out_args = [arg + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables()] + if out_args: + gen("return None, (%s,)" + % ", ".join(arg.name for arg in out_args)) + else: + gen("return None, ()") + # }}} # }}} # {{{ kernel executor -class _CLKernelInfo(ImmutableRecord): - pass - - -class _CLKernels(object): - pass - class PyOpenCLKernelExecutor(KernelExecutorBase): """An object connecting a kernel to a :class:`pyopencl.Context` @@ -635,7 +226,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): specific arguments. """ - super(PyOpenCLKernelExecutor, self).__init__(kernel) + super(PyOpenCLKernelExecutor, self).__init__( + kernel, invoker=PyOpenCLExecutionWrapperGenerator()) self.context = context @@ -644,10 +236,11 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): self.kernel = kernel.copy(target=PyOpenCLTarget(context.devices[0])) @memoize_method - def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 + from loopy.execution import get_highlighted_code codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() @@ -655,7 +248,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): if self.kernel.options.write_cl: output = dev_code if self.kernel.options.highlight_cl: - output = get_highlighted_cl_code(output) + output = get_highlighted_code(output) if self.kernel.options.write_cl is True: print(output) @@ -673,33 +266,15 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): cl.Program(self.context, dev_code) .build(options=kernel.options.cl_build_options)) - cl_kernels = _CLKernels() + cl_kernels = _Kernels() for dp in codegen_result.device_programs: setattr(cl_kernels, dp.name, getattr(cl_program, dp.name)) - return _CLKernelInfo( + return _KernelInfo( kernel=kernel, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_info, - invoker=generate_invoker(kernel, codegen_result)) - - # {{{ debugging aids - - def get_code(self, arg_to_dtype=None): - if arg_to_dtype is not None: - arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) - - kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) - - from loopy.codegen import generate_code_v2 - code = generate_code_v2(kernel) - return code.device_code() - - def get_highlighted_code(self, arg_to_dtype=None): - return get_highlighted_cl_code( - self.get_code(arg_to_dtype)) - - # }}} + invoker=self.invoker(kernel, codegen_result)) def __call__(self, queue, **kwargs): """ @@ -742,29 +317,4 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): # }}} - -def get_highlighted_python_code(text): - try: - from pygments import highlight - except ImportError: - return text - else: - from pygments.lexers import PythonLexer - from pygments.formatters import TerminalFormatter - - return highlight(text, PythonLexer(), TerminalFormatter()) - - -def get_highlighted_cl_code(text): - try: - from pygments import highlight - except ImportError: - return text - else: - from pygments.lexers import CLexer - from pygments.formatters import TerminalFormatter - - return highlight(text, CLexer(), TerminalFormatter()) - - # vim: foldmethod=marker -- GitLab