diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 56ed87176f891d362ac0555024ef0d8098cd843e..a91eb51a0a90e987ec05a8ebf71a6759e047f5d3 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -403,7 +403,8 @@ def auto_test_vs_ref( raise LoopyError("ref_knl and test_knl argument lists disagree at index " "%d (1-based)" % (i+1)) - from loopy.compiled import CompiledKernel, get_highlighted_cl_code + from loopy.compiled import CompiledKernel + from loopy.target.execution import get_highlighted_code if isinstance(op_count, (int, float)): warn("op_count should be a list", stacklevel=2) @@ -448,15 +449,15 @@ def auto_test_vs_ref( print(75*"-") print("Reference Code:") print(75*"-") - print(get_highlighted_cl_code(ref_compiled.get_code())) + print(get_highlighted_code(ref_compiled.get_code())) print(75*"-") - ref_cl_kernel_info = ref_compiled.cl_kernel_info(frozenset()) + ref_kernel_info = ref_compiled.kernel_info(frozenset()) try: ref_args, ref_arg_data = \ make_ref_args(ref_sched_kernel, - ref_cl_kernel_info.implemented_data_info, + ref_kernel_info.implemented_data_info, ref_queue, parameters) ref_args["out_host"] = False except cl.RuntimeError as e: @@ -545,10 +546,10 @@ def auto_test_vs_ref( compiled = CompiledKernel(ctx, kernel) if args is None: - cl_kernel_info = compiled.cl_kernel_info(frozenset()) + kernel_info = compiled.kernel_info(frozenset()) args = make_args(kernel, - cl_kernel_info.implemented_data_info, + kernel_info.implemented_data_info, queue, ref_arg_data, parameters) args["out_host"] = False diff --git a/loopy/check.py b/loopy/check.py index 6bac368381c708b72b2b7f235792df97d0bcd15e..7e661b566b15c47ec99e03ffdeb035057602da76 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -779,8 +779,8 @@ def check_implemented_domains(kernel, implemented_domains, code=None): print(79*"-") print("CODE:") print(79*"-") - from loopy.compiled import get_highlighted_cl_code - print(get_highlighted_cl_code(code)) + from loopy.target.execution import get_highlighted_code + print(get_highlighted_code(code)) print(79*"-") raise LoopyError("sanity check failed--implemented and desired " diff --git a/loopy/compiled.py b/loopy/compiled.py index b3e4fe0589dc9a62d7bdefd7152784560be0ca8a..613bca56fc1de23a66d45d8f990f91f9d3f9b949 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -24,8 +24,7 @@ THE SOFTWARE. from loopy.target.pyopencl_execution import ( # noqa - PyOpenCLKernelExecutor, - get_highlighted_cl_code) + PyOpenCLKernelExecutor) # {{{ compatibility diff --git a/loopy/execution.py b/loopy/execution.py deleted file mode 100644 index a1228f8f3bb3493e83936ee0b3998bbd5b8cdcc2..0000000000000000000000000000000000000000 --- a/loopy/execution.py +++ /dev/null @@ -1,239 +0,0 @@ -from __future__ import division, with_statement, absolute_import - -__copyright__ = "Copyright (C) 2012-16 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -import six -import numpy as np -from pytools import ImmutableRecord, memoize_method -from loopy.diagnostic import LoopyError - -import logging -logger = logging.getLogger(__name__) - -from pytools.persistent_dict import WriteOncePersistentDict -from loopy.tools import LoopyKeyBuilder -from loopy.version import DATA_MODEL_VERSION - - -# {{{ object array argument packing - -class _PackingInfo(ImmutableRecord): - """ - .. attribute:: name - .. attribute:: sep_shape - - .. attribute:: subscripts_and_names - - A list of type ``[(index, unpacked_name), ...]``. - """ - - -class SeparateArrayPackingController(object): - """For argument arrays with axes tagged to be implemented as separate - arrays, this class provides preprocessing of the incoming arguments so that - all sub-arrays may be passed in one object array (under the original, - un-split argument name) and are unpacked into separate arrays before being - passed to the kernel. - - It also repacks outgoing arrays of this type back into an object array. - """ - - def __init__(self, kernel): - # map from arg name - self.packing_info = {} - - from loopy.kernel.array import ArrayBase - for arg in kernel.args: - if not isinstance(arg, ArrayBase): - continue - - if arg.shape is None or arg.dim_tags is None: - continue - - subscripts_and_names = arg.subscripts_and_names() - - if subscripts_and_names is None: - continue - - self.packing_info[arg.name] = _PackingInfo( - name=arg.name, - sep_shape=arg.sep_shape(), - subscripts_and_names=subscripts_and_names, - is_written=arg.name in kernel.get_written_variables()) - - def unpack(self, kernel_kwargs): - if not self.packing_info: - return kernel_kwargs - - kernel_kwargs = kernel_kwargs.copy() - - for packing_info in six.itervalues(self.packing_info): - arg_name = packing_info.name - if packing_info.name in kernel_kwargs: - arg = kernel_kwargs[arg_name] - for index, unpacked_name in packing_info.subscripts_and_names: - assert unpacked_name not in kernel_kwargs - kernel_kwargs[unpacked_name] = arg[index] - del kernel_kwargs[arg_name] - - return kernel_kwargs - - def pack(self, outputs): - if not self.packing_info: - return outputs - - for packing_info in six.itervalues(self.packing_info): - if not packing_info.is_written: - continue - - result = outputs[packing_info.name] = \ - np.zeros(packing_info.sep_shape, dtype=np.object) - - for index, unpacked_name in packing_info.subscripts_and_names: - result[index] = outputs.pop(unpacked_name) - - return outputs - -# }}} - - -# {{{ KernelExecutorBase - -typed_and_scheduled_cache = WriteOncePersistentDict( - "loopy-typed-and-scheduled-cache-v1-"+DATA_MODEL_VERSION, - key_builder=LoopyKeyBuilder()) - - -class KernelExecutorBase(object): - """An object connecting a kernel to a :class:`pyopencl.Context` - for execution. - - .. automethod:: __init__ - .. automethod:: __call__ - """ - - def __init__(self, kernel): - """ - :arg kernel: a loopy.LoopKernel - """ - - self.kernel = kernel - - self.packing_controller = SeparateArrayPackingController(kernel) - - self.output_names = tuple(arg.name for arg in self.kernel.args - if arg.name in self.kernel.get_written_variables()) - - self.has_runtime_typed_args = any( - arg.dtype is None - for arg in kernel.args) - - def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): - from loopy.kernel.tools import add_dtypes - - kernel = self.kernel - - if arg_to_dtype_set: - var_to_dtype = {} - for var, dtype in arg_to_dtype_set: - try: - dest_name = kernel.impl_arg_to_arg[var].name - except KeyError: - dest_name = var - - try: - var_to_dtype[dest_name] = dtype - except KeyError: - raise LoopyError("cannot set type for '%s': " - "no known variable/argument with that name" - % var) - - kernel = add_dtypes(kernel, var_to_dtype) - - from loopy.type_inference import infer_unknown_types - kernel = infer_unknown_types(kernel, expect_completion=True) - - if kernel.schedule is None: - from loopy.preprocess import preprocess_kernel - kernel = preprocess_kernel(kernel) - - from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel) - - return kernel - - @memoize_method - def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): - from loopy import CACHING_ENABLED - - from loopy.preprocess import prepare_for_caching - # prepare_for_caching() gets run by preprocess, but the kernel at this - # stage is not guaranteed to be preprocessed. - cacheable_kernel = prepare_for_caching(self.kernel) - cache_key = (type(self).__name__, cacheable_kernel, arg_to_dtype_set) - - if CACHING_ENABLED: - try: - return typed_and_scheduled_cache[cache_key] - except KeyError: - pass - - logger.debug("%s: typed-and-scheduled cache miss" % self.kernel.name) - - kernel = self.get_typed_and_scheduled_kernel_uncached(arg_to_dtype_set) - - if CACHING_ENABLED: - typed_and_scheduled_cache.store_if_not_present(cache_key, kernel) - - return kernel - - def arg_to_dtype_set(self, kwargs): - if not self.has_runtime_typed_args: - return None - - from loopy.types import NumpyType - target = self.kernel.target - - impl_arg_to_arg = self.kernel.impl_arg_to_arg - arg_to_dtype = {} - for arg_name, val in six.iteritems(kwargs): - arg = impl_arg_to_arg.get(arg_name, None) - - if arg is None: - # offsets, strides and such - continue - - if arg.dtype is None and val is not None: - try: - dtype = val.dtype - except AttributeError: - pass - else: - arg_to_dtype[arg_name] = NumpyType(dtype, target) - - return frozenset(six.iteritems(arg_to_dtype)) - -# }}} - -# vim: foldmethod=marker diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e870f46e60ebf9c817cc29db529562031b693bb5..a2cfbb3600fe73cea3b1dae4e0d203b68aeaabe1 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -28,7 +28,7 @@ import six import numpy as np # noqa from loopy.kernel.data import CallMangleInfo -from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder +from loopy.target import TargetBase, ASTBuilderBase from loopy.diagnostic import LoopyError, LoopyTypeError from cgen import Pointer, NestedDeclarator, Block from cgen.mapper import IdentityMapper as CASTIdentityMapperBase @@ -270,7 +270,7 @@ class CTarget(TargetBase): return False def get_host_ast_builder(self): - return DummyHostASTBuilder(self) + return CASTBuilder(self) def get_device_ast_builder(self): return CASTBuilder(self) @@ -300,9 +300,34 @@ class CTarget(TargetBase): # These kind of shouldn't be here. return self.get_dtype_registry().dtype_to_ctype(dtype) + def get_kernel_executor_cache_key(self, *args, **kwargs): + return None # TODO: ??? + + def get_kernel_executor(self, knl, *args, **kwargs): + raise NotImplementedError() + # }}} +# {{{ + +class ExecutableCTarget(CTarget): + """ + An executable CTarget that uses (by default) JIT compilation of C-code + """ + from .c_execution import CCompiler + + def __init__(self, compiler=CCompiler(), fortran_abi=False): + super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi) + self.compiler = compiler + + def get_kernel_executor(self, knl, *args, **kwargs): + from loopy.target.c.c_execution import CKernelExecutor + return CKernelExecutor(knl, compiler=self.compiler) + +# }}} + + class _ConstRestrictPointer(Pointer): def get_decl_pair(self): sub_tp, sub_decl = self.subdecl.get_decl_pair() @@ -508,6 +533,9 @@ class CASTBuilder(ASTBuilderBase): [self.idi_to_cgen_declarator(codegen_state.kernel, idi) for idi in codegen_state.implemented_data_info])) + def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): + return None + def get_temporary_decls(self, codegen_state, schedule_index): from loopy.kernel.data import temp_var_scope diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py new file mode 100644 index 0000000000000000000000000000000000000000..164d59b9e20a78668bb21debc60121e7c47542b5 --- /dev/null +++ b/loopy/target/c/c_execution.py @@ -0,0 +1,419 @@ +from __future__ import division, with_statement, absolute_import + +__copyright__ = "Copyright (C) 2017 Nick Curtis" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import tempfile +import os + +from loopy.target.execution import (KernelExecutorBase, _KernelInfo, + ExecutionWrapperGeneratorBase, get_highlighted_code) +from pytools import memoize_method +from pytools.py_codegen import (Indentation) +from codepy.toolchain import guess_toolchain +from codepy.jit import compile_from_string +import six +import ctypes + +import numpy as np + +import logging +logger = logging.getLogger(__name__) + + +class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): + """ + Specialized form of the :class:`ExecutionWrapperGeneratorBase` for + pyopencl execution + """ + + def __init__(self): + system_args = ["_lpy_c_kernels"] + super(CExecutionWrapperGenerator, self).__init__(system_args) + + def python_dtype_str(self, dtype): + if np.dtype(str(dtype)).isbuiltin: + return "_lpy_np."+dtype.name + raise Exception('dtype: {} not recognized'.format(dtype)) + + # {{{ handle non numpy arguements + + def handle_non_numpy_arg(self, gen, arg): + pass + + # }}} + + # {{{ handle allocation of unspecified arguements + + def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): + """ + Handle allocation of non-specified arguements for C-execution + """ + from pymbolic import var + + num_axes = len(arg.unvec_shape) + for i in range(num_axes): + gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) + + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + for i in range(num_axes): + gen("_lpy_strides_%d = %s" % (i, strify( + itemsize*arg.unvec_strides[i]))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("assert _lpy_strides_%d > 0, " + "\"'%s' has negative stride in axis %d\"" + % (i, arg.name, i)) + + sym_strides = tuple( + var("_lpy_strides_%d" % i) + for i in range(num_axes)) + + sym_shape = tuple( + var("_lpy_shape_%d" % i) + for i in range(num_axes)) + + # find order of array + order = "'C'" if arg.unvec_strides[-1] == 1 else "'F'" + + gen("%(name)s = _lpy_np.empty(%(shape)s, " + "%(dtype)s, order=%(order)s)" + % dict( + name=arg.name, + shape=strify(sym_shape), + dtype=self.python_dtype_str( + kernel_arg.dtype.numpy_dtype), + order=order)) + + #check strides + if not skip_arg_checks: + gen("assert %(strides)s == %(name)s.strides, " + "'Strides of loopy created array %(name)s, " + "do not match expected.'" % + dict(name=arg.name, + strides=strify(sym_strides))) + for i in range(num_axes): + gen("del _lpy_shape_%d" % i) + gen("del _lpy_strides_%d" % i) + gen("") + + # }}} + + def target_specific_preamble(self, gen): + """ + Add default C-imports to preamble + """ + gen.add_to_preamble("import numpy as _lpy_np") + + def initialize_system_args(self, gen): + """ + Initializes possibly empty system arguements + """ + pass + + # {{{ generate invocation + + def generate_invocation(self, gen, kernel_name, args): + gen("for knl in _lpy_c_kernels:") + with Indentation(gen): + gen('knl({args})'.format( + args=", ".join(args))) + # }}} + + # {{{ + + def generate_output_handler( + self, gen, options, kernel, implemented_data_info): + + from loopy.kernel.data import KernelArgument + + if options.return_dict: + gen("return None, {%s}" + % ", ".join("\"%s\": %s" % (arg.name, arg.name) + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables())) + else: + out_args = [arg + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables()] + if out_args: + gen("return None, (%s,)" + % ", ".join(arg.name for arg in out_args)) + else: + gen("return None, ()") + + # }}} + + def generate_host_code(self, gen, codegen_result): + # "host" code for C is embedded in the same file as the "device" code + # this will enable a logical jumping off point for global barriers for + # OpenMP, etc. + pass + + def get_arg_pass(self, arg): + return arg.name + + +class CCompiler(object): + """ + The compiler module handles invocation of compilers to generate a shared lib + using codepy, which can subsequently be loaded via ctypes. + + The general strategy here is as follows: + + 1. A :class:`codepy.Toolchain` is guessed from distutils. + The user may override any flags obtained therein by passing in arguements + to cc, cflags, etc. + + 2. The kernel source is built into and object first, then made into a shared + library using :meth:`codepy.jit.compile_from_string`, which additionally + handles caching + + 3. The resulting shared library is turned into a :class:`ctypes.CDLL` + to enable calling by the invoker generated by, e.g., + :class:`CExecutionWrapperGenerator` + """ + + def __init__(self, toolchain=None, + cc='gcc', cflags='-std=c99 -O3 -fPIC'.split(), + ldflags='-shared'.split(), libraries=[], + include_dirs=[], library_dirs=[], defines=[], + source_suffix='c'): + # try to get a default toolchain + # or subclass supplied version if available + self.toolchain = guess_toolchain() if toolchain is None else toolchain + self.source_suffix = source_suffix + if toolchain is None: + # copy in all differing values + diff = {'cc': cc, + 'cflags': cflags, + 'ldflags': ldflags, + 'libraries': libraries, + 'include_dirs': include_dirs, + 'library_dirs': library_dirs, + 'defines': defines} + # filter empty and those equal to toolchain defaults + diff = dict((k, v) for k, v in six.iteritems(diff) + if v and + not hasattr(self.toolchain, k) or + getattr(self.toolchain, k) != v) + self.toolchain = self.toolchain.copy(**diff) + self.tempdir = tempfile.mkdtemp(prefix="tmp_loopy") + self.source_suffix = source_suffix + + def _tempname(self, name): + """Build temporary filename path in tempdir.""" + return os.path.join(self.tempdir, name) + + @memoize_method + def build(self, name, code, debug=False, wait_on_error=None, + debug_recompile=True): + """Compile code, build and load shared library.""" + logger.debug(code) + c_fname = self._tempname('code.' + self.source_suffix) + + # build object + checksum, mod_name, ext_file, recompiled = \ + compile_from_string(self.toolchain, name, code, c_fname, + self.tempdir, debug, wait_on_error, + debug_recompile, False) + + if not recompiled: + logger.debug('Kernel {} compiled from source'.format(name)) + + # and return compiled + return checksum, ctypes.CDLL(ext_file) + + +class CPlusPlusCompiler(CCompiler): + """Subclass of CCompiler to invoke a C++ compiler.""" + + def __init__(self, cc='g++', cflags='-std=c++98 -O3 -fPIC'.split(), + ldflags=[], libraries=[], + include_dirs=[], library_dirs=[], defines=[], + source_suffix='cpp'): + + super(CPlusPlusCompiler, self).__init__( + cc=cc, cflags=cflags, ldflags=ldflags, libraries=libraries, + include_dirs=include_dirs, library_dirs=library_dirs, + defines=defines, source_suffix=source_suffix) + + +class IDIToCDLL(object): + """ + A utility class that extracts arguement and return type info from a + :class:`ImplementedDataInfo` in order to create a :class:`ctype.CDLL` + """ + def __init__(self, target): + self.target = target + self.registry = target.get_dtype_registry().wrapped_registry + + def __call__(self, knl, idi): + # next loop through the implemented data info to get the arg data + arg_info = [] + for arg in idi: + # check if pointer + pointer = arg.shape + arg_info.append(self._dtype_to_ctype(arg.dtype, pointer)) + + return arg_info + + def _append_arg(self, name, dtype, pointer=False): + """Append arg info to current argument list.""" + self._arg_info.append(( + name, + self._dtype_to_ctype(dtype, pointer=pointer) + )) + + def _dtype_to_ctype(self, dtype, pointer=False): + """Map NumPy dtype to equivalent ctypes type.""" + typename = self.registry.dtype_to_ctype(dtype) + typename = {'unsigned': 'uint'}.get(typename, typename) + basetype = getattr(ctypes, 'c_' + typename) + if pointer: + return ctypes.POINTER(basetype) + return basetype + + +class CompiledCKernel(object): + """ + A CompiledCKernel wraps a loopy kernel, compiling it and loading the + result as a shared library, and provides access to the kernel as a + ctypes function object, wrapped by the __call__ method, which attempts + to automatically map argument types. + """ + + def __init__(self, knl, idi, dev_code, target, comp=CCompiler()): + from loopy.target.c import CTarget + assert isinstance(target, CTarget) + self.target = target + self.name = knl.name + # get code and build + self.code = dev_code + self.comp = comp + self.checksum, self.dll = self.comp.build( + self.name, self.code) + + # get the function declaration for interface with ctypes + func_decl = IDIToCDLL(self.target) + arg_info = func_decl(knl, idi) + self._fn = getattr(self.dll, self.name) + # kernels are void by defn. + self._fn.restype = None + self._fn.argtypes = [ctype for ctype in arg_info] + + def __call__(self, *args): + """Execute kernel with given args mapped to ctypes equivalents.""" + args_ = [] + for arg, arg_t in zip(args, self._fn.argtypes): + if hasattr(arg, 'ctypes'): + if arg.size == 0: + # TODO eliminate unused arguments from kernel + arg_ = arg_t(0.0) + else: + arg_ = arg.ctypes.data_as(arg_t) + else: + arg_ = arg_t(arg) + args_.append(arg_) + self._fn(*args_) + + +class CKernelExecutor(KernelExecutorBase): + """An object connecting a kernel to a :class:`CompiledKernel` + for execution. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, kernel, compiler=None): + """ + :arg kernel: may be a loopy.LoopKernel, a generator returning kernels + (a warning will be issued if more than one is returned). If the + kernel has not yet been loop-scheduled, that is done, too, with no + specific arguments. + """ + + self.compiler = compiler if compiler else CCompiler() + super(CKernelExecutor, self).__init__(kernel, + CExecutionWrapperGenerator()) + + @memoize_method + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) + + from loopy.codegen import generate_code_v2 + codegen_result = generate_code_v2(kernel) + + dev_code = codegen_result.device_code() + host_code = codegen_result.host_code() + all_code = '\n'.join([dev_code, '', host_code]) + + if self.kernel.options.write_cl: + output = all_code + if self.kernel.options.highlight_cl: + output = get_highlighted_code(code=output) + + if self.kernel.options.write_cl is True: + print(output) + else: + with open(self.kernel.options.write_cl, "w") as outf: + outf.write(output) + + if self.kernel.options.edit_cl: + from pytools import invoke_editor + dev_code = invoke_editor(dev_code, "code.c") + + c_kernels = [] + for dp in codegen_result.device_programs: + c_kernels.append(CompiledCKernel(dp, + codegen_result.implemented_data_info, all_code, self.kernel.target, + self.compiler)) + + return _KernelInfo( + kernel=kernel, + c_kernels=c_kernels, + implemented_data_info=codegen_result.implemented_data_info, + invoker=self.invoker(kernel, codegen_result)) + + # }}} + + def __call__(self, *args, **kwargs): + """ + :returns: ``(None, output)`` the output is a tuple of output arguments + (arguments that are written as part of the kernel). The order is given + by the order of kernel arguments. If this order is unspecified + (such as when kernel arguments are inferred automatically), + enable :attr:`loopy.Options.return_dict` to make *output* a + :class:`dict` instead, with keys of argument names and values + of the returned arrays. + """ + + kwargs = self.packing_controller.unpack(kwargs) + + kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) + + return kernel_info.invoker( + kernel_info.c_kernels, *args, **kwargs) diff --git a/loopy/target/execution.py b/loopy/target/execution.py new file mode 100644 index 0000000000000000000000000000000000000000..4d6c0378cd6322e21909514a1d3c7d9d4cbe96de --- /dev/null +++ b/loopy/target/execution.py @@ -0,0 +1,833 @@ +from __future__ import division, with_statement, absolute_import + +__copyright__ = "Copyright (C) 2012-17 Andreas Kloeckner, Nick Curtis" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import six +import numpy as np +from pytools import ImmutableRecord, memoize_method +from loopy.diagnostic import LoopyError +from pytools.py_codegen import ( + Indentation, PythonFunctionGenerator) + +import logging +logger = logging.getLogger(__name__) + +from pytools.persistent_dict import WriteOncePersistentDict +from loopy.tools import LoopyKeyBuilder +from loopy.version import DATA_MODEL_VERSION + + +# {{{ object array argument packing + +class _PackingInfo(ImmutableRecord): + """ + .. attribute:: name + .. attribute:: sep_shape + + .. attribute:: subscripts_and_names + + A list of type ``[(index, unpacked_name), ...]``. + """ + + +class SeparateArrayPackingController(object): + """For argument arrays with axes tagged to be implemented as separate + arrays, this class provides preprocessing of the incoming arguments so that + all sub-arrays may be passed in one object array (under the original, + un-split argument name) and are unpacked into separate arrays before being + passed to the kernel. + + It also repacks outgoing arrays of this type back into an object array. + """ + + def __init__(self, kernel): + # map from arg name + self.packing_info = {} + + from loopy.kernel.array import ArrayBase + for arg in kernel.args: + if not isinstance(arg, ArrayBase): + continue + + if arg.shape is None or arg.dim_tags is None: + continue + + subscripts_and_names = arg.subscripts_and_names() + + if subscripts_and_names is None: + continue + + self.packing_info[arg.name] = _PackingInfo( + name=arg.name, + sep_shape=arg.sep_shape(), + subscripts_and_names=subscripts_and_names, + is_written=arg.name in kernel.get_written_variables()) + + def unpack(self, kernel_kwargs): + if not self.packing_info: + return kernel_kwargs + + kernel_kwargs = kernel_kwargs.copy() + + for packing_info in six.itervalues(self.packing_info): + arg_name = packing_info.name + if packing_info.name in kernel_kwargs: + arg = kernel_kwargs[arg_name] + for index, unpacked_name in packing_info.subscripts_and_names: + assert unpacked_name not in kernel_kwargs + kernel_kwargs[unpacked_name] = arg[index] + del kernel_kwargs[arg_name] + + return kernel_kwargs + + def pack(self, outputs): + if not self.packing_info: + return outputs + + for packing_info in six.itervalues(self.packing_info): + if not packing_info.is_written: + continue + + result = outputs[packing_info.name] = \ + np.zeros(packing_info.sep_shape, dtype=np.object) + + for index, unpacked_name in packing_info.subscripts_and_names: + result[index] = outputs.pop(unpacked_name) + + return outputs + +# }}} + + +# {{{ ExecutionWrapperGeneratorBase + +class ExecutionWrapperGeneratorBase(object): + """ + A set of common methods for generating a wrapper + for execution + + """ + + def __init__(self, system_args): + self.system_args = system_args[:] + + def python_dtype_str(self, dtype): + raise NotImplementedError() + + # {{{ invoker generation + + # /!\ This code runs in a namespace controlled by the user. + # Prefix all auxiliary variables with "_lpy". + + # {{{ integer arg finding from shapes + + def generate_integer_arg_finding_from_shapes( + self, gen, kernel, implemented_data_info): + # a mapping from integer argument names to a list of tuples + # (arg_name, expression), where expression is a + # unary function of kernel.arg_dict[arg_name] + # returning the desired integer argument. + iarg_to_sources = {} + + from loopy.kernel.data import GlobalArg + from loopy.symbolic import DependencyMapper, StringifyMapper + from loopy.diagnostic import ParameterFinderWarning + dep_map = DependencyMapper() + + from pymbolic import var + for arg in implemented_data_info: + if arg.arg_class is GlobalArg: + sym_shape = var(arg.name).attr("shape") + for axis_nr, shape_i in enumerate(arg.shape): + if shape_i is None: + continue + + deps = dep_map(shape_i) + + if len(deps) == 1: + integer_arg_var, = deps + + if kernel.arg_dict[integer_arg_var.name].dtype.is_integral(): + from pymbolic.algorithm import solve_affine_equations_for + try: + # friggin' overkill :) + iarg_expr = solve_affine_equations_for( + [integer_arg_var.name], + [(shape_i, sym_shape.index(axis_nr))] + )[integer_arg_var] + except Exception as e: + #from traceback import print_exc + #print_exc() + + # went wrong? oh well + from warnings import warn + warn("Unable to generate code to automatically " + "find '%s' from the shape of '%s':\n%s" + % (integer_arg_var.name, arg.name, str(e)), + ParameterFinderWarning) + else: + iarg_to_sources.setdefault(integer_arg_var.name, [])\ + .append((arg.name, iarg_expr)) + + gen("# {{{ find integer arguments from shapes") + gen("") + + for iarg_name, sources in six.iteritems(iarg_to_sources): + gen("if %s is None:" % iarg_name) + with Indentation(gen): + if_stmt = "if" + for arg_name, value_expr in sources: + gen("%s %s is not None:" % (if_stmt, arg_name)) + with Indentation(gen): + gen("%s = %s" + % (iarg_name, StringifyMapper()(value_expr))) + + if_stmt = "elif" + + gen("") + + gen("# }}}") + gen("") + + # }}} + + # {{{ integer arg finding from offsets + + def generate_integer_arg_finding_from_offsets(self, gen, kernel, + implemented_data_info): + options = kernel.options + + gen("# {{{ find integer arguments from offsets") + gen("") + + for arg in implemented_data_info: + impl_array_name = arg.offset_for_name + if impl_array_name is not None: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("if %s is None:" % impl_array_name) + with Indentation(gen): + gen("# Output variable, we'll be allocating " + "it, with zero offset.") + gen("%s = 0" % arg.name) + gen("else:") + with Indentation(gen): + if not options.no_numpy: + gen("_lpy_offset = getattr(%s, \"offset\", 0)" + % impl_array_name) + else: + gen("_lpy_offset = %s.offset" % impl_array_name) + + base_arg = kernel.impl_arg_to_arg[impl_array_name] + + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" + % (arg.name, base_arg.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Offset of array '%s' is " + "not divisible by its dtype itemsize\"" + % impl_array_name) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) + + if not options.skip_arg_checks: + gen("del _lpy_offset") + + gen("# }}}") + gen("") + + # }}} + + # {{{ integer arg finding from strides + + def generate_integer_arg_finding_from_strides( + self, gen, kernel, implemented_data_info): + options = kernel.options + + gen("# {{{ find integer arguments from strides") + gen("") + + for arg in implemented_data_info: + if arg.stride_for_name_and_axis is not None: + impl_array_name, stride_impl_axis = arg.stride_for_name_and_axis + + gen("if %s is None:" % arg.name) + with Indentation(gen): + if not options.skip_arg_checks: + gen("if %s is None:" % impl_array_name) + with Indentation(gen): + gen("raise RuntimeError(\"required stride '%s' for " + "argument '%s' not given or deducible from " + "passed array\")" + % (arg.name, impl_array_name)) + + base_arg = kernel.impl_arg_to_arg[impl_array_name] + + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" + % (arg.name, impl_array_name, stride_impl_axis, + base_arg.dtype.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " + " is not divisible by its dtype itemsize\"" + % (stride_impl_axis, impl_array_name)) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) + + gen("# }}}") + gen("") + + # }}} + + # {{{ check that value args are present + + def generate_value_arg_check( + self, gen, kernel, implemented_data_info): + if kernel.options.skip_arg_checks: + return + + from loopy.kernel.data import ValueArg + + gen("# {{{ check that value args are present") + gen("") + + for arg in implemented_data_info: + if not issubclass(arg.arg_class, ValueArg): + continue + + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise TypeError(\"value argument '%s' " + "was not given and could not be automatically " + "determined\")" % arg.name) + + gen("# }}}") + gen("") + + # }}} + + # {{{ handle non numpy arguements + + def handle_non_numpy_arg(self, gen, arg): + raise NotImplementedError() + + # }}} + + # {{{ handle allocation of unspecified arguements + + def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): + """ + Handle allocation of non-specified arguements for C-execution + """ + raise NotImplementedError() + + # }}} + + def get_arg_pass(self, arg): + raise NotImplementedError() + + # {{{ arg setup + + def generate_arg_setup( + self, gen, kernel, implemented_data_info, options): + import loopy as lp + + from loopy.kernel.data import KernelArgument + from loopy.kernel.array import ArrayBase + from loopy.symbolic import StringifyMapper + from loopy.types import NumpyType + + gen("# {{{ set up array arguments") + gen("") + + if not options.no_numpy: + gen("_lpy_encountered_numpy = False") + gen("_lpy_encountered_dev = False") + gen("") + + args = [] + + strify = StringifyMapper() + + expect_no_more_arguments = False + + for arg_idx, arg in enumerate(implemented_data_info): + is_written = arg.base_name in kernel.get_written_variables() + kernel_arg = kernel.impl_arg_to_arg.get(arg.name) + + if not issubclass(arg.arg_class, KernelArgument): + expect_no_more_arguments = True + continue + + if expect_no_more_arguments: + raise LoopyError("Further arguments encountered after arg info " + "describing a global temporary variable") + + if not issubclass(arg.arg_class, ArrayBase): + args.append(arg.name) + continue + + gen("# {{{ process %s" % arg.name) + gen("") + + if not options.no_numpy: + self.handle_non_numpy_arg(gen, arg) + + if not options.skip_arg_checks and not is_written: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"input argument '%s' must " + "be supplied\")" % arg.name) + gen("") + + if (is_written + and arg.arg_class is lp.ImageArg + and not options.skip_arg_checks): + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"written image '%s' must " + "be supplied\")" % arg.name) + gen("") + + if is_written and arg.shape is None and not options.skip_arg_checks: + gen("if %s is None:" % arg.name) + with Indentation(gen): + gen("raise RuntimeError(\"written argument '%s' has " + "unknown shape and must be supplied\")" % arg.name) + gen("") + + possibly_made_by_loopy = False + + # {{{ allocate written arrays, if needed + + if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ + and arg.shape is not None \ + and all(si is not None for si in arg.shape): + + if not isinstance(arg.dtype, NumpyType): + raise LoopyError("do not know how to pass arg of type '%s'" + % arg.dtype) + + possibly_made_by_loopy = True + gen("_lpy_made_by_loopy = False") + gen("") + + gen("if %s is None:" % arg.name) + with Indentation(gen): + self.handle_alloc( + gen, arg, kernel_arg, strify, options.skip_arg_checks) + gen("_lpy_made_by_loopy = True") + gen("") + + # }}} + + # {{{ argument checking + + if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ + and not options.skip_arg_checks: + if possibly_made_by_loopy: + gen("if not _lpy_made_by_loopy:") + else: + gen("if True:") + + with Indentation(gen): + gen("if %s.dtype != %s:" + % (arg.name, self.python_dtype_str( + kernel_arg.dtype.numpy_dtype))) + with Indentation(gen): + gen("raise TypeError(\"dtype mismatch on argument '%s' " + "(got: %%s, expected: %s)\" %% %s.dtype)" + % (arg.name, arg.dtype, arg.name)) + + # {{{ generate shape checking code + + def strify_allowing_none(shape_axis): + if shape_axis is None: + return "None" + else: + return strify(shape_axis) + + def strify_tuple(t): + if len(t) == 0: + return "()" + else: + return "(%s,)" % ", ".join( + strify_allowing_none(sa) + for sa in t) + + shape_mismatch_msg = ( + "raise TypeError(\"shape mismatch on argument '%s' " + "(got: %%s, expected: %%s)\" " + "%% (%s.shape, %s))" + % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) + + if kernel_arg.shape is None: + pass + + elif any(shape_axis is None for shape_axis in kernel_arg.shape): + gen("if len(%s.shape) != %s:" + % (arg.name, len(arg.unvec_shape))) + with Indentation(gen): + gen(shape_mismatch_msg) + + for i, shape_axis in enumerate(arg.unvec_shape): + if shape_axis is None: + continue + + gen("if %s.shape[%d] != %s:" + % (arg.name, i, strify(shape_axis))) + with Indentation(gen): + gen(shape_mismatch_msg) + + else: # not None, no Nones in tuple + gen("if %s.shape != %s:" + % (arg.name, strify(arg.unvec_shape))) + with Indentation(gen): + gen(shape_mismatch_msg) + + # }}} + + if arg.unvec_strides and kernel_arg.dim_tags: + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + sym_strides = tuple( + itemsize*s_i for s_i in arg.unvec_strides) + gen("if %s.strides != %s:" + % (arg.name, strify(sym_strides))) + with Indentation(gen): + gen("raise TypeError(\"strides mismatch on " + "argument '%s' (got: %%s, expected: %%s)\" " + "%% (%s.strides, %s))" + % (arg.name, arg.name, strify(sym_strides))) + + if not arg.allows_offset: + gen("if hasattr(%s, 'offset') and %s.offset:" % ( + arg.name, arg.name)) + with Indentation(gen): + gen("raise ValueError(\"Argument '%s' does not " + "allow arrays with offsets. Try passing " + "default_offset=loopy.auto to make_kernel()." + "\")" % arg.name) + gen("") + + # }}} + + if possibly_made_by_loopy and not options.skip_arg_checks: + gen("del _lpy_made_by_loopy") + gen("") + + if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]: + args.append(self.get_arg_pass(arg)) + else: + args.append("%s" % arg.name) + + gen("") + + gen("# }}}") + gen("") + + gen("# }}}") + gen("") + + return args + + # }}} + + def target_specific_preamble(self, gen): + """ + Add target specific imports to preamble + """ + raise NotImplementedError() + + def initialize_system_args(self, gen): + """ + Override to intialize any default system args + """ + raise NotImplementedError() + + # {{{ generate invocation + + def generate_invocation(self, gen, kernel_name, args): + raise NotImplementedError() + + # }}} + + # {{{ output + + def generate_output_handler( + self, gen, options, kernel, implemented_data_info): + + raise NotImplementedError() + + # }}} + + def generate_host_code(self, gen, codegen_result): + raise NotImplementedError + + def __call__(self, kernel, codegen_result): + """ + Generates the wrapping python invoker for this execution target + + :arg kernel: the loopy :class:`LoopKernel`(s) to be executued + :codegen_result: the loopy :class:`CodeGenerationResult` created + by code generation + + :returns: py_func, a python function that handles excution of this + kernel + """ + options = kernel.options + implemented_data_info = codegen_result.implemented_data_info + + from loopy.kernel.data import KernelArgument + gen = PythonFunctionGenerator( + "invoke_%s_loopy_kernel" % kernel.name, + self.system_args + [ + "%s=None" % idi.name + for idi in implemented_data_info + if issubclass(idi.arg_class, KernelArgument) + ]) + + gen.add_to_preamble("from __future__ import division") + gen.add_to_preamble("") + self.target_specific_preamble(gen) + gen.add_to_preamble("") + self.generate_host_code(gen, codegen_result) + gen.add_to_preamble("") + + self.initialize_system_args(gen) + + self.generate_integer_arg_finding_from_shapes( + gen, kernel, implemented_data_info) + self.generate_integer_arg_finding_from_offsets( + gen, kernel, implemented_data_info) + self.generate_integer_arg_finding_from_strides( + gen, kernel, implemented_data_info) + self.generate_value_arg_check( + gen, kernel, implemented_data_info) + + args = self.generate_arg_setup( + gen, kernel, implemented_data_info, options) + + self.generate_invocation(gen, codegen_result.host_program.name, args) + + self.generate_output_handler(gen, options, kernel, implemented_data_info) + + if options.write_wrapper: + output = gen.get() + if options.highlight_wrapper: + output = get_highlighted_python_code(output) + + if options.write_wrapper is True: + print(output) + else: + with open(options.write_wrapper, "w") as outf: + outf.write(output) + + return gen.get_function() + + +# }}} + + +class _KernelInfo(ImmutableRecord): + pass + + +class _Kernels(object): + pass + + +typed_and_scheduled_cache = WriteOncePersistentDict( + "loopy-typed-and-scheduled-cache-v1-"+DATA_MODEL_VERSION, + key_builder=LoopyKeyBuilder()) + + +# {{{ kernel executor + +class KernelExecutorBase(object): + """An object connecting a kernel to a :class:`pyopencl.Context` + for execution. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, kernel, invoker): + """ + :arg kernel: a loopy.LoopKernel + """ + + self.kernel = kernel + + self.packing_controller = SeparateArrayPackingController(kernel) + + self.output_names = tuple(arg.name for arg in self.kernel.args + if arg.name in self.kernel.get_written_variables()) + + self.has_runtime_typed_args = any( + arg.dtype is None + for arg in kernel.args) + + self.invoker = invoker + + def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): + from loopy.kernel.tools import add_dtypes + + kernel = self.kernel + + if arg_to_dtype_set: + var_to_dtype = {} + for var, dtype in arg_to_dtype_set: + try: + dest_name = kernel.impl_arg_to_arg[var].name + except KeyError: + dest_name = var + + try: + var_to_dtype[dest_name] = dtype + except KeyError: + raise LoopyError("cannot set type for '%s': " + "no known variable/argument with that name" + % var) + + kernel = add_dtypes(kernel, var_to_dtype) + + from loopy.type_inference import infer_unknown_types + kernel = infer_unknown_types(kernel, expect_completion=True) + + if kernel.schedule is None: + from loopy.preprocess import preprocess_kernel + kernel = preprocess_kernel(kernel) + + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) + + return kernel + + @memoize_method + def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): + from loopy import CACHING_ENABLED + + from loopy.preprocess import prepare_for_caching + # prepare_for_caching() gets run by preprocess, but the kernel at this + # stage is not guaranteed to be preprocessed. + cacheable_kernel = prepare_for_caching(self.kernel) + cache_key = (type(self).__name__, cacheable_kernel, arg_to_dtype_set) + + if CACHING_ENABLED: + try: + return typed_and_scheduled_cache[cache_key] + except KeyError: + pass + + logger.debug("%s: typed-and-scheduled cache miss" % self.kernel.name) + + kernel = self.get_typed_and_scheduled_kernel_uncached(arg_to_dtype_set) + + if CACHING_ENABLED: + typed_and_scheduled_cache.store_if_not_present(cache_key, kernel) + + return kernel + + def arg_to_dtype_set(self, kwargs): + if not self.has_runtime_typed_args: + return None + + impl_arg_to_arg = self.kernel.impl_arg_to_arg + arg_to_dtype = {} + for arg_name, val in six.iteritems(kwargs): + arg = impl_arg_to_arg.get(arg_name, None) + + if arg is None: + # offsets, strides and such + continue + + if arg.dtype is None and val is not None: + try: + dtype = val.dtype + except AttributeError: + pass + else: + arg_to_dtype[arg_name] = dtype + + return frozenset(six.iteritems(arg_to_dtype)) + + # {{{ debugging aids + + def get_highlighted_code(self, arg_to_dtype=None, code=None): + if code is None: + code = self.get_code(arg_to_dtype) + return get_highlighted_code(code) + + def get_code(self, arg_to_dtype=None): + if arg_to_dtype is not None: + arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) + + kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) + + from loopy.codegen import generate_code_v2 + code = generate_code_v2(kernel) + return code.device_code() + + # }}} + + # {{{ call and info generator + + @memoize_method + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + raise NotImplementedError() + + def __call__(self, queue, **kwargs): + raise NotImplementedError() + + # }}} + +# }}} + +# {{{ code highlighers + + +def get_highlighted_code(text, python=False): + try: + from pygments import highlight + except ImportError: + return text + else: + from pygments.lexers import CLexer, PythonLexer + from pygments.formatters import TerminalFormatter + + return highlight(text, CLexer() if not python else PythonLexer(), + TerminalFormatter()) + + +def get_highlighted_python_code(text): + return get_highlighted_code(text, True) + + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 975c691a74d0d17bdca39243f515c5d04284893d..0da502fbab8aa45ed58a0491e0f43323ecf9aeff 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -22,18 +22,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six from six.moves import range, zip -import numpy as np -from pytools import ImmutableRecord, memoize_method -from loopy.diagnostic import ParameterFinderWarning -from pytools.py_codegen import ( - Indentation, PythonFunctionGenerator) -from loopy.diagnostic import LoopyError -from loopy.types import NumpyType -from loopy.execution import KernelExecutorBase - +from pytools import memoize_method +from pytools.py_codegen import Indentation +from loopy.target.execution import ( + KernelExecutorBase, ExecutionWrapperGeneratorBase, _KernelInfo, _Kernels) import logging logger = logging.getLogger(__name__) @@ -44,581 +38,184 @@ logger = logging.getLogger(__name__) # Prefix all auxiliary variables with "_lpy". -def python_dtype_str(dtype): - import pyopencl.tools as cl_tools - if dtype.isbuiltin: - return "_lpy_np."+dtype.name - else: - return ("_lpy_cl_tools.get_or_register_dtype(\"%s\")" - % cl_tools.dtype_to_ctype(dtype)) - - -# {{{ integer arg finding from shapes - -def generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info): - # a mapping from integer argument names to a list of tuples - # (arg_name, expression), where expression is a - # unary function of kernel.arg_dict[arg_name] - # returning the desired integer argument. - iarg_to_sources = {} - - from loopy.kernel.data import GlobalArg - from loopy.symbolic import DependencyMapper, StringifyMapper - dep_map = DependencyMapper() - - from pymbolic import var - for arg in implemented_data_info: - if arg.arg_class is GlobalArg: - sym_shape = var(arg.name).attr("shape") - for axis_nr, shape_i in enumerate(arg.shape): - if shape_i is None: - continue - - deps = dep_map(shape_i) - - if len(deps) == 1: - integer_arg_var, = deps - - if kernel.arg_dict[integer_arg_var.name].dtype.is_integral(): - from pymbolic.algorithm import solve_affine_equations_for - try: - # friggin' overkill :) - iarg_expr = solve_affine_equations_for( - [integer_arg_var.name], - [(shape_i, sym_shape.index(axis_nr))] - )[integer_arg_var] - except Exception as e: - #from traceback import print_exc - #print_exc() - - # went wrong? oh well - from warnings import warn - warn("Unable to generate code to automatically " - "find '%s' from the shape of '%s':\n%s" - % (integer_arg_var.name, arg.name, str(e)), - ParameterFinderWarning) - else: - iarg_to_sources.setdefault(integer_arg_var.name, []) \ - .append((arg.name, iarg_expr)) - - gen("# {{{ find integer arguments from shapes") - gen("") - - for iarg_name, sources in six.iteritems(iarg_to_sources): - gen("if %s is None:" % iarg_name) - with Indentation(gen): - if_stmt = "if" - for arg_name, value_expr in sources: - gen("%s %s is not None:" % (if_stmt, arg_name)) - with Indentation(gen): - gen("%s = %s" - % (iarg_name, StringifyMapper()(value_expr))) - - if_stmt = "elif" - - gen("") - - gen("# }}}") - gen("") - -# }}} - - -# {{{ integer arg finding from offsets - -def generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info): - options = kernel.options - - gen("# {{{ find integer arguments from offsets") - gen("") - - for arg in implemented_data_info: - impl_array_name = arg.offset_for_name - if impl_array_name is not None: - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("if %s is None:" % impl_array_name) - with Indentation(gen): - gen("# Output variable, we'll be allocating " - "it, with zero offset.") - gen("%s = 0" % arg.name) - gen("else:") - with Indentation(gen): - if not options.no_numpy: - gen("_lpy_offset = getattr(%s, \"offset\", 0)" - % impl_array_name) - else: - gen("_lpy_offset = %s.offset" % impl_array_name) - - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" - % (arg.name, base_arg.dtype.itemsize)) - - gen("assert _lpy_remdr == 0, \"Offset of array '%s' is " - "not divisible by its dtype itemsize\"" - % impl_array_name) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) - - if not options.skip_arg_checks: - gen("del _lpy_offset") - - gen("# }}}") - gen("") - -# }}} - - -# {{{ integer arg finding from strides - -def generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info): - options = kernel.options - - gen("# {{{ find integer arguments from strides") - gen("") - - for arg in implemented_data_info: - if arg.stride_for_name_and_axis is not None: - impl_array_name, stride_impl_axis = arg.stride_for_name_and_axis - - gen("if %s is None:" % arg.name) - with Indentation(gen): - if not options.skip_arg_checks: - gen("if %s is None:" % impl_array_name) - with Indentation(gen): - gen("raise RuntimeError(\"required stride '%s' for " - "argument '%s' not given or deducible from " - "passed array\")" - % (arg.name, impl_array_name)) - - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" - % (arg.name, impl_array_name, stride_impl_axis, - base_arg.dtype.dtype.itemsize)) - - gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' is " - "not divisible by its dtype itemsize\"" - % (stride_impl_axis, impl_array_name)) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) - - gen("# }}}") - gen("") - -# }}} - - -# {{{ check that value args are present - -def generate_value_arg_check(gen, kernel, implemented_data_info): - if kernel.options.skip_arg_checks: - return +class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): + """ + Specialized form of the :class:`ExecutionWrapperGeneratorBase` for + pyopencl execution + """ - from loopy.kernel.data import ValueArg + def __init__(self): + system_args = [ + "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", + # ignored if options.no_numpy + "out_host=None" + ] + super(PyOpenCLExecutionWrapperGenerator, self).__init__(system_args) - gen("# {{{ check that value args are present") - gen("") + def python_dtype_str(self, dtype): + import pyopencl.tools as cl_tools + if dtype.isbuiltin: + return "_lpy_np."+dtype.name + else: + return ("_lpy_cl_tools.get_or_register_dtype(\"%s\")" + % cl_tools.dtype_to_ctype(dtype)) - for arg in implemented_data_info: - if not issubclass(arg.arg_class, ValueArg): - continue + # {{{ handle non-numpy args - gen("if %s is None:" % arg.name) + def handle_non_numpy_arg(self, gen, arg): + gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) with Indentation(gen): - gen("raise TypeError(\"value argument '%s' " - "was not given and could not be automatically " - "determined\")" % arg.name) - - gen("# }}}") - gen("") - -# }}} + gen("# synchronous, nothing to worry about") + gen("%s = _lpy_cl_array.to_device(" + "queue, %s, allocator=allocator)" + % (arg.name, arg.name)) + gen("_lpy_encountered_numpy = True") + gen("elif %s is not None:" % arg.name) + with Indentation(gen): + gen("_lpy_encountered_dev = True") + gen("") -# {{{ arg setup + # {{{ handle allocation of unspecified arguements -def generate_arg_setup(gen, kernel, implemented_data_info, options): - import loopy as lp + def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): + """ + Handle allocation of non-specified arguements for pyopencl execution + """ + from pymbolic import var + + num_axes = len(arg.strides) + for i in range(num_axes): + gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) + + itemsize = kernel_arg.dtype.numpy_dtype.itemsize + for i in range(num_axes): + gen("_lpy_strides_%d = %s" % (i, strify( + itemsize*arg.unvec_strides[i]))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("assert _lpy_strides_%d > 0, " + "\"'%s' has negative stride in axis %d\"" + % (i, arg.name, i)) + + sym_strides = tuple( + var("_lpy_strides_%d" % i) + for i in range(num_axes)) + sym_shape = tuple( + var("_lpy_shape_%d" % i) + for i in range(num_axes)) + + alloc_size_expr = (sum(astrd*(alen-1) + for alen, astrd in zip(sym_shape, sym_strides)) + + itemsize) + + gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) + gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " + "%(dtype)s, strides=%(strides)s, " + "data=allocator(_lpy_alloc_size), allocator=allocator)" + % dict( + name=arg.name, + shape=strify(sym_shape), + strides=strify(sym_strides), + dtype=self.python_dtype_str(kernel_arg.dtype.numpy_dtype))) + + if not skip_arg_checks: + for i in range(num_axes): + gen("del _lpy_shape_%d" % i) + gen("del _lpy_strides_%d" % i) + gen("del _lpy_alloc_size") + gen("") - from loopy.kernel.data import KernelArgument - from loopy.kernel.array import ArrayBase - from loopy.symbolic import StringifyMapper - from pymbolic import var + # }}} - gen("# {{{ set up array arguments") - gen("") + def target_specific_preamble(self, gen): + """ + Add default pyopencl imports to preamble + """ + gen.add_to_preamble("import numpy as _lpy_np") + gen.add_to_preamble("import pyopencl as _lpy_cl") + gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") + gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") - if not options.no_numpy: - gen("_lpy_encountered_numpy = False") - gen("_lpy_encountered_dev = False") + def initialize_system_args(self, gen): + """ + Initializes possibly empty system arguements + """ + gen("if allocator is None:") + with Indentation(gen): + gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") - args = [] - - strify = StringifyMapper() - - expect_no_more_arguments = False + # {{{ generate invocation - for arg_idx, arg in enumerate(implemented_data_info): - is_written = arg.base_name in kernel.get_written_variables() - kernel_arg = kernel.impl_arg_to_arg.get(arg.name) + def generate_invocation(self, gen, kernel_name, args): + gen("_lpy_evt = {kernel_name}({args})" + .format( + kernel_name=kernel_name, + args=", ".join( + ["_lpy_cl_kernels", "queue"] + + args + + ["wait_for=wait_for"]))) - if not issubclass(arg.arg_class, KernelArgument): - expect_no_more_arguments = True - continue + # }}} - if expect_no_more_arguments: - raise LoopyError("Further arguments encountered after arg info " - "describing a global temporary variable") + # {{{ - if not issubclass(arg.arg_class, ArrayBase): - args.append(arg.name) - continue + def generate_output_handler( + self, gen, options, kernel, implemented_data_info): - gen("# {{{ process %s" % arg.name) - gen("") + from loopy.kernel.data import KernelArgument if not options.no_numpy: - gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) - with Indentation(gen): - gen("# synchronous, nothing to worry about") - gen("%s = _lpy_cl_array.to_device(" - "queue, %s, allocator=allocator)" - % (arg.name, arg.name)) - gen("_lpy_encountered_numpy = True") - gen("elif %s is not None:" % arg.name) + gen("if out_host is None and (_lpy_encountered_numpy " + "and not _lpy_encountered_dev):") with Indentation(gen): - gen("_lpy_encountered_dev = True") + gen("out_host = True") - gen("") - - if not options.skip_arg_checks and not is_written: - gen("if %s is None:" % arg.name) + gen("if out_host:") with Indentation(gen): - gen("raise RuntimeError(\"input argument '%s' must " - "be supplied\")" % arg.name) - gen("") - - if (is_written - and arg.arg_class is lp.ImageArg - and not options.skip_arg_checks): - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("raise RuntimeError(\"written image '%s' must " - "be supplied\")" % arg.name) - gen("") - - if is_written and arg.shape is None and not options.skip_arg_checks: - gen("if %s is None:" % arg.name) - with Indentation(gen): - gen("raise RuntimeError(\"written argument '%s' has " - "unknown shape and must be supplied\")" % arg.name) - gen("") - - possibly_made_by_loopy = False - - # {{{ allocate written arrays, if needed + gen("pass") # if no outputs (?!) + for arg in implemented_data_info: + if not issubclass(arg.arg_class, KernelArgument): + continue - if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and arg.shape is not None \ - and all(si is not None for si in arg.shape): + is_written = arg.base_name in kernel.get_written_variables() + if is_written: + gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) - if not isinstance(arg.dtype, NumpyType): - raise LoopyError("do not know how to pass arg of type '%s'" - % arg.dtype) - - possibly_made_by_loopy = True - gen("_lpy_made_by_loopy = False") - gen("") - - gen("if %s is None:" % arg.name) - with Indentation(gen): - num_axes = len(arg.strides) - for i in range(num_axes): - gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) - - itemsize = kernel_arg.dtype.numpy_dtype.itemsize - for i in range(num_axes): - gen("_lpy_strides_%d = %s" % (i, strify( - itemsize*arg.unvec_strides[i]))) - - if not options.skip_arg_checks: - for i in range(num_axes): - gen("assert _lpy_strides_%d > 0, " - "\"'%s' has negative stride in axis %d\"" - % (i, arg.name, i)) - - sym_strides = tuple( - var("_lpy_strides_%d" % i) - for i in range(num_axes)) - sym_shape = tuple( - var("_lpy_shape_%d" % i) - for i in range(num_axes)) - - alloc_size_expr = (sum(astrd*(alen-1) - for alen, astrd in zip(sym_shape, sym_strides)) - + itemsize) - - gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) - gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " - "%(dtype)s, strides=%(strides)s, " - "data=allocator(_lpy_alloc_size), allocator=allocator)" - % dict( - name=arg.name, - shape=strify(sym_shape), - strides=strify(sym_strides), - dtype=python_dtype_str(kernel_arg.dtype.numpy_dtype))) - - if not options.skip_arg_checks: - for i in range(num_axes): - gen("del _lpy_shape_%d" % i) - gen("del _lpy_strides_%d" % i) - gen("del _lpy_alloc_size") - gen("") - - gen("_lpy_made_by_loopy = True") - gen("") - - # }}} - - # {{{ argument checking - - if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and not options.skip_arg_checks: - if possibly_made_by_loopy: - gen("if not _lpy_made_by_loopy:") - else: - gen("if True:") - - with Indentation(gen): - gen("if %s.dtype != %s:" - % (arg.name, python_dtype_str(kernel_arg.dtype.numpy_dtype))) - with Indentation(gen): - gen("raise TypeError(\"dtype mismatch on argument '%s' " - "(got: %%s, expected: %s)\" %% %s.dtype)" - % (arg.name, arg.dtype, arg.name)) - - # {{{ generate shape checking code - - def strify_allowing_none(shape_axis): - if shape_axis is None: - return "None" - else: - return strify(shape_axis) - - def strify_tuple(t): - if len(t) == 0: - return "()" - else: - return "(%s,)" % ", ".join( - strify_allowing_none(sa) - for sa in t) - - shape_mismatch_msg = ( - "raise TypeError(\"shape mismatch on argument '%s' " - "(got: %%s, expected: %%s)\" " - "%% (%s.shape, %s))" - % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) - - if kernel_arg.shape is None: - pass - - elif any(shape_axis is None for shape_axis in kernel_arg.shape): - gen("if len(%s.shape) != %s:" - % (arg.name, len(arg.unvec_shape))) - with Indentation(gen): - gen(shape_mismatch_msg) - - for i, shape_axis in enumerate(arg.unvec_shape): - if shape_axis is None: - continue - - gen("if %s.shape[%d] != %s:" - % (arg.name, i, strify(shape_axis))) - with Indentation(gen): - gen(shape_mismatch_msg) - - else: # not None, no Nones in tuple - gen("if %s.shape != %s:" - % (arg.name, strify(arg.unvec_shape))) - with Indentation(gen): - gen(shape_mismatch_msg) - - # }}} - - if arg.unvec_strides and kernel_arg.dim_tags: - itemsize = kernel_arg.dtype.numpy_dtype.itemsize - sym_strides = tuple( - itemsize*s_i for s_i in arg.unvec_strides) - gen("if %s.strides != %s:" - % (arg.name, strify(sym_strides))) - with Indentation(gen): - gen("raise TypeError(\"strides mismatch on " - "argument '%s' (got: %%s, expected: %%s)\" " - "%% (%s.strides, %s))" - % (arg.name, arg.name, strify(sym_strides))) - - if not arg.allows_offset: - gen("if %s.offset:" % arg.name) - with Indentation(gen): - gen("raise ValueError(\"Argument '%s' does not " - "allow arrays with offsets. Try passing " - "default_offset=loopy.auto to make_kernel()." - "\")" % arg.name) - gen("") - - # }}} - - if possibly_made_by_loopy and not options.skip_arg_checks: - gen("del _lpy_made_by_loopy") gen("") - if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]: - args.append("%s.base_data" % arg.name) + if options.return_dict: + gen("return _lpy_evt, {%s}" + % ", ".join("\"%s\": %s" % (arg.name, arg.name) + for arg in implemented_data_info + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables())) else: - args.append("%s" % arg.name) - - gen("") - - gen("# }}}") - gen("") - - gen("# }}}") - gen("") - - return args - -# }}} - - -def generate_invoker(kernel, codegen_result): - options = kernel.options - implemented_data_info = codegen_result.implemented_data_info - host_code = codegen_result.host_code() - - system_args = [ - "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", - # ignored if options.no_numpy - "out_host=None" - ] - - from loopy.kernel.data import KernelArgument - gen = PythonFunctionGenerator( - "invoke_%s_loopy_kernel" % kernel.name, - system_args + [ - "%s=None" % idi.name - for idi in implemented_data_info - if issubclass(idi.arg_class, KernelArgument) - ]) - - gen.add_to_preamble("from __future__ import division") - gen.add_to_preamble("") - gen.add_to_preamble("import pyopencl as _lpy_cl") - gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") - gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") - gen.add_to_preamble("import numpy as _lpy_np") - gen.add_to_preamble("") - gen.add_to_preamble(host_code) - gen.add_to_preamble("") - - gen("if allocator is None:") - with Indentation(gen): - gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") - gen("") - - generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) - generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) - generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) - generate_value_arg_check(gen, kernel, implemented_data_info) - - args = generate_arg_setup(gen, kernel, implemented_data_info, options) - - # {{{ generate invocation - - gen("_lpy_evt = {kernel_name}({args})" - .format( - kernel_name=codegen_result.host_program.name, - args=", ".join( - ["_lpy_cl_kernels", "queue"] - + args - + ["wait_for=wait_for"]))) - - # }}} - - # {{{ output - - if not options.no_numpy: - gen("if out_host is None and (_lpy_encountered_numpy " - "and not _lpy_encountered_dev):") - with Indentation(gen): - gen("out_host = True") - - gen("if out_host:") - with Indentation(gen): - gen("pass") # if no outputs (?!) - for arg in implemented_data_info: - if not issubclass(arg.arg_class, KernelArgument): - continue - - is_written = arg.base_name in kernel.get_written_variables() - if is_written: - gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) - - gen("") - - if options.return_dict: - gen("return _lpy_evt, {%s}" - % ", ".join("\"%s\": %s" % (arg.name, arg.name) + out_args = [arg for arg in implemented_data_info - if issubclass(arg.arg_class, KernelArgument) - if arg.base_name in kernel.get_written_variables())) - else: - out_args = [arg - for arg in implemented_data_info - if issubclass(arg.arg_class, KernelArgument) - if arg.base_name in kernel.get_written_variables()] - if out_args: - gen("return _lpy_evt, (%s,)" - % ", ".join(arg.name for arg in out_args)) - else: - gen("return _lpy_evt, ()") + if issubclass(arg.arg_class, KernelArgument) + if arg.base_name in kernel.get_written_variables()] + if out_args: + gen("return _lpy_evt, (%s,)" + % ", ".join(arg.name for arg in out_args)) + else: + gen("return _lpy_evt, ()") # }}} - if options.write_wrapper: - output = gen.get() - if options.highlight_wrapper: - output = get_highlighted_python_code(output) - - if options.write_wrapper is True: - print(output) - else: - with open(options.write_wrapper, "w") as outf: - outf.write(output) - - return gen.get_function() + def generate_host_code(self, gen, codegen_result): + gen.add_to_preamble(codegen_result.host_code()) + def get_arg_pass(self, arg): + return "%s.base_data" % arg.name # }}} # {{{ kernel executor -class _CLKernelInfo(ImmutableRecord): - pass - - -class _CLKernels(object): - pass - class PyOpenCLKernelExecutor(KernelExecutorBase): """An object connecting a kernel to a :class:`pyopencl.Context` @@ -637,7 +234,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): specific arguments. """ - super(PyOpenCLKernelExecutor, self).__init__(kernel) + super(PyOpenCLKernelExecutor, self).__init__( + kernel, invoker=PyOpenCLExecutionWrapperGenerator()) self.context = context @@ -646,10 +244,11 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): self.kernel = kernel.copy(target=PyOpenCLTarget(context.devices[0])) @memoize_method - def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): + def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 + from loopy.target.execution import get_highlighted_code codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() @@ -657,7 +256,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): if self.kernel.options.write_cl: output = dev_code if self.kernel.options.highlight_cl: - output = get_highlighted_cl_code(output) + output = get_highlighted_code(output) if self.kernel.options.write_cl is True: print(output) @@ -675,42 +274,15 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): cl.Program(self.context, dev_code) .build(options=kernel.options.cl_build_options)) - cl_kernels = _CLKernels() + cl_kernels = _Kernels() for dp in codegen_result.device_programs: setattr(cl_kernels, dp.name, getattr(cl_program, dp.name)) - return _CLKernelInfo( + return _KernelInfo( kernel=kernel, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_info, - invoker=generate_invoker(kernel, codegen_result)) - - # {{{ debugging aids - - def get_code(self, arg_to_dtype=None): - def process_dtype(dtype): - if isinstance(dtype, type) and issubclass(dtype, np.generic): - dtype = np.dtype(dtype) - if isinstance(dtype, np.dtype): - dtype = NumpyType(dtype, self.kernel.target) - - return dtype - - if arg_to_dtype is not None: - arg_to_dtype = frozenset( - (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) - - kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) - - from loopy.codegen import generate_code_v2 - code = generate_code_v2(kernel) - return code.device_code() - - def get_highlighted_code(self, arg_to_dtype=None): - return get_highlighted_cl_code( - self.get_code(arg_to_dtype)) - - # }}} + invoker=self.invoker(kernel, codegen_result)) def __call__(self, queue, **kwargs): """ @@ -745,7 +317,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): kwargs = self.packing_controller.unpack(kwargs) - kernel_info = self.cl_kernel_info(self.arg_to_dtype_set(kwargs)) + kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) return kernel_info.invoker( kernel_info.cl_kernels, queue, allocator, wait_for, @@ -753,29 +325,4 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): # }}} - -def get_highlighted_python_code(text): - try: - from pygments import highlight - except ImportError: - return text - else: - from pygments.lexers import PythonLexer - from pygments.formatters import TerminalFormatter - - return highlight(text, PythonLexer(), TerminalFormatter()) - - -def get_highlighted_cl_code(text): - try: - from pygments import highlight - except ImportError: - return text - else: - from pygments.lexers import CLexer - from pygments.formatters import TerminalFormatter - - return highlight(text, CLexer(), TerminalFormatter()) - - # vim: foldmethod=marker diff --git a/requirements.txt b/requirements.txt index 881b2b9bbe57e6dbcf3e3297e17b95393bda4253..1a23022821116aea068b76eab72f9a5596694eea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ git+https://github.com/inducer/cgen.git git+https://github.com/inducer/pyopencl.git git+https://github.com/inducer/pymbolic.git git+https://github.com/inducer/genpy.git +git+https://github.com/inducer/codepy.git hg+https://bitbucket.org/inducer/f2py diff --git a/setup.py b/setup.py index 94843bf69e4e25677ccc0713e5f598e9dcfd55e2..b8f36d12559f05a47ef57dd06efd4761e3b3ad9a 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ setup(name="loo.py", "cgen>=2016.1", "islpy>=2016.2", "six>=1.8.0", + "codepy>=2017.1", "colorama", "Mako", ], diff --git a/test/test_c_execution.py b/test/test_c_execution.py new file mode 100644 index 0000000000000000000000000000000000000000..5cd1e44f6d0a85ef9b6fbe0e74de0945d22e36b6 --- /dev/null +++ b/test/test_c_execution.py @@ -0,0 +1,191 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2017 Nick Curtis" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np +import loopy as lp + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + + +def test_c_target(): + from loopy.target.c import ExecutableCTarget + + knl = lp.make_kernel( + "{ [i]: 0<=i 0 - <>b = sum(i, a[i]) + if l > 0 + b[l] = sum(i, l*a[i]) end """, - [lp.GlobalArg("a", dtype=np.float32, shape=(42,)), - lp.GlobalArg("n", dtype=np.float32, shape=())], - target=lp.CTarget()) - code = lp.generate_body(knl) + [lp.ValueArg("n", dtype=np.int32), "..."]) + + knl = lp.tag_inames(knl, "l:g.0") + knl = lp.add_and_infer_dtypes(knl, {"a": np.float32}) + code = lp.generate_code_v2(knl).device_code() + print(code) # Check that the if appears before the loop that realizes the reduction. assert code.index("if") < code.index("for")