diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index d45d867453c34977d8f6a49ce8fca513836059df..2d76f916a4fb5bec35fc87f159dfaa2392087860 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -30,7 +30,6 @@ import six from six.moves import map, range, zip, intern import warnings -from warnings import warn import numpy as np import sys import re @@ -93,7 +92,6 @@ _lib.set_py_funcs(_py_gc, _py_ref, _py_deref, _py_call) # are we running on pypy? _PYPY = '__pypy__' in sys.builtin_module_names _CPY2 = not _PYPY and sys.version_info < (3,) -_CPY26 = _CPY2 and sys.version_info < (2, 7) try: _unicode = eval('unicode') @@ -1661,23 +1659,6 @@ class LocalMemory(_CLKernelArg): # {{{ Kernel -# {{{ arg packing helpers - -_size_t_char = ({ - 8: 'Q', - 4: 'L', - 2: 'H', - 1: 'B', -})[_ffi.sizeof('size_t')] -_type_char_map = { - 'n': _size_t_char.lower(), - 'N': _size_t_char -} -del _size_t_char - -# }}} - - class Kernel(_Common): _id = 'kernel' @@ -1695,160 +1676,18 @@ class Kernel(_Common): def _setup(self, prg): self._source = getattr(prg, "_source", None) - self._generate_naive_call() + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, self.num_args, self.num_args, + None, + warn_about_arg_count_bug=None, + work_around_arg_count_bug=None) + self._wg_info_cache = {} return self - # {{{ code generation for __call__, set_args - - def _set_set_args_body(self, body, num_passed_args): - from pytools.py_codegen import ( - PythonFunctionGenerator, - PythonCodeGenerator, - Indentation) - - arg_names = ["arg%d" % i for i in range(num_passed_args)] - - # {{{ wrap in error handler - - err_gen = PythonCodeGenerator() - - def gen_error_handler(): - err_gen(""" - if current_arg is not None: - args = [{args}] - advice = "" - from pyopencl.array import Array - if isinstance(args[current_arg], Array): - advice = " (perhaps you meant to pass 'array.data' " \ - "instead of the array itself?)" - - raise _cl.LogicError( - "when processing argument #%d (1-based): %s%s" - % (current_arg+1, str(e), advice)) - else: - raise - """ - .format(args=", ".join(arg_names))) - err_gen("") - - err_gen("try:") - with Indentation(err_gen): - err_gen.extend(body) - err_gen("except TypeError as e:") - with Indentation(err_gen): - gen_error_handler() - err_gen("except _cl.LogicError as e:") - with Indentation(err_gen): - gen_error_handler() - - # }}} - - def add_preamble(gen): - gen.add_to_preamble( - "import numpy as np") - gen.add_to_preamble( - "import pyopencl.cffi_cl as _cl") - gen.add_to_preamble( - "from pyopencl.cffi_cl import _lib, " - "_ffi, _handle_error, _CLKernelArg") - gen.add_to_preamble("from pyopencl import status_code") - gen.add_to_preamble("from struct import pack") - gen.add_to_preamble("") - - # {{{ generate _enqueue - - gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name, - ["self", "queue", "global_size", "local_size"] - + arg_names - + ["global_offset=None", "g_times_l=None", "wait_for=None"]) - - add_preamble(gen) - gen.extend(err_gen) - - gen(""" - return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, - global_offset, wait_for, g_times_l=g_times_l) - """) - - self._enqueue = gen.get_function() - - # }}} - - # {{{ generate set_args - - gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names) - - add_preamble(gen) - gen.extend(err_gen) - - self._set_args = gen.get_function() - - # }}} - - def _generate_buffer_arg_setter(self, gen, arg_idx, buf_var): - from pytools.py_codegen import Indentation - - if _CPY2: - # https://github.com/numpy/numpy/issues/5381 - gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) - with Indentation(gen): - gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) - - gen(""" - c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) - if status != _ffi.NULL: - _handle_error(status) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def _generate_bytes_arg_setter(self, gen, arg_idx, buf_var): - gen(""" - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, - {buf_var}, len({buf_var})) - if status != _ffi.NULL: - _handle_error(status) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def _generate_generic_arg_handler(self, gen, arg_idx, arg_var): - from pytools.py_codegen import Indentation - - gen(""" - if {arg_var} is None: - status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) - if status != _ffi.NULL: - _handle_error(status) - elif isinstance({arg_var}, _cl._CLKernelArg): - self._set_arg_clkernelarg({arg_idx}, {arg_var}) - """ - .format(arg_idx=arg_idx, arg_var=arg_var)) - - gen("else:") - with Indentation(gen): - self._generate_buffer_arg_setter(gen, arg_idx, arg_var) - - def _generate_naive_call(self): - num_args = self.num_args - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if num_args == 0: - gen("pass") - - for i in range(num_args): - gen("# process argument {arg_idx}".format(arg_idx=i)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=i)) - self._generate_generic_arg_handler(gen, i, "arg%d" % i) - gen("") - - self._set_set_args_body(gen, num_args) - def set_scalar_arg_dtypes(self, scalar_arg_dtypes): - self._scalar_arg_dtypes = scalar_arg_dtypes + self._scalar_arg_dtypes = tuple(scalar_arg_dtypes) # {{{ arg counting bug handling @@ -1872,119 +1711,15 @@ class Kernel(_Common): else: warn_about_arg_count_bug = True - fp_arg_count = 0 - # }}} - cl_arg_idx = 0 - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if not scalar_arg_dtypes: - gen("pass") - - for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): - gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) - arg_var = "arg%d" % arg_idx - - if arg_dtype is None: - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - gen("") - continue - - arg_dtype = np.dtype(arg_dtype) - - if arg_dtype.char == "V": - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - - elif arg_dtype.kind == "c": - if warn_about_arg_count_bug: - warn("{knl_name}: arguments include complex numbers, and " - "some (but not all) of the target devices mishandle " - "struct kernel arguments (hence the workaround is " - "disabled".format( - knl_name=self.function_name, stacklevel=2)) - - if arg_dtype == np.complex64: - arg_char = "f" - elif arg_dtype == np.complex128: - arg_char = "d" - else: - raise TypeError("unexpected complex type: %s" % arg_dtype) - - if (work_around_arg_count_bug == "pocl" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - gen( - "buf = pack('{arg_char}', {arg_var}.real)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - gen("current_arg = current_arg + 1000") - gen( - "buf = pack('{arg_char}', {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - elif (work_around_arg_count_bug == "apple" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - raise NotImplementedError("No work-around to " - "Apple's broken structs-as-kernel arg " - "handling has been found. " - "Cannot pass complex numbers to kernels.") - - else: - gen( - "buf = pack('{arg_char}{arg_char}', " - "{arg_var}.real, {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - fp_arg_count += 2 - - elif arg_dtype.char in "IL" and _CPY26: - # Prevent SystemError: ../Objects/longobject.c:336: bad - # argument to internal function - - gen( - "buf = pack('{arg_char}', long({arg_var}))" - .format(arg_char=arg_dtype.char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - else: - if arg_dtype.kind == "f": - fp_arg_count += 1 - - arg_char = arg_dtype.char - arg_char = _type_char_map.get(arg_char, arg_char) - gen( - "buf = pack('{arg_char}', {arg_var})" - .format( - arg_char=arg_char, - arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - gen("") - - if cl_arg_idx != self.num_args: - raise TypeError( - "length of argument list (%d) and " - "CL-generated number of arguments (%d) do not agree" - % (cl_arg_idx, self.num_args)) - - self._set_set_args_body(gen, len(scalar_arg_dtypes)) - - # }}} + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, + len(scalar_arg_dtypes), self.num_args, + self._scalar_arg_dtypes, + warn_about_arg_count_bug=warn_about_arg_count_bug, + work_around_arg_count_bug=work_around_arg_count_bug) def set_args(self, *args, **kwargs): # Need to duplicate the 'self' argument for dynamically generated method diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e79efc9b0906318709de14c07cec68c06198e1a --- /dev/null +++ b/pyopencl/invoker.py @@ -0,0 +1,406 @@ +from __future__ import division, absolute_import + +__copyright__ = """ +Copyright (C) 2017 Andreas Kloeckner +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import numpy as np + +from warnings import warn +from pyopencl._cffi import ffi as _ffi +from pytools.persistent_dict import ( + PersistentDict, + KeyBuilder as KeyBuilderBase) + +_PYPY = '__pypy__' in sys.builtin_module_names +_CPY2 = not _PYPY and sys.version_info < (3,) +_CPY26 = _CPY2 and sys.version_info < (2, 7) + + +# {{{ arg packing helpers + +_size_t_char = ({ + 8: 'Q', + 4: 'L', + 2: 'H', + 1: 'B', +})[_ffi.sizeof('size_t')] +_type_char_map = { + 'n': _size_t_char.lower(), + 'N': _size_t_char +} +del _size_t_char + +# }}} + + +# {{{ individual arg handling + +def generate_buffer_arg_setter(gen, arg_idx, buf_var): + from pytools.py_codegen import Indentation + + if _CPY2: + # https://github.com/numpy/numpy/issues/5381 + gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) + with Indentation(gen): + gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) + + gen(""" + c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) + status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) + if status != _ffi.NULL: + _handle_error(status) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + +def generate_bytes_arg_setter(gen, arg_idx, buf_var): + gen(""" + status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, + {buf_var}, len({buf_var})) + if status != _ffi.NULL: + _handle_error(status) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + +def generate_generic_arg_handler(gen, arg_idx, arg_var): + from pytools.py_codegen import Indentation + + gen(""" + if {arg_var} is None: + status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) + if status != _ffi.NULL: + _handle_error(status) + elif isinstance({arg_var}, _cl._CLKernelArg): + self._set_arg_clkernelarg({arg_idx}, {arg_var}) + """ + .format(arg_idx=arg_idx, arg_var=arg_var)) + + gen("else:") + with Indentation(gen): + generate_buffer_arg_setter(gen, arg_idx, arg_var) + +# }}} + + +# {{{ generic arg handling body + +def generate_generic_arg_handling_body(num_args): + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if num_args == 0: + gen("pass") + + for i in range(num_args): + gen("# process argument {arg_idx}".format(arg_idx=i)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=i)) + generate_generic_arg_handler(gen, i, "arg%d" % i) + gen("") + + return gen + +# }}} + + +# {{{ specific arg handling body + +def generate_specific_arg_handling_body(function_name, + num_cl_args, scalar_arg_dtypes, + work_around_arg_count_bug, warn_about_arg_count_bug): + + assert work_around_arg_count_bug is not None + assert warn_about_arg_count_bug is not None + + fp_arg_count = 0 + cl_arg_idx = 0 + + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if not scalar_arg_dtypes: + gen("pass") + + for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): + gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) + arg_var = "arg%d" % arg_idx + + if arg_dtype is None: + generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + gen("") + continue + + arg_dtype = np.dtype(arg_dtype) + + if arg_dtype.char == "V": + generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + + elif arg_dtype.kind == "c": + if warn_about_arg_count_bug: + warn("{knl_name}: arguments include complex numbers, and " + "some (but not all) of the target devices mishandle " + "struct kernel arguments (hence the workaround is " + "disabled".format( + knl_name=function_name, stacklevel=2)) + + if arg_dtype == np.complex64: + arg_char = "f" + elif arg_dtype == np.complex128: + arg_char = "d" + else: + raise TypeError("unexpected complex type: %s" % arg_dtype) + + if (work_around_arg_count_bug == "pocl" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + gen( + "buf = pack('{arg_char}', {arg_var}.real)" + .format(arg_char=arg_char, arg_var=arg_var)) + generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + gen("current_arg = current_arg + 1000") + gen( + "buf = pack('{arg_char}', {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + elif (work_around_arg_count_bug == "apple" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + raise NotImplementedError("No work-around to " + "Apple's broken structs-as-kernel arg " + "handling has been found. " + "Cannot pass complex numbers to kernels.") + + else: + gen( + "buf = pack('{arg_char}{arg_char}', " + "{arg_var}.real, {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + fp_arg_count += 2 + + elif arg_dtype.char in "IL" and _CPY26: + # Prevent SystemError: ../Objects/longobject.c:336: bad + # argument to internal function + + gen( + "buf = pack('{arg_char}', long({arg_var}))" + .format(arg_char=arg_dtype.char, arg_var=arg_var)) + generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + else: + if arg_dtype.kind == "f": + fp_arg_count += 1 + + arg_char = arg_dtype.char + arg_char = _type_char_map.get(arg_char, arg_char) + gen( + "buf = pack('{arg_char}', {arg_var})" + .format( + arg_char=arg_char, + arg_var=arg_var)) + generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + gen("") + + if cl_arg_idx != num_cl_args: + raise TypeError( + "length of argument list (%d) and " + "CL-generated number of arguments (%d) do not agree" + % (cl_arg_idx, num_cl_args)) + + return gen + +# }}} + + +# {{{ error handler + +def wrap_in_error_handler(body, arg_names): + from pytools.py_codegen import PythonCodeGenerator, Indentation + + err_gen = PythonCodeGenerator() + + def gen_error_handler(): + err_gen(""" + if current_arg is not None: + args = [{args}] + advice = "" + from pyopencl.array import Array + if isinstance(args[current_arg], Array): + advice = " (perhaps you meant to pass 'array.data' " \ + "instead of the array itself?)" + + raise _cl.LogicError( + "when processing argument #%d (1-based): %s%s" + % (current_arg+1, str(e), advice)) + else: + raise + """ + .format(args=", ".join(arg_names))) + err_gen("") + + err_gen("try:") + with Indentation(err_gen): + err_gen.extend(body) + err_gen("except TypeError as e:") + with Indentation(err_gen): + gen_error_handler() + err_gen("except _cl.LogicError as e:") + with Indentation(err_gen): + gen_error_handler() + + return err_gen + +# }}} + + +def add_local_imports(gen): + gen("import numpy as np") + gen("import pyopencl.cffi_cl as _cl") + gen( + "from pyopencl.cffi_cl import _lib, " + "_ffi, _handle_error, _CLKernelArg") + gen("") + + +def _generate_enqueue_and_set_args_module(function_name, + num_passed_args, num_cl_args, + scalar_arg_dtypes, + work_around_arg_count_bug, warn_about_arg_count_bug): + + from pytools.py_codegen import PythonCodeGenerator, Indentation + + arg_names = ["arg%d" % i for i in range(num_passed_args)] + + if scalar_arg_dtypes is None: + body = generate_generic_arg_handling_body(num_passed_args) + else: + body = generate_specific_arg_handling_body( + function_name, num_cl_args, scalar_arg_dtypes, + warn_about_arg_count_bug=warn_about_arg_count_bug, + work_around_arg_count_bug=work_around_arg_count_bug) + + err_handler = wrap_in_error_handler(body, arg_names) + + gen = PythonCodeGenerator() + + gen("from struct import pack") + gen("from pyopencl import status_code") + gen("") + + # {{{ generate _enqueue + + enqueue_name = "enqueue_knl_%s" % function_name + gen("def %s(%s):" + % (enqueue_name, + ", ".join( + ["self", "queue", "global_size", "local_size"] + + arg_names + + ["global_offset=None", "g_times_l=None", + "wait_for=None"]))) + + with Indentation(gen): + add_local_imports(gen) + gen.extend(err_handler) + + gen(""" + return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, + global_offset, wait_for, g_times_l=g_times_l) + """) + + # }}} + + # {{{ generate set_args + + gen("") + gen("def set_args(%s):" + % (", ".join(["self"] + arg_names))) + + with Indentation(gen): + add_local_imports(gen) + gen.extend(err_handler) + + # }}} + + return gen.get_picklable_module(), enqueue_name + + +class NumpyTypesKeyBuilder(KeyBuilderBase): + def update_for_type(self, key_hash, key): + if issubclass(key, np.generic): + self.update_for_str(key_hash, key.__name__) + return + + raise TypeError("unsupported type for persistent hash keying: %s" + % type(key)) + + +invoker_cache = PersistentDict("pyopencl-invoker-cache-v1", + key_builder=NumpyTypesKeyBuilder()) + + +def generate_enqueue_and_set_args(function_name, + num_passed_args, num_cl_args, + scalar_arg_dtypes, + work_around_arg_count_bug, warn_about_arg_count_bug): + + cache_key = (function_name, num_passed_args, num_cl_args, + scalar_arg_dtypes, + work_around_arg_count_bug, warn_about_arg_count_bug) + + from_cache = False + + try: + result = invoker_cache[cache_key] + from_cache = True + except KeyError: + pass + + if not from_cache: + result = _generate_enqueue_and_set_args_module(*cache_key) + invoker_cache[cache_key] = result + + pmod, enqueue_name = result + + return ( + pmod.mod_globals[enqueue_name], + pmod.mod_globals["set_args"]) + +# }}} + + +# vim: foldmethod=marker diff --git a/setup.py b/setup.py index 3d2ddd0e9d359374abebb2ee2330381000dfa738..cdd356ae9077145346627c271d32a12661c08d09 100644 --- a/setup.py +++ b/setup.py @@ -219,7 +219,7 @@ def main(): install_requires=[ "numpy", - "pytools>=2015.1.2", + "pytools>=2017.2", "pytest>=2", "decorator>=3.2.0", "cffi>=1.1.0",