diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py index fec2a2ca29fe4df98dd412b35828ce1f1b38a611..5254882059888888f1a6ea170641c60f75ec1269 100644 --- a/pyopencl/invoker.py +++ b/pyopencl/invoker.py @@ -22,7 +22,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import sys import numpy as np from warnings import warn @@ -31,9 +30,6 @@ from pytools.persistent_dict import WriteOncePersistentDict from pytools.py_codegen import Indentation, PythonCodeGenerator from pyopencl.tools import _NumpyTypesKeyBuilder -_PYPY = "__pypy__" in sys.builtin_module_names - - # {{{ arg packing helpers _size_t_char = ({ @@ -75,6 +71,9 @@ def generate_generic_arg_handling_body(num_args): # {{{ specific arg handling body +BUF_PACK_TYPECHARS = ["c", "b", "B", "h", "H", "i", "I", "l", "L", "f", "d"] + + def generate_specific_arg_handling_body(function_name, num_cl_args, scalar_arg_dtypes, work_around_arg_count_bug, warn_about_arg_count_bug): @@ -91,7 +90,17 @@ def generate_specific_arg_handling_body(function_name, gen("pass") gen_indices_and_args = [] - buf_args_indices = [] + buf_indices_and_args = [] + buf_pack_indices_and_args = [] + + def add_buf_arg(arg_idx, typechar, expr_str): + if typechar in BUF_PACK_TYPECHARS: + buf_pack_indices_and_args.append(cl_arg_idx) + buf_pack_indices_and_args.append(repr(typechar.encode())) + buf_pack_indices_and_args.append(expr_str) + else: + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): arg_var = "arg%d" % arg_idx @@ -106,8 +115,8 @@ def generate_specific_arg_handling_body(function_name, arg_dtype = np.dtype(arg_dtype) if arg_dtype.char == "V": - gen_indices_and_args.append(cl_arg_idx) - gen_indices_and_args.append(arg_var) + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append(arg_var) cl_arg_idx += 1 elif arg_dtype.kind == "c": @@ -128,11 +137,9 @@ def generate_specific_arg_handling_body(function_name, if (work_around_arg_count_bug == "pocl" and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8): - buf_args_indices.append(cl_arg_idx) - buf_args_indices.append(f"pack('{arg_char}', {arg_var}.real)") + add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real") cl_arg_idx += 1 - buf_args_indices.append(cl_arg_idx) - buf_args_indices.append(f"pack('{arg_char}', {arg_var}.imag)") + add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag") cl_arg_idx += 1 elif (work_around_arg_count_bug == "apple" @@ -144,8 +151,8 @@ def generate_specific_arg_handling_body(function_name, "Cannot pass complex numbers to kernels.") else: - buf_args_indices.append(cl_arg_idx) - buf_args_indices.append( + buf_indices_and_args.append(cl_arg_idx) + buf_indices_and_args.append( f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)") cl_arg_idx += 1 @@ -157,15 +164,15 @@ def generate_specific_arg_handling_body(function_name, arg_char = arg_dtype.char arg_char = _type_char_map.get(arg_char, arg_char) - buf_args_indices.append(cl_arg_idx) - buf_args_indices.append(f"pack('{arg_char}', {arg_var})") + add_buf_arg(cl_arg_idx, arg_char, arg_var) cl_arg_idx += 1 gen("") - for arg_kind, args_and_indices in [ - ("", gen_indices_and_args), - ("_buf", buf_args_indices) + for arg_kind, args_and_indices, entry_length in [ + ("", gen_indices_and_args, 2), + ("_buf", buf_indices_and_args, 2), + ("_buf_pack", buf_pack_indices_and_args, 3), ]: assert len(args_and_indices) % 2 == 0 if args_and_indices: @@ -247,7 +254,7 @@ def _generate_enqueue_and_set_args_module(function_name, invoker_cache = WriteOncePersistentDict( - "pyopencl-invoker-cache-v30", + "pyopencl-invoker-cache-v34", key_builder=_NumpyTypesKeyBuilder()) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 89afb8d697e648b940598785fbb08cdd5b6887e9..58663e0c26f638f6a37723b0d0c60a6fae02e11a 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -4372,6 +4372,40 @@ namespace pyopencl (m_kernel, arg_index, sizeof(cl_command_queue), &q)); } + void set_arg_buf_pack(cl_uint arg_index, py::handle py_typechar, py::handle obj) + { +#define PYOPENCL_KERNEL_PACK_AND_SET_ARG(TYPECH_VAL, TYPE) \ + case TYPECH_VAL: \ + { \ + TYPE val = py::cast(obj); \ + PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, sizeof(val), &val)); \ + break; \ + } + + /* This is an internal interface that assumes it gets fed well-formed + * data. No meaningful error checking is being performed on + * py_typechar, on purpose. + */ + switch (*PyBytes_AS_STRING(py_typechar.ptr())) + { + PYOPENCL_KERNEL_PACK_AND_SET_ARG('c', char) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('b', signed char) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('B', unsigned char) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('h', short) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('H', unsigned short) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('i', int) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('I', unsigned int) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('l', long) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('L', unsigned long) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('f', float) + PYOPENCL_KERNEL_PACK_AND_SET_ARG('d', double) + default: + throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE, + "invalid type char"); + } +#undef PYOPENCL_KERNEL_PACK_AND_SET_ARG + } + void set_arg_buf(cl_uint arg_index, py::handle py_buffer) { const void *buf; @@ -4453,57 +4487,6 @@ namespace pyopencl set_arg_buf(arg_index, arg); } - static - void set_arg_multi( - std::function set_arg_func, - py::tuple args_and_indices) - { - // This is an internal interface used by generated invokers. - // We can save a tiny bit of time by not checking their work. - /* - if (indices.size() != args.size()) - throw error("Kernel.set_arg_multi", CL_INVALID_VALUE, - "indices and args arguments do not have the same length"); - */ - - cl_uint arg_index; - py::handle arg_value; - - auto it = args_and_indices.begin(), end = args_and_indices.end(); - try - { - while (it != end) - { - arg_index = py::cast(*it++); - arg_value = *it++; - set_arg_func(arg_index, arg_value); - } - } - catch (error &err) - { - std::string msg( - std::string("when processing arg#") + std::to_string(arg_index+1) - + std::string(" (1-based): ") + std::string(err.what())); - - auto mod_cl_ary(py::module::import("pyopencl.array")); - auto cls_array(mod_cl_ary.attr("Array")); - if (arg_value.ptr() && py::isinstance(arg_value, cls_array)) - msg.append( - " (perhaps you meant to pass 'array.data' instead of the array itself?)"); - - throw error(err.routine().c_str(), err.code(), msg.c_str()); - } - catch (std::exception &err) - { - std::string msg( - std::string("when processing arg#") + std::to_string(arg_index+1) - + std::string(" (1-based): ") + std::string(err.what())); - - throw std::runtime_error(msg.c_str()); - } - } - - py::object get_info(cl_kernel_info param_name) const { switch (param_name) @@ -4668,6 +4651,85 @@ namespace pyopencl #endif }; +#define PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER \ + catch (error &err) \ + { \ + std::string msg( \ + std::string("when processing arg#") + std::to_string(arg_index+1) \ + + std::string(" (1-based): ") + std::string(err.what())); \ + auto mod_cl_ary(py::module::import("pyopencl.array")); \ + auto cls_array(mod_cl_ary.attr("Array")); \ + if (arg_value.ptr() && py::isinstance(arg_value, cls_array)) \ + msg.append( \ + " (perhaps you meant to pass 'array.data' instead of the array itself?)"); \ + throw error(err.routine().c_str(), err.code(), msg.c_str()); \ + } \ + catch (std::exception &err) \ + { \ + std::string msg( \ + std::string("when processing arg#") + std::to_string(arg_index+1) \ + + std::string(" (1-based): ") + std::string(err.what())); \ + throw std::runtime_error(msg.c_str()); \ + } + + inline + void set_arg_multi( + std::function set_arg_func, + py::tuple args_and_indices) + { + cl_uint arg_index; + py::handle arg_value; + + auto it = args_and_indices.begin(), end = args_and_indices.end(); + try + { + /* This is an internal interface that assumes it gets fed well-formed + * data. No meaningful error checking is being performed on + * off-interval exhaustion of the iterator, on purpose. + */ + while (it != end) + { + // special value in case integer cast fails + arg_index = 9999 - 1; + + arg_index = py::cast(*it++); + arg_value = *it++; + set_arg_func(arg_index, arg_value); + } + } + PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER + } + + + inline + void set_arg_multi( + std::function set_arg_func, + py::tuple args_and_indices) + { + cl_uint arg_index; + py::handle arg_descr, arg_value; + + auto it = args_and_indices.begin(), end = args_and_indices.end(); + try + { + /* This is an internal interface that assumes it gets fed well-formed + * data. No meaningful error checking is being performed on + * off-interval exhaustion of the iterator, on purpose. + */ + while (it != end) + { + // special value in case integer cast fails + arg_index = 9999 - 1; + + arg_index = py::cast(*it++); + arg_descr = *it++; + arg_value = *it++; + set_arg_func(arg_index, arg_descr, arg_value); + } + } + PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER + } + inline py::list create_kernels_in_program(program &pgm) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index d4b60f64106a2c4f9c34f7741ae6d18c1feaee5f..205b31ec452b388fe1b32f3443e63762b33a10c0 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -473,17 +473,25 @@ void pyopencl_expose_part_2(py::module &m) .def("_set_arg_multi", [](cls &knl, py::tuple indices_and_args) { - cls::set_arg_multi( + set_arg_multi( [&](cl_uint i, py::handle arg) { knl.set_arg(i, arg); }, indices_and_args); }) .def("_set_arg_buf_multi", [](cls &knl, py::tuple indices_and_args) { - cls::set_arg_multi( + set_arg_multi( [&](cl_uint i, py::handle arg) { knl.set_arg_buf(i, arg); }, indices_and_args); }) + .def("_set_arg_buf_pack_multi", + [](cls &knl, py::tuple indices_chars_and_args) + { + set_arg_multi( + [&](cl_uint i, py::handle typechar, py::handle arg) + { knl.set_arg_buf_pack(i, typechar, arg); }, + indices_chars_and_args); + }) .DEF_SIMPLE_METHOD(set_arg) #if PYOPENCL_CL_VERSION >= 0x1020 .DEF_SIMPLE_METHOD(get_arg_info)