diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 58805c03f35a56dfebe92634fe6aaa02c4edf702..823489e560d6fa44de36ad079e30132da5c9d5b9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -194,6 +194,7 @@ def _add_functionality(): global_offset = kwargs.pop("global_offset", None) had_local_size = "local_size" in kwargs local_size = kwargs.pop("local_size", None) + g_times_l = kwargs.pop("g_times_l", False) wait_for = kwargs.pop("wait_for", None) if kwargs: @@ -226,7 +227,7 @@ def _add_functionality(): self.set_args(*args) return enqueue_nd_range_kernel(queue, self, global_size, local_size, - global_offset, wait_for) + global_offset, wait_for, g_times_l=g_times_l) def kernel_set_scalar_arg_dtypes(self, arg_dtypes): arg_type_chars = [] diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp index c20211f92e81bb879dc24b7dde78cd9f8e3e04a9..5be798539bf7b7c7076e80f84f28e2cb9554be21 100644 --- a/src/wrapper/wrap_cl.hpp +++ b/src/wrapper/wrap_cl.hpp @@ -2782,7 +2782,8 @@ namespace pyopencl py::object py_global_work_size, py::object py_local_work_size, py::object py_global_work_offset, - py::object py_wait_for) + py::object py_wait_for, + bool g_times_l) { PYOPENCL_PARSE_WAIT_FOR; @@ -2804,6 +2805,12 @@ namespace pyopencl local_work_size_ptr = local_work_size.empty( ) ? NULL : &local_work_size.front(); } + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_size[work_axis] *= local_work_size[work_axis]; + } + size_t *global_work_offset_ptr = 0; std::vector<size_t> global_work_offset; if (py_global_work_offset.ptr() != Py_None) @@ -2814,6 +2821,12 @@ namespace pyopencl COPY_PY_LIST(size_t, global_work_offset); + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_offset[work_axis] *= local_work_size[work_axis]; + } + global_work_offset_ptr = global_work_offset.empty( ) ? NULL : &global_work_offset.front(); } diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp index fc3736a7a4c0bddaced3a8dde830502712f01119..95f298830f94afb15e11f5a5c5ade3494f1ca86d 100644 --- a/src/wrapper/wrap_cl_part_2.cpp +++ b/src/wrapper/wrap_cl_part_2.cpp @@ -166,7 +166,8 @@ void pyopencl_expose_part_2() py::arg("global_work_size"), py::arg("local_work_size"), py::arg("global_work_offset")=py::object(), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::object(), + py::arg("g_times_l")=false ), py::return_value_policy<py::manage_new_object>()); py::def("enqueue_task", enqueue_task,