From f872301a37977d336d5bcc4a55a099fdff55a1b5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 16 Sep 2020 22:10:31 -0500 Subject: [PATCH] Add clGetKernelSubGroupInfo --- doc/runtime_program.rst | 11 +++++++ src/wrap_cl.hpp | 65 ++++++++++++++++++++++++++++++++++++++++- src/wrap_cl_part_2.cpp | 7 +++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/doc/runtime_program.rst b/doc/runtime_program.rst index 1c28a78c..8cb2077c 100644 --- a/doc/runtime_program.rst +++ b/doc/runtime_program.rst @@ -170,6 +170,17 @@ Kernel Only available in OpenCL 1.2 and newer. + .. method:: get_sub_group_info(self, device, param, input_value=None) + + When the OpenCL spec requests *input_value* to be of type ``size_t``, + these may be passed directly as a number. When it requests + *input_value* to be of type ``size_t *``, a tuple of integers + may be passed. + + Only available in OpenCL 2.1 and newer. + + .. versionadded:: 2020.3 + .. method:: set_arg(self, index, arg) *arg* may be diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 436317fe..599bb36a 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -33,7 +33,6 @@ // CL 2.0 complete // CL 2.1 missing: -// clGetKernelSubGroupInfo // clEnqueueSVMMigrateMem // CL 2.2 complete @@ -4511,6 +4510,70 @@ namespace pyopencl } } #endif + +#if PYOPENCL_CL_VERSION >= 0x2010 + py::object get_sub_group_info( + device const &dev, + cl_kernel_sub_group_info param_name, + py::object py_input_value) + { + switch (param_name) + { + // size_t * -> size_t + case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE: + case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: + { + std::vector input_value; + COPY_PY_LIST(size_t, input_value); + + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo, + (m_kernel, dev.data(), param_name, + input_value.size()*sizeof(input_value.front()), + input_value.empty() ? nullptr : &input_value.front(), + sizeof(param_value), ¶m_value, 0)); + + return py::cast(param_value); + } + + // size_t -> size_t[] + case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: + { + size_t input_value = py::cast(py_input_value); + + std::vector result; + size_t size; + PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo, + (m_kernel, dev.data(), param_name, + sizeof(input_value), &input_value, + 0, nullptr, &size)); + result.resize(size / sizeof(result.front())); + PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo, + (m_kernel, dev.data(), param_name, + sizeof(input_value), &input_value, + size, result.empty() ? nullptr : &result.front(), 0)); + + PYOPENCL_RETURN_VECTOR(size_t, result); + } + + // () -> size_t + case CL_KERNEL_MAX_NUM_SUB_GROUPS: + case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: + { + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo, + (m_kernel, dev.data(), param_name, + 0, nullptr, + sizeof(param_value), ¶m_value, 0)); + + return py::cast(param_value); + } + + default: + throw error("Kernel.get_sub_group_info", CL_INVALID_VALUE); + } + } +#endif }; diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index a69c9299..9644eea6 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -478,6 +478,13 @@ void pyopencl_expose_part_2(py::module &m) .def(py::self != py::self) .def("__hash__", &cls::hash) PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_kernel) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def("get_sub_group_info", &cls::get_sub_group_info, + py::arg("device"), + py::arg("param"), + py::arg("input_value")=py::none() + ) +#endif ; } -- GitLab