From c8c235601fea20ea9de43efdf3d2593f22a65b36 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 22 Jun 2011 01:02:44 -0400
Subject: [PATCH] Add g_times_l launch config modifier.

---
 pyopencl/__init__.py           |  3 ++-
 src/wrapper/wrap_cl.hpp        | 15 ++++++++++++++-
 src/wrapper/wrap_cl_part_2.cpp |  3 ++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 58805c03..823489e5 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -194,6 +194,7 @@ def _add_functionality():
         global_offset = kwargs.pop("global_offset", None)
         had_local_size = "local_size" in kwargs
         local_size = kwargs.pop("local_size", None)
+        g_times_l = kwargs.pop("g_times_l", False)
         wait_for = kwargs.pop("wait_for", None)
 
         if kwargs:
@@ -226,7 +227,7 @@ def _add_functionality():
         self.set_args(*args)
 
         return enqueue_nd_range_kernel(queue, self, global_size, local_size,
-                global_offset, wait_for)
+                global_offset, wait_for, g_times_l=g_times_l)
 
     def kernel_set_scalar_arg_dtypes(self, arg_dtypes):
         arg_type_chars = []
diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp
index c20211f9..5be79853 100644
--- a/src/wrapper/wrap_cl.hpp
+++ b/src/wrapper/wrap_cl.hpp
@@ -2782,7 +2782,8 @@ namespace pyopencl
       py::object py_global_work_size,
       py::object py_local_work_size,
       py::object py_global_work_offset,
-      py::object py_wait_for)
+      py::object py_wait_for,
+      bool g_times_l)
   {
     PYOPENCL_PARSE_WAIT_FOR;
 
@@ -2804,6 +2805,12 @@ namespace pyopencl
       local_work_size_ptr = local_work_size.empty( ) ? NULL : &local_work_size.front();
     }
 
+    if (g_times_l && local_work_size_ptr)
+    {
+      for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
+        global_work_size[work_axis] *= local_work_size[work_axis];
+    }
+
     size_t *global_work_offset_ptr = 0;
     std::vector<size_t> global_work_offset;
     if (py_global_work_offset.ptr() != Py_None)
@@ -2814,6 +2821,12 @@ namespace pyopencl
 
       COPY_PY_LIST(size_t, global_work_offset);
 
+      if (g_times_l && local_work_size_ptr)
+      {
+        for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
+          global_work_offset[work_axis] *= local_work_size[work_axis];
+      }
+
       global_work_offset_ptr = global_work_offset.empty( ) ? NULL :  &global_work_offset.front();
     }
 
diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp
index fc3736a7..95f29883 100644
--- a/src/wrapper/wrap_cl_part_2.cpp
+++ b/src/wrapper/wrap_cl_part_2.cpp
@@ -166,7 +166,8 @@ void pyopencl_expose_part_2()
       py::arg("global_work_size"),
       py::arg("local_work_size"),
       py::arg("global_work_offset")=py::object(),
-      py::arg("wait_for")=py::object()
+      py::arg("wait_for")=py::object(),
+      py::arg("g_times_l")=false
       ),
       py::return_value_policy<py::manage_new_object>());
   py::def("enqueue_task", enqueue_task,
-- 
GitLab