From f50dcc16e63cc8984da8194d54abc89921e5071c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 4 Jul 2011 09:38:41 -0400
Subject: [PATCH] Document g_times_l.

---
 doc/source/misc.rst    |  6 ++++++
 doc/source/runtime.rst | 21 +++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 326ecc0b..954da015 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -73,6 +73,11 @@ Version 2011.2
     This version is currently under development. You can get snapshots from
     PyOpenCL's git version control.
 
+Version 2011.1.2
+----------------
+
+* More bug fixes.
+
 Version 2011.1.1
 ----------------
 
@@ -108,6 +113,7 @@ Version 2011.1
 * Add :func:`pyopencl.enqueue_copy`. Deprecate all other transfer functions.
 * Add support for numerous extensions, among them device fission.
 * Add a compiler cache.
+* Add the 'g_times_l' keyword arg to kernel execution.
 
 Version 0.92
 ------------
diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index 538ae09d..88e72140 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -782,7 +782,7 @@ Programs and Kernels
                prg.kernel(queue, n_globals, None, args)
 
 
-    .. method:: __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None)
+    .. method:: __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None, g_times_l=False)
 
         Use :func:`enqueue_nd_range_kernel` to enqueue a kernel execution, after using
         :meth:`set_args` to set each argument in turn. See the documentation for
@@ -791,6 +791,11 @@ Programs and Kernels
 
         *None* may be passed for local_size.
 
+        If *g_times_l* is specified, the global size will be multiplied by the
+        local size. (which makes the behavior more like Nvidia CUDA) In this case,
+        *global_size* and *local_size* also do not have to have the same number
+        of dimensions.
+
         .. versionchanged:: 0.92
             *local_size* was promoted to third positional argument from being a
             keyword argument. The old keyword argument usage will continue to
@@ -802,6 +807,9 @@ Programs and Kernels
             its treatment in the wrapper had a bug (now fixed) that prevented
             it from working.
 
+        .. versionchanged:: 2011.1
+            Added the *g_times_l* keyword arg.
+
     |comparable|
 
 .. class:: LocalMemory(size)
@@ -814,9 +822,18 @@ Programs and Kernels
 
         The size of local buffer in bytes to be provided.
 
-.. function:: enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, global_work_offset=None, wait_for=None)
+.. function:: enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, global_work_offset=None, wait_for=None, g_times_l=True)
 
     |std-enqueue-blurb|
+    
+    If *g_times_l* is specified, the global size will be multiplied by the
+    local size. (which makes the behavior more like Nvidia CUDA) In this case,
+    *global_size* and *local_size* also do not have to have the same number
+    of dimensions.
+
+    .. versionchanged:: 2011.1
+        Added the *g_times_l* keyword arg.
+
 
 .. function:: enqueue_task(queue, kernel, wait_for=None)
 
-- 
GitLab