From b9e8260d2a56475c7f71d43fa8b4d1b2c459a0fb Mon Sep 17 00:00:00 2001
From: "[6~" <inform@tiker.net>
Date: Wed, 6 Nov 2019 17:56:40 -0600
Subject: [PATCH] Document {global,local}_size (Closes gh-310)

---
 doc/runtime_program.rst | 12 ++----------
 doc/subst.rst           | 12 ++++++++++++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/doc/runtime_program.rst b/doc/runtime_program.rst
index e9546878..71ee84b4 100644
--- a/doc/runtime_program.rst
+++ b/doc/runtime_program.rst
@@ -212,12 +212,7 @@ Kernel
         :meth:`set_arg` to see what argument types are allowed.
         |std-enqueue-blurb|
 
-        *None* may be passed for local_size.
-
-        If *g_times_l* is specified, the global size will be multiplied by the
-        local size. (which makes the behavior more like Nvidia CUDA) In this case,
-        *global_size* and *local_size* also do not have to have the same number
-        of dimensions.
+        |glsize|
 
         .. note::
 
@@ -287,10 +282,7 @@ Kernel
 
     |std-enqueue-blurb|
 
-    If *g_times_l* is specified, the global size will be multiplied by the
-    local size. (which makes the behavior more like Nvidia CUDA) In this case,
-    *global_size* and *local_size* also do not have to have the same number
-    of dimensions.
+    |glsize|
 
     .. versionchanged:: 2011.1
         Added the *g_times_l* keyword arg.
diff --git a/doc/subst.rst b/doc/subst.rst
index 4210ab24..5e7b524b 100644
--- a/doc/subst.rst
+++ b/doc/subst.rst
@@ -13,3 +13,15 @@
 
 .. |copy-depr| replace:: **Note:** This function is deprecated as of PyOpenCL 2011.1.
         Use :func:`enqueue_copy` instead.
+
+.. |glsize| replace:: *global_size* and *local_size* are tuples of identical length, with
+        between one and three entries. *global_size* specifies the overall size
+        of the computational grid: one work item will be launched for every
+        integer point in the grid. *local_size* specifies the workgroup size,
+        which must evenly divide the *global_size* in a dimension-by-dimension
+        manner.  *None* may be passed for local_size, in which case the
+        implementation will use an implementation-defined workgroup size.
+        If *g_times_l* is *True*, the global size will be multiplied by the
+        local size. (which makes the behavior more like Nvidia CUDA) In this case,
+        *global_size* and *local_size* also do not have to have the same number
+        of entries.
-- 
GitLab