From d61e5cf08b003392523344d7a612d5a3d06622e9 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sat, 26 Jun 2010 14:06:13 -0400
Subject: [PATCH] Promote local_size to positional argument in
 Kernel.__call__().

---
 doc/source/index.rst           |   2 +-
 doc/source/misc.rst            |   2 +
 doc/source/runtime.rst         |  17 +++++-
 examples/benchmark-all.py      |   2 +-
 examples/demo.py               |   2 +-
 examples/demo_meta_codepy.py   |   5 +-
 examples/demo_meta_template.py |   5 +-
 examples/gl_interop_demo.py    |   6 +-
 examples/matrix-multiply.py    |  10 ++--
 examples/narray.py             |   2 +-
 examples/transpose.py          |  12 ++--
 pyopencl/__init__.py           | 103 +++++++++++++++++++++++++++------
 test/test_wrapper.py           |  34 ++++++-----
 13 files changed, 141 insertions(+), 61 deletions(-)

diff --git a/doc/source/index.rst b/doc/source/index.rst
index 9fd85142..38456e96 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -53,7 +53,7 @@ Here's an example, to give you an impression::
         }
         """).build()
 
-    prg.sum(queue, a.shape, a_buf, b_buf, dest_buf)
+    prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
     a_plus_b = numpy.empty_like(a)
     cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 7504bf8f..8deba3b0 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -78,6 +78,8 @@ Version 0.92
   `cl_khr_gl_sharing <ghttp://www.khronos.org/registry/cl/extensions/khr/cl_khr_gl_sharing.txt>`_
   extension, leading to working GL interoperability.
 * Add :meth:`pyopencl.Kernel.set_args`.
+* The call signature of :meth:`pyopencl.Kernel.__call__` changed to
+  emphasize the importance of *loccal_size*.
 
 Version 0.91.5
 --------------
diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index 11ddf72c..7fa2b130 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -592,13 +592,26 @@ Programs and Kernels
 
         Invoke :meth:`set_arg` on each element of *args* in turn.
 
-    .. method:: __call__(queue, global_size, *args, global_offset=None, local_size=None, wait_for=None)
+    .. method:: __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None)
 
         Use :func:`enqueue_nd_range_kernel` to enqueue a kernel execution, after using
-        :meth:`set_arg` to set each argument in turn. See the documentation for 
+        :meth:`set_args` to set each argument in turn. See the documentation for 
         :meth:`set_arg` to see what argument types are allowed.
         |std-enqueue-blurb|
 
+        *None* may be passed for local_size
+
+        .. versionchanged:: 0.92
+            *local_size* was promoted to third positional argument from being a
+            keyword argument. The old keyword argument usage will continue to
+            be accepted with a warning throughout the 0.92 release cycle. 
+            This is a backward-compatible change (just barely!) because
+            *local_size* as third positional argument can only be a
+            :class:`tuple` or *None*.  :class:`tuple` instances are never valid
+            :class:`Kernel` arguments, and *None* is valid as an argument, but
+            its treatment in the wrapper had a bug (now fixed) that prevented
+            it from working.
+
     |comparable|
 
 .. class:: LocalMemory(size)
diff --git a/examples/benchmark-all.py b/examples/benchmark-all.py
index 03553f94..0d08979b 100644
--- a/examples/benchmark-all.py
+++ b/examples/benchmark-all.py
@@ -60,7 +60,7 @@ for platform in cl.get_platforms():
                 }
                 """).build()
 
-        exec_evt = prg.sum(queue, a.shape, a_buf, b_buf, dest_buf)
+        exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
         exec_evt.wait()
         elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)
 
diff --git a/examples/demo.py b/examples/demo.py
index 71f146eb..44bc0a58 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -22,7 +22,7 @@ prg = cl.Program(ctx, """
     }
     """).build()
 
-prg.sum(queue, a.shape, a_buf, b_buf, dest_buf)
+prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
 a_plus_b = numpy.empty_like(a)
 cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py
index 8c2a8275..c080109b 100644
--- a/examples/demo_meta_codepy.py
+++ b/examples/demo_meta_codepy.py
@@ -46,9 +46,8 @@ mod = Module([
 
 knl = cl.Program(ctx, str(mod)).build().add
 
-knl(queue, (local_size*macroblock_count,), 
-        c_buf, a_buf, b_buf, 
-        local_size=(local_size,))
+knl(queue, (local_size*macroblock_count,), (local_size,),
+        c_buf, a_buf, b_buf)
 
 c = numpy.empty_like(a)
 cl.enqueue_read_buffer(queue, c_buf, c).wait()
diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py
index fe05b1ee..e6d9315d 100644
--- a/examples/demo_meta_template.py
+++ b/examples/demo_meta_template.py
@@ -44,9 +44,8 @@ rendered_tpl = tpl.render(type_name="float",
 
 knl = cl.Program(ctx, str(rendered_tpl)).build().add
 
-knl(queue, (local_size*macroblock_count,), 
-        c_buf, a_buf, b_buf, 
-        local_size=(local_size,))
+knl(queue, (local_size*macroblock_count,), (local_size,),
+        c_buf, a_buf, b_buf)
 
 c = numpy.empty_like(a)
 cl.enqueue_read_buffer(queue, c_buf, c).wait()
diff --git a/examples/gl_interop_demo.py b/examples/gl_interop_demo.py
index e773db83..dd4bf3e3 100644
--- a/examples/gl_interop_demo.py
+++ b/examples/gl_interop_demo.py
@@ -32,7 +32,7 @@ def initialize():
     plats = cl.get_platforms()
     ctx_props = cl.context_properties
     props = [(ctx_props.PLATFORM, plats[0]), (ctx_props.GL_CONTEXT_KHR,
-	GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())]
+        GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())]
     ctx = cl.Context(properties=props)
     glClearColor(1, 1, 1, 1)
     glColor(0, 0, 1)
@@ -45,7 +45,7 @@ def initialize():
     prog = cl.Program(ctx, src).build()
     queue = cl.CommandQueue(ctx)
     cl.enqueue_acquire_gl_objects(queue, [coords_dev])
-    prog.generate_sin(queue, (n_vertices,), coords_dev)
+    prog.generate_sin(queue, (n_vertices,), None, coords_dev)
     cl.enqueue_release_gl_objects(queue, [coords_dev])
     queue.finish()
     glFlush()
@@ -65,7 +65,7 @@ if __name__ == '__main__':
     import sys
     glutInit(sys.argv)
     if len(sys.argv) > 1:
-	n_vertices = int(sys.argv[1])
+        n_vertices = int(sys.argv[1])
     glutInitWindowSize(800, 160)
     glutInitWindowPosition(0, 0)
     glutCreateWindow('OpenCL/OpenGL Interop Tutorial: Sin Generator')
diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py
index 7e26f8c2..142591f1 100644
--- a/examples/matrix-multiply.py
+++ b/examples/matrix-multiply.py
@@ -183,9 +183,9 @@ push_time = time()-t1
 
 # warmup ----------------------------------------------------------------------
 for i in range(5):
-    event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, 
-            local_size=(block_size, block_size))
-event.wait()
+    event = kernel(queue, h_c.shape, (block_size, block_size), 
+            d_c_buf, d_a_buf, d_b_buf)
+    event.wait()
 
 queue.finish()
 
@@ -194,8 +194,8 @@ t1 = time()
 
 count = 20
 for i in range(count):
-    event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, 
-            local_size=(block_size, block_size))
+    event = kernel(queue, h_c.shape, (block_size, block_size),
+            d_c_buf, d_a_buf, d_b_buf)
 
 event.wait()
 
diff --git a/examples/narray.py b/examples/narray.py
index 667f6717..17450d73 100644
--- a/examples/narray.py
+++ b/examples/narray.py
@@ -28,7 +28,7 @@ except:
     print prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG)
     raise
 
-prg.demo(queue, (500,), demo_buf)
+prg.demo(queue, (500,), None, demo_buf)
 cl.enqueue_read_buffer(queue, demo_buf, demo_r).wait()
 
 for res in demo_r:
diff --git a/examples/transpose.py b/examples/transpose.py
index ad86de3b..7c571fdf 100644
--- a/examples/transpose.py
+++ b/examples/transpose.py
@@ -35,9 +35,8 @@ class NaiveTranspose:
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h),
-            tgt, src, numpy.uint32(w), numpy.uint32(h),
-            local_size=(block_size, block_size))
+        return self.kernel(queue, (w, h), (block_size, block_size),
+            tgt, src, numpy.uint32(w), numpy.uint32(h))
 
 
 
@@ -48,7 +47,7 @@ class SillyTranspose(NaiveTranspose):
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h),
+        return self.kernel(queue, (w, h), None,
             tgt, src, numpy.uint32(w), numpy.uint32(h))
 
 
@@ -90,10 +89,9 @@ class TransposeWithLocal:
         assert w % block_size == 0
         assert h % block_size == 0
 
-        return self.kernel(queue, (w, h),
+        return self.kernel(queue, (w, h), (block_size, block_size),
             tgt, src, numpy.uint32(w), numpy.uint32(h),
-            cl.LocalMemory(4*block_size*(block_size+1)),
-            local_size=(block_size, block_size))
+            cl.LocalMemory(4*block_size*(block_size+1)))
 
 
 
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index f8083bc4..e9c44745 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -52,7 +52,7 @@ def _add_functionality():
     for cls in CONSTANT_CLASSES:
         cls.to_string = classmethod(to_string)
 
-    # get_info attributes -----------------------------------------------------
+    # {{{ get_info attributes -------------------------------------------------
     def make_getattr(info_classes):
         name_to_info = dict(
                 (intern(info_name.lower()), (info_method, info_value))
@@ -76,26 +76,34 @@ def _add_functionality():
     for cls, info_classes in cls_to_info_cls.iteritems():
         cls.__getattr__ = make_getattr(info_classes)
 
-    # Platform ----------------------------------------------------------------
+    # }}}
+
+    # {{{ Platform
     def platform_repr(self):
         return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.obj_ptr)
 
     Platform.__repr__ = platform_repr
 
-    # Device ------------------------------------------------------------------
+    # }}}
+
+    # {{{ Device
     def device_repr(self):
         return "<pyopencl.Device '%s' at 0x%x>" % (self.name, self.obj_ptr)
 
     Device.__repr__ = device_repr
 
-    # Context -----------------------------------------------------------------
+    # }}}
+
+    # {{{ Context
     def context_repr(self):
         return "<pyopencl.Context at 0x%x on %s>" % (self.obj_ptr,
                 ", ".join(repr(dev) for dev in self.devices))
 
     Context.__repr__ = context_repr
 
-    # Program -----------------------------------------------------------------
+    # }}}
+
+    # {{{ Program
     def program_getattr(self, attr):
         try:
             pi_attr = getattr(_cl.program_info, attr.upper())
@@ -134,7 +142,9 @@ def _add_functionality():
     Program.__getattr__ = program_getattr
     Program.build = program_build
 
-    # Event -------------------------------------------------------------------
+    # }}}
+
+    # {{{ Event
     class ProfilingInfoGetter:
         def __init__(self, event):
             self.event = event
@@ -152,12 +162,12 @@ def _add_functionality():
 
     _cl.Event.profile = property(ProfilingInfoGetter)
 
-    # Kernel ------------------------------------------------------------------
-    def kernel_call(self, queue, global_size, *args, **kwargs):
-        for i, arg in enumerate(args):
-            self.set_arg(i, arg)
+    # }}}
 
+    # {{{ Kernel
+    def kernel_call(self, queue, global_size, *args, **kwargs):
         global_offset = kwargs.pop("global_offset", None)
+        had_local_size = "local_size" in kwargs
         local_size = kwargs.pop("local_size", None)
         wait_for = kwargs.pop("wait_for", None)
 
@@ -166,25 +176,77 @@ def _add_functionality():
                     "Kernel.__call__ recived unexpected keyword arguments: %s"
                     % ", ".join(kwargs.keys()))
 
+        if had_local_size:
+            from warnings import warn
+            warn("The local_size keyword argument is deprecated and will be "
+                    "removed in pyopencl 0.94. Pass the local "
+                    "size as the third positional argument instead.",
+                    DeprecationWarning, stacklevel=2)
+
+        from types import NoneType
+        if isinstance(args[0], (NoneType, tuple)) and not had_local_size:
+            local_size = args[0]
+            args = args[1:]
+        elif not had_local_size:
+            from warnings import warn
+            warn("PyOpenCL Warning: There was an API change "
+                    "in Kernel.__call__() in pyopencl 0.92. "
+                    "local_size was moved from keyword argument to third "
+                    "positional argument in pyopencl 0.92. "
+                    "You didn't pass local_size, but you still need to insert "
+                    "'None' as a third argument. "
+                    "Your present usage is deprecated and will stop "
+                    "working in pyopencl 0.94.",
+                    DeprecationWarning, stacklevel=2)
+
+        self.set_args(*args)
+
         return enqueue_nd_range_kernel(queue, self, global_size, local_size,
                 global_offset, wait_for)
 
+    def kernel_set_scalar_arg_dtypes(self, arg_dtypes):
+        arg_type_chars = []
+
+        for arg_dtype in arg_dtypes:
+            if arg_dtype is None:
+                arg_type_chars.append(None)
+            else:
+                import numpy
+                arg_type_chars.append(numpy.dtype(arg_dtype).char)
+
+        self._arg_type_chars = arg_type_chars
+
     def kernel_set_args(self, *args):
-        for i, arg in enumerate(args):
-            self.set_arg(i, arg)
+        try:
+            arg_type_chars = self.__dict__["_arg_type_chars"]
+        except KeyError:
+            for i, arg in enumerate(args):
+                self.set_arg(i, arg)
+        else:
+            from struct import pack
+            for i, (arg, arg_type_char) in enumerate(
+                    zip(args, arg_type_chars)):
+                if arg_type_char:
+                    self.set_arg(i, pack(arg_type_char, arg))
+                else:
+                    self.set_arg(i, arg)
 
     Kernel.__call__ = kernel_call
+    Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes
     Kernel.set_args = kernel_set_args
 
-    # ImageFormat -------------------------------------------------------------
+    # }}}
+
+    # {{{ ImageFormat
     def image_format_repr(self):
         return "ImageFormat(%s, %s)" % (
                 channel_order.to_string(self.channel_order),
                 channel_type.to_string(self.channel_data_type))
 
     ImageFormat.__repr__ = image_format_repr
+    # }}}
 
-    # Image -------------------------------------------------------------------
+    # {{{ Image
     class ImageInfoGetter:
         def __init__(self, event):
             from warnings import warn
@@ -213,13 +275,17 @@ def _add_functionality():
     _cl.Image.image = property(ImageInfoGetter)
     _cl.Image.shape = property(image_shape)
 
-    # Event -------------------------------------------------------------------
+    # }}}
+
+    # {{{ Event
     def event_wait(self):
         wait_for_events([self])
         return self
 
     Event.wait = event_wait
 
+    # }}}
+
     if _cl.have_gl():
         def gl_object_get_gl_object(self):
             return self.get_gl_object_info()[1]
@@ -232,7 +298,7 @@ _add_functionality()
 
 
 
-# convenience -----------------------------------------------------------------
+# {{{ convenience -------------------------------------------------------------
 def create_some_context(interactive=True):
     try:
         import sys
@@ -248,7 +314,7 @@ def create_some_context(interactive=True):
     elif len(platforms) == 1 or not interactive:
         platform = platforms[0]
     else:
-        print "Choose platform from these choices:"
+        print "Choose platform:"
         for i, pf in enumerate(platforms):
             print "[%d] %s" % (i, pf)
 
@@ -267,7 +333,7 @@ def create_some_context(interactive=True):
     elif len(devices) == 1 or not interactive:
         pass
     else:
-        print "Choose device(s) from these choices:"
+        print "Choose device(s):"
         for i, dev in enumerate(devices):
             print "[%d] %s" % (i, dev)
 
@@ -279,6 +345,7 @@ def create_some_context(interactive=True):
 
     return Context(devices)
 
+# }}}
 
 
 # vim: foldmethod=marker
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index ed683120..7bd9e268 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -26,7 +26,7 @@ class TestCL:
 
     @pytools.test.mark_test.opencl
     def test_get_info(self, platform, device):
-        had_failures = [False]
+        failure_count = [0]
 
         CRASH_QUIRKS = [
                 (("NVIDIA Corporation", "NVIDIA CUDA", 
@@ -69,13 +69,12 @@ class TestCL:
                     try:
                         func(info)
                     except:
-                        print "failed get_info", type(cl_obj), info_name
+                        msg = "failed get_info", type(cl_obj), info_name
 
                         if find_quirk(QUIRKS, cl_obj, info):
-                            print "(known quirk for %s)" % platform.name
+                            msg += ("(known quirk for %s)" % platform.name)
                         else:
-                            had_failures[0] = True
-                            raise
+                            failure_count[0] += 1
 
                     if try_attr_form:
                         try:
@@ -86,8 +85,7 @@ class TestCL:
                             if find_quirk(QUIRKS, cl_obj, info):
                                 print "(known quirk for %s)" % platform.name
                             else:
-                                had_failures[0] = True
-                                raise
+                                failure_count[0] += 1
 
         do_test(platform, cl.platform_info)
 
@@ -125,7 +123,7 @@ class TestCL:
         kernel = prg.sum
         do_test(kernel, cl.kernel_info)
 
-        evt = kernel(queue, (n,), a_buf)
+        evt = kernel(queue, (n,), None, a_buf)
         do_test(evt, cl.event_info)
 
         if profiling:
@@ -151,8 +149,12 @@ class TestCL:
             do_test(img, cl.image_info,
                     lambda info: img.get_image_info(info))
 
-        if had_failures[0]:
-            raise RuntimeError("get_info testing had errors")
+        if failure_count[0]:
+            raise RuntimeError(
+                    "get_info testing had %d errors "
+                    "(If you compiled against OpenCL 1.1 but are testing a 1.0 "
+                    "implementation, you can safely ignore this.)"
+                    % failure_count[0])
 
     @pytools.test.mark_test.opencl
     def test_invalid_kernel_names_cause_failures(self):
@@ -201,18 +203,18 @@ class TestCL:
         a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
 
         try:
-            prg.mult(queue, a.shape, a_buf, 2, 3)
+            prg.mult(queue, a.shape, None, a_buf, 2, 3)
             assert False, "PyOpenCL should not accept bare Python types as arguments"
-        except TypeError:
+        except cl.LogicError:
             pass
 
         try:
-            prg.mult(queue, a.shape, a_buf, float(2), 3)
+            prg.mult(queue, a.shape, None, a_buf, float(2), 3)
             assert False, "PyOpenCL should not accept bare Python types as arguments"
-        except TypeError:
+        except cl.LogicError:
             pass
 
-        prg.mult(queue, a.shape, a_buf, numpy.float32(2), numpy.int32(3))
+        prg.mult(queue, a.shape, None, a_buf, numpy.float32(2), numpy.int32(3))
 
         a_result = numpy.empty_like(a)
         cl.enqueue_read_buffer(queue, a_buf, a_result).wait()
@@ -254,7 +256,7 @@ class TestCL:
         samp = cl.Sampler(context, False,
                 cl.addressing_mode.CLAMP,
                 cl.filter_mode.NEAREST)
-        prg.copy_image(queue, a.shape, a_dest, a_img, samp, numpy.int32(a.shape[0]))
+        prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, numpy.int32(a.shape[0]))
 
         a_result = numpy.empty_like(a)
         cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True)
-- 
GitLab