From d61e5cf08b003392523344d7a612d5a3d06622e9 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 26 Jun 2010 14:06:13 -0400 Subject: [PATCH] Promote local_size to positional argument in Kernel.__call__(). --- doc/source/index.rst | 2 +- doc/source/misc.rst | 2 + doc/source/runtime.rst | 17 +++++- examples/benchmark-all.py | 2 +- examples/demo.py | 2 +- examples/demo_meta_codepy.py | 5 +- examples/demo_meta_template.py | 5 +- examples/gl_interop_demo.py | 6 +- examples/matrix-multiply.py | 10 ++-- examples/narray.py | 2 +- examples/transpose.py | 12 ++-- pyopencl/__init__.py | 103 +++++++++++++++++++++++++++------ test/test_wrapper.py | 34 ++++++----- 13 files changed, 141 insertions(+), 61 deletions(-) diff --git a/doc/source/index.rst b/doc/source/index.rst index 9fd85142..38456e96 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -53,7 +53,7 @@ Here's an example, to give you an impression:: } """).build() - prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) + prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 7504bf8f..8deba3b0 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -78,6 +78,8 @@ Version 0.92 `cl_khr_gl_sharing `_ extension, leading to working GL interoperability. * Add :meth:`pyopencl.Kernel.set_args`. +* The call signature of :meth:`pyopencl.Kernel.__call__` changed to + emphasize the importance of *loccal_size*. Version 0.91.5 -------------- diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst index 11ddf72c..7fa2b130 100644 --- a/doc/source/runtime.rst +++ b/doc/source/runtime.rst @@ -592,13 +592,26 @@ Programs and Kernels Invoke :meth:`set_arg` on each element of *args* in turn. - .. method:: __call__(queue, global_size, *args, global_offset=None, local_size=None, wait_for=None) + .. method:: __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None) Use :func:`enqueue_nd_range_kernel` to enqueue a kernel execution, after using - :meth:`set_arg` to set each argument in turn. See the documentation for + :meth:`set_args` to set each argument in turn. See the documentation for :meth:`set_arg` to see what argument types are allowed. |std-enqueue-blurb| + *None* may be passed for local_size + + .. versionchanged:: 0.92 + *local_size* was promoted to third positional argument from being a + keyword argument. The old keyword argument usage will continue to + be accepted with a warning throughout the 0.92 release cycle. + This is a backward-compatible change (just barely!) because + *local_size* as third positional argument can only be a + :class:`tuple` or *None*. :class:`tuple` instances are never valid + :class:`Kernel` arguments, and *None* is valid as an argument, but + its treatment in the wrapper had a bug (now fixed) that prevented + it from working. + |comparable| .. class:: LocalMemory(size) diff --git a/examples/benchmark-all.py b/examples/benchmark-all.py index 03553f94..0d08979b 100644 --- a/examples/benchmark-all.py +++ b/examples/benchmark-all.py @@ -60,7 +60,7 @@ for platform in cl.get_platforms(): } """).build() - exec_evt = prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) + exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) exec_evt.wait() elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start) diff --git a/examples/demo.py b/examples/demo.py index 71f146eb..44bc0a58 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -22,7 +22,7 @@ prg = cl.Program(ctx, """ } """).build() -prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) +prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py index 8c2a8275..c080109b 100644 --- a/examples/demo_meta_codepy.py +++ b/examples/demo_meta_codepy.py @@ -46,9 +46,8 @@ mod = Module([ knl = cl.Program(ctx, str(mod)).build().add -knl(queue, (local_size*macroblock_count,), - c_buf, a_buf, b_buf, - local_size=(local_size,)) +knl(queue, (local_size*macroblock_count,), (local_size,), + c_buf, a_buf, b_buf) c = numpy.empty_like(a) cl.enqueue_read_buffer(queue, c_buf, c).wait() diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py index fe05b1ee..e6d9315d 100644 --- a/examples/demo_meta_template.py +++ b/examples/demo_meta_template.py @@ -44,9 +44,8 @@ rendered_tpl = tpl.render(type_name="float", knl = cl.Program(ctx, str(rendered_tpl)).build().add -knl(queue, (local_size*macroblock_count,), - c_buf, a_buf, b_buf, - local_size=(local_size,)) +knl(queue, (local_size*macroblock_count,), (local_size,), + c_buf, a_buf, b_buf) c = numpy.empty_like(a) cl.enqueue_read_buffer(queue, c_buf, c).wait() diff --git a/examples/gl_interop_demo.py b/examples/gl_interop_demo.py index e773db83..dd4bf3e3 100644 --- a/examples/gl_interop_demo.py +++ b/examples/gl_interop_demo.py @@ -32,7 +32,7 @@ def initialize(): plats = cl.get_platforms() ctx_props = cl.context_properties props = [(ctx_props.PLATFORM, plats[0]), (ctx_props.GL_CONTEXT_KHR, - GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())] + GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())] ctx = cl.Context(properties=props) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) @@ -45,7 +45,7 @@ def initialize(): prog = cl.Program(ctx, src).build() queue = cl.CommandQueue(ctx) cl.enqueue_acquire_gl_objects(queue, [coords_dev]) - prog.generate_sin(queue, (n_vertices,), coords_dev) + prog.generate_sin(queue, (n_vertices,), None, coords_dev) cl.enqueue_release_gl_objects(queue, [coords_dev]) queue.finish() glFlush() @@ -65,7 +65,7 @@ if __name__ == '__main__': import sys glutInit(sys.argv) if len(sys.argv) > 1: - n_vertices = int(sys.argv[1]) + n_vertices = int(sys.argv[1]) glutInitWindowSize(800, 160) glutInitWindowPosition(0, 0) glutCreateWindow('OpenCL/OpenGL Interop Tutorial: Sin Generator') diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py index 7e26f8c2..142591f1 100644 --- a/examples/matrix-multiply.py +++ b/examples/matrix-multiply.py @@ -183,9 +183,9 @@ push_time = time()-t1 # warmup ---------------------------------------------------------------------- for i in range(5): - event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, - local_size=(block_size, block_size)) -event.wait() + event = kernel(queue, h_c.shape, (block_size, block_size), + d_c_buf, d_a_buf, d_b_buf) + event.wait() queue.finish() @@ -194,8 +194,8 @@ t1 = time() count = 20 for i in range(count): - event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, - local_size=(block_size, block_size)) + event = kernel(queue, h_c.shape, (block_size, block_size), + d_c_buf, d_a_buf, d_b_buf) event.wait() diff --git a/examples/narray.py b/examples/narray.py index 667f6717..17450d73 100644 --- a/examples/narray.py +++ b/examples/narray.py @@ -28,7 +28,7 @@ except: print prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG) raise -prg.demo(queue, (500,), demo_buf) +prg.demo(queue, (500,), None, demo_buf) cl.enqueue_read_buffer(queue, demo_buf, demo_r).wait() for res in demo_r: diff --git a/examples/transpose.py b/examples/transpose.py index ad86de3b..7c571fdf 100644 --- a/examples/transpose.py +++ b/examples/transpose.py @@ -35,9 +35,8 @@ class NaiveTranspose: assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), - tgt, src, numpy.uint32(w), numpy.uint32(h), - local_size=(block_size, block_size)) + return self.kernel(queue, (w, h), (block_size, block_size), + tgt, src, numpy.uint32(w), numpy.uint32(h)) @@ -48,7 +47,7 @@ class SillyTranspose(NaiveTranspose): assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), + return self.kernel(queue, (w, h), None, tgt, src, numpy.uint32(w), numpy.uint32(h)) @@ -90,10 +89,9 @@ class TransposeWithLocal: assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), + return self.kernel(queue, (w, h), (block_size, block_size), tgt, src, numpy.uint32(w), numpy.uint32(h), - cl.LocalMemory(4*block_size*(block_size+1)), - local_size=(block_size, block_size)) + cl.LocalMemory(4*block_size*(block_size+1))) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index f8083bc4..e9c44745 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -52,7 +52,7 @@ def _add_functionality(): for cls in CONSTANT_CLASSES: cls.to_string = classmethod(to_string) - # get_info attributes ----------------------------------------------------- + # {{{ get_info attributes ------------------------------------------------- def make_getattr(info_classes): name_to_info = dict( (intern(info_name.lower()), (info_method, info_value)) @@ -76,26 +76,34 @@ def _add_functionality(): for cls, info_classes in cls_to_info_cls.iteritems(): cls.__getattr__ = make_getattr(info_classes) - # Platform ---------------------------------------------------------------- + # }}} + + # {{{ Platform def platform_repr(self): return "" % (self.name, self.obj_ptr) Platform.__repr__ = platform_repr - # Device ------------------------------------------------------------------ + # }}} + + # {{{ Device def device_repr(self): return "" % (self.name, self.obj_ptr) Device.__repr__ = device_repr - # Context ----------------------------------------------------------------- + # }}} + + # {{{ Context def context_repr(self): return "" % (self.obj_ptr, ", ".join(repr(dev) for dev in self.devices)) Context.__repr__ = context_repr - # Program ----------------------------------------------------------------- + # }}} + + # {{{ Program def program_getattr(self, attr): try: pi_attr = getattr(_cl.program_info, attr.upper()) @@ -134,7 +142,9 @@ def _add_functionality(): Program.__getattr__ = program_getattr Program.build = program_build - # Event ------------------------------------------------------------------- + # }}} + + # {{{ Event class ProfilingInfoGetter: def __init__(self, event): self.event = event @@ -152,12 +162,12 @@ def _add_functionality(): _cl.Event.profile = property(ProfilingInfoGetter) - # Kernel ------------------------------------------------------------------ - def kernel_call(self, queue, global_size, *args, **kwargs): - for i, arg in enumerate(args): - self.set_arg(i, arg) + # }}} + # {{{ Kernel + def kernel_call(self, queue, global_size, *args, **kwargs): global_offset = kwargs.pop("global_offset", None) + had_local_size = "local_size" in kwargs local_size = kwargs.pop("local_size", None) wait_for = kwargs.pop("wait_for", None) @@ -166,25 +176,77 @@ def _add_functionality(): "Kernel.__call__ recived unexpected keyword arguments: %s" % ", ".join(kwargs.keys())) + if had_local_size: + from warnings import warn + warn("The local_size keyword argument is deprecated and will be " + "removed in pyopencl 0.94. Pass the local " + "size as the third positional argument instead.", + DeprecationWarning, stacklevel=2) + + from types import NoneType + if isinstance(args[0], (NoneType, tuple)) and not had_local_size: + local_size = args[0] + args = args[1:] + elif not had_local_size: + from warnings import warn + warn("PyOpenCL Warning: There was an API change " + "in Kernel.__call__() in pyopencl 0.92. " + "local_size was moved from keyword argument to third " + "positional argument in pyopencl 0.92. " + "You didn't pass local_size, but you still need to insert " + "'None' as a third argument. " + "Your present usage is deprecated and will stop " + "working in pyopencl 0.94.", + DeprecationWarning, stacklevel=2) + + self.set_args(*args) + return enqueue_nd_range_kernel(queue, self, global_size, local_size, global_offset, wait_for) + def kernel_set_scalar_arg_dtypes(self, arg_dtypes): + arg_type_chars = [] + + for arg_dtype in arg_dtypes: + if arg_dtype is None: + arg_type_chars.append(None) + else: + import numpy + arg_type_chars.append(numpy.dtype(arg_dtype).char) + + self._arg_type_chars = arg_type_chars + def kernel_set_args(self, *args): - for i, arg in enumerate(args): - self.set_arg(i, arg) + try: + arg_type_chars = self.__dict__["_arg_type_chars"] + except KeyError: + for i, arg in enumerate(args): + self.set_arg(i, arg) + else: + from struct import pack + for i, (arg, arg_type_char) in enumerate( + zip(args, arg_type_chars)): + if arg_type_char: + self.set_arg(i, pack(arg_type_char, arg)) + else: + self.set_arg(i, arg) Kernel.__call__ = kernel_call + Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes Kernel.set_args = kernel_set_args - # ImageFormat ------------------------------------------------------------- + # }}} + + # {{{ ImageFormat def image_format_repr(self): return "ImageFormat(%s, %s)" % ( channel_order.to_string(self.channel_order), channel_type.to_string(self.channel_data_type)) ImageFormat.__repr__ = image_format_repr + # }}} - # Image ------------------------------------------------------------------- + # {{{ Image class ImageInfoGetter: def __init__(self, event): from warnings import warn @@ -213,13 +275,17 @@ def _add_functionality(): _cl.Image.image = property(ImageInfoGetter) _cl.Image.shape = property(image_shape) - # Event ------------------------------------------------------------------- + # }}} + + # {{{ Event def event_wait(self): wait_for_events([self]) return self Event.wait = event_wait + # }}} + if _cl.have_gl(): def gl_object_get_gl_object(self): return self.get_gl_object_info()[1] @@ -232,7 +298,7 @@ _add_functionality() -# convenience ----------------------------------------------------------------- +# {{{ convenience ------------------------------------------------------------- def create_some_context(interactive=True): try: import sys @@ -248,7 +314,7 @@ def create_some_context(interactive=True): elif len(platforms) == 1 or not interactive: platform = platforms[0] else: - print "Choose platform from these choices:" + print "Choose platform:" for i, pf in enumerate(platforms): print "[%d] %s" % (i, pf) @@ -267,7 +333,7 @@ def create_some_context(interactive=True): elif len(devices) == 1 or not interactive: pass else: - print "Choose device(s) from these choices:" + print "Choose device(s):" for i, dev in enumerate(devices): print "[%d] %s" % (i, dev) @@ -279,6 +345,7 @@ def create_some_context(interactive=True): return Context(devices) +# }}} # vim: foldmethod=marker diff --git a/test/test_wrapper.py b/test/test_wrapper.py index ed683120..7bd9e268 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -26,7 +26,7 @@ class TestCL: @pytools.test.mark_test.opencl def test_get_info(self, platform, device): - had_failures = [False] + failure_count = [0] CRASH_QUIRKS = [ (("NVIDIA Corporation", "NVIDIA CUDA", @@ -69,13 +69,12 @@ class TestCL: try: func(info) except: - print "failed get_info", type(cl_obj), info_name + msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): - print "(known quirk for %s)" % platform.name + msg += ("(known quirk for %s)" % platform.name) else: - had_failures[0] = True - raise + failure_count[0] += 1 if try_attr_form: try: @@ -86,8 +85,7 @@ class TestCL: if find_quirk(QUIRKS, cl_obj, info): print "(known quirk for %s)" % platform.name else: - had_failures[0] = True - raise + failure_count[0] += 1 do_test(platform, cl.platform_info) @@ -125,7 +123,7 @@ class TestCL: kernel = prg.sum do_test(kernel, cl.kernel_info) - evt = kernel(queue, (n,), a_buf) + evt = kernel(queue, (n,), None, a_buf) do_test(evt, cl.event_info) if profiling: @@ -151,8 +149,12 @@ class TestCL: do_test(img, cl.image_info, lambda info: img.get_image_info(info)) - if had_failures[0]: - raise RuntimeError("get_info testing had errors") + if failure_count[0]: + raise RuntimeError( + "get_info testing had %d errors " + "(If you compiled against OpenCL 1.1 but are testing a 1.0 " + "implementation, you can safely ignore this.)" + % failure_count[0]) @pytools.test.mark_test.opencl def test_invalid_kernel_names_cause_failures(self): @@ -201,18 +203,18 @@ class TestCL: a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a) try: - prg.mult(queue, a.shape, a_buf, 2, 3) + prg.mult(queue, a.shape, None, a_buf, 2, 3) assert False, "PyOpenCL should not accept bare Python types as arguments" - except TypeError: + except cl.LogicError: pass try: - prg.mult(queue, a.shape, a_buf, float(2), 3) + prg.mult(queue, a.shape, None, a_buf, float(2), 3) assert False, "PyOpenCL should not accept bare Python types as arguments" - except TypeError: + except cl.LogicError: pass - prg.mult(queue, a.shape, a_buf, numpy.float32(2), numpy.int32(3)) + prg.mult(queue, a.shape, None, a_buf, numpy.float32(2), numpy.int32(3)) a_result = numpy.empty_like(a) cl.enqueue_read_buffer(queue, a_buf, a_result).wait() @@ -254,7 +256,7 @@ class TestCL: samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) - prg.copy_image(queue, a.shape, a_dest, a_img, samp, numpy.int32(a.shape[0])) + prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, numpy.int32(a.shape[0])) a_result = numpy.empty_like(a) cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True) -- GitLab