diff --git a/doc/source/index.rst b/doc/source/index.rst index 9fd85142d25562240029b1354b6de8a53ed91efc..38456e96a14eabd00176b328c123d2a72df1aa51 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -53,7 +53,7 @@ Here's an example, to give you an impression:: } """).build() - prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) + prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 7504bf8f326acbb8b384c5b4ce87df5bbbed76b3..8deba3b084ec46c024b50bc393ce5637ac565cd5 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -78,6 +78,8 @@ Version 0.92 `cl_khr_gl_sharing `_ extension, leading to working GL interoperability. * Add :meth:`pyopencl.Kernel.set_args`. +* The call signature of :meth:`pyopencl.Kernel.__call__` changed to + emphasize the importance of *loccal_size*. Version 0.91.5 -------------- diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst index 11ddf72c08c15d3ef5d379aa01af6b6e3bd25b84..7fa2b130ed861818322825228a7893a025b979bf 100644 --- a/doc/source/runtime.rst +++ b/doc/source/runtime.rst @@ -592,13 +592,26 @@ Programs and Kernels Invoke :meth:`set_arg` on each element of *args* in turn. - .. method:: __call__(queue, global_size, *args, global_offset=None, local_size=None, wait_for=None) + .. method:: __call__(queue, global_size, local_size, *args, global_offset=None, wait_for=None) Use :func:`enqueue_nd_range_kernel` to enqueue a kernel execution, after using - :meth:`set_arg` to set each argument in turn. See the documentation for + :meth:`set_args` to set each argument in turn. See the documentation for :meth:`set_arg` to see what argument types are allowed. |std-enqueue-blurb| + *None* may be passed for local_size + + .. versionchanged:: 0.92 + *local_size* was promoted to third positional argument from being a + keyword argument. The old keyword argument usage will continue to + be accepted with a warning throughout the 0.92 release cycle. + This is a backward-compatible change (just barely!) because + *local_size* as third positional argument can only be a + :class:`tuple` or *None*. :class:`tuple` instances are never valid + :class:`Kernel` arguments, and *None* is valid as an argument, but + its treatment in the wrapper had a bug (now fixed) that prevented + it from working. + |comparable| .. class:: LocalMemory(size) diff --git a/examples/benchmark-all.py b/examples/benchmark-all.py index 03553f94af4bbbdae217b0b8b4c9de4fa1aa56d0..0d08979b76e89ee1b75e8b3a07d93e86beab23f0 100644 --- a/examples/benchmark-all.py +++ b/examples/benchmark-all.py @@ -60,7 +60,7 @@ for platform in cl.get_platforms(): } """).build() - exec_evt = prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) + exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) exec_evt.wait() elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start) diff --git a/examples/demo.py b/examples/demo.py index 71f146eb87581e145f909f872e5d9c07a02b46b6..44bc0a58e24142f6263c70c5e77849960d9c4da2 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -22,7 +22,7 @@ prg = cl.Program(ctx, """ } """).build() -prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) +prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() diff --git a/examples/demo_meta_codepy.py b/examples/demo_meta_codepy.py index 8c2a827560e77e72d0dad71eb4d406cb1107fe29..c080109b9dcfe45c16525db2eaa7709f9250b3a9 100644 --- a/examples/demo_meta_codepy.py +++ b/examples/demo_meta_codepy.py @@ -46,9 +46,8 @@ mod = Module([ knl = cl.Program(ctx, str(mod)).build().add -knl(queue, (local_size*macroblock_count,), - c_buf, a_buf, b_buf, - local_size=(local_size,)) +knl(queue, (local_size*macroblock_count,), (local_size,), + c_buf, a_buf, b_buf) c = numpy.empty_like(a) cl.enqueue_read_buffer(queue, c_buf, c).wait() diff --git a/examples/demo_meta_template.py b/examples/demo_meta_template.py index fe05b1eeabb2fac27b1716e34daf3d1ee44d3930..e6d9315dc308936e8f9bed888e108c4bf609e200 100644 --- a/examples/demo_meta_template.py +++ b/examples/demo_meta_template.py @@ -44,9 +44,8 @@ rendered_tpl = tpl.render(type_name="float", knl = cl.Program(ctx, str(rendered_tpl)).build().add -knl(queue, (local_size*macroblock_count,), - c_buf, a_buf, b_buf, - local_size=(local_size,)) +knl(queue, (local_size*macroblock_count,), (local_size,), + c_buf, a_buf, b_buf) c = numpy.empty_like(a) cl.enqueue_read_buffer(queue, c_buf, c).wait() diff --git a/examples/gl_interop_demo.py b/examples/gl_interop_demo.py index e773db83a8dd587d72fbc40437a59fef2cd5e7be..dd4bf3e3abe962d37947a6ebfd88e1d40a6aefeb 100644 --- a/examples/gl_interop_demo.py +++ b/examples/gl_interop_demo.py @@ -32,7 +32,7 @@ def initialize(): plats = cl.get_platforms() ctx_props = cl.context_properties props = [(ctx_props.PLATFORM, plats[0]), (ctx_props.GL_CONTEXT_KHR, - GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())] + GetCurrentContext()), (ctx_props.GLX_DISPLAY_KHR, GetCurrentDisplay())] ctx = cl.Context(properties=props) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) @@ -45,7 +45,7 @@ def initialize(): prog = cl.Program(ctx, src).build() queue = cl.CommandQueue(ctx) cl.enqueue_acquire_gl_objects(queue, [coords_dev]) - prog.generate_sin(queue, (n_vertices,), coords_dev) + prog.generate_sin(queue, (n_vertices,), None, coords_dev) cl.enqueue_release_gl_objects(queue, [coords_dev]) queue.finish() glFlush() @@ -65,7 +65,7 @@ if __name__ == '__main__': import sys glutInit(sys.argv) if len(sys.argv) > 1: - n_vertices = int(sys.argv[1]) + n_vertices = int(sys.argv[1]) glutInitWindowSize(800, 160) glutInitWindowPosition(0, 0) glutCreateWindow('OpenCL/OpenGL Interop Tutorial: Sin Generator') diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py index 7e26f8c2aa50b4bc0842d16b400f5a635da9786c..142591f14e7bbeb568c9558386c87a64fd7bf10a 100644 --- a/examples/matrix-multiply.py +++ b/examples/matrix-multiply.py @@ -183,9 +183,9 @@ push_time = time()-t1 # warmup ---------------------------------------------------------------------- for i in range(5): - event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, - local_size=(block_size, block_size)) -event.wait() + event = kernel(queue, h_c.shape, (block_size, block_size), + d_c_buf, d_a_buf, d_b_buf) + event.wait() queue.finish() @@ -194,8 +194,8 @@ t1 = time() count = 20 for i in range(count): - event = kernel(queue, h_c.shape, d_c_buf, d_a_buf, d_b_buf, - local_size=(block_size, block_size)) + event = kernel(queue, h_c.shape, (block_size, block_size), + d_c_buf, d_a_buf, d_b_buf) event.wait() diff --git a/examples/narray.py b/examples/narray.py index 667f6717411e80e76670a951af55298b70b1ad47..17450d73c0977f597c59947ccfb70c9f5b3a79ff 100644 --- a/examples/narray.py +++ b/examples/narray.py @@ -28,7 +28,7 @@ except: print prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG) raise -prg.demo(queue, (500,), demo_buf) +prg.demo(queue, (500,), None, demo_buf) cl.enqueue_read_buffer(queue, demo_buf, demo_r).wait() for res in demo_r: diff --git a/examples/transpose.py b/examples/transpose.py index ad86de3b7112d107173d78e6d52222b70e38e469..7c571fdf07d6d6a27cc4d49afdaef81b5b5f2c85 100644 --- a/examples/transpose.py +++ b/examples/transpose.py @@ -35,9 +35,8 @@ class NaiveTranspose: assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), - tgt, src, numpy.uint32(w), numpy.uint32(h), - local_size=(block_size, block_size)) + return self.kernel(queue, (w, h), (block_size, block_size), + tgt, src, numpy.uint32(w), numpy.uint32(h)) @@ -48,7 +47,7 @@ class SillyTranspose(NaiveTranspose): assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), + return self.kernel(queue, (w, h), None, tgt, src, numpy.uint32(w), numpy.uint32(h)) @@ -90,10 +89,9 @@ class TransposeWithLocal: assert w % block_size == 0 assert h % block_size == 0 - return self.kernel(queue, (w, h), + return self.kernel(queue, (w, h), (block_size, block_size), tgt, src, numpy.uint32(w), numpy.uint32(h), - cl.LocalMemory(4*block_size*(block_size+1)), - local_size=(block_size, block_size)) + cl.LocalMemory(4*block_size*(block_size+1))) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index f8083bc4ec68016f626eb66cfefc879e898f3156..e9c4474502922b81699422f02768bace3aa3d1b5 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -52,7 +52,7 @@ def _add_functionality(): for cls in CONSTANT_CLASSES: cls.to_string = classmethod(to_string) - # get_info attributes ----------------------------------------------------- + # {{{ get_info attributes ------------------------------------------------- def make_getattr(info_classes): name_to_info = dict( (intern(info_name.lower()), (info_method, info_value)) @@ -76,26 +76,34 @@ def _add_functionality(): for cls, info_classes in cls_to_info_cls.iteritems(): cls.__getattr__ = make_getattr(info_classes) - # Platform ---------------------------------------------------------------- + # }}} + + # {{{ Platform def platform_repr(self): return "" % (self.name, self.obj_ptr) Platform.__repr__ = platform_repr - # Device ------------------------------------------------------------------ + # }}} + + # {{{ Device def device_repr(self): return "" % (self.name, self.obj_ptr) Device.__repr__ = device_repr - # Context ----------------------------------------------------------------- + # }}} + + # {{{ Context def context_repr(self): return "" % (self.obj_ptr, ", ".join(repr(dev) for dev in self.devices)) Context.__repr__ = context_repr - # Program ----------------------------------------------------------------- + # }}} + + # {{{ Program def program_getattr(self, attr): try: pi_attr = getattr(_cl.program_info, attr.upper()) @@ -134,7 +142,9 @@ def _add_functionality(): Program.__getattr__ = program_getattr Program.build = program_build - # Event ------------------------------------------------------------------- + # }}} + + # {{{ Event class ProfilingInfoGetter: def __init__(self, event): self.event = event @@ -152,12 +162,12 @@ def _add_functionality(): _cl.Event.profile = property(ProfilingInfoGetter) - # Kernel ------------------------------------------------------------------ - def kernel_call(self, queue, global_size, *args, **kwargs): - for i, arg in enumerate(args): - self.set_arg(i, arg) + # }}} + # {{{ Kernel + def kernel_call(self, queue, global_size, *args, **kwargs): global_offset = kwargs.pop("global_offset", None) + had_local_size = "local_size" in kwargs local_size = kwargs.pop("local_size", None) wait_for = kwargs.pop("wait_for", None) @@ -166,25 +176,77 @@ def _add_functionality(): "Kernel.__call__ recived unexpected keyword arguments: %s" % ", ".join(kwargs.keys())) + if had_local_size: + from warnings import warn + warn("The local_size keyword argument is deprecated and will be " + "removed in pyopencl 0.94. Pass the local " + "size as the third positional argument instead.", + DeprecationWarning, stacklevel=2) + + from types import NoneType + if isinstance(args[0], (NoneType, tuple)) and not had_local_size: + local_size = args[0] + args = args[1:] + elif not had_local_size: + from warnings import warn + warn("PyOpenCL Warning: There was an API change " + "in Kernel.__call__() in pyopencl 0.92. " + "local_size was moved from keyword argument to third " + "positional argument in pyopencl 0.92. " + "You didn't pass local_size, but you still need to insert " + "'None' as a third argument. " + "Your present usage is deprecated and will stop " + "working in pyopencl 0.94.", + DeprecationWarning, stacklevel=2) + + self.set_args(*args) + return enqueue_nd_range_kernel(queue, self, global_size, local_size, global_offset, wait_for) + def kernel_set_scalar_arg_dtypes(self, arg_dtypes): + arg_type_chars = [] + + for arg_dtype in arg_dtypes: + if arg_dtype is None: + arg_type_chars.append(None) + else: + import numpy + arg_type_chars.append(numpy.dtype(arg_dtype).char) + + self._arg_type_chars = arg_type_chars + def kernel_set_args(self, *args): - for i, arg in enumerate(args): - self.set_arg(i, arg) + try: + arg_type_chars = self.__dict__["_arg_type_chars"] + except KeyError: + for i, arg in enumerate(args): + self.set_arg(i, arg) + else: + from struct import pack + for i, (arg, arg_type_char) in enumerate( + zip(args, arg_type_chars)): + if arg_type_char: + self.set_arg(i, pack(arg_type_char, arg)) + else: + self.set_arg(i, arg) Kernel.__call__ = kernel_call + Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes Kernel.set_args = kernel_set_args - # ImageFormat ------------------------------------------------------------- + # }}} + + # {{{ ImageFormat def image_format_repr(self): return "ImageFormat(%s, %s)" % ( channel_order.to_string(self.channel_order), channel_type.to_string(self.channel_data_type)) ImageFormat.__repr__ = image_format_repr + # }}} - # Image ------------------------------------------------------------------- + # {{{ Image class ImageInfoGetter: def __init__(self, event): from warnings import warn @@ -213,13 +275,17 @@ def _add_functionality(): _cl.Image.image = property(ImageInfoGetter) _cl.Image.shape = property(image_shape) - # Event ------------------------------------------------------------------- + # }}} + + # {{{ Event def event_wait(self): wait_for_events([self]) return self Event.wait = event_wait + # }}} + if _cl.have_gl(): def gl_object_get_gl_object(self): return self.get_gl_object_info()[1] @@ -232,7 +298,7 @@ _add_functionality() -# convenience ----------------------------------------------------------------- +# {{{ convenience ------------------------------------------------------------- def create_some_context(interactive=True): try: import sys @@ -248,7 +314,7 @@ def create_some_context(interactive=True): elif len(platforms) == 1 or not interactive: platform = platforms[0] else: - print "Choose platform from these choices:" + print "Choose platform:" for i, pf in enumerate(platforms): print "[%d] %s" % (i, pf) @@ -267,7 +333,7 @@ def create_some_context(interactive=True): elif len(devices) == 1 or not interactive: pass else: - print "Choose device(s) from these choices:" + print "Choose device(s):" for i, dev in enumerate(devices): print "[%d] %s" % (i, dev) @@ -279,6 +345,7 @@ def create_some_context(interactive=True): return Context(devices) +# }}} # vim: foldmethod=marker diff --git a/test/test_wrapper.py b/test/test_wrapper.py index ed683120f87f6be784c0c882e69a2d675224e32d..7bd9e26801f6f40016e4632a98df79087f7f3309 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -26,7 +26,7 @@ class TestCL: @pytools.test.mark_test.opencl def test_get_info(self, platform, device): - had_failures = [False] + failure_count = [0] CRASH_QUIRKS = [ (("NVIDIA Corporation", "NVIDIA CUDA", @@ -69,13 +69,12 @@ class TestCL: try: func(info) except: - print "failed get_info", type(cl_obj), info_name + msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): - print "(known quirk for %s)" % platform.name + msg += ("(known quirk for %s)" % platform.name) else: - had_failures[0] = True - raise + failure_count[0] += 1 if try_attr_form: try: @@ -86,8 +85,7 @@ class TestCL: if find_quirk(QUIRKS, cl_obj, info): print "(known quirk for %s)" % platform.name else: - had_failures[0] = True - raise + failure_count[0] += 1 do_test(platform, cl.platform_info) @@ -125,7 +123,7 @@ class TestCL: kernel = prg.sum do_test(kernel, cl.kernel_info) - evt = kernel(queue, (n,), a_buf) + evt = kernel(queue, (n,), None, a_buf) do_test(evt, cl.event_info) if profiling: @@ -151,8 +149,12 @@ class TestCL: do_test(img, cl.image_info, lambda info: img.get_image_info(info)) - if had_failures[0]: - raise RuntimeError("get_info testing had errors") + if failure_count[0]: + raise RuntimeError( + "get_info testing had %d errors " + "(If you compiled against OpenCL 1.1 but are testing a 1.0 " + "implementation, you can safely ignore this.)" + % failure_count[0]) @pytools.test.mark_test.opencl def test_invalid_kernel_names_cause_failures(self): @@ -201,18 +203,18 @@ class TestCL: a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a) try: - prg.mult(queue, a.shape, a_buf, 2, 3) + prg.mult(queue, a.shape, None, a_buf, 2, 3) assert False, "PyOpenCL should not accept bare Python types as arguments" - except TypeError: + except cl.LogicError: pass try: - prg.mult(queue, a.shape, a_buf, float(2), 3) + prg.mult(queue, a.shape, None, a_buf, float(2), 3) assert False, "PyOpenCL should not accept bare Python types as arguments" - except TypeError: + except cl.LogicError: pass - prg.mult(queue, a.shape, a_buf, numpy.float32(2), numpy.int32(3)) + prg.mult(queue, a.shape, None, a_buf, numpy.float32(2), numpy.int32(3)) a_result = numpy.empty_like(a) cl.enqueue_read_buffer(queue, a_buf, a_result).wait() @@ -254,7 +256,7 @@ class TestCL: samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) - prg.copy_image(queue, a.shape, a_dest, a_img, samp, numpy.int32(a.shape[0])) + prg.copy_image(queue, a.shape, None, a_dest, a_img, samp, numpy.int32(a.shape[0])) a_result = numpy.empty_like(a) cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True)