diff --git a/doc/source/index.rst b/doc/source/index.rst index 9ec0a735d3e8834e291054525db33fbb9be064a3..387bd7cd0be922e4b07a8548a56cce1abe133db1 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -56,7 +56,7 @@ Here's an example, to give you an impression:: prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) - cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() + cl.enqueue_copy(queue, a_plus_b, dest_buf) print la.norm(a_plus_b - (a+b)) diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 1a2b031307de2a0ce2cb6dedb277f7f951ffa4c7..4fd88e65b1e4b52752dade85c09bdec37eb1b4c6 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -97,6 +97,7 @@ Version 2011.1 * Add vector types, see :class:`pyopencl.array.vec`. * Add :attr:`pyopencl.array.Array.strides`, :attr:`pyopencl.array.Array.flags`. Allow the creation of arrys in C and Fortran order. +* Add :func:`pyopencl.enqueue_copy`. Deprecate all other transfer functions. Version 0.92 ------------ diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst index aab26ef2108c2cb6ce2060143a67a4a91acae1de..189323fbb0856fc415aaa7f77631a670e4ed789e 100644 --- a/doc/source/runtime.rst +++ b/doc/source/runtime.rst @@ -64,6 +64,9 @@ Platforms, Devices and Contexts whose completion this command waits before starting exeuction. .. |std-enqueue-blurb| replace:: Returns a new :class:`Event`. |explain-waitfor| +.. |copy-depr| replace:: **Note:** This function is deprecated as of PyOpenCL 2011.1. + Use :func:`enqueue_copy` instead. + .. function:: get_platforms() Return a list of :class:`Platform` instances. @@ -300,6 +303,8 @@ Buffers *hostbuf* |buf-iface| + |copy-depr| + .. versionchanged:: 2011.1 *is_blocking* now defaults to True. @@ -309,6 +314,8 @@ Buffers *hostbuf* |buf-iface| + |copy-depr| + .. versionchanged:: 2011.1 *is_blocking* now defaults to True. @@ -319,6 +326,8 @@ Buffers |std-enqueue-blurb| + |copy-depr| + .. versionadded:: 0.91.5 .. function:: enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, host_origin, region, buffer_pitches=None, host_pitches=None, wait_for=None, is_blocking=True) @@ -331,6 +340,8 @@ Buffers *hostbuf* |buf-iface| + |copy-depr| + Only available in OpenCL 1.1 and newer. .. versionadded:: 0.92 @@ -348,6 +359,8 @@ Buffers *hostbuf* |buf-iface| + |copy-depr| + Only available in OpenCL 1.1 and newer. .. versionadded:: 0.92 @@ -363,6 +376,8 @@ Buffers |std-enqueue-blurb| + |copy-depr| + Only available in OpenCL 1.1 and newer. .. versionadded:: 0.92 @@ -458,6 +473,8 @@ Images |std-enqueue-blurb| + |copy-depr| + .. versionchanged:: 0.91 *pitch* arguments defaults to zero, moved. @@ -478,16 +495,90 @@ Images .. function:: enqueue_copy_image(queue, src, dest, src_origin, dest_origin, region, wait_for=None) + |copy-depr| + |std-enqueue-blurb| .. function:: enqueue_copy_image_to_buffer(queue, src, dest, origin, region, offset, wait_for=None) + |copy-depr| + |std-enqueue-blurb| .. function:: enqueue_copy_buffer_to_image(queue, src, dest, offset, origin, region, wait_for=None) + |copy-depr| + |std-enqueue-blurb| +Transfers +^^^^^^^^^ + +.. function:: enqueue_copy(queue, dest, src, **kwargs) + + Copy from :class:`Image`, :class:`Buffer` or the host to + :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host + copies are unsupported.) + + The following keyword arguments are available: + + :arg wait_for: (optional, default empty) + :arg is_blocking: Wait for completion. Defaults to *True*. + (Available on any copy involving host memory) + + :class:`Buffer` ↔ host transfers: + + :arg device_offset: offset in bytes (optional) + + :class:`Buffer` ↔ :class:`Buffer` transfers: + + :arg byte_count: (optional) + :arg src_offset: (optional) + :arg dest_offset: (optional) + + Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer): + + :arg buffer_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg host_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg buffer_pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional, "tightly-packed" if unspecified) + :arg host_pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional, "tightly-packed" if unspecified) + + :class:`Image` ↔ host transfers: + + :arg origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional) + + :class:`Buffer` ↔ :class:`Image` transfers: + + :arg offset: offset in buffer (mandatory) + :arg origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + + :class:`Image` ↔ :class:`Image` transfers: + + :arg src_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg dest_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + + |std-enqueue-blurb| + + .. versionadded:: 2011.1 + Mapping Memory into Host Address Space ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/demo.py b/examples/demo.py index 98fb46f8b49e6ebca02c9481e186127045e25b9a..ba948d6716b84c338f3a28b64d0b3e6c9425a1bc 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -25,6 +25,6 @@ prg = cl.Program(ctx, """ prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) -cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait() +cl.enqueue_copy(queue, a_plus_b, dest_buf) print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b)) diff --git a/examples/demo_elementwise_complex.py b/examples/demo_elementwise_complex.py index c845c35badf7f7d2a682b355cdce7f2cfd63c736..7845530b13c152db9e35479ba7ea3bcbcd56064d 100644 --- a/examples/demo_elementwise_complex.py +++ b/examples/demo_elementwise_complex.py @@ -7,10 +7,10 @@ ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) n = 10 -a_gpu = cl_array.to_device(ctx, queue, +a_gpu = cl_array.to_device(queue, ( numpy.random.randn(n) + 1j*numpy.random.randn(n) ).astype(numpy.complex64)) -b_gpu = cl_array.to_device(ctx, queue, +b_gpu = cl_array.to_device(queue, ( numpy.random.randn(n) + 1j*numpy.random.randn(n) ).astype(numpy.complex64)) @@ -46,7 +46,7 @@ real_part = ElementwiseKernel(ctx, c_gpu = cl_array.empty_like(a_gpu) complex_prod(5, a_gpu, b_gpu, c_gpu) -c_gpu_real = cl_array.empty(ctx, len(a_gpu), dtype=numpy.float32, queue=queue) +c_gpu_real = cl_array.empty(queue, len(a_gpu), dtype=numpy.float32) real_part(c_gpu, c_gpu_real) print c_gpu.get().real - c_gpu_real.get() diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py index 91560ffbf6fb9f39f01e83d32df587a60aafbf45..9de9cf1ea7b419700911584bab9c64961231f338 100644 --- a/examples/matrix-multiply.py +++ b/examples/matrix-multiply.py @@ -207,7 +207,7 @@ gpu_time = (time()-t1)/count # transfer device -> host ----------------------------------------------------- t1 = time() -cl.enqueue_read_buffer(queue, d_c_buf, h_c).wait() +cl.enqueue_copy(queue, h_c, d_c_buf) pull_time = time()-t1 # timing output --------------------------------------------------------------- diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index feab73477196958a5e7a9f63d3c19590a693f4a0..8154caf5b901b72b27066b443a1656ae54808831 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -7,7 +7,7 @@ except ImportError: from os.path import dirname, join, realpath if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)): from warnings import warn - warn ("It looks like you are importing PyOpenCL from " + warn("It looks like you are importing PyOpenCL from " "its source directory. This likely won't work.") raise @@ -16,6 +16,7 @@ except ImportError: import numpy as np from pyopencl._cl import * import inspect as _inspect +from decorator import decorator as _decorator CONSTANT_CLASSES = [ getattr(_cl, name) for name in dir(_cl) @@ -377,6 +378,89 @@ def create_some_context(interactive=True): return Context(devices) + + + +def _mark_copy_deprecated(func): + def new_func(*args, **kwargs): + from warnings import warn + warn("'%s' has been deprecated in version 2011.1. Please use " + "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning, + stacklevel=2) + return func(*args, **kwargs) + + try: + from functools import update_wrapper + except ImportError: + pass + else: + update_wrapper(new_func, func) + + return new_func + + +enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image) +enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image) +enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image) +enqueue_copy_image_to_buffer = _mark_copy_deprecated(_cl._enqueue_copy_image_to_buffer) +enqueue_copy_buffer_to_image = _mark_copy_deprecated(_cl._enqueue_copy_buffer_to_image) +enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer) +enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer) +enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer) + +if _cl.get_cl_header_version() >= (1,1): + enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect) + enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect) + enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect) + +def enqueue_copy(queue, dest, src, **kwargs): + if isinstance(dest, Buffer): + if isinstance(src, Buffer): + if "src_origin" in kwargs: + return _cl._enqueue_copy_buffer_rect(queue, src, dest, **kwargs) + else: + kwargs["dst_offset"] = kwargs.pop("dest_offset") + return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs) + elif isinstance(src, Image): + return _cl._enqueue_copy_image_to_buffer(queue, src, dest, **kwargs) + else: + # assume from-host + if "buffer_origin" in kwargs: + return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs) + else: + return _cl._enqueue_write_buffer(queue, dest, src, **kwargs) + + elif isinstance(dest, Image): + if isinstance(src, Buffer): + return _cl._enqueue_copy_buffer_to_image(queue, src, dest, **kwargs) + elif isinstance(src, Image): + return _cl._enqueue_copy_image(queue, src, dest, **kwargs) + else: + # assume from-host + origin = kwargs.pop("origin") + region = kwargs.pop("region") + return _cl._enqueue_write_image(queue, dest, origin, region, src, **kwargs) + + else: + # assume to-host + + if isinstance(src, Buffer): + if "buffer_origin" in kwargs: + return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs) + else: + return _cl._enqueue_read_buffer(queue, src, dest, **kwargs) + elif isinstance(src, Image): + pitches = kwargs.pop("pitches", (0,0)) + if len(pitches) == 1: + kwargs["row_pitch"], = pitches + else: + kwargs["row_pitch"], kwargs["slice_pitch"] = pitches + + return _cl._enqueue_read_image(queue, src, dest, **kwargs) + else: + # assume from-host + raise TypeError("enqueue_copy cannot perform host-to-host transfers") + # }}} diff --git a/pyopencl/array.py b/pyopencl/array.py index f78cab4bb8db7a993311cf79c48d438e72fc383d..38eabdb31d129f6f681cbdd3b4c9477d85f8ac25 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -349,7 +349,7 @@ class Array(object): ary = ary.copy() if self.size: - cl.enqueue_write_buffer(queue or self.queue, self.data, ary, + cl.enqueue_copy(queue or self.queue, self.data, ary, is_blocking=not async) def get(self, queue=None, ary=None, async=False): @@ -367,7 +367,7 @@ class Array(object): assert self.flags.forc, "Array in get() must be contiguous" if self.size: - cl.enqueue_read_buffer(queue or self.queue, self.data, ary, + cl.enqueue_copy(queue or self.queue, ary, self.data, is_blocking=not async) return ary diff --git a/src/wrapper/wrap_cl_part_1.cpp b/src/wrapper/wrap_cl_part_1.cpp index 58fda5d8cf0decdbd965322097d275837cd6675f..f7a7dddbf9a4d0f0191bca9f3f6dc9d6d7011d1c 100644 --- a/src/wrapper/wrap_cl_part_1.cpp +++ b/src/wrapper/wrap_cl_part_1.cpp @@ -164,7 +164,7 @@ void pyopencl_expose_part_1() // {{{ transfers // {{{ byte-for-byte - py::def("enqueue_read_buffer", enqueue_read_buffer, + py::def("_enqueue_read_buffer", enqueue_read_buffer, (py::args("queue", "mem", "hostbuf"), py::arg("device_offset")=0, py::arg("wait_for")=py::object(), @@ -172,7 +172,7 @@ void pyopencl_expose_part_1() py::arg("host_buffer")=py::object() ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_write_buffer", enqueue_write_buffer, + py::def("_enqueue_write_buffer", enqueue_write_buffer, (py::args("queue", "mem", "hostbuf"), py::arg("device_offset")=0, py::arg("wait_for")=py::object(), @@ -180,7 +180,7 @@ void pyopencl_expose_part_1() py::arg("host_buffer")=py::object() ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_copy_buffer", enqueue_copy_buffer, + py::def("_enqueue_copy_buffer", enqueue_copy_buffer, (py::args("queue", "src", "dst"), py::arg("byte_count")=0, py::arg("src_offset")=0, @@ -194,7 +194,7 @@ void pyopencl_expose_part_1() // {{{ rectangular #ifdef CL_VERSION_1_1 - py::def("enqueue_read_buffer_rect", enqueue_read_buffer_rect, + py::def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, (py::args("queue", "mem", "hostbuf", "buffer_origin", "host_origin", "region"), py::arg("buffer_pitches")=py::object(), @@ -203,7 +203,7 @@ void pyopencl_expose_part_1() py::arg("is_blocking")=true ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_write_buffer_rect", enqueue_write_buffer_rect, + py::def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, (py::args("queue", "mem", "hostbuf", "buffer_origin", "host_origin", "region"), py::arg("buffer_pitches")=py::object(), @@ -212,7 +212,7 @@ void pyopencl_expose_part_1() py::arg("is_blocking")=true ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, + py::def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, (py::args("queue", "src", "dst", "src_origin", "dst_origin", "region"), py::arg("src_pitches")=py::object(), diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp index 899db5b893af39e7f7ea3779e4934dd366b48dca..6e8807fd9adde53ceea30b9a1f97be6971925520 100644 --- a/src/wrapper/wrap_cl_part_2.cpp +++ b/src/wrapper/wrap_cl_part_2.cpp @@ -41,7 +41,7 @@ void pyopencl_expose_part_2() DEF_SIMPLE_FUNCTION(get_supported_image_formats); - py::def("enqueue_read_image", enqueue_read_image, + py::def("_enqueue_read_image", enqueue_read_image, (py::args("queue", "mem", "origin", "region", "hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, @@ -50,7 +50,7 @@ void pyopencl_expose_part_2() py::arg("host_buffer")=py::object() ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_write_image", enqueue_write_image, + py::def("_enqueue_write_image", enqueue_write_image, (py::args("queue", "mem", "origin", "region", "hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, @@ -60,15 +60,15 @@ void pyopencl_expose_part_2() ), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_copy_image", enqueue_copy_image, + py::def("_enqueue_copy_image", enqueue_copy_image, (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"), py::arg("wait_for")=py::object()), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, + py::def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, (py::args("queue", "src", "dest", "origin", "region", "offset"), py::arg("wait_for")=py::object()), py::return_value_policy<py::manage_new_object>()); - py::def("enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer, + py::def("_enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer, (py::args("queue", "src", "dest", "offset", "origin", "region"), py::arg("wait_for")=py::object()), py::return_value_policy<py::manage_new_object>());