From 0a730d5e5be3df3229ea14c83dd2b6483bca843d Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 17 Apr 2011 14:44:52 -0400
Subject: [PATCH] Introduce enqueue_copy().

---
 doc/source/index.rst                 |  2 +-
 doc/source/misc.rst                  |  1 +
 doc/source/runtime.rst               | 91 ++++++++++++++++++++++++++++
 examples/demo.py                     |  2 +-
 examples/demo_elementwise_complex.py |  6 +-
 examples/matrix-multiply.py          |  2 +-
 pyopencl/__init__.py                 | 86 +++++++++++++++++++++++++-
 pyopencl/array.py                    |  4 +-
 src/wrapper/wrap_cl_part_1.cpp       | 12 ++--
 src/wrapper/wrap_cl_part_2.cpp       | 10 +--
 10 files changed, 196 insertions(+), 20 deletions(-)

diff --git a/doc/source/index.rst b/doc/source/index.rst
index 9ec0a735..387bd7cd 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -56,7 +56,7 @@ Here's an example, to give you an impression::
     prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
     a_plus_b = numpy.empty_like(a)
-    cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
+    cl.enqueue_copy(queue, a_plus_b, dest_buf)
 
     print la.norm(a_plus_b - (a+b))
 
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 1a2b0313..4fd88e65 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -97,6 +97,7 @@ Version 2011.1
 * Add vector types, see :class:`pyopencl.array.vec`.
 * Add :attr:`pyopencl.array.Array.strides`, :attr:`pyopencl.array.Array.flags`.
   Allow the creation of arrys in C and Fortran order.
+* Add :func:`pyopencl.enqueue_copy`. Deprecate all other transfer functions.
 
 Version 0.92
 ------------
diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index aab26ef2..189323fb 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -64,6 +64,9 @@ Platforms, Devices and Contexts
     whose completion this command waits before starting exeuction.
 .. |std-enqueue-blurb| replace:: Returns a new :class:`Event`. |explain-waitfor|
 
+.. |copy-depr| replace:: **Note:** This function is deprecated as of PyOpenCL 2011.1.
+        Use :func:`enqueue_copy` instead.
+
 .. function:: get_platforms()
 
     Return a list of :class:`Platform` instances.
@@ -300,6 +303,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
@@ -309,6 +314,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
@@ -319,6 +326,8 @@ Buffers
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     .. versionadded:: 0.91.5
 
 .. function:: enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, host_origin, region, buffer_pitches=None, host_pitches=None, wait_for=None, is_blocking=True)
@@ -331,6 +340,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -348,6 +359,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -363,6 +376,8 @@ Buffers
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -458,6 +473,8 @@ Images
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     .. versionchanged:: 0.91
         *pitch* arguments defaults to zero, moved.
 
@@ -478,16 +495,90 @@ Images
 
 .. function:: enqueue_copy_image(queue, src, dest, src_origin, dest_origin, region, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
 .. function:: enqueue_copy_image_to_buffer(queue, src, dest, origin, region, offset, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
 .. function:: enqueue_copy_buffer_to_image(queue, src, dest, offset, origin, region, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
+Transfers
+^^^^^^^^^
+
+.. function:: enqueue_copy(queue, dest, src, **kwargs)
+
+    Copy from :class:`Image`, :class:`Buffer` or the host to 
+    :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
+    copies are unsupported.)
+
+    The following keyword arguments are available:
+
+    :arg wait_for: (optional, default empty)
+    :arg is_blocking: Wait for completion. Defaults to *True*. 
+      (Available on any copy involving host memory)
+
+    :class:`Buffer` ↔ host transfers:
+
+    :arg device_offset: offset in bytes (optional)
+
+    :class:`Buffer` ↔ :class:`Buffer` transfers:
+
+    :arg byte_count: (optional)
+    :arg src_offset: (optional)
+    :arg dest_offset: (optional)
+
+    Rectangular :class:`Buffer` ↔  host transfers (CL 1.1 and newer):
+
+    :arg buffer_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg host_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg buffer_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+    :arg host_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+
+    :class:`Image` ↔ host transfers:
+
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional)
+
+    :class:`Buffer` ↔ :class:`Image` transfers:
+
+    :arg offset: offset in buffer (mandatory)
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    :class:`Image` ↔ :class:`Image` transfers:
+
+    :arg src_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg dest_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    |std-enqueue-blurb|
+
+    .. versionadded:: 2011.1
+
 Mapping Memory into Host Address Space
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/examples/demo.py b/examples/demo.py
index 98fb46f8..ba948d67 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -25,6 +25,6 @@ prg = cl.Program(ctx, """
 prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
 a_plus_b = numpy.empty_like(a)
-cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
+cl.enqueue_copy(queue, a_plus_b, dest_buf)
 
 print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b))
diff --git a/examples/demo_elementwise_complex.py b/examples/demo_elementwise_complex.py
index c845c35b..7845530b 100644
--- a/examples/demo_elementwise_complex.py
+++ b/examples/demo_elementwise_complex.py
@@ -7,10 +7,10 @@ ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)
 
 n = 10
-a_gpu = cl_array.to_device(ctx, queue,
+a_gpu = cl_array.to_device(queue,
         ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
             ).astype(numpy.complex64))
-b_gpu = cl_array.to_device(ctx, queue,
+b_gpu = cl_array.to_device(queue,
         ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
             ).astype(numpy.complex64))
 
@@ -46,7 +46,7 @@ real_part = ElementwiseKernel(ctx,
 c_gpu = cl_array.empty_like(a_gpu)
 complex_prod(5, a_gpu, b_gpu, c_gpu)
 
-c_gpu_real = cl_array.empty(ctx, len(a_gpu), dtype=numpy.float32, queue=queue)
+c_gpu_real = cl_array.empty(queue, len(a_gpu), dtype=numpy.float32)
 real_part(c_gpu, c_gpu_real)
 print c_gpu.get().real - c_gpu_real.get()
 
diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py
index 91560ffb..9de9cf1e 100644
--- a/examples/matrix-multiply.py
+++ b/examples/matrix-multiply.py
@@ -207,7 +207,7 @@ gpu_time = (time()-t1)/count
 
 # transfer device -> host -----------------------------------------------------
 t1 = time()
-cl.enqueue_read_buffer(queue, d_c_buf, h_c).wait()
+cl.enqueue_copy(queue, h_c, d_c_buf)
 pull_time = time()-t1
 
 # timing output ---------------------------------------------------------------
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index feab7347..8154caf5 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -7,7 +7,7 @@ except ImportError:
     from os.path import dirname, join, realpath
     if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
         from warnings import warn
-        warn ("It looks like you are importing PyOpenCL from "
+        warn("It looks like you are importing PyOpenCL from "
                 "its source directory. This likely won't work.")
     raise
 
@@ -16,6 +16,7 @@ except ImportError:
 import numpy as np
 from pyopencl._cl import *
 import inspect as _inspect
+from decorator import decorator as _decorator
 
 CONSTANT_CLASSES = [
         getattr(_cl, name) for name in dir(_cl)
@@ -377,6 +378,89 @@ def create_some_context(interactive=True):
 
     return Context(devices)
 
+
+
+
+def _mark_copy_deprecated(func):
+    def new_func(*args, **kwargs):
+        from warnings import warn
+        warn("'%s' has been deprecated in version 2011.1. Please use "
+                "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning,
+                stacklevel=2)
+        return func(*args, **kwargs)
+
+    try:
+        from functools import update_wrapper
+    except ImportError:
+        pass
+    else:
+        update_wrapper(new_func, func)
+
+    return new_func
+
+
+enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image)
+enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image)
+enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image)
+enqueue_copy_image_to_buffer = _mark_copy_deprecated(_cl._enqueue_copy_image_to_buffer)
+enqueue_copy_buffer_to_image = _mark_copy_deprecated(_cl._enqueue_copy_buffer_to_image)
+enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer)
+enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer)
+enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer)
+
+if _cl.get_cl_header_version() >= (1,1):
+    enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect)
+    enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect)
+    enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect)
+
+def enqueue_copy(queue, dest, src, **kwargs):
+    if isinstance(dest, Buffer):
+        if isinstance(src, Buffer):
+            if "src_origin" in kwargs:
+                return _cl._enqueue_copy_buffer_rect(queue, src, dest, **kwargs)
+            else:
+                kwargs["dst_offset"] = kwargs.pop("dest_offset")
+                return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            return _cl._enqueue_copy_image_to_buffer(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            if "buffer_origin" in kwargs:
+                return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
+            else:
+                return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
+
+    elif isinstance(dest, Image):
+        if isinstance(src, Buffer):
+            return _cl._enqueue_copy_buffer_to_image(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            origin = kwargs.pop("origin")
+            region = kwargs.pop("region")
+            return _cl._enqueue_write_image(queue, dest, origin, region, src, **kwargs)
+
+    else:
+        # assume to-host
+
+        if isinstance(src, Buffer):
+            if "buffer_origin" in kwargs:
+                return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
+            else:
+                return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            pitches = kwargs.pop("pitches", (0,0))
+            if len(pitches) == 1:
+                kwargs["row_pitch"], = pitches
+            else:
+                kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
+
+            return _cl._enqueue_read_image(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            raise TypeError("enqueue_copy cannot perform host-to-host transfers")
+
 # }}}
 
 
diff --git a/pyopencl/array.py b/pyopencl/array.py
index f78cab4b..38eabdb3 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -349,7 +349,7 @@ class Array(object):
             ary = ary.copy()
 
         if self.size:
-            cl.enqueue_write_buffer(queue or self.queue, self.data, ary, 
+            cl.enqueue_copy(queue or self.queue, self.data, ary,
                     is_blocking=not async)
 
     def get(self, queue=None, ary=None, async=False):
@@ -367,7 +367,7 @@ class Array(object):
         assert self.flags.forc, "Array in get() must be contiguous"
 
         if self.size:
-            cl.enqueue_read_buffer(queue or self.queue, self.data, ary,
+            cl.enqueue_copy(queue or self.queue, ary, self.data,
                     is_blocking=not async)
 
         return ary
diff --git a/src/wrapper/wrap_cl_part_1.cpp b/src/wrapper/wrap_cl_part_1.cpp
index 58fda5d8..f7a7dddb 100644
--- a/src/wrapper/wrap_cl_part_1.cpp
+++ b/src/wrapper/wrap_cl_part_1.cpp
@@ -164,7 +164,7 @@ void pyopencl_expose_part_1()
   // {{{ transfers
 
   // {{{ byte-for-byte
-  py::def("enqueue_read_buffer", enqueue_read_buffer,
+  py::def("_enqueue_read_buffer", enqueue_read_buffer,
       (py::args("queue", "mem", "hostbuf"),
        py::arg("device_offset")=0,
        py::arg("wait_for")=py::object(),
@@ -172,7 +172,7 @@ void pyopencl_expose_part_1()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_buffer", enqueue_write_buffer,
+  py::def("_enqueue_write_buffer", enqueue_write_buffer,
       (py::args("queue", "mem", "hostbuf"),
        py::arg("device_offset")=0,
        py::arg("wait_for")=py::object(),
@@ -180,7 +180,7 @@ void pyopencl_expose_part_1()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer", enqueue_copy_buffer,
+  py::def("_enqueue_copy_buffer", enqueue_copy_buffer,
       (py::args("queue", "src", "dst"),
        py::arg("byte_count")=0,
        py::arg("src_offset")=0,
@@ -194,7 +194,7 @@ void pyopencl_expose_part_1()
   // {{{ rectangular
 
 #ifdef CL_VERSION_1_1
-  py::def("enqueue_read_buffer_rect", enqueue_read_buffer_rect,
+  py::def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect,
       (py::args("queue", "mem", "hostbuf",
                 "buffer_origin", "host_origin", "region"),
        py::arg("buffer_pitches")=py::object(),
@@ -203,7 +203,7 @@ void pyopencl_expose_part_1()
        py::arg("is_blocking")=true
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_buffer_rect", enqueue_write_buffer_rect,
+  py::def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect,
       (py::args("queue", "mem", "hostbuf",
                 "buffer_origin", "host_origin", "region"),
        py::arg("buffer_pitches")=py::object(),
@@ -212,7 +212,7 @@ void pyopencl_expose_part_1()
        py::arg("is_blocking")=true
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer_rect", enqueue_copy_buffer_rect,
+  py::def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect,
       (py::args("queue", "src", "dst",
                 "src_origin", "dst_origin", "region"),
        py::arg("src_pitches")=py::object(),
diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp
index 899db5b8..6e8807fd 100644
--- a/src/wrapper/wrap_cl_part_2.cpp
+++ b/src/wrapper/wrap_cl_part_2.cpp
@@ -41,7 +41,7 @@ void pyopencl_expose_part_2()
 
   DEF_SIMPLE_FUNCTION(get_supported_image_formats);
 
-  py::def("enqueue_read_image", enqueue_read_image,
+  py::def("_enqueue_read_image", enqueue_read_image,
       (py::args("queue", "mem", "origin", "region", "hostbuf"),
        py::arg("row_pitch")=0,
        py::arg("slice_pitch")=0,
@@ -50,7 +50,7 @@ void pyopencl_expose_part_2()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_image", enqueue_write_image,
+  py::def("_enqueue_write_image", enqueue_write_image,
       (py::args("queue", "mem", "origin", "region", "hostbuf"),
        py::arg("row_pitch")=0,
        py::arg("slice_pitch")=0,
@@ -60,15 +60,15 @@ void pyopencl_expose_part_2()
        ),
       py::return_value_policy<py::manage_new_object>());
 
-  py::def("enqueue_copy_image", enqueue_copy_image,
+  py::def("_enqueue_copy_image", enqueue_copy_image,
       (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer,
+  py::def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer,
       (py::args("queue", "src", "dest", "origin", "region", "offset"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer,
+  py::def("_enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer,
       (py::args("queue", "src", "dest", "offset", "origin", "region"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());
-- 
GitLab