From 0a1691f7a2adb09433c5cc2958d2cb7fc9d7729e Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 5 Sep 2022 20:31:39 -0500
Subject: [PATCH] Make enqueue_copy offsets interface more uniform

---
 pyopencl/__init__.py   | 87 ++++++++++++++++++++++++++++++++++++++----
 pyopencl/array.py      | 22 ++++-------
 src/wrap_cl.hpp        |  8 ++--
 src/wrap_cl_part_1.cpp |  5 ++-
 4 files changed, 94 insertions(+), 28 deletions(-)

diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 94c35cc5..ef96cb9b 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -1770,7 +1770,13 @@ def enqueue_copy(queue, dest, src, **kwargs):
     .. rubric :: Transfer :class:`Buffer` ↔ host
     .. ------------------------------------------------------------------------
 
-    :arg device_offset: offset in bytes (optional)
+    :arg src_offset: offset in bytes (optional)
+
+        May only be nonzero if applied on the device side.
+
+    :arg dst_offset: offset in bytes (optional)
+
+        May only be nonzero if applied on the device side.
 
     .. note::
 
@@ -1790,7 +1796,7 @@ def enqueue_copy(queue, dest, src, **kwargs):
         and to the minimum of the size of the source and target
         from 2013.1 on.
     :arg src_offset: (optional)
-    :arg dest_offset: (optional)
+    :arg dst_offset: (optional)
 
     .. ------------------------------------------------------------------------
     .. rubric :: Rectangular :class:`Buffer` ↔  host transfers (CL 1.1 and newer)
@@ -1873,25 +1879,61 @@ def enqueue_copy(queue, dest, src, **kwargs):
         if dest.type == mem_object_type.BUFFER:
             if isinstance(src, MemoryObjectHolder):
                 if src.type == mem_object_type.BUFFER:
+                    # {{{ buffer -> buffer
+
                     if "src_origin" in kwargs:
+                        # rectangular
                         return _cl._enqueue_copy_buffer_rect(
                                 queue, src, dest, **kwargs)
                     else:
-                        kwargs["dst_offset"] = kwargs.pop("dest_offset", 0)
+                        # linear
+                        dest_offset = kwargs.pop("dest_offset", None)
+                        if dest_offset is not None:
+                            if "dst_offset" in kwargs:
+                                raise TypeError("may not specify both 'dst_offset' "
+                                                "and 'dest_offset'")
+
+                            warn("The 'dest_offset' argument of enqueue_copy "
+                                 "is deprecated. Use 'dst_offset' instead. "
+                                 "'dest_offset' will stop working in 2023.x.",
+                                 DeprecationWarning, stacklevel=2)
+
+                            kwargs["dst_offset"] = dest_offset
+
                         return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
+
+                    # }}}
                 elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]:
                     return _cl._enqueue_copy_image_to_buffer(
                             queue, src, dest, **kwargs)
                 else:
                     raise ValueError("invalid src mem object type")
             else:
-                # assume from-host
+                # {{{ host -> buffer
+
                 if "buffer_origin" in kwargs:
                     return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
                 else:
+                    device_offset = kwargs.pop("device_offset", None)
+                    if device_offset is not None:
+                        if "dst_offset" in kwargs:
+                            raise TypeError("may not specify both 'device_offset' "
+                                            "and 'dst_offset'")
+
+                        warn("The 'device_offset' argument of enqueue_copy "
+                                "is deprecated. Use 'dst_offset' instead. "
+                                "'dst_offset' will stop working in 2023.x.",
+                                DeprecationWarning, stacklevel=2)
+
+                        kwargs["dst_offset"] = device_offset
+
                     return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
 
+                # }}}
+
         elif dest.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]:
+            # {{{ ... -> image
+
             if isinstance(src, MemoryObjectHolder):
                 if src.type == mem_object_type.BUFFER:
                     return _cl._enqueue_copy_buffer_to_image(
@@ -1913,19 +1955,28 @@ def enqueue_copy(queue, dest, src, **kwargs):
 
                 return _cl._enqueue_write_image(
                         queue, dest, origin, region, src, **kwargs)
+
+            # }}}
         else:
             raise ValueError("invalid dest mem object type")
 
     elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
-        # to SVM
+        # {{{ ... ->  SVM
+
         if not isinstance(src, SVMPointer):
             src = SVM(src)
 
         is_blocking = kwargs.pop("is_blocking", True)
+
+        # These are NOT documented. They only support consistency with the
+        # Buffer-based API for the sake of the Array.
         assert kwargs.pop("src_offset", 0) == 0
-        assert kwargs.pop("dest_offset", 0) == 0
+        assert kwargs.pop("dst_offset", 0) == 0
+
         return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
 
+        # }}}
+
     else:
         # assume to-host
 
@@ -1934,7 +1985,21 @@ def enqueue_copy(queue, dest, src, **kwargs):
                 if "buffer_origin" in kwargs:
                     return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
                 else:
+                    device_offset = kwargs.pop("device_offset", None)
+                    if device_offset is not None:
+                        if "src_offset" in kwargs:
+                            raise TypeError("may not specify both 'device_offset' "
+                                            "and 'src_offset'")
+
+                        warn("The 'device_offset' argument of enqueue_copy "
+                                "is deprecated. Use 'src_offset' instead. "
+                                "'dst_offset' will stop working in 2023.x.",
+                                DeprecationWarning, stacklevel=2)
+
+                        kwargs["src_offset"] = device_offset
+
                     return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
+
             elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]:
                 origin = kwargs.pop("origin")
                 region = kwargs.pop("region")
@@ -1950,11 +2015,19 @@ def enqueue_copy(queue, dest, src, **kwargs):
             else:
                 raise ValueError("invalid src mem object type")
         elif isinstance(src, SVMPointer):
-            # from svm
+            # {{{ svm -> host
+
             # dest is not a SVM instance, otherwise we'd be in the branch above
+
+            # This is NOT documented. They only support consistency with the
+            # Buffer-based API for the sake of the Array.
+            assert kwargs.pop("src_offset", 0) == 0
+
             is_blocking = kwargs.pop("is_blocking", True)
             return _cl._enqueue_svm_memcpy(
                     queue, is_blocking, SVM(dest), src, **kwargs)
+
+            # }}}
         else:
             # assume from-host
             raise TypeError("enqueue_copy cannot perform host-to-host transfers")
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 851a0bb4..699466aa 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -735,13 +735,9 @@ class Array:
                     stacklevel=2)
 
         if self.size:
-            if self.offset:
-                event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
-                        device_offset=self.offset,
-                        is_blocking=not async_)
-            else:
-                event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
-                        is_blocking=not async_)
+            event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
+                    dst_offset=self.offset,
+                    is_blocking=not async_)
 
             self.add_event(event1)
 
@@ -789,13 +785,9 @@ class Array:
                     "to associate one.")
 
         if self.size:
-            if self.offset:
-                event1 = cl.enqueue_copy(queue, ary, self.base_data,
-                        device_offset=self.offset,
-                        wait_for=self.events, is_blocking=not async_)
-            else:
-                event1 = cl.enqueue_copy(queue, ary, self.base_data,
-                        wait_for=self.events, is_blocking=not async_)
+            event1 = cl.enqueue_copy(queue, ary, self.base_data,
+                    src_offset=self.offset,
+                    wait_for=self.events, is_blocking=not async_)
 
             self.add_event(event1)
         else:
@@ -2142,7 +2134,7 @@ class Array:
             if subarray.shape == value.shape and subarray.strides == value.strides:
                 self.add_event(
                         cl.enqueue_copy(queue, subarray.base_data,
-                            value, device_offset=subarray.offset, wait_for=wait_for))
+                            value, dst_offset=subarray.offset, wait_for=wait_for))
                 return
             else:
                 value = to_device(queue, value, self.allocator)
diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp
index dc6a3684..15aa882f 100644
--- a/src/wrap_cl.hpp
+++ b/src/wrap_cl.hpp
@@ -2460,7 +2460,7 @@ namespace pyopencl
       command_queue &cq,
       memory_object_holder &mem,
       py::object buffer,
-      size_t device_offset,
+      size_t src_offset,
       py::object py_wait_for,
       bool is_blocking)
   {
@@ -2484,7 +2484,7 @@ namespace pyopencl
             queue,
             mem.data(),
             PYOPENCL_CAST_BOOL(is_blocking),
-            device_offset, len, buf,
+            src_offset, len, buf,
             PYOPENCL_WAITLIST_ARGS, &evt
             ))
       );
@@ -2499,7 +2499,7 @@ namespace pyopencl
       command_queue &cq,
       memory_object_holder &mem,
       py::object buffer,
-      size_t device_offset,
+      size_t dst_offset,
       py::object py_wait_for,
       bool is_blocking)
   {
@@ -2523,7 +2523,7 @@ namespace pyopencl
             queue,
             mem.data(),
             PYOPENCL_CAST_BOOL(is_blocking),
-            device_offset, len, buf,
+            dst_offset, len, buf,
             PYOPENCL_WAITLIST_ARGS, &evt
             ))
       );
diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp
index 8d62ef5d..3b9f79ed 100644
--- a/src/wrap_cl_part_1.cpp
+++ b/src/wrap_cl_part_1.cpp
@@ -282,11 +282,12 @@ void pyopencl_expose_part_1(py::module &m)
   // {{{ transfers
 
   // {{{ byte-for-byte
+
   m.def("_enqueue_read_buffer", enqueue_read_buffer,
       py::arg("queue"),
       py::arg("mem"),
       py::arg("hostbuf"),
-      py::arg("device_offset")=0,
+      py::arg("src_offset")=0,
       py::arg("wait_for")=py::none(),
       py::arg("is_blocking")=true
       );
@@ -294,7 +295,7 @@ void pyopencl_expose_part_1(py::module &m)
       py::arg("queue"),
       py::arg("mem"),
       py::arg("hostbuf"),
-      py::arg("device_offset")=0,
+      py::arg("dst_offset")=0,
       py::arg("wait_for")=py::none(),
       py::arg("is_blocking")=true
       );
-- 
GitLab