From 81b6b09983ab21cb07ad443b8d9b08debe81c599 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 3 Jan 2013 14:30:04 +0100
Subject: [PATCH] Make buffer-to-buffer enqueue_copy safe by default if no
 byte_count specified.

---
 doc/source/runtime.rst         | 68 +----------------------------
 pyopencl/__init__.py           | 78 ++++++++++++++++++++++++++++++++--
 src/wrapper/wrap_cl.hpp        | 11 +++--
 src/wrapper/wrap_cl_part_1.cpp |  2 +-
 4 files changed, 85 insertions(+), 74 deletions(-)

diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index 547ce018..ccfa1172 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -526,73 +526,7 @@ Images
 Transfers
 ^^^^^^^^^
 
-.. function:: enqueue_copy(queue, dest, src, **kwargs)
-
-    Copy from :class:`Image`, :class:`Buffer` or the host to 
-    :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
-    copies are unsupported.)
-
-    The following keyword arguments are available:
-
-    :arg wait_for: (optional, default empty)
-    :arg is_blocking: Wait for completion. Defaults to *True*. 
-      (Available on any copy involving host memory)
-
-    :return: A :class:`NannyEvent` if the transfer involved a
-        host-side buffer, otherwise an :class:`Event`.
-
-    :class:`Buffer` ↔ host transfers:
-
-    :arg device_offset: offset in bytes (optional)
-
-    :class:`Buffer` ↔ :class:`Buffer` transfers:
-
-    :arg byte_count: (optional)
-    :arg src_offset: (optional)
-    :arg dest_offset: (optional)
-
-    Rectangular :class:`Buffer` ↔  host transfers (CL 1.1 and newer):
-
-    :arg buffer_origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg host_origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg region: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg buffer_pitches: :class:`tuple` of :class:`int` of length
-        two or shorter. (optional, "tightly-packed" if unspecified)
-    :arg host_pitches: :class:`tuple` of :class:`int` of length
-        two or shorter. (optional, "tightly-packed" if unspecified)
-
-    :class:`Image` ↔ host transfers:
-
-    :arg origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg region: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg pitches: :class:`tuple` of :class:`int` of length
-        two or shorter. (optional)
-
-    :class:`Buffer` ↔ :class:`Image` transfers:
-
-    :arg offset: offset in buffer (mandatory)
-    :arg origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg region: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-
-    :class:`Image` ↔ :class:`Image` transfers:
-
-    :arg src_origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg dest_origin: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-    :arg region: :class:`tuple` of :class:`int` of length
-        three or shorter. (mandatory)
-
-    |std-enqueue-blurb|
-
-    .. versionadded:: 2011.1
+.. autofunction:: enqueue_copy(queue, dest, src, **kwargs)
 
 Mapping Memory into Host Address Space
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 59628fca..f7a4a535 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
 __license__ = """
@@ -420,7 +422,7 @@ def _add_functionality():
                 pass
 
             what = e.what + "\n\n" + (75*"="+"\n").join(
-                    "Build on %s:\n\n%s" % (dev, log) 
+                    "Build on %s:\n\n%s" % (dev, log)
                     for dev, log in self._get_build_logs())
             code = e.code
             routine = e.routine
@@ -580,7 +582,7 @@ def _add_functionality():
 
     def image_format_repr(self):
         return "ImageFormat(%s, %s)" % (
-                channel_order.to_string(self.channel_order, 
+                channel_order.to_string(self.channel_order,
                     "<unknown channel order 0x%x>"),
                 channel_type.to_string(self.channel_data_type,
                     "<unknown channel data type 0x%x>"))
@@ -598,7 +600,7 @@ def _add_functionality():
         except AttributeError:
             return str(val)
         else:
-            result = "%s failed: %s" % (val.routine(), 
+            result = "%s failed: %s" % (val.routine(),
                     status_code.to_string(val.code(), "<unknown error %d>")
                     .lower().replace("_", " "))
             if val.what():
@@ -809,6 +811,76 @@ if _cl.get_cl_header_version() >= (1,1):
     enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect)
 
 def enqueue_copy(queue, dest, src, **kwargs):
+    """Copy from :class:`Image`, :class:`Buffer` or the host to
+    :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
+    copies are unsupported.)
+
+    The following keyword arguments are available:
+
+    :arg wait_for: (optional, default empty)
+    :arg is_blocking: Wait for completion. Defaults to *True*.
+      (Available on any copy involving host memory)
+
+    :return: A :class:`NannyEvent` if the transfer involved a
+        host-side buffer, otherwise an :class:`Event`.
+
+    :class:`Buffer` ↔ host transfers:
+
+    :arg device_offset: offset in bytes (optional)
+
+    :class:`Buffer` ↔ :class:`Buffer` transfers:
+
+    :arg byte_count: (optional) If not specified, defaults to the
+        size of the source in versions 2012.x and earlier,
+        and to the minimum of the size of the source and target
+        from 2013.1 on.
+    :arg src_offset: (optional)
+    :arg dest_offset: (optional)
+
+    Rectangular :class:`Buffer` ↔  host transfers (CL 1.1 and newer):
+
+    :arg buffer_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg host_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg buffer_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+    :arg host_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+
+    :class:`Image` ↔ host transfers:
+
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional)
+
+    :class:`Buffer` ↔ :class:`Image` transfers:
+
+    :arg offset: offset in buffer (mandatory)
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    :class:`Image` ↔ :class:`Image` transfers:
+
+    :arg src_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg dest_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    |std-enqueue-blurb|
+
+    .. versionadded:: 2011.1
+    """
+
     if isinstance(dest, MemoryObjectHolder):
         if dest.type == mem_object_type.BUFFER:
             if isinstance(src, MemoryObjectHolder):
diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp
index 6c64ac97..c71e33f6 100644
--- a/src/wrapper/wrap_cl.hpp
+++ b/src/wrapper/wrap_cl.hpp
@@ -1845,17 +1845,22 @@ namespace pyopencl
       command_queue &cq,
       memory_object_holder &src,
       memory_object_holder &dst,
-      size_t byte_count,
+      ptrdiff_t byte_count,
       size_t src_offset,
       size_t dst_offset,
       py::object py_wait_for)
   {
     PYOPENCL_PARSE_WAIT_FOR;
 
-    if (byte_count == 0)
+    if (byte_count < 0)
     {
+      size_t byte_count_src = 0;
+      size_t byte_count_dst = 0;
       PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
-          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count, 0));
+          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
+      PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
+          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
+      byte_count = std::min(byte_count_src, byte_count_dst);
     }
 
     cl_event evt;
diff --git a/src/wrapper/wrap_cl_part_1.cpp b/src/wrapper/wrap_cl_part_1.cpp
index d441718f..b9f55a06 100644
--- a/src/wrapper/wrap_cl_part_1.cpp
+++ b/src/wrapper/wrap_cl_part_1.cpp
@@ -228,7 +228,7 @@ void pyopencl_expose_part_1()
       py::return_value_policy<py::manage_new_object>());
   py::def("_enqueue_copy_buffer", enqueue_copy_buffer,
       (py::args("queue", "src", "dst"),
-       py::arg("byte_count")=0,
+       py::arg("byte_count")=-1,
        py::arg("src_offset")=0,
        py::arg("dst_offset")=0,
        py::arg("wait_for")=py::object()
-- 
GitLab