From 7eaf1041c4d7b053e3ebdcb80d2a4dce7dffa756 Mon Sep 17 00:00:00 2001 From: "Rebecca N. Palmer" <rebecca_palmer@zoho.com> Date: Sat, 4 Aug 2018 10:14:02 +0100 Subject: [PATCH] Make Array.get/set/copy wait for / append to self.events --- pyopencl/array.py | 12 ++++++++---- test/test_array.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/pyopencl/array.py b/pyopencl/array.py index 2d032079..d7ef138b 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -621,9 +621,11 @@ class Array(object): stacklevel=2) if self.size: - cl.enqueue_copy(queue or self.queue, self.base_data, ary, + event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary, device_offset=self.offset, is_blocking=not async_) + if not async_: # not already waited for + self.add_event(event1) def get(self, queue=None, ary=None, async_=None, **kwargs): """Transfer the contents of *self* into *ary* or a newly allocated @@ -687,7 +689,7 @@ class Array(object): if self.size: cl.enqueue_copy(queue, ary, self.base_data, device_offset=self.offset, - is_blocking=not async_) + wait_for=self.events, is_blocking=not async_) return ary @@ -712,9 +714,11 @@ class Array(object): result = result.with_queue(queue) if self.nbytes: - cl.enqueue_copy(queue or self.queue, + event1 = cl.enqueue_copy(queue or self.queue, result.base_data, self.base_data, - src_offset=self.offset, byte_count=self.nbytes) + src_offset=self.offset, byte_count=self.nbytes, + wait_for=self.events) + result.add_event(event1) return result diff --git a/test/test_array.py b/test/test_array.py index bca78f5c..fdfcfce3 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -1212,6 +1212,34 @@ def test_multi_put(ctx_factory): assert np.all(np.all(out_compare[i] == out_arrays[i].get()) for i in range(9)) +def test_outoforderqueue_get(ctx_factory): + context = ctx_factory() + try: + queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE) + except Exception: + pytest.skip("out-of-order queue not available") + a = np.random.rand(10**6).astype(np.dtype('float32')) + a_gpu = cl_array.to_device(queue, a) + b_gpu = a_gpu + a_gpu**5 + 1 + b1 = b_gpu.get() # testing that this waits for events + b = a + a**5 + 1 + assert np.abs(b1 - b).mean() < 1e-5 + +def test_outoforderqueue_copy(ctx_factory): + context = ctx_factory() + try: + queue = cl.CommandQueue(context, properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE) + except Exception: + pytest.skip("out-of-order queue not available") + a = np.random.rand(10**6).astype(np.dtype('float32')) + a_gpu = cl_array.to_device(queue, a) + c_gpu = a_gpu**2 - 7 + b_gpu = c_gpu.copy() # testing that this waits for and creates events + b_gpu *= 10 + queue.finish() + b1 = b_gpu.get() + b = 10 * (a**2 - 7) + assert np.abs(b1 - b).mean() < 1e-5 if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the -- GitLab