From 4302d21e8b49cf1883205deff4849fc48ebc749a Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 1 Nov 2020 13:50:47 -0600 Subject: [PATCH 1/5] Skip enqueue_fill_buffer for large arrays on NVIDIA --- pyopencl/array.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pyopencl/array.py b/pyopencl/array.py index bf58c965..87910fb8 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -1279,10 +1279,14 @@ class Array: if not self.size: return - if ( - queue._get_cl_version() >= (1, 2) - and cl.get_cl_header_version() >= (1, 2)): - + cl_version_gtr_1_2 = ( + queue._get_cl_version() >= (1, 2) + and cl.get_cl_header_version() >= (1, 2) + ) + on_nvidia = queue.device.vendor.startswith("NVIDIA") + + # circumvent bug with large buffers on NVIDIA (gh-395) + if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31): self.add_event( cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0), self.offset, self.nbytes, wait_for=wait_for)) -- GitLab From af14ee81f5c1da84431568d850de1279c1cd2272 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 1 Nov 2020 15:12:20 -0500 Subject: [PATCH 2/5] Directly link issue 395 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andreas Klöckner --- pyopencl/array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyopencl/array.py b/pyopencl/array.py index 87910fb8..f3b84c63 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -1285,7 +1285,8 @@ class Array: ) on_nvidia = queue.device.vendor.startswith("NVIDIA") - # circumvent bug with large buffers on NVIDIA (gh-395) + # circumvent bug with large buffers on NVIDIA + # https://github.com/inducer/pyopencl/issues/395 if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31): self.add_event( cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0), -- GitLab From 32907a84db7432bafce9db3b34a1b85252512ed5 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 1 Nov 2020 15:32:55 -0600 Subject: [PATCH 3/5] add large array test --- test/test_array.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/test_array.py b/test/test_array.py index 613d2ff7..65b98038 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -205,6 +205,21 @@ def test_vector_fill(ctx_factory): a_gpu = cl_array.zeros(queue, 100, dtype=cltypes.float4) +def test_zeros_large_array(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + if queue.device.address_bits == 64: + # this shouldn't hang/cause errors + # see https://github.com/inducer/pyopencl/issues/395 + a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype='float64') + # run a couple kernels to ensure no propagated runtime errors + a_gpu[...] = 1. + a_gpu = 2 * a_gpu - 3 + else: + pass + + def test_absrealimag(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) -- GitLab From 84b9b7be7b4343c26bce340f9ee18303362bf35c Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 1 Nov 2020 15:34:24 -0600 Subject: [PATCH 4/5] fix quotes --- test/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_array.py b/test/test_array.py index 65b98038..381105bf 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -212,7 +212,7 @@ def test_zeros_large_array(ctx_factory): if queue.device.address_bits == 64: # this shouldn't hang/cause errors # see https://github.com/inducer/pyopencl/issues/395 - a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype='float64') + a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype="float64") # run a couple kernels to ensure no propagated runtime errors a_gpu[...] = 1. a_gpu = 2 * a_gpu - 3 -- GitLab From 0aebd7bc94a2cebcbf33e7a8191560b750d755e4 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 1 Nov 2020 16:06:50 -0600 Subject: [PATCH 5/5] Don't exceed max_mem_alloc_size in test --- test/test_array.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_array.py b/test/test_array.py index 381105bf..b5e99273 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -208,11 +208,13 @@ def test_vector_fill(ctx_factory): def test_zeros_large_array(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) + dev = queue.device - if queue.device.address_bits == 64: + size = 2**28 + 1 + if dev.address_bits == 64 and dev.max_mem_alloc_size >= 8 * size: # this shouldn't hang/cause errors # see https://github.com/inducer/pyopencl/issues/395 - a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype="float64") + a_gpu = cl_array.zeros(queue, (size,), dtype="float64") # run a couple kernels to ensure no propagated runtime errors a_gpu[...] = 1. a_gpu = 2 * a_gpu - 3 -- GitLab