From 4302d21e8b49cf1883205deff4849fc48ebc749a Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@gmail.com>
Date: Sun, 1 Nov 2020 13:50:47 -0600
Subject: [PATCH 1/5] Skip enqueue_fill_buffer for large arrays on NVIDIA

---
 pyopencl/array.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pyopencl/array.py b/pyopencl/array.py
index bf58c965..87910fb8 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1279,10 +1279,14 @@ class Array:
         if not self.size:
             return
 
-        if (
-                queue._get_cl_version() >= (1, 2)
-                and cl.get_cl_header_version() >= (1, 2)):
-
+        cl_version_gtr_1_2 = (
+            queue._get_cl_version() >= (1, 2)
+            and cl.get_cl_header_version() >= (1, 2)
+        )
+        on_nvidia = queue.device.vendor.startswith("NVIDIA")
+
+        # circumvent bug with large buffers on NVIDIA (gh-395)
+        if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
             self.add_event(
                     cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
                         self.offset, self.nbytes, wait_for=wait_for))
-- 
GitLab


From af14ee81f5c1da84431568d850de1279c1cd2272 Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@users.noreply.github.com>
Date: Sun, 1 Nov 2020 15:12:20 -0500
Subject: [PATCH 2/5] Directly link issue 395
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Andreas Klöckner <inform@tiker.net>
---
 pyopencl/array.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyopencl/array.py b/pyopencl/array.py
index 87910fb8..f3b84c63 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1285,7 +1285,8 @@ class Array:
         )
         on_nvidia = queue.device.vendor.startswith("NVIDIA")
 
-        # circumvent bug with large buffers on NVIDIA (gh-395)
+        # circumvent bug with large buffers on NVIDIA
+        # https://github.com/inducer/pyopencl/issues/395
         if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
             self.add_event(
                     cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
-- 
GitLab


From 32907a84db7432bafce9db3b34a1b85252512ed5 Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@gmail.com>
Date: Sun, 1 Nov 2020 15:32:55 -0600
Subject: [PATCH 3/5] add large array test

---
 test/test_array.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/test/test_array.py b/test/test_array.py
index 613d2ff7..65b98038 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -205,6 +205,21 @@ def test_vector_fill(ctx_factory):
     a_gpu = cl_array.zeros(queue, 100, dtype=cltypes.float4)
 
 
+def test_zeros_large_array(ctx_factory):
+    context = ctx_factory()
+    queue = cl.CommandQueue(context)
+
+    if queue.device.address_bits == 64:
+        # this shouldn't hang/cause errors
+        # see https://github.com/inducer/pyopencl/issues/395
+        a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype='float64')
+        # run a couple kernels to ensure no propagated runtime errors
+        a_gpu[...] = 1.
+        a_gpu = 2 * a_gpu - 3
+    else:
+        pass
+
+
 def test_absrealimag(ctx_factory):
     context = ctx_factory()
     queue = cl.CommandQueue(context)
-- 
GitLab


From 84b9b7be7b4343c26bce340f9ee18303362bf35c Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@gmail.com>
Date: Sun, 1 Nov 2020 15:34:24 -0600
Subject: [PATCH 4/5] fix quotes

---
 test/test_array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_array.py b/test/test_array.py
index 65b98038..381105bf 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -212,7 +212,7 @@ def test_zeros_large_array(ctx_factory):
     if queue.device.address_bits == 64:
         # this shouldn't hang/cause errors
         # see https://github.com/inducer/pyopencl/issues/395
-        a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype='float64')
+        a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype="float64")
         # run a couple kernels to ensure no propagated runtime errors
         a_gpu[...] = 1.
         a_gpu = 2 * a_gpu - 3
-- 
GitLab


From 0aebd7bc94a2cebcbf33e7a8191560b750d755e4 Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@gmail.com>
Date: Sun, 1 Nov 2020 16:06:50 -0600
Subject: [PATCH 5/5] Don't exceed max_mem_alloc_size in test

---
 test/test_array.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/test_array.py b/test/test_array.py
index 381105bf..b5e99273 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -208,11 +208,13 @@ def test_vector_fill(ctx_factory):
 def test_zeros_large_array(ctx_factory):
     context = ctx_factory()
     queue = cl.CommandQueue(context)
+    dev = queue.device
 
-    if queue.device.address_bits == 64:
+    size = 2**28 + 1
+    if dev.address_bits == 64 and dev.max_mem_alloc_size >= 8 * size:
         # this shouldn't hang/cause errors
         # see https://github.com/inducer/pyopencl/issues/395
-        a_gpu = cl_array.zeros(queue, (2**28 + 1,), dtype="float64")
+        a_gpu = cl_array.zeros(queue, (size,), dtype="float64")
         # run a couple kernels to ensure no propagated runtime errors
         a_gpu[...] = 1.
         a_gpu = 2 * a_gpu - 3
-- 
GitLab