From 4302d21e8b49cf1883205deff4849fc48ebc749a Mon Sep 17 00:00:00 2001
From: zachjweiner <zachjweiner@gmail.com>
Date: Sun, 1 Nov 2020 13:50:47 -0600
Subject: [PATCH] Skip enqueue_fill_buffer for large arrays on NVIDIA

---
 pyopencl/array.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pyopencl/array.py b/pyopencl/array.py
index bf58c965..87910fb8 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1279,10 +1279,14 @@ class Array:
         if not self.size:
             return
 
-        if (
-                queue._get_cl_version() >= (1, 2)
-                and cl.get_cl_header_version() >= (1, 2)):
-
+        cl_version_gtr_1_2 = (
+            queue._get_cl_version() >= (1, 2)
+            and cl.get_cl_header_version() >= (1, 2)
+        )
+        on_nvidia = queue.device.vendor.startswith("NVIDIA")
+
+        # circumvent bug with large buffers on NVIDIA (gh-395)
+        if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
             self.add_event(
                     cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
                         self.offset, self.nbytes, wait_for=wait_for))
-- 
GitLab