diff --git a/pyopencl/array.py b/pyopencl/array.py
index bf58c965c69d69820a405de7a90eb7cd1e74bc87..87910fb8d452fe6f29c83cb0d8890860e6c5fe10 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1279,10 +1279,14 @@ class Array:
         if not self.size:
             return
 
-        if (
-                queue._get_cl_version() >= (1, 2)
-                and cl.get_cl_header_version() >= (1, 2)):
-
+        cl_version_gtr_1_2 = (
+            queue._get_cl_version() >= (1, 2)
+            and cl.get_cl_header_version() >= (1, 2)
+        )
+        on_nvidia = queue.device.vendor.startswith("NVIDIA")
+
+        # circumvent bug with large buffers on NVIDIA (gh-395)
+        if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
             self.add_event(
                     cl.enqueue_fill_buffer(queue, self.base_data, np.int8(0),
                         self.offset, self.nbytes, wait_for=wait_for))