From 8b02d61c819f9217fc3ee8ee9b63351f2b1e7b1d Mon Sep 17 00:00:00 2001
From: Matthias Diener <mdiener@illinois.edu>
Date: Mon, 26 Sep 2022 00:20:39 -0500
Subject: [PATCH] PytatoPyOpenCLArrayContext: don't trust the arg limit
 reported by the GPU (#198)

* PytatoPyOpenCLArrayContext: don't trust the arg limit reported by the
GPU

Apparently, CUDA doesn't like it when argument sizes get close
to the reported limit.
Avoids PTX/JIT errors of the type
CUDA_ERROR_INVALID_IMAGE: device kernel image is invalid
CUDA_ERROR_FILE_NOT_FOUND: file not found

* restructure and clarify

* add warning, clarify comment

* add comment about device naming
---
 arraycontext/impl/pytato/__init__.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py
index 98a310d..6b9ac6b 100644
--- a/arraycontext/impl/pytato/__init__.py
+++ b/arraycontext/impl/pytato/__init__.py
@@ -387,7 +387,27 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext):
                     self.using_svm and dev.type & cl.device_type.GPU
                     and cl_char.has_coarse_grain_buffer_svm(dev))):
 
-            limit = dev.max_parameter_size
+            if dev.max_parameter_size == 4352:
+                # Nvidia devices and PTXAS declare a limit of 4352 bytes,
+                # which is incorrect. The CUDA documentation at
+                # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#function-parameters
+                # mentions a limit of 4KB, which is also incorrect.
+                # As far as I can tell, the actual limit is around 4080
+                # bytes, at least on a K40. Reducing the limit further
+                # in order to be on the safe side.
+
+                # Note that the naming convention isn't super consistent
+                # for Nvidia GPUs, so that we only use the maximum
+                # parameter size to determine if it is an Nvidia GPU.
+
+                limit = 4096-200
+
+                from warnings import warn
+                warn("Running on an Nvidia GPU, reducing the argument "
+                    f"size limit from 4352 to {limit}.")
+            else:
+                limit = dev.max_parameter_size
+
             if self._force_svm_arg_limit is not None:
                 limit = self._force_svm_arg_limit
 
-- 
GitLab