From 8b02d61c819f9217fc3ee8ee9b63351f2b1e7b1d Mon Sep 17 00:00:00 2001 From: Matthias Diener <mdiener@illinois.edu> Date: Mon, 26 Sep 2022 00:20:39 -0500 Subject: [PATCH] PytatoPyOpenCLArrayContext: don't trust the arg limit reported by the GPU (#198) * PytatoPyOpenCLArrayContext: don't trust the arg limit reported by the GPU Apparently, CUDA doesn't like it when argument sizes get close to the reported limit. Avoids PTX/JIT errors of the type CUDA_ERROR_INVALID_IMAGE: device kernel image is invalid CUDA_ERROR_FILE_NOT_FOUND: file not found * restructure and clarify * add warning, clarify comment * add comment about device naming --- arraycontext/impl/pytato/__init__.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py index 98a310d..6b9ac6b 100644 --- a/arraycontext/impl/pytato/__init__.py +++ b/arraycontext/impl/pytato/__init__.py @@ -387,7 +387,27 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext): self.using_svm and dev.type & cl.device_type.GPU and cl_char.has_coarse_grain_buffer_svm(dev))): - limit = dev.max_parameter_size + if dev.max_parameter_size == 4352: + # Nvidia devices and PTXAS declare a limit of 4352 bytes, + # which is incorrect. The CUDA documentation at + # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#function-parameters + # mentions a limit of 4KB, which is also incorrect. + # As far as I can tell, the actual limit is around 4080 + # bytes, at least on a K40. Reducing the limit further + # in order to be on the safe side. + + # Note that the naming convention isn't super consistent + # for Nvidia GPUs, so that we only use the maximum + # parameter size to determine if it is an Nvidia GPU. + + limit = 4096-200 + + from warnings import warn + warn("Running on an Nvidia GPU, reducing the argument " + f"size limit from 4352 to {limit}.") + else: + limit = dev.max_parameter_size + if self._force_svm_arg_limit is not None: limit = self._force_svm_arg_limit -- GitLab