diff --git a/arraycontext/impl/pytato/__init__.py b/arraycontext/impl/pytato/__init__.py
index 98a310d086b45bbc87db4cb4539a0c23149c291e..6b9ac6b7ee2f0a8a83506ddbd02c0030552494cd 100644
--- a/arraycontext/impl/pytato/__init__.py
+++ b/arraycontext/impl/pytato/__init__.py
@@ -387,7 +387,27 @@ class PytatoPyOpenCLArrayContext(_BasePytatoArrayContext):
                     self.using_svm and dev.type & cl.device_type.GPU
                     and cl_char.has_coarse_grain_buffer_svm(dev))):
 
-            limit = dev.max_parameter_size
+            if dev.max_parameter_size == 4352:
+                # Nvidia devices and PTXAS declare a limit of 4352 bytes,
+                # which is incorrect. The CUDA documentation at
+                # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#function-parameters
+                # mentions a limit of 4KB, which is also incorrect.
+                # As far as I can tell, the actual limit is around 4080
+                # bytes, at least on a K40. Reducing the limit further
+                # in order to be on the safe side.
+
+                # Note that the naming convention isn't super consistent
+                # for Nvidia GPUs, so that we only use the maximum
+                # parameter size to determine if it is an Nvidia GPU.
+
+                limit = 4096-200
+
+                from warnings import warn
+                warn("Running on an Nvidia GPU, reducing the argument "
+                    f"size limit from 4352 to {limit}.")
+            else:
+                limit = dev.max_parameter_size
+
             if self._force_svm_arg_limit is not None:
                 limit = self._force_svm_arg_limit