diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 0a965564c4f95d52714eb1b6e8279d7d2dfe69c8..a72f66b9f5a339482544e513f33204429f2988a4 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -819,7 +819,7 @@ def _add_functionality():
                 self.function_name, self.num_args, self.num_args,
                 None,
                 warn_about_arg_count_bug=None,
-                work_around_arg_count_bug=None)
+                work_around_arg_count_bug=None, devs=self.context.devices)
 
         self._wg_info_cache = {}
         return self
@@ -858,7 +858,8 @@ def _add_functionality():
                         len(arg_types), self.num_args,
                         arg_types,
                         warn_about_arg_count_bug=warn_about_arg_count_bug,
-                        work_around_arg_count_bug=work_around_arg_count_bug)
+                        work_around_arg_count_bug=work_around_arg_count_bug,
+                        devs=self.context.devices)
 
         # Make ourselves a kernel-specific class, so that we're able to override
         # __call__. Inspired by https://stackoverflow.com/a/38541437
@@ -880,7 +881,7 @@ def _add_functionality():
         return result
 
     def kernel_set_args(self, *args, **kwargs):
-        # Need to dupicate the 'self' argument for dynamically generated  method
+        # Need to duplicate the 'self' argument for dynamically generated  method
         return self._set_args(self, *args, **kwargs)
 
     def kernel_call(self, queue, global_size, local_size, *args, **kwargs):
diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py
index 9383afdd59e3c9b0e587d9ea6594269c46e50a4a..bd1a402f71c7a97c2feb339be049b1bfde2ff796 100644
--- a/pyopencl/invoker.py
+++ b/pyopencl/invoker.py
@@ -29,6 +29,7 @@ import pyopencl._cl as _cl
 from pytools.persistent_dict import WriteOncePersistentDict
 from pytools.py_codegen import Indentation, PythonCodeGenerator
 from pyopencl.tools import _NumpyTypesKeyBuilder, VectorArg
+import pyopencl as cl
 
 
 # {{{ arg packing helpers
@@ -305,6 +306,71 @@ def _generate_enqueue_and_set_args_module(function_name,
             enqueue_name)
 
 
+# {{{ Helper functions related to argument sizes and device limits
+
+def _get_max_parameter_size(dev):
+    """Return the device's maximum parameter size adjusted for pocl."""
+    from pyopencl.characterize import get_pocl_version
+
+    dev_limit = dev.max_parameter_size
+
+    if get_pocl_version(dev.platform) is not None:
+        # Current pocl versions (as of 04/2022) have an incorrect parameter
+        # size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
+        if dev_limit == 1024:
+            if dev.type & cl.device_type.CPU:
+                return 1024*1024
+            if dev.type & cl.device_type.GPU:
+                # All modern Nvidia GPUs (starting from Compute Capability 2)
+                # have this limit
+                return 4352
+
+    return dev_limit
+
+
+def _check_arg_size(function_name, num_cl_args, arg_types, devs):
+    """Check whether argument sizes exceed the OpenCL device limit."""
+
+    for dev in devs:
+        dev_ptr_size = int(dev.address_bits / 8)
+        dev_limit = _get_max_parameter_size(dev)
+
+        total_arg_size = 0
+
+        is_estimate = False
+
+        if arg_types:
+            for arg_type in arg_types:
+                if arg_type is None:
+                    is_estimate = True
+                    total_arg_size += dev_ptr_size
+                elif isinstance(arg_type, VectorArg):
+                    total_arg_size += dev_ptr_size
+                else:
+                    total_arg_size += np.dtype(arg_type).itemsize
+        else:
+            # Estimate that each argument has the size of a pointer on average
+            is_estimate = True
+            total_arg_size = dev_ptr_size * num_cl_args
+
+        if total_arg_size > dev_limit:
+            from warnings import warn
+            warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
+                f"a total size of {total_arg_size} bytes, which is higher than "
+                f"the limit of {dev_limit} bytes on {dev}. This might "
+                "lead to compilation errors, especially on GPU devices.")
+        elif is_estimate and total_arg_size >= dev_limit * 0.75:
+            # Since total_arg_size is just an estimate, also warn in case we are
+            # just below the actual limit.
+            from warnings import warn
+            warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
+                f"a total size of {total_arg_size} bytes, which approaches "
+                f"the limit of {dev_limit} bytes on {dev}. This might "
+                "lead to compilation errors, especially on GPU devices.")
+
+# }}}
+
+
 invoker_cache = WriteOncePersistentDict(
         "pyopencl-invoker-cache-v41",
         key_builder=_NumpyTypesKeyBuilder())
@@ -313,7 +379,9 @@ invoker_cache = WriteOncePersistentDict(
 def generate_enqueue_and_set_args(function_name,
         num_passed_args, num_cl_args,
         arg_types,
-        work_around_arg_count_bug, warn_about_arg_count_bug):
+        work_around_arg_count_bug, warn_about_arg_count_bug, devs):
+
+    _check_arg_size(function_name, num_cl_args, arg_types, devs)
 
     cache_key = (function_name, num_passed_args, num_cl_args,
             arg_types, __debug__,