diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index bc5c51eb0453a34ad902d58903997b75d6c54f34..816cbfe7b9babdadd10266b947f48d03a9b45a95 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -2159,8 +2159,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): creation_plog.done() - from loopy.kernel.tools import infer_arg_is_output_only - knl = infer_arg_is_output_only(knl) + from loopy.kernel.tools import infer_args_are_input_output + knl = infer_args_are_input_output(knl) return knl diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index e6544b34a55af97a1a15e86f7d74855e08e53116..c528e3eebc8951fbc8dde7088f6392fe6c245d12 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -338,6 +338,8 @@ class KernelArgument(ImmutableRecord): dtype = None kwargs["dtype"] = dtype + kwargs["is_output"] = kwargs.pop("is_output", None) + kwargs["is_input"] = kwargs.pop("is_input", None) ImmutableRecord.__init__(self, **kwargs) @@ -354,16 +356,38 @@ class ArrayArg(ArrayBase, KernelArgument): An instance of :class:`bool`. If set to *True*, recorded to be returned from the kernel. + + .. attribute:: is_output + An instance of :class:`bool`. If set to *True*, the argument is used + to return information to the caller. If set to *False*, then the + callee should not write the array during execution. + + .. attribute:: is_input + An instance of :class:`bool`. If set to *True*, expected to be + provided by the caller. If *False* then the callee should not depend + on the state of the array on entry to a function. + """) allowed_extra_kwargs = [ "address_space", - "is_output_only"] + "is_output_only", + "is_output", + "is_input"] def __init__(self, *args, **kwargs): if "address_space" not in kwargs: raise TypeError("'address_space' must be specified") - kwargs["is_output_only"] = kwargs.pop("is_output_only", False) + + is_output_only = kwargs.pop("is_output_only", None) + if is_output_only is not None: + warn("'is_output_only' is deprecated. Use 'is_output', 'is_input'" + " instead.", DeprecationWarning, stacklevel=2) + kwargs["is_output"] = is_output_only + kwargs["is_input"] = not is_output_only + else: + kwargs["is_output"] = kwargs.pop("is_output", None) + kwargs["is_input"] = kwargs.pop("is_input", None) super(ArrayArg, self).__init__(*args, **kwargs) @@ -391,7 +415,8 @@ class ArrayArg(ArrayBase, KernelArgument): """ super(ArrayArg, self).update_persistent_hash(key_hash, key_builder) key_builder.rec(key_hash, self.address_space) - key_builder.rec(key_hash, self.is_output_only) + key_builder.rec(key_hash, self.is_output) + key_builder.rec(key_hash, self.is_input) # Making this a function prevents incorrect use in isinstance. @@ -411,6 +436,9 @@ class ConstantArg(ArrayBase, KernelArgument): min_target_axes = 0 max_target_axes = 1 + is_output = False + is_input = True + def get_arg_decl(self, ast_builder, name_suffix, shape, dtype, is_written): return ast_builder.get_constant_arg_decl(self.name + name_suffix, shape, dtype, is_written) @@ -432,13 +460,15 @@ class ImageArg(ArrayBase, KernelArgument): class ValueArg(KernelArgument): def __init__(self, name, dtype=None, approximately=1000, target=None, - is_output_only=False): + is_output_only=None, is_output=False, is_input=True): KernelArgument.__init__(self, name=name, dtype=dtype, approximately=approximately, target=target, - is_output_only=is_output_only) + is_output_only=is_output_only, + is_output=is_output, + is_input=is_input) def __str__(self): import loopy as lp diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index e33d260fba4f3f4122f35e033ecc573b41999d5d..5b00950e7b5f0144d53e8cef6d7fbeebf75fe02c 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1864,6 +1864,56 @@ def infer_arg_is_output_only(kernel): return kernel.copy(args=new_args) + +def infer_args_are_input_output(kernel): + """ + Returns a copy of *kernel* with the attributes ``is_input`` and + ``is_output`` of the arguments set. + .. note:: + If the :attr:`~loopy.ArrayArg.is_output` is not supplied from a user, + then the array is inferred as an output argument if it is written at + some point in the kernel. + If the :attr:`~loopy.ArrayArg.is_input` is not supplied from a user, + then the array is inferred as an input argument if it is either read at + some point in the kernel or it is neither read nor written. + """ + from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg + new_args = [] + + for arg in kernel.args: + if isinstance(arg, ArrayArg): + if arg.is_output is not None: + assert isinstance(arg.is_output, bool) + else: + if arg.name in kernel.get_written_variables(): + arg = arg.copy(is_output=True) + else: + arg = arg.copy(is_output=False) + + if arg.is_input is not None: + assert isinstance(arg.is_input, bool) + else: + if arg.name in kernel.get_read_variables() or ( + (arg.name not in kernel.get_read_variables()) and ( + arg.name not in kernel.get_written_variables())): + arg = arg.copy(is_input=True) + else: + arg = arg.copy(is_input=False) + elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): + pass + else: + raise NotImplementedError("Unkonwn argument type %s." % type(arg)) + + if not (arg.is_input or arg.is_output): + raise LoopyError("Kernel argument must be either input or output." + " '{}' in '{}' does not follow it.".format(arg.name, + kernel.name)) + + new_args.append(arg) + + return kernel.copy(args=new_args) + + # }}} # vim: foldmethod=marker diff --git a/loopy/target/execution.py b/loopy/target/execution.py index c8f0d40903b1638e853caf459c0c0393d754c993..84cb616ab6fbfd53795cb6cb782b26bef2aee7a6 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -385,6 +385,7 @@ class ExecutionWrapperGeneratorBase(object): for arg_idx, arg in enumerate(implemented_data_info): is_written = arg.base_name in kernel.get_written_variables() + is_read = arg.base_name in kernel.get_read_variables() kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if not issubclass(arg.arg_class, KernelArgument): @@ -432,7 +433,8 @@ class ExecutionWrapperGeneratorBase(object): # {{{ allocate written arrays, if needed - if is_written and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ + if is_written and not is_read \ + and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ and arg.shape is not None \ and all(si is not None for si in arg.shape): @@ -723,12 +725,26 @@ class KernelExecutorBase(object): self.packing_controller = SeparateArrayPackingController(kernel) self.output_names = tuple(arg.name for arg in self.kernel.args - if arg.name in self.kernel.get_written_variables()) + if arg.is_output) + + self.input_names = tuple(arg.name for arg in self.kernel.args + if arg.is_input) + + from loopy import ArrayArg + self.input_array_names = tuple(arg.name for arg in self.kernel.args + if arg.is_input and isinstance(arg, ArrayArg)) self.has_runtime_typed_args = any( arg.dtype is None for arg in kernel.args) + def check_for_required_arguments(self, input_args): + missing_args = set(self.input_array_names) - set(input_args) + if missing_args != set(): + raise LoopyError( + "Kernel %s() missing required array arguments: '%s'." + % (self.kernel.name, ', '.join(missing_args))) + def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): from loopy.kernel.tools import add_dtypes diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 822d7df991e2a2e6ccc0a1bd3fa026fd1b8cc0f7..c0f8f07b796da4cc9bfb7b579328949d729ab269 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -353,6 +353,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): wait_for = kwargs.pop("wait_for", None) out_host = kwargs.pop("out_host", None) + self.check_for_required_arguments(kwargs.keys()) kwargs = self.packing_controller.unpack(kwargs) kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) diff --git a/test/test_target.py b/test/test_target.py index e27f6a32a3e84ce29ac9b6d0c817c989ee75058e..5fc2fe2a32effb9330450e33fa680d10b2ed0e65 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -410,6 +410,42 @@ def test_pyopencl_execution_numpy_handling(ctx_factory): assert x[0] == 5. +def test_input_arguments_are_required(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + import pyopencl.array as cla + + n = 2 + x = cla.zeros(queue, (n,), 'float64') + 1.5 + y = cla.zeros(queue, (n,), 'float64') + 2. + + # make sure y is required even when y.is_output == True + knl = lp.make_kernel( + "{ [i]: 0<=i 1: exec(sys.argv[1])