From 8877a116026a3074a66f08bf42dd49c90679ce50 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sat, 8 Aug 2020 12:17:02 -0500 Subject: [PATCH 1/3] PyOpenCLExecutionWrapperGenerator: retain and write to numpy inputs --- loopy/target/pyopencl_execution.py | 31 ++++++++++++++++++----------- test/test_target.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 05fdd21f0..3eb266166 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -65,6 +65,8 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): def handle_non_numpy_arg(self, gen, arg): gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) with Indentation(gen): + gen("# retain originally passed array") + gen("_lpy_%s_np_input = %s" % (arg.name, arg.name)) gen("# synchronous, nothing to worry about") gen("%s = _lpy_cl_array.to_device(" "queue, %s, allocator=allocator)" @@ -73,16 +75,20 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen("elif %s is not None:" % arg.name) with Indentation(gen): gen("_lpy_encountered_dev = True") + gen("_lpy_%s_np_input = None" % arg.name) + gen("else:") + with Indentation(gen): + gen("_lpy_%s_np_input = None" % arg.name) gen("") # }}} - # {{{ handle allocation of unspecified arguements + # {{{ handle allocation of unspecified arguments def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): """ - Handle allocation of non-specified arguements for pyopencl execution + Handle allocation of non-specified arguments for pyopencl execution """ from pymbolic import var @@ -200,16 +206,17 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): with Indentation(gen): gen("out_host = True") - gen("if out_host:") - with Indentation(gen): - gen("pass") # if no outputs (?!) - for arg in implemented_data_info: - if not issubclass(arg.arg_class, KernelArgument): - continue - - is_written = arg.base_name in kernel.get_written_variables() - if is_written: - gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) + for arg in implemented_data_info: + if not issubclass(arg.arg_class, KernelArgument): + continue + + is_written = arg.base_name in kernel.get_written_variables() + if is_written: + np_name = "_lpy_%s_np_input" % arg.name + gen("if out_host or %s is not None:" % np_name) + with Indentation(gen): + gen("%s = %s.get(queue=queue, ary=%s)" + % (arg.name, arg.name, np_name)) gen("") diff --git a/test/test_target.py b/test/test_target.py index afad1b676..e27f6a32a 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -378,6 +378,38 @@ def test_cuda_short_vector(): print(lp.generate_code_v2(knl).device_code()) +def test_pyopencl_execution_numpy_handling(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + # test numpy input for x is written to and returned + knl = lp.make_kernel('{:}', ['x[0] = y[0] + x[0]']) + + y = np.array([3.]) + x = np.array([4.]) + evt, out = knl(queue, y=y, x=x) + assert out[0] is x + assert x[0] == 7. + + # test numpy input for x is written to and returned, even when a pyopencl array + # is passed for y + import pyopencl.array as cla + y = cla.zeros(queue, shape=(1), dtype='float64') + 3. + x = np.array([4.]) + evt, out = knl(queue, y=y, x=x) + assert out[0] is x + assert x[0] == 7. + + # test numpy input for x is written to and returned, even when output-only + knl = lp.make_kernel('{:}', ['x[0] = y[0] + 2']) + + y = np.array([3.]) + x = np.array([4.]) + evt, out = knl(queue, y=y, x=x) + assert out[0] is x + assert x[0] == 5. + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From cf63c2620efc2511cedd10dff945611060a4ec66 Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sat, 8 Aug 2020 12:19:39 -0500 Subject: [PATCH 2/3] make KernelExecutors demand all input arguments are passed; indexof tests will fail --- loopy/target/execution.py | 14 +++++++++++- loopy/target/pyopencl_execution.py | 2 ++ test/test_target.py | 36 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index c8f0d4090..afc3341cf 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -385,6 +385,7 @@ class ExecutionWrapperGeneratorBase(object): for arg_idx, arg in enumerate(implemented_data_info): is_written = arg.base_name in kernel.get_written_variables() + is_read = arg.base_name in kernel.get_read_variables() kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if not issubclass(arg.arg_class, KernelArgument): @@ -432,7 +433,8 @@ class ExecutionWrapperGeneratorBase(object): # {{{ allocate written arrays, if needed - if is_written and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ + if is_written and not is_read \ + and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ and arg.shape is not None \ and all(si is not None for si in arg.shape): @@ -725,10 +727,20 @@ class KernelExecutorBase(object): self.output_names = tuple(arg.name for arg in self.kernel.args if arg.name in self.kernel.get_written_variables()) + self.input_names = tuple(arg.name for arg in self.kernel.args + if arg.name in self.kernel.get_read_variables()) + self.has_runtime_typed_args = any( arg.dtype is None for arg in kernel.args) + def check_for_required_arguments(self, input_args): + missing_args = set(self.input_names) - set(input_args) + if missing_args != set(): + raise LoopyError( + "Kernel arguments '%s' are required but were not passed." + % missing_args) + def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): from loopy.kernel.tools import add_dtypes diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 3eb266166..b57169bac 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -355,6 +355,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): kwargs = self.packing_controller.unpack(kwargs) + self.check_for_required_arguments(kwargs.keys()) + kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) return kernel_info.invoker( diff --git a/test/test_target.py b/test/test_target.py index e27f6a32a..5fc2fe2a3 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -410,6 +410,42 @@ def test_pyopencl_execution_numpy_handling(ctx_factory): assert x[0] == 5. +def test_input_arguments_are_required(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + import pyopencl.array as cla + + n = 2 + x = cla.zeros(queue, (n,), 'float64') + 1.5 + y = cla.zeros(queue, (n,), 'float64') + 2. + + # make sure y is required even when y.is_output == True + knl = lp.make_kernel( + "{ [i]: 0<=i 1: exec(sys.argv[1]) -- GitLab From 7ee6a3b0669ad0af7d2c5c7e14cb9bf9c1aa26af Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Sun, 9 Aug 2020 09:18:16 -0500 Subject: [PATCH 3/3] Add is_input/is_output; restrict passed arg check to ArrayArgs --- loopy/kernel/creation.py | 4 +-- loopy/kernel/data.py | 40 +++++++++++++++++++++--- loopy/kernel/tools.py | 50 ++++++++++++++++++++++++++++++ loopy/target/execution.py | 14 ++++++--- loopy/target/pyopencl_execution.py | 3 +- 5 files changed, 97 insertions(+), 14 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index bc5c51eb0..816cbfe7b 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -2159,8 +2159,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): creation_plog.done() - from loopy.kernel.tools import infer_arg_is_output_only - knl = infer_arg_is_output_only(knl) + from loopy.kernel.tools import infer_args_are_input_output + knl = infer_args_are_input_output(knl) return knl diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index e6544b34a..c528e3eeb 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -338,6 +338,8 @@ class KernelArgument(ImmutableRecord): dtype = None kwargs["dtype"] = dtype + kwargs["is_output"] = kwargs.pop("is_output", None) + kwargs["is_input"] = kwargs.pop("is_input", None) ImmutableRecord.__init__(self, **kwargs) @@ -354,16 +356,38 @@ class ArrayArg(ArrayBase, KernelArgument): An instance of :class:`bool`. If set to *True*, recorded to be returned from the kernel. + + .. attribute:: is_output + An instance of :class:`bool`. If set to *True*, the argument is used + to return information to the caller. If set to *False*, then the + callee should not write the array during execution. + + .. attribute:: is_input + An instance of :class:`bool`. If set to *True*, expected to be + provided by the caller. If *False* then the callee should not depend + on the state of the array on entry to a function. + """) allowed_extra_kwargs = [ "address_space", - "is_output_only"] + "is_output_only", + "is_output", + "is_input"] def __init__(self, *args, **kwargs): if "address_space" not in kwargs: raise TypeError("'address_space' must be specified") - kwargs["is_output_only"] = kwargs.pop("is_output_only", False) + + is_output_only = kwargs.pop("is_output_only", None) + if is_output_only is not None: + warn("'is_output_only' is deprecated. Use 'is_output', 'is_input'" + " instead.", DeprecationWarning, stacklevel=2) + kwargs["is_output"] = is_output_only + kwargs["is_input"] = not is_output_only + else: + kwargs["is_output"] = kwargs.pop("is_output", None) + kwargs["is_input"] = kwargs.pop("is_input", None) super(ArrayArg, self).__init__(*args, **kwargs) @@ -391,7 +415,8 @@ class ArrayArg(ArrayBase, KernelArgument): """ super(ArrayArg, self).update_persistent_hash(key_hash, key_builder) key_builder.rec(key_hash, self.address_space) - key_builder.rec(key_hash, self.is_output_only) + key_builder.rec(key_hash, self.is_output) + key_builder.rec(key_hash, self.is_input) # Making this a function prevents incorrect use in isinstance. @@ -411,6 +436,9 @@ class ConstantArg(ArrayBase, KernelArgument): min_target_axes = 0 max_target_axes = 1 + is_output = False + is_input = True + def get_arg_decl(self, ast_builder, name_suffix, shape, dtype, is_written): return ast_builder.get_constant_arg_decl(self.name + name_suffix, shape, dtype, is_written) @@ -432,13 +460,15 @@ class ImageArg(ArrayBase, KernelArgument): class ValueArg(KernelArgument): def __init__(self, name, dtype=None, approximately=1000, target=None, - is_output_only=False): + is_output_only=None, is_output=False, is_input=True): KernelArgument.__init__(self, name=name, dtype=dtype, approximately=approximately, target=target, - is_output_only=is_output_only) + is_output_only=is_output_only, + is_output=is_output, + is_input=is_input) def __str__(self): import loopy as lp diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index e33d260fb..5b00950e7 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1864,6 +1864,56 @@ def infer_arg_is_output_only(kernel): return kernel.copy(args=new_args) + +def infer_args_are_input_output(kernel): + """ + Returns a copy of *kernel* with the attributes ``is_input`` and + ``is_output`` of the arguments set. + .. note:: + If the :attr:`~loopy.ArrayArg.is_output` is not supplied from a user, + then the array is inferred as an output argument if it is written at + some point in the kernel. + If the :attr:`~loopy.ArrayArg.is_input` is not supplied from a user, + then the array is inferred as an input argument if it is either read at + some point in the kernel or it is neither read nor written. + """ + from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg + new_args = [] + + for arg in kernel.args: + if isinstance(arg, ArrayArg): + if arg.is_output is not None: + assert isinstance(arg.is_output, bool) + else: + if arg.name in kernel.get_written_variables(): + arg = arg.copy(is_output=True) + else: + arg = arg.copy(is_output=False) + + if arg.is_input is not None: + assert isinstance(arg.is_input, bool) + else: + if arg.name in kernel.get_read_variables() or ( + (arg.name not in kernel.get_read_variables()) and ( + arg.name not in kernel.get_written_variables())): + arg = arg.copy(is_input=True) + else: + arg = arg.copy(is_input=False) + elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): + pass + else: + raise NotImplementedError("Unkonwn argument type %s." % type(arg)) + + if not (arg.is_input or arg.is_output): + raise LoopyError("Kernel argument must be either input or output." + " '{}' in '{}' does not follow it.".format(arg.name, + kernel.name)) + + new_args.append(arg) + + return kernel.copy(args=new_args) + + # }}} # vim: foldmethod=marker diff --git a/loopy/target/execution.py b/loopy/target/execution.py index afc3341cf..84cb616ab 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -725,21 +725,25 @@ class KernelExecutorBase(object): self.packing_controller = SeparateArrayPackingController(kernel) self.output_names = tuple(arg.name for arg in self.kernel.args - if arg.name in self.kernel.get_written_variables()) + if arg.is_output) self.input_names = tuple(arg.name for arg in self.kernel.args - if arg.name in self.kernel.get_read_variables()) + if arg.is_input) + + from loopy import ArrayArg + self.input_array_names = tuple(arg.name for arg in self.kernel.args + if arg.is_input and isinstance(arg, ArrayArg)) self.has_runtime_typed_args = any( arg.dtype is None for arg in kernel.args) def check_for_required_arguments(self, input_args): - missing_args = set(self.input_names) - set(input_args) + missing_args = set(self.input_array_names) - set(input_args) if missing_args != set(): raise LoopyError( - "Kernel arguments '%s' are required but were not passed." - % missing_args) + "Kernel %s() missing required array arguments: '%s'." + % (self.kernel.name, ', '.join(missing_args))) def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set): from loopy.kernel.tools import add_dtypes diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index b57169bac..f443518a5 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -353,9 +353,8 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): wait_for = kwargs.pop("wait_for", None) out_host = kwargs.pop("out_host", None) - kwargs = self.packing_controller.unpack(kwargs) - self.check_for_required_arguments(kwargs.keys()) + kwargs = self.packing_controller.unpack(kwargs) kernel_info = self.kernel_info(self.arg_to_dtype_set(kwargs)) -- GitLab