diff --git a/loopy/options.py b/loopy/options.py index 25bb7014ce07a30c49f7f78d5a6325eaba36291d..13d0b752dfcfa0f0da233880f27f09a963ab4c81 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -112,6 +112,15 @@ class Options(ImmutableRecord): Do not check for or accept :mod:`numpy` arrays as arguments. + Defaults to *False*. + + .. attribute:: cl_exec_manage_array_events + + Within the PyOpenCL executor, respect and udpate + :attr:`pyopencl.array.Array.event`. + + Defaults to *True*. + .. attribute:: return_dict Have kernels return a :class:`dict` instead of a tuple as @@ -196,6 +205,7 @@ class Options(ImmutableRecord): skip_arg_checks=kwargs.get("skip_arg_checks", False), no_numpy=kwargs.get("no_numpy", False), + cl_exec_manage_array_events=kwargs.get("no_numpy", True), return_dict=kwargs.get("return_dict", False), write_wrapper=kwargs.get("write_wrapper", False), write_code=kwargs.get("write_code", False), diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 5efc58bb7cd8692594018a5f7a9bcf75278a3b9b..c136a9f36f8dd7b797aa9b6875a41e3ea185c0ca 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -133,11 +133,13 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): # {{{ generate invocation - def generate_invocation(self, gen, kernel_name, args): + def generate_invocation(self, gen, kernel_name, args, + kernel, implemented_data_info): gen("for knl in _lpy_c_kernels:") with Indentation(gen): gen('knl({args})'.format( args=", ".join(args))) + # }}} # {{{ diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 0304ec6f09eb2b014bb01a7b30889e24910e0dd9..2aa76e099d8e50a2949c616736b30f725fb10bb4 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -571,7 +571,8 @@ class ExecutionWrapperGeneratorBase(object): # {{{ generate invocation - def generate_invocation(self, gen, kernel_name, args): + def generate_invocation(self, gen, kernel_name, args, + kernel, implemented_data_info): raise NotImplementedError() # }}} @@ -632,7 +633,8 @@ class ExecutionWrapperGeneratorBase(object): args = self.generate_arg_setup( gen, kernel, implemented_data_info, options) - self.generate_invocation(gen, codegen_result.host_program.name, args) + self.generate_invocation(gen, codegen_result.host_program.name, args, + kernel, implemented_data_info) self.generate_output_handler(gen, options, kernel, implemented_data_info) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index cc0b48a6ac17e23f318c5489d45fca6710bb3392..bef3152d03c193c14b11ce6f9ba3f20fdfcff6ad 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -151,7 +151,24 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): # {{{ generate invocation - def generate_invocation(self, gen, kernel_name, args): + def generate_invocation(self, gen, kernel_name, args, + kernel, implemented_data_info): + if kernel.options.cl_exec_manage_array_events: + gen(""" + if wait_for is None: + wait_for = [] + """) + + gen("") + from loopy.kernel.data import GlobalArg + for arg in implemented_data_info: + if issubclass(arg.arg_class, GlobalArg): + gen( + "wait_for.extend({arg_name}.events)" + .format(arg_name=arg.name)) + + gen("") + gen("_lpy_evt = {kernel_name}({args})" .format( kernel_name=kernel_name, @@ -160,6 +177,14 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): + args + ["wait_for=wait_for"]))) + if kernel.options.cl_exec_manage_array_events: + gen("") + from loopy.kernel.data import GlobalArg + for arg in implemented_data_info: + if (issubclass(arg.arg_class, GlobalArg) + and arg.base_name in kernel.get_written_variables()): + gen("{arg_name}.add_event(_lpy_evt)".format(arg_name=arg.name)) + # }}} # {{{ diff --git a/loopy/version.py b/loopy/version.py index d5d50a0fe89c4001cadef5e94d2552b645408518..888fb95f9d28b04692a9e86865a23610e7bd9f5c 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -21,7 +21,7 @@ THE SOFTWARE. """ -VERSION = (2017, 2) +VERSION = (2017, 2, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v74-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v75-islpy%s" % _islpy_version