From 73f15141c6e5d1d5b5dbd3709d0d170982524ba2 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 10 Jan 2018 16:10:04 -0600
Subject: [PATCH] PyOpenCL executor: manage pyopencl array events

---
 loopy/options.py                   | 10 ++++++++++
 loopy/target/c/c_execution.py      |  3 ++-
 loopy/target/execution.py          |  5 +++--
 loopy/target/pyopencl_execution.py | 24 +++++++++++++++++++++++-
 loopy/version.py                   |  4 ++--
 5 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/loopy/options.py b/loopy/options.py
index 25bb7014c..13d0b752d 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -112,6 +112,15 @@ class Options(ImmutableRecord):
         Do not check for or accept :mod:`numpy` arrays as
         arguments.
 
+        Defaults to *False*.
+
+    .. attribute:: cl_exec_manage_array_events
+
+        Within the PyOpenCL executor, respect and udpate
+        :attr:`pyopencl.array.Array.event`.
+
+        Defaults to *True*.
+
     .. attribute:: return_dict
 
         Have kernels return a :class:`dict` instead of a tuple as
@@ -196,6 +205,7 @@ class Options(ImmutableRecord):
 
                 skip_arg_checks=kwargs.get("skip_arg_checks", False),
                 no_numpy=kwargs.get("no_numpy", False),
+                cl_exec_manage_array_events=kwargs.get("no_numpy", True),
                 return_dict=kwargs.get("return_dict", False),
                 write_wrapper=kwargs.get("write_wrapper", False),
                 write_code=kwargs.get("write_code", False),
diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py
index 5efc58bb7..64dd3f041 100644
--- a/loopy/target/c/c_execution.py
+++ b/loopy/target/c/c_execution.py
@@ -133,11 +133,12 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
 
     # {{{ generate invocation
 
-    def generate_invocation(self, gen, kernel_name, args):
+    def generate_invocation(self, gen, kernel_name, args, kernel, implemented_data_info):
         gen("for knl in _lpy_c_kernels:")
         with Indentation(gen):
             gen('knl({args})'.format(
                 args=", ".join(args)))
+
     # }}}
 
     # {{{
diff --git a/loopy/target/execution.py b/loopy/target/execution.py
index 0304ec6f0..2509be122 100644
--- a/loopy/target/execution.py
+++ b/loopy/target/execution.py
@@ -571,7 +571,7 @@ class ExecutionWrapperGeneratorBase(object):
 
     # {{{ generate invocation
 
-    def generate_invocation(self, gen, kernel_name, args):
+    def generate_invocation(self, gen, kernel_name, args, kernel, implemented_data_info):
         raise NotImplementedError()
 
     # }}}
@@ -632,7 +632,8 @@ class ExecutionWrapperGeneratorBase(object):
         args = self.generate_arg_setup(
             gen, kernel, implemented_data_info, options)
 
-        self.generate_invocation(gen, codegen_result.host_program.name, args)
+        self.generate_invocation(gen, codegen_result.host_program.name, args,
+                kernel, implemented_data_info)
 
         self.generate_output_handler(gen, options, kernel, implemented_data_info)
 
diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py
index cc0b48a6a..389f88fdc 100644
--- a/loopy/target/pyopencl_execution.py
+++ b/loopy/target/pyopencl_execution.py
@@ -151,7 +151,21 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
 
     # {{{ generate invocation
 
-    def generate_invocation(self, gen, kernel_name, args):
+    def generate_invocation(self, gen, kernel_name, args, kernel, implemented_data_info):
+        if kernel.options.cl_exec_manage_array_events:
+            gen("""
+                if wait_for is None:
+                    wait_for = []
+                """)
+
+            gen("")
+            from loopy.kernel.data import GlobalArg
+            for arg in implemented_data_info:
+                if issubclass(arg.arg_class, GlobalArg):
+                    gen("wait_for.extend({arg_name}.events)".format(arg_name=arg.name))
+
+            gen("")
+
         gen("_lpy_evt = {kernel_name}({args})"
         .format(
             kernel_name=kernel_name,
@@ -160,6 +174,14 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
                 + args
                 + ["wait_for=wait_for"])))
 
+        if kernel.options.cl_exec_manage_array_events:
+            gen("")
+            from loopy.kernel.data import GlobalArg
+            for arg in implemented_data_info:
+                if (issubclass(arg.arg_class, GlobalArg)
+                        and arg.base_name in kernel.get_written_variables()):
+                    gen("{arg_name}.add_event(_lpy_evt)".format(arg_name=arg.name))
+
     # }}}
 
     # {{{
diff --git a/loopy/version.py b/loopy/version.py
index d5d50a0fe..888fb95f9 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -21,7 +21,7 @@ THE SOFTWARE.
 """
 
 
-VERSION = (2017, 2)
+VERSION = (2017, 2, 1)
 VERSION_STATUS = ""
 VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
 
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v74-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v75-islpy%s" % _islpy_version
-- 
GitLab