From 98303ded5eb205e1d3fac68851b8ab357c72d2e4 Mon Sep 17 00:00:00 2001
From: arghdos <arghdos@gmail.com>
Date: Mon, 1 May 2017 15:28:42 -0400
Subject: [PATCH] runs simple C kernels

---
 loopy/execution.py                 | 72 +++++-------------------------
 loopy/target/c/c_execution.py      | 11 ++---
 loopy/target/pyopencl_execution.py | 28 +++++++-----
 3 files changed, 32 insertions(+), 79 deletions(-)

diff --git a/loopy/execution.py b/loopy/execution.py
index ad33ae3e7..ee2666745 100644
--- a/loopy/execution.py
+++ b/loopy/execution.py
@@ -391,6 +391,9 @@ class ExecutionWrapperGeneratorBase(object):
 
     # }}}
 
+    def get_arg_pass(self, arg):
+        return arg.name
+
     # {{{ arg setup
 
     def generate_arg_setup(
@@ -580,7 +583,7 @@ class ExecutionWrapperGeneratorBase(object):
                 gen("")
 
             if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]:
-                args.append("%s.base_data" % arg.name)
+                args.append(self.get_arg_pass(arg))
             else:
                 args.append("%s" % arg.name)
 
@@ -604,7 +607,7 @@ class ExecutionWrapperGeneratorBase(object):
         gen.add_to_preamble("from loopy.target.c.compyte.array"
                             " import as_strided as _lpy_strided")
 
-    def intialize_system_args(self, gen):
+    def initialize_system_args(self, gen):
         """
         Override to intialize any default system args
         """
@@ -615,10 +618,8 @@ class ExecutionWrapperGeneratorBase(object):
     def generate_invocation(self, gen, kernel_name, args):
         gen("for knl in _lpy_c_kernels:")
         with Indentation(gen):
-            gen("{kernel_name}({args})"
-                    .format(
-                        kernel_name='knl.name',
-                        args=", ".join(args)))
+            gen('knl({args})'.format(
+                args=", ".join(args)))
 
     # }}}
 
@@ -629,21 +630,6 @@ class ExecutionWrapperGeneratorBase(object):
 
         from loopy.kernel.data import KernelArgument
 
-        if not options.no_numpy:
-            gen("if out_host is None and (_lpy_encountered_numpy "
-                    "and not _lpy_encountered_dev):")
-            with Indentation(gen):
-                gen("out_host = True")
-
-            gen("if out_host:")
-            with Indentation(gen):
-                gen("pass")  # if no outputs (?!)
-                for arg in implemented_data_info:
-                    if not issubclass(arg.arg_class, KernelArgument):
-                        continue
-
-            gen("")
-
         if options.return_dict:
             gen("return None, {%s}"
                     % ", ".join("\"%s\": %s" % (arg.name, arg.name)
@@ -663,6 +649,9 @@ class ExecutionWrapperGeneratorBase(object):
 
     # }}}
 
+    def generate_host_code(self, gen, codegen_result):
+        pass
+
     def __call__(self, kernel, codegen_result):
         """
         Generates the wrapping python invoker for this execution target
@@ -694,7 +683,7 @@ class ExecutionWrapperGeneratorBase(object):
         gen.add_to_preamble(host_code)
         gen.add_to_preamble("")
 
-        self.intialize_system_args(gen)
+        self.initialize_system_args(gen)
 
         self.generate_integer_arg_finding_from_shapes(
             gen, kernel, implemented_data_info)
@@ -712,7 +701,6 @@ class ExecutionWrapperGeneratorBase(object):
 
         self.generate_output_handler(gen, options, kernel, implemented_data_info)
 
-        import pdb; pdb.set_trace()
         if options.write_wrapper:
             output = gen.get()
             if options.highlight_wrapper:
@@ -849,43 +837,7 @@ class KernelExecutorBase(object):
         raise NotImplementedError()
 
     def __call__(self, queue, **kwargs):
-        """
-        :arg allocator: a callable passed a byte count and returning
-            a :class:`pyopencl.Buffer`. A :class:`pyopencl` allocator
-            maybe.
-        :arg wait_for: A list of :class:`pyopencl.Event` instances
-            for which to wait.
-        :arg out_host: :class:`bool`
-            Decides whether output arguments (i.e. arguments
-            written by the kernel) are to be returned as
-            :mod:`numpy` arrays. *True* for yes, *False* for no.
-
-            For the default value of *None*, if all (input) array
-            arguments are :mod:`numpy` arrays, defaults to
-            returning :mod:`numpy` arrays as well.
-
-        :returns: ``(evt, output)`` where *evt* is a :class:`pyopencl.Event`
-            associated with the execution of the kernel, and
-            output is a tuple of output arguments (arguments that
-            are written as part of the kernel). The order is given
-            by the order of kernel arguments. If this order is unspecified
-            (such as when kernel arguments are inferred automatically),
-            enable :attr:`loopy.Options.return_dict` to make *output* a
-            :class:`dict` instead, with keys of argument names and values
-            of the returned arrays.
-        """
-
-        allocator = kwargs.pop("allocator", None)
-        wait_for = kwargs.pop("wait_for", None)
-        out_host = kwargs.pop("out_host", None)
-
-        kwargs = self.packing_controller.unpack(kwargs)
-
-        kernel_info = self.cl_kernel_info(self.arg_to_dtype_set(kwargs))
-
-        return kernel_info.invoker(
-                kernel_info.cl_kernels, queue, allocator, wait_for,
-                out_host, **kwargs)
+        raise NotImplementedError()
 
     # }}}
 
diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py
index 618da0226..d819f347a 100644
--- a/loopy/target/c/c_execution.py
+++ b/loopy/target/c/c_execution.py
@@ -138,11 +138,10 @@ class CompiledCKernel(object):
         self._fn.argtypes = [ctype for name, ctype in self._arg_info]
         self._prepared_call_cache = weakref.WeakKeyDictionary()
 
-    def __call__(self, **kwargs):
+    def __call__(self, *args):
         """Execute kernel with given args mapped to ctypes equivalents."""
         args_ = []
-        for knl_arg, arg_t in zip(self.knl.args, self._fn.argtypes):
-            arg = kwargs[knl_arg.name]
+        for arg, arg_t in zip(args, self._fn.argtypes):
             if hasattr(arg, 'ctypes'):
                 if arg.size == 0:
                     # TODO eliminate unused arguments from kernel
@@ -239,11 +238,9 @@ class CKernelExecutor(KernelExecutorBase):
             from pytools import invoke_editor
             dev_code = invoke_editor(dev_code, "code.cl")
 
-        c_kernels = _Kernels()
+        c_kernels = []
         for dp in codegen_result.device_programs:
-            setattr(c_kernels, dp.name, CompiledCKernel(dp,
-                                                       self.kernel.target,
-                                                       self.compiler))
+            c_kernels.append(CompiledCKernel(dp, self.kernel.target, self.compiler))
 
         return _KernelInfo(
                 kernel=kernel,
diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py
index 021bc786f..e67c49c59 100644
--- a/loopy/target/pyopencl_execution.py
+++ b/loopy/target/pyopencl_execution.py
@@ -150,15 +150,13 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
     # {{{ generate invocation
 
     def generate_invocation(self, gen, kernel_name, args):
-        gen("for knl in _lpy_cl_kernels:")
-        with Indentation(gen):
-            gen("_lpy_evt = {kernel_name}({args})"
-            .format(
-                kernel_name=kernel_name,
-                args=", ".join(
-                    ["_lpy_cl_kernels", "queue"]
-                    + args
-                    + ["wait_for=wait_for"])))
+        gen("_lpy_evt = {kernel_name}({args})"
+        .format(
+            kernel_name=kernel_name,
+            args=", ".join(
+                ["_lpy_cl_kernels", "queue"]
+                + args
+                + ["wait_for=wait_for"])))
 
     # }}}
 
@@ -185,7 +183,7 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
             gen("")
 
         if options.return_dict:
-            gen("return None, {%s}"
+            gen("return _lpy_evt, {%s}"
                     % ", ".join("\"%s\": %s" % (arg.name, arg.name)
                         for arg in implemented_data_info
                         if issubclass(arg.arg_class, KernelArgument)
@@ -196,13 +194,19 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
                         if issubclass(arg.arg_class, KernelArgument)
                     if arg.base_name in kernel.get_written_variables()]
             if out_args:
-                gen("return None, (%s,)"
+                gen("return _lpy_evt, (%s,)"
                         % ", ".join(arg.name for arg in out_args))
             else:
-                gen("return None, ()")
+                gen("return _lpy_evt, ()")
 
     # }}}
 
+    def generate_host_code(self, gen, codegen_result):
+        gen.add_to_preamble(codegen_result.host_code())
+
+    def get_arg_pass(self, arg):
+        return "%s.base_data" % arg.name
+
 # }}}
 
 
-- 
GitLab