From 6b391673d12b15a63b72ad8eb099b14704d2933d Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 7 Jan 2016 23:51:56 -0600
Subject: [PATCH] Add OCCA codegen mode to ISPC target

---
 bin/loopy            |  8 ++++++--
 loopy/target/ispc.py | 41 +++++++++++++++++++++++++++++++++++++++--
 test/test_loopy.py   |  4 ++--
 3 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/bin/loopy b/bin/loopy
index 31551c16d..8291bdd3b 100644
--- a/bin/loopy
+++ b/bin/loopy
@@ -63,8 +63,9 @@ def main():
     parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE",
             help="Defaults to stdout ('-').", nargs='?')
     parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran")
-    parser.add_argument("--target", choices=("opencl", "ispc", "c", "cuda"),
-            default="opencl")
+    parser.add_argument("--target", choices=(
+        "opencl", "ispc", "ispc-occa", "c", "cuda"),
+        default="opencl")
     parser.add_argument("--name")
     parser.add_argument("--transform")
     parser.add_argument("--edit-code", action="store_true")
@@ -79,6 +80,9 @@ def main():
     elif args.target == "ispc":
         from loopy.target.ispc import ISPCTarget
         target = ISPCTarget()
+    elif args.target == "ispc-occa":
+        from loopy.target.ispc import ISPCTarget
+        target = ISPCTarget(occa_mode=True)
     elif args.target == "c":
         from loopy.target.c import CTarget
         target = CTarget()
diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py
index 2d146e82a..b9e654c9f 100644
--- a/loopy/target/ispc.py
+++ b/loopy/target/ispc.py
@@ -49,6 +49,13 @@ class LoopyISPCCodeMapper(LoopyCCodeMapper):
 
 
 class ISPCTarget(CTarget):
+    def __init__(self, occa_mode=False):
+        """
+        :arg occa_mode: Whether to modify the generated call signature to
+            be compatible with OCCA
+        """
+        self.occa_mode = occa_mode
+
     # {{{ top-level codegen
 
     def generate_code(self, kernel, codegen_state, impl_arg_info):
@@ -61,6 +68,24 @@ class ISPCTarget(CTarget):
 
         inner_name = "lp_ispc_inner_"+kernel.name
         arg_decls = [iai.cgen_declarator for iai in impl_arg_info]
+        arg_names = [iai.name for iai in impl_arg_info]
+
+        # {{{ occa compatibility hackery
+
+        if self.occa_mode:
+            from cgen import ArrayOf, Const
+            from cgen.ispc import ISPCUniform
+
+            arg_decls = [
+                    Const(ISPCUniform(ArrayOf(Value("int", "loopy_dims")))),
+                    Const(ISPCUniform(Value("int", "o1"))),
+                    Const(ISPCUniform(Value("int", "o2"))),
+                    Const(ISPCUniform(Value("int", "o3"))),
+                    ] + arg_decls
+            arg_names = ["loopy_dims", "o1", "o2", "o3"] + arg_names
+
+        # }}}
+
         knl_fbody = FunctionBody(
                 ISPCTask(
                     FunctionDeclaration(
@@ -92,7 +117,7 @@ class ISPCTarget(CTarget):
                             ccm(gs_i, PREC_NONE)
                             for gs_i in gsize),
                         inner_name,
-                        ", ".join(iai.name for iai in impl_arg_info)
+                        ", ".join(arg_names)
                         ))
                 ])
 
@@ -100,7 +125,7 @@ class ISPCTarget(CTarget):
                 ISPCExport(
                     FunctionDeclaration(
                         Value("void", kernel.name),
-                        [iai.cgen_declarator for iai in impl_arg_info])),
+                        arg_decls)),
                 wrapper_body)
 
         # }}}
@@ -167,6 +192,18 @@ class ISPCTarget(CTarget):
         result = super(ISPCTarget, self).get_value_arg_decl(
                 name, shape, dtype, is_written)
 
+        from cgen import Reference, Const
+        was_const = isinstance(result, Const)
+
+        if was_const:
+            result = result.subdecl
+
+        if self.occa_mode:
+            result = Reference(result)
+
+        if was_const:
+            result = Const(result)
+
         from cgen.ispc import ISPCUniform
         return ISPCUniform(result)
 
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 09b218c1e..aa1f7b09a 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2313,7 +2313,7 @@ def test_collect_common_factors(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=13))
 
 
-def test_ispc_target():
+def test_ispc_target(occa_mode=False):
     from loopy.target.ispc import ISPCTarget
 
     knl = lp.make_kernel(
@@ -2323,7 +2323,7 @@ def test_ispc_target():
                 lp.GlobalArg("out,a", np.float32, shape=lp.auto),
                 "..."
                 ],
-            target=ISPCTarget())
+            target=ISPCTarget(occa_mode=occa_mode))
 
     knl = lp.split_iname(knl, "i", 8, inner_tag="l.0")
     knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")
-- 
GitLab