diff --git a/bin/loopy b/bin/loopy index 31551c16debff1f8a73153a5796af57b7494a931..8291bdd3bfe5c157fca5bc4ad60ced2839ae529f 100644 --- a/bin/loopy +++ b/bin/loopy @@ -63,8 +63,9 @@ def main(): parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE", help="Defaults to stdout ('-').", nargs='?') parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") - parser.add_argument("--target", choices=("opencl", "ispc", "c", "cuda"), - default="opencl") + parser.add_argument("--target", choices=( + "opencl", "ispc", "ispc-occa", "c", "cuda"), + default="opencl") parser.add_argument("--name") parser.add_argument("--transform") parser.add_argument("--edit-code", action="store_true") @@ -79,6 +80,9 @@ def main(): elif args.target == "ispc": from loopy.target.ispc import ISPCTarget target = ISPCTarget() + elif args.target == "ispc-occa": + from loopy.target.ispc import ISPCTarget + target = ISPCTarget(occa_mode=True) elif args.target == "c": from loopy.target.c import CTarget target = CTarget() diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 2d146e82a80a668582156df4232ee0f430a3f546..b9e654c9f173ce0028f9a793dd7f3cdc4f3b4fdc 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -49,6 +49,13 @@ class LoopyISPCCodeMapper(LoopyCCodeMapper): class ISPCTarget(CTarget): + def __init__(self, occa_mode=False): + """ + :arg occa_mode: Whether to modify the generated call signature to + be compatible with OCCA + """ + self.occa_mode = occa_mode + # {{{ top-level codegen def generate_code(self, kernel, codegen_state, impl_arg_info): @@ -61,6 +68,24 @@ class ISPCTarget(CTarget): inner_name = "lp_ispc_inner_"+kernel.name arg_decls = [iai.cgen_declarator for iai in impl_arg_info] + arg_names = [iai.name for iai in impl_arg_info] + + # {{{ occa compatibility hackery + + if self.occa_mode: + from cgen import ArrayOf, Const + from cgen.ispc import ISPCUniform + + arg_decls = [ + Const(ISPCUniform(ArrayOf(Value("int", "loopy_dims")))), + Const(ISPCUniform(Value("int", "o1"))), + Const(ISPCUniform(Value("int", "o2"))), + Const(ISPCUniform(Value("int", "o3"))), + ] + arg_decls + arg_names = ["loopy_dims", "o1", "o2", "o3"] + arg_names + + # }}} + knl_fbody = FunctionBody( ISPCTask( FunctionDeclaration( @@ -92,7 +117,7 @@ class ISPCTarget(CTarget): ccm(gs_i, PREC_NONE) for gs_i in gsize), inner_name, - ", ".join(iai.name for iai in impl_arg_info) + ", ".join(arg_names) )) ]) @@ -100,7 +125,7 @@ class ISPCTarget(CTarget): ISPCExport( FunctionDeclaration( Value("void", kernel.name), - [iai.cgen_declarator for iai in impl_arg_info])), + arg_decls)), wrapper_body) # }}} @@ -167,6 +192,18 @@ class ISPCTarget(CTarget): result = super(ISPCTarget, self).get_value_arg_decl( name, shape, dtype, is_written) + from cgen import Reference, Const + was_const = isinstance(result, Const) + + if was_const: + result = result.subdecl + + if self.occa_mode: + result = Reference(result) + + if was_const: + result = Const(result) + from cgen.ispc import ISPCUniform return ISPCUniform(result) diff --git a/test/test_loopy.py b/test/test_loopy.py index 09b218c1e8bcddcc32ae21c72ebfd17c776204ed..aa1f7b09a8d1882d41a6b94df2296377465db457 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2313,7 +2313,7 @@ def test_collect_common_factors(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=13)) -def test_ispc_target(): +def test_ispc_target(occa_mode=False): from loopy.target.ispc import ISPCTarget knl = lp.make_kernel( @@ -2323,7 +2323,7 @@ def test_ispc_target(): lp.GlobalArg("out,a", np.float32, shape=lp.auto), "..." ], - target=ISPCTarget()) + target=ISPCTarget(occa_mode=occa_mode)) knl = lp.split_iname(knl, "i", 8, inner_tag="l.0") knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")