diff --git a/loopy/__init__.py b/loopy/__init__.py index db6953a6d2ce80e5bbba4d741c7123c9f390b45b..12329bbf6472867c873958f1a18f20ba1016b5f7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -97,7 +97,7 @@ __all__ = [ "preprocess_kernel", "realize_reduction", "infer_unknown_types", "generate_loop_schedules", "get_one_scheduled_kernel", - "generate_code", + "generate_code", "generate_body", "CompiledKernel", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index ecbb552032b5d34df32aa7c2637d210fb5593ffb..26f225c6293e62ef43adc921bfef20a3a2a534b1 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -488,4 +488,51 @@ def generate_code(kernel, device=None): # }}} +# {{{ generate function body + +def generate_body(kernel): + if kernel.schedule is None: + from loopy.schedule import get_one_scheduled_kernel + kernel = get_one_scheduled_kernel(kernel) + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("cannot generate code for a kernel that has not been " + "scheduled") + + from loopy.preprocess import infer_unknown_types + kernel = infer_unknown_types(kernel, expect_completion=True) + + from loopy.check import pre_codegen_checks + pre_codegen_checks(kernel) + + logger.info("%s: generate code: start" % kernel.name) + + allow_complex = False + for var in kernel.args + list(six.itervalues(kernel.temporary_variables)): + if var.dtype.kind == "c": + allow_complex = True + + seen_dtypes = set() + seen_functions = set() + + initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions) + codegen_state = CodeGenerationState( + implemented_domain=initial_implemented_domain, + implemented_predicates=frozenset(), + expression_to_code_mapper=kernel.target.get_expression_to_code_mapper( + kernel, seen_dtypes, seen_functions, allow_complex)) + + code_str, implemented_domains = kernel.target.generate_body( + kernel, codegen_state) + + from loopy.check import check_implemented_domains + assert check_implemented_domains(kernel, implemented_domains, + code_str) + + logger.info("%s: generate code: done" % kernel.name) + + return code_str + +# }}} + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index f99c0b3b3638d024530c339e4340e27d71043c4e..5dce1292d9966deff3d145c00610945c39932429 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1958,6 +1958,51 @@ def test_auto_test_can_detect_problems(ctx_factory): parameters=dict(n=123)) +def test_generate_c_snippet(): + from loopy.target.c import CTarget + + from pymbolic import var + I = var("I") # noqa + f = var("f") + df = var("df") + q_v = var("q_v") + eN = var("eN") # noqa + k = var("k") + u = var("u") + + from functools import partial + l_sum = partial(lp.Reduction, "sum") + + Instr = lp.ExpressionInstruction # noqa + + knl = lp.make_kernel( + "{[I, k]: 0<=I<nSpace and 0<=k<nQuad}", + [ + Instr(f[I], l_sum(k, q_v[k, I]*u)), + Instr(df[I], l_sum(k, q_v[k, I])), + ], + [ + lp.GlobalArg("q_v", np.float64, shape="nQuad, nSpace"), + lp.GlobalArg("f,df", np.float64, shape="nSpace"), + lp.ValueArg("u", np.float64), + "...", + ], + target=CTarget(), + assumptions="nQuad>=1") + + if 0: # enable to play with prefetching + # (prefetch currently requires constant sizes) + knl = lp.fix_parameters(knl, nQuad=5, nSpace=3) + knl = lp.add_prefetch(knl, "q_v", "k,I", default_tag=None) + + knl = lp.split_iname(knl, "k", 4, inner_tag="unr", slabs=(0, 1)) + knl = lp.set_loop_priority(knl, "I,k_outer,k_inner") + + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) + print(lp.generate_body(knl)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])