diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 83598dcc26646261703ce9b24fecebdd8a975774..ddc412d94abec693057d796f6a4cd5f248c51f9d 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1750,4 +1750,41 @@ def add_inames_to_insn(knl, inames, insn_match): # }}} +# {{{ nest_and_predicate_instructions + +def nest_and_predicate_instructions(kernel, iname, within): + """ + Nests an instruction inside of a loop while only executing on one iteration. + Useful for instructions which don't require all local hardware axes. + :arg iname: loop iname to nest instruction inside + "arg within: instruction to nest and predicate + """ + from loopy.match import parse_match + within = parse_match(within) + + from pymbolic import var + from pymbolic.primitives import Comparison + + def rewrite_insn(insn): + if within(kernel, insn): + if iname in insn.within_inames: + raise LoopyError("instruction '%s' is already nested " + "within iname '%s'" % (insn.id, iname)) + return insn.copy( + within_inames=insn.within_inames | frozenset([iname]), + predicates=( + # FIXME Do a better job finding the lower bound + insn.predicates + | frozenset([Comparison(var(iname), "==", 0)])) + ) + else: + return insn + + return kernel.copy( + instructions=[rewrite_insn(insn) for insn in kernel.instructions] + ) + + +# }}} + # vim: foldmethod=marker