diff --git a/loopy/preprocess.py b/loopy/preprocess.py index e30d3bcb3f441036bab6558bc2d8691031a1c2e4..71cddd9d25521a133cf989bd7d6975d606e67542 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -480,6 +480,7 @@ def realize_reduction(kernel, insn_id_filter=None): id=init_id, assignee=target_var, forced_iname_deps=outer_insn_inames - frozenset(expr.inames), + forced_iname_deps_is_final=insn.forced_iname_deps_is_final, depends_on=frozenset(), expression=expr.operation.neutral_element(arg_dtype, expr.inames)) @@ -489,13 +490,18 @@ def realize_reduction(kernel, insn_id_filter=None): based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)), extra_used_ids=set(i.id for i in generated_insns)) + update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames) + if insn.forced_iname_deps_is_final: + update_insn_iname_deps = insn.forced_iname_deps | set(expr.inames) + reduction_insn = Assignment( id=update_id, assignee=target_var, expression=expr.operation( arg_dtype, target_var, expr.expr, expr.inames), depends_on=frozenset([init_insn.id]) | insn.depends_on, - forced_iname_deps=temp_kernel.insn_inames(insn) | set(expr.inames)) + forced_iname_deps=update_insn_iname_deps, + forced_iname_deps_is_final=insn.forced_iname_deps_is_final) generated_insns.append(reduction_insn)