From 7241bd636afe82566aa0e80b7c7b2dbb9e49312a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni <kaushikcfd@gmail.com> Date: Wed, 16 Feb 2022 17:21:28 -0600 Subject: [PATCH] [bugfix]: precompute over insns after a gbarrier --- loopy/transform/precompute.py | 14 ++++++++++++++ test/test_transform.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 7c20d7a01..201abd470 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -29,6 +29,8 @@ from loopy.diagnostic import LoopyError from pymbolic.mapper.substitutor import make_subst_func from loopy.translation_unit import TranslationUnit from loopy.kernel.function_interface import CallableKernel, ScalarCallable +from loopy.kernel.tools import (kernel_has_global_barriers, + find_most_recent_global_barrier) import numpy as np from pymbolic import var @@ -217,6 +219,18 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.replaced_something = True + # {{{ add gbarriers that the replaced insn depends-on to compute insn's deps + + if (kernel_has_global_barriers(expn_state.kernel) + and (find_most_recent_global_barrier(expn_state.kernel, + expn_state.instruction.id + ) is not None)): + self.compute_insn_depends_on.add( + find_most_recent_global_barrier(expn_state.kernel, + expn_state.instruction.id)) + + # }}} + return new_outer_expr def map_kernel(self, kernel): diff --git a/test/test_transform.py b/test/test_transform.py index e42eeb498..2043b127e 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -1366,6 +1366,32 @@ def test_rename_inames_existing_ok(ctx_factory): lp.auto_test_vs_ref(knl, ctx, ref_knl) +def test_precompute_with_gbarrier(ctx_factory): + # See https://github.com/inducer/loopy/issues/543 + ctx = ctx_factory() + + t_unit = lp.make_kernel( + ["{[i0, j0]: 0<=i0<100 and 0<=j0<10}", + "{[i1, j1]: 0<=i1<100 and 0<=j1<10}"], + """ + out0[i0] = sum(j0, A[i0] * x[j0]) + ... gbarrier {id=gbarrier} + out1[i1] = sum(j1, A[i1] * x[j1]) + """, seq_dependencies=True) + t_unit = lp.add_dtypes(t_unit, {"A": np.float64, + "x": np.float64}) + ref_t_unit = t_unit + + t_unit = lp.add_prefetch(t_unit, + "x", + sweep_inames=["j1"], + within="writes:out1", + prefetch_insn_id="x_fetch") + assert "gbarrier" in t_unit.default_entrypoint.id_to_insn["x_fetch"].depends_on + + lp.auto_test_vs_ref(ref_t_unit, ctx, t_unit) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab