From d2cd0d89c68b03cde169c9351a17aa376c8ef427 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Wed, 16 Feb 2022 19:01:45 -0600
Subject: [PATCH] preserve rev. depends for buffer array's store instructions

---
 loopy/transform/buffer.py | 13 +++++++++++--
 test/test_transform.py    | 24 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py
index a6e25457d..e3dbeeb51 100644
--- a/loopy/transform/buffer.py
+++ b/loopy/transform/buffer.py
@@ -499,14 +499,23 @@ def buffer_array_for_single_kernel(kernel, callables_table, var_name,
 
     new_insns.append(init_instruction)
     if did_write:
-        new_insns.append(store_instruction)
+        # new_insns_with_redirected_deps: if an insn depends on a modified
+        # insn, then it should also depend on the store insn.
+        new_insns_with_redirected_deps = [
+            insn.copy(depends_on=(insn.depends_on | {store_instruction.id}))
+            if insn.depends_on & aar.modified_insn_ids
+            else insn
+            for insn in new_insns
+        ] + [store_instruction]
     else:
         for iname in store_inames:
             del new_iname_to_tag[iname]
 
+        new_insns_with_redirected_deps = new_insns
+
     kernel = kernel.copy(
             domains=new_kernel_domains,
-            instructions=new_insns,
+            instructions=new_insns_with_redirected_deps,
             temporary_variables=new_temporary_variables)
 
     from loopy import tag_inames
diff --git a/test/test_transform.py b/test/test_transform.py
index 2043b127e..2aa07dabb 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -1351,6 +1351,30 @@ def test_rename_inames(ctx_factory):
     lp.auto_test_vs_ref(knl, ctx, ref_knl)
 
 
+def test_buffer_array_preserves_rev_deps(ctx_factory):
+    # See https://github.com/inducer/loopy/issues/546
+    ctx = ctx_factory()
+    knl = lp.make_kernel(
+        ["{[i0, j0]: 0<=i0<100 and 0<=j0<10}",
+         "{[i1, j1]: 0<=i1<100 and 0<=j1<10}"],
+        """
+        out0[i0] = sum(j0, A[i0] * x[j0])
+        ... gbarrier {id=gbarrier}
+        out1[i1] = sum(j1, A[i1] * x[j1])
+        """, seq_dependencies=True)
+    knl = lp.add_dtypes(knl, {"A": np.float64,
+                              "x": np.float64})
+    ref_knl = knl
+
+    knl = lp.split_iname(knl, "j0", 2)
+    knl = lp.split_iname(knl, "i0", 2, outer_tag="g.0")
+    knl = lp.buffer_array(knl, "out0",
+                          buffer_inames=["i0_inner"],
+                          init_expression="0")
+    assert "store_out0" in knl.default_entrypoint.id_to_insn["gbarrier"].depends_on
+    lp.auto_test_vs_ref(ref_knl, ctx, knl)
+
+
 def test_rename_inames_existing_ok(ctx_factory):
     ctx = ctx_factory()
 
-- 
GitLab