From 91f1b35c3fa3590d77fe445fe710d418ade7d7bc Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 21 Dec 2016 17:52:48 +0100
Subject: [PATCH] Fix barrier insertion above top of loop (Fixes #17 on gitlab)

---
 loopy/schedule/__init__.py |  2 +-
 loopy/version.py           |  2 +-
 test/test_loopy.py         | 46 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index e23e5f350..545c53344 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1640,7 +1640,6 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0):
                         barrier_kind_more_or_equally_global(
                             sub_sched_item.kind, kind)):
 
-                    seen_barrier()
                     last_barrier_index = j
                     if first_barrier_index is None:
                         first_barrier_index = j
@@ -1683,6 +1682,7 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0):
             if last_barrier_index is None:
                 candidates.update(insn_ids_from_schedule(subresult))
             else:
+                seen_barrier()
                 candidates.update(
                         insn_ids_from_schedule(
                             subresult[last_barrier_index+1:]))
diff --git a/loopy/version.py b/loopy/version.py
index b3505973b..503b6c54e 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v51-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v52-islpy%s" % _islpy_version
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 6b6071096..48ccd8ee0 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1995,6 +1995,52 @@ def test_integer_reduction(ctx_factory):
             assert function(out)
 
 
+def assert_barrier_between(knl, id1, id2):
+    from loopy.schedule import RunInstruction, Barrier
+    watch_for_barrier = False
+    seen_barrier = False
+
+    for sched_item in knl.schedule:
+        if isinstance(sched_item, RunInstruction):
+            if sched_item.insn_id == id1:
+                watch_for_barrier = True
+            elif sched_item.insn_id == id2:
+                assert watch_for_barrier
+                assert seen_barrier
+                return
+        if isinstance(sched_item, Barrier):
+            if watch_for_barrier:
+                seen_barrier = True
+
+    raise RuntimeError("id2 was not seen")
+
+
+def test_barrier_insertion_near_top_of_loop():
+    knl = lp.make_kernel(
+        "{[i,j]: 0 <= i,j < 10 }",
+        """
+        for i
+         <>a[i] = i  {id=ainit}
+         for j
+          <>t = a[(i + 1) % 10]  {id=tcomp}
+          <>b[i,j] = a[i] + t   {id=bcomp1}
+          b[i,j] = b[i,j] + 1  {id=bcomp2}
+         end
+        end
+        """,
+        seq_dependencies=True)
+    knl = lp.tag_inames(knl, dict(i="l.0"))
+    knl = lp.set_temporary_scope(knl, "a", "local")
+    knl = lp.set_temporary_scope(knl, "b", "local")
+    knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
+
+    print(knl)
+
+    assert_barrier_between(knl, "ainit", "tcomp")
+    assert_barrier_between(knl, "tcomp", "bcomp1")
+    assert_barrier_between(knl, "bcomp1", "bcomp2")
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab