diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index e23e5f350864a623d2ff05551fc2d559c361d734..545c5334451623ba291158d64635fc6bf9904d0d 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -1640,7 +1640,6 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0):
                         barrier_kind_more_or_equally_global(
                             sub_sched_item.kind, kind)):
 
-                    seen_barrier()
                     last_barrier_index = j
                     if first_barrier_index is None:
                         first_barrier_index = j
@@ -1683,6 +1682,7 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0):
             if last_barrier_index is None:
                 candidates.update(insn_ids_from_schedule(subresult))
             else:
+                seen_barrier()
                 candidates.update(
                         insn_ids_from_schedule(
                             subresult[last_barrier_index+1:]))
diff --git a/loopy/version.py b/loopy/version.py
index b3505973bbfecf229e5675565eca9a609a23b49f..503b6c54eca544e1ca0ef67571a4469ad9c4c5fe 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v51-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v52-islpy%s" % _islpy_version
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 6b607109678c0b280113707ee77c0ede7df8f72d..48ccd8ee024325150f8686185678eeb64a7395dd 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1995,6 +1995,52 @@ def test_integer_reduction(ctx_factory):
             assert function(out)
 
 
+def assert_barrier_between(knl, id1, id2):
+    from loopy.schedule import RunInstruction, Barrier
+    watch_for_barrier = False
+    seen_barrier = False
+
+    for sched_item in knl.schedule:
+        if isinstance(sched_item, RunInstruction):
+            if sched_item.insn_id == id1:
+                watch_for_barrier = True
+            elif sched_item.insn_id == id2:
+                assert watch_for_barrier
+                assert seen_barrier
+                return
+        if isinstance(sched_item, Barrier):
+            if watch_for_barrier:
+                seen_barrier = True
+
+    raise RuntimeError("id2 was not seen")
+
+
+def test_barrier_insertion_near_top_of_loop():
+    knl = lp.make_kernel(
+        "{[i,j]: 0 <= i,j < 10 }",
+        """
+        for i
+         <>a[i] = i  {id=ainit}
+         for j
+          <>t = a[(i + 1) % 10]  {id=tcomp}
+          <>b[i,j] = a[i] + t   {id=bcomp1}
+          b[i,j] = b[i,j] + 1  {id=bcomp2}
+         end
+        end
+        """,
+        seq_dependencies=True)
+    knl = lp.tag_inames(knl, dict(i="l.0"))
+    knl = lp.set_temporary_scope(knl, "a", "local")
+    knl = lp.set_temporary_scope(knl, "b", "local")
+    knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
+
+    print(knl)
+
+    assert_barrier_between(knl, "ainit", "tcomp")
+    assert_barrier_between(knl, "tcomp", "bcomp1")
+    assert_barrier_between(knl, "bcomp1", "bcomp2")
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])