diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index e23e5f350864a623d2ff05551fc2d559c361d734..545c5334451623ba291158d64635fc6bf9904d0d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1640,7 +1640,6 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0): barrier_kind_more_or_equally_global( sub_sched_item.kind, kind)): - seen_barrier() last_barrier_index = j if first_barrier_index is None: first_barrier_index = j @@ -1683,6 +1682,7 @@ def insert_barriers(kernel, schedule, reverse, kind, verify_only, level=0): if last_barrier_index is None: candidates.update(insn_ids_from_schedule(subresult)) else: + seen_barrier() candidates.update( insn_ids_from_schedule( subresult[last_barrier_index+1:])) diff --git a/loopy/version.py b/loopy/version.py index b3505973bbfecf229e5675565eca9a609a23b49f..503b6c54eca544e1ca0ef67571a4469ad9c4c5fe 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v51-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v52-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index 6b607109678c0b280113707ee77c0ede7df8f72d..48ccd8ee024325150f8686185678eeb64a7395dd 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1995,6 +1995,52 @@ def test_integer_reduction(ctx_factory): assert function(out) +def assert_barrier_between(knl, id1, id2): + from loopy.schedule import RunInstruction, Barrier + watch_for_barrier = False + seen_barrier = False + + for sched_item in knl.schedule: + if isinstance(sched_item, RunInstruction): + if sched_item.insn_id == id1: + watch_for_barrier = True + elif sched_item.insn_id == id2: + assert watch_for_barrier + assert seen_barrier + return + if isinstance(sched_item, Barrier): + if watch_for_barrier: + seen_barrier = True + + raise RuntimeError("id2 was not seen") + + +def test_barrier_insertion_near_top_of_loop(): + knl = lp.make_kernel( + "{[i,j]: 0 <= i,j < 10 }", + """ + for i + <>a[i] = i {id=ainit} + for j + <>t = a[(i + 1) % 10] {id=tcomp} + <>b[i,j] = a[i] + t {id=bcomp1} + b[i,j] = b[i,j] + 1 {id=bcomp2} + end + end + """, + seq_dependencies=True) + knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.set_temporary_scope(knl, "a", "local") + knl = lp.set_temporary_scope(knl, "b", "local") + knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) + + print(knl) + + assert_barrier_between(knl, "ainit", "tcomp") + assert_barrier_between(knl, "tcomp", "bcomp1") + assert_barrier_between(knl, "bcomp1", "bcomp2") + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])