diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 7cc381f11d1239cba5656a9dc7a04cddaa14a368..61f4b3a9b8c38dfc25ebc81243812aa963423f8a 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -63,13 +63,20 @@ def get_usable_inames_for_conditional(kernel, sched_index): result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) - # Find our containing subkernel, grab inames for all insns from there. - - subkernel_index = sched_index - from loopy.schedule import CallKernel - - while not isinstance(kernel.schedule[subkernel_index], CallKernel): - subkernel_index -= 1 + # Find our containing subkernel. Grab inames for all insns from there. + within_subkernel = False + + for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]): + from loopy.schedule import CallKernel, ReturnFromKernel + if isinstance(sched_item, CallKernel): + within_subkernel = True + subkernel_index = sched_item_index + elif isinstance(sched_item, ReturnFromKernel): + within_subkernel = False + + if not within_subkernel: + # Outside all subkernels - use only inames available to host. + return frozenset(result) insn_ids_for_subkernel = get_insn_ids_for_block_at( kernel.schedule, subkernel_index) diff --git a/loopy/version.py b/loopy/version.py index 77d0e21bdd2ef5383c5f874656c25fe1ede21a70..4c6dfbcc22c0ea38668c663eb858161471a3999a 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v60-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v62-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index 77fd49e07b8d884fd12324e735770b8d5a488b48..4042cc92082ef21cfa9a823d81e0f8be21a9bccc 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2231,6 +2231,29 @@ def test_struct_assignment(ctx_factory): knl(queue, N=200) +def test_inames_conditional_generation(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i,j,k]: 0 < k < i and 0 < j < 10 and 0 < i < 10}", + """ + for k + ... gbarrier + <>tmp1 = 0 + end + for j + ... gbarrier + <>tmp2 = i + end + """, + "...", + seq_dependencies=True) + + knl = lp.tag_inames(knl, dict(i="g.0")) + + with cl.CommandQueue(ctx) as queue: + knl(queue) + + def test_kernel_var_name_generator(): knl = lp.make_kernel( "{[i]: 0 <= i <= 10}",