diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 7cc381f11d1239cba5656a9dc7a04cddaa14a368..ae91abd56633c823923daac34bde84b84bffee58 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -65,11 +65,20 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Find our containing subkernel, grab inames for all insns from there. - subkernel_index = sched_index - from loopy.schedule import CallKernel - - while not isinstance(kernel.schedule[subkernel_index], CallKernel): - subkernel_index -= 1 + within_subkernel = False + + for prev_sched_index, sched_item in enumerate(kernel.schedule): + if prev_sched_index == sched_index: + if not within_subkernel: + # Outside all subkernels - use only inames available to device. + return frozenset(result) + + from loopy.schedule import CallKernel, ReturnFromKernel + if isinstance(sched_item, CallKernel): + within_subkernel = True + subkernel_index = prev_sched_index + elif isinstance(sched_item, ReturnFromKernel): + within_subkernel = False insn_ids_for_subkernel = get_insn_ids_for_block_at( kernel.schedule, subkernel_index)