From 94b65d29ee2868abc1068adee0dc5273dcf0e5ac Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 20 May 2017 19:06:57 -0500 Subject: [PATCH 1/6] get_usable_inames_for_conditional(): Fix inames finding in the case that we are outside a subkernel (closes #65). --- loopy/codegen/bounds.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 7cc381f11..ae91abd56 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -65,11 +65,20 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Find our containing subkernel, grab inames for all insns from there. - subkernel_index = sched_index - from loopy.schedule import CallKernel - - while not isinstance(kernel.schedule[subkernel_index], CallKernel): - subkernel_index -= 1 + within_subkernel = False + + for prev_sched_index, sched_item in enumerate(kernel.schedule): + if prev_sched_index == sched_index: + if not within_subkernel: + # Outside all subkernels - use only inames available to device. + return frozenset(result) + + from loopy.schedule import CallKernel, ReturnFromKernel + if isinstance(sched_item, CallKernel): + within_subkernel = True + subkernel_index = prev_sched_index + elif isinstance(sched_item, ReturnFromKernel): + within_subkernel = False insn_ids_for_subkernel = get_insn_ids_for_block_at( kernel.schedule, subkernel_index) -- GitLab From 41d8dd365dc4180141278fcbdebb8536f5a3ed77 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 20 May 2017 19:13:27 -0500 Subject: [PATCH 2/6] Fix comment. --- loopy/codegen/bounds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index ae91abd56..442985e6a 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -70,7 +70,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): for prev_sched_index, sched_item in enumerate(kernel.schedule): if prev_sched_index == sched_index: if not within_subkernel: - # Outside all subkernels - use only inames available to device. + # Outside all subkernels - use only inames available to host. return frozenset(result) from loopy.schedule import CallKernel, ReturnFromKernel -- GitLab From b4783e9c9aaa2baa269d4016b83573643a979d78 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 20 May 2017 19:13:40 -0500 Subject: [PATCH 3/6] Bump kernel version. --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 77d0e21bd..18f4aafdc 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v60-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v61-islpy%s" % _islpy_version -- GitLab From b1eacb5e7d139c0267f2d321152b741f3841e6e8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 21 May 2017 15:00:33 -0500 Subject: [PATCH 4/6] Add test. --- test/test_loopy.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index 4bb6a2726..f00477f6b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2231,6 +2231,29 @@ def test_struct_assignment(ctx_factory): knl(queue, N=200) +def test_inames_conditional_generation(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i,j,k]: 0 < k < i and 0 < j < 10 and 0 < i < 10}", + """ + for k + ... gbarrier + <>tmp1 = 0 + end + for j + ... gbarrier + <>tmp2 = i + end + """, + "...", + seq_dependencies=True) + + knl = lp.tag_inames(knl, dict(i="g.0")) + + with cl.CommandQueue(ctx) as queue: + knl(queue) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 1bce09a2b4a85c3bb03c58a952ea169c5b776fb0 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 May 2017 12:22:42 -0500 Subject: [PATCH 5/6] get_usable_inames_for_conditional(): Simplify loop for finding the containing subkernel. Rename prev_sched_index to sched_item_index. Fix a case where the loop would exit too late. --- loopy/codegen/bounds.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 442985e6a..61f4b3a9b 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -63,23 +63,21 @@ def get_usable_inames_for_conditional(kernel, sched_index): result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) - # Find our containing subkernel, grab inames for all insns from there. - + # Find our containing subkernel. Grab inames for all insns from there. within_subkernel = False - for prev_sched_index, sched_item in enumerate(kernel.schedule): - if prev_sched_index == sched_index: - if not within_subkernel: - # Outside all subkernels - use only inames available to host. - return frozenset(result) - + for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]): from loopy.schedule import CallKernel, ReturnFromKernel if isinstance(sched_item, CallKernel): within_subkernel = True - subkernel_index = prev_sched_index + subkernel_index = sched_item_index elif isinstance(sched_item, ReturnFromKernel): within_subkernel = False + if not within_subkernel: + # Outside all subkernels - use only inames available to host. + return frozenset(result) + insn_ids_for_subkernel = get_insn_ids_for_block_at( kernel.schedule, subkernel_index) -- GitLab From 875fabdcbe45503e759e1e8e06e8e814a3f92d8c Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Thu, 25 May 2017 12:26:16 -0500 Subject: [PATCH 6/6] Bump kernel version for 1bce09a2b4a85c3bb03c58a952ea169c5b776fb0 --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 18f4aafdc..4c6dfbcc2 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v61-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v62-islpy%s" % _islpy_version -- GitLab