From 8eb5aa1890248485b160794b47656ebcc8148b31 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 9 Jun 2017 23:08:03 -0500 Subject: [PATCH 1/2] get_usable_inames_for_conditional(): Only pull in inames that are common to all instructions in the block. Closes #71 on gitlab --- loopy/codegen/bounds.py | 39 +++++++++++++++++++++++++++++---------- test/test_loopy.py | 26 ++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py index 61f4b3a9b..93a34d233 100644 --- a/loopy/codegen/bounds.py +++ b/loopy/codegen/bounds.py @@ -58,7 +58,6 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai def get_usable_inames_for_conditional(kernel, sched_index): from loopy.schedule import ( find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) - from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag result = find_active_inames_at(kernel, sched_index) crosses_barrier = has_barrier_within(kernel, sched_index) @@ -78,15 +77,35 @@ def get_usable_inames_for_conditional(kernel, sched_index): # Outside all subkernels - use only inames available to host. return frozenset(result) - insn_ids_for_subkernel = get_insn_ids_for_block_at( - kernel.schedule, subkernel_index) + insn_ids_for_block = list( + get_insn_ids_for_block_at(kernel.schedule, subkernel_index)) + + # Iterate through the block, and pick out the common set of admissible inames. + # + # The admissible inames must be common to all instructions in the block to + # ensure that constraints from irrelevant inames don't get pulled into the + # conditional. + + if len(insn_ids_for_block) == 0: + return frozenset(result) + + common_admissible_inames = _pick_out_admissible_inames( + kernel, insn_ids_for_block[0], crosses_barrier) - inames_for_subkernel = ( - iname - for insn in insn_ids_for_subkernel - for iname in kernel.insn_inames(insn)) + from itertools import islice + for insn_id in islice(insn_ids_for_block, 1, None): + common_admissible_inames &= kernel.insn_inames(insn_id) + + return frozenset(result | common_admissible_inames) + + +def _pick_out_admissible_inames(kernel, insn_id, crosses_barrier): + # Given an instruction, pick out the set of (parallel) inames on + # which a conditional may depend. + from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag + admissible_inames = [] - for iname in inames_for_subkernel: + for iname in kernel.insn_inames(insn_id): tag = kernel.iname_to_tag.get(iname) # Parallel inames are defined within a subkernel, BUT: @@ -101,9 +120,9 @@ def get_usable_inames_for_conditional(kernel, sched_index): and not (isinstance(tag, LocalIndexTagBase) and crosses_barrier) and not isinstance(tag, IlpBaseTag) ): - result.add(iname) + admissible_inames.append(iname) - return frozenset(result) + return set(admissible_inames) # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index 21db62610..921fe8a4a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2321,6 +2321,32 @@ def test_inames_conditional_generation(ctx_factory): knl(queue) +def test_inames_conditional_generation_avoids_irrelevant_constraints(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i, loc1, loc2]: 0 <= loc1 <= 1 and 0 <= loc2 <= 2" + " and 0 <= i <= loc1 and 0 <= i <= loc2}", + """ + <>tmp[loc2] = 0 + + for i + tmp[i] = 1 {inames=i:loc2} + end + + out[loc2] = tmp[loc2] + """, + "...", + seq_dependencies=True) + + knl = lp.tag_inames(knl, dict(loc1="l.0", loc2="l.0")) + knl = lp.set_temporary_scope(knl, "tmp", "local") + + with cl.CommandQueue(ctx) as queue: + evt, (out,) = knl(queue) + + assert all(out.get() == 1) + + def test_kernel_var_name_generator(): knl = lp.make_kernel( "{[i]: 0 <= i <= 10}", -- GitLab From 442149e693961275471f07331ec1ab4ee6994520 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 9 Jun 2017 23:37:17 -0500 Subject: [PATCH 2/2] Bump kernel version for 8eb5aa1890248485b160794b47656ebcc8148b31 --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 8516ce006..02244f55d 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v63-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v64-islpy%s" % _islpy_version -- GitLab