From b6c095e8166dfeaa3e16abbfa5e77d9533310e26 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 4 Dec 2016 21:04:41 -0600 Subject: [PATCH 1/3] codegen.loop: Use PwAff.is_equal() to test for lower and upper bound equality. --- loopy/codegen/loop.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 8ac963835..e77465f09 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -431,13 +431,9 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): astb = codegen_state.ast_builder - zero = isl.PwAff.zero_on_domain( - isl.LocalSpace.from_space( - lbound.get_space()).domain()) - from loopy.symbolic import pw_aff_to_expr - if (ubound - lbound).plain_is_equal(zero): + if ubound.is_equal(lbound): # single-trip, generate just a variable assignment, not a loop inner = merge_codegen_results(codegen_state, [ astb.emit_initializer( -- GitLab From 35981b68e46dfdbe47204659c975b678c62204ce Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sun, 4 Dec 2016 21:31:48 -0600 Subject: [PATCH 2/3] Loop bounds finder: gist against implemented domain. --- loopy/codegen/loop.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index e77465f09..81686878d 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -386,17 +386,24 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] + impl_domain = isl.align_spaces( + codegen_state.implemented_domain, + dom_and_slab, + obj_bigger_ok=True, + across_dim_types=True + ).params() + lbound = ( kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) - .gist(dom_and_slab.params()) + .gist(impl_domain) .coalesce()) ubound = ( kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) - .gist(dom_and_slab.params()) + .gist(impl_domain) .coalesce()) # }}} -- GitLab From d4740920cdc35ddee7b62b8f32fb8e806f76db23 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Mon, 5 Dec 2016 01:12:52 -0600 Subject: [PATCH 3/3] Address a subtle mismatch involving what loop bounds the code thought were implemented. The issue comes from the fact that a loop bound predicated on a parameter such as [n] -> { [(-1 + n - floor((3n)/4))] : n > 0 } (which may result from slabbing) will be converted by pw_aff_to_expr() to an unconditional expression, even though the original pw aff is conditional. As a result the previous code might not actually ensure that n > 0. (In prior versions the issue was masked by taking the gist against dom_and_slab, which would remove the constraint that n > 0.) The fix is to teach the code about the actual bounds that get implemented. --- loopy/codegen/loop.py | 9 +++++++-- loopy/symbolic.py | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 81686878d..ad80475c1 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -410,12 +410,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): # {{{ find implemented loop, build inner code - from loopy.isl_helpers import make_loop_bounds_from_pwaffs + from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr + impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound) + impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound) # impl_loop may be overapproximated + from loopy.isl_helpers import make_loop_bounds_from_pwaffs impl_loop = make_loop_bounds_from_pwaffs( dom_and_slab.space, - loop_iname, lbound, ubound) + loop_iname, + impl_lbound, + impl_ubound) for iname in moved_inames: dt, idx = impl_loop.get_var_dict()[iname] diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 430c65158..52fd6e57f 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1150,6 +1150,23 @@ def pw_aff_to_expr(pw_aff, int_ok=False): return expr + +def pw_aff_to_pw_aff_implemented_by_expr(pw_aff): + pieces = pw_aff.get_pieces() + + rest = isl.Set.universe(pw_aff.space.params()) + aff_set, aff = pieces[0] + impl_pw_aff = isl.PwAff.alloc(aff_set, aff) + rest = rest.intersect_params(aff_set.complement()) + + for aff_set, aff in pieces[1:-1]: + impl_pw_aff = impl_pw_aff.union_max( + isl.PwAff.alloc(aff_set, aff)) + rest = rest.intersect_params(aff_set.complement()) + + _, aff = pieces[-1] + return impl_pw_aff.union_max(isl.PwAff.alloc(rest, aff)).coalesce() + # }}} -- GitLab