From 624cef0619736598116c8ab84be8a91683750beb Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 12 Oct 2011 21:36:09 -0400 Subject: [PATCH] Restrict condition hoisting to actually used inames. --- loopy/codegen/dispatch.py | 8 +++++++- loopy/schedule.py | 23 +++++++++++++++++++++++ test/test_matmul.py | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/dispatch.py b/loopy/codegen/dispatch.py index 9d1f0261a..f9c29a08a 100644 --- a/loopy/codegen/dispatch.py +++ b/loopy/codegen/dispatch.py @@ -146,8 +146,14 @@ def build_loop_nest(kernel, sched_index, codegen_state): # Success: found a big enough group of inames for a conditional. # See if there are bounds checks available for that set. + from loopy.schedule import find_used_inames_within + used_inames = set() + for subsched_index, _ in sched_indices_and_cond_inames[0:idx]: + used_inames |= find_used_inames_within(kernel, subsched_index) + from loopy.codegen.bounds import generate_bounds_checks - bounds_checks = generate_bounds_checks(kernel.domain, current_iname_set, + bounds_checks = generate_bounds_checks(kernel.domain, + current_iname_set & used_inames, codegen_state.implemented_domain) else: bounds_checks = [] diff --git a/loopy/schedule.py b/loopy/schedule.py index 55137d1d8..5cbf88471 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -759,6 +759,29 @@ def has_barrier_within(kernel, sched_index): else: return False + + + +def find_used_inames_within(kernel, sched_index): + sched_item = kernel.schedule[sched_index] + + if isinstance(sched_item, EnterLoop): + loop_contents, _ = gather_schedule_subloop( + kernel.schedule, sched_index) + run_insns = [subsched_item + for subsched_item in loop_contents + if isinstance(subsched_item, RunInstruction)] + elif isinstance(sched_item, RunInstruction): + run_insns = [sched_item] + else: + return set() + + result = set() + for sched_item in run_insns: + result.update(kernel.id_to_insn[sched_item.insn_id].all_inames()) + + return result + # }}} diff --git a/test/test_matmul.py b/test/test_matmul.py index 42ebf7e9e..71b1d409e 100644 --- a/test/test_matmul.py +++ b/test/test_matmul.py @@ -211,7 +211,7 @@ def test_plain_matrix_mul_new_ui(ctx_factory): lp.ArrayArg("c", dtype, shape=(n, n), order=order), lp.ScalarArg("n", np.int32, approximately=n), ], - name="matmul", assumptions="n >= 1") + name="matmul", assumptions="n >= 16") knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1", no_slabs=True) -- GitLab