From e4fe5396cbe8d3fb8b18587f01edb37766d431fc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 22 Jul 2011 10:09:10 -0500 Subject: [PATCH] Fix, better messages from conflict detector. --- examples/matrix-ops.py | 9 +++------ loopy/__init__.py | 13 +++++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/matrix-ops.py b/examples/matrix-ops.py index eb1ffb7bf..e1f81b878 100644 --- a/examples/matrix-ops.py +++ b/examples/matrix-ops.py @@ -136,12 +136,9 @@ def image_matrix_mul(ctx_factory=cl.create_some_context): knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") knl = lp.split_dimension(knl, "k", 32) - # slow, but conflict-free - #knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) - #knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner"]) - # fast - knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner"]) - knl = lp.add_prefetch(knl, 'b', ["k_inner", "j_inner", ]) + # conflict-free + knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) + knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner"]) assert knl.get_invalid_reason() is None kernel_gen = (lp.insert_register_prefetches(knl) diff --git a/loopy/__init__.py b/loopy/__init__.py index 78ee835a2..07c2450a6 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -38,6 +38,9 @@ register_mpz_with_pymbolic() # TODO: Custom reductions per red. axis +# TODO Tim: implement efficient div_ceil? +# TODO Tim: why are corner cases inefficient? + @@ -1873,6 +1876,7 @@ def preprocess_prefetch(kernel): min_mult = cl_char.local_memory_bank_count(kernel.device) good_incr = None new_dsl = dim_storage_lengths + min_why_not = None for increment in range(dim_storage_lengths[-1]//2): @@ -1884,21 +1888,22 @@ def preprocess_prefetch(kernel): # will choose smallest increment 'automatically' if new_mult < min_mult: - new_lmem_use = other_pf_sizes+pf.itemsize*product(new_dsl) + new_lmem_use = other_pf_sizes + pf.itemsize*product(new_dsl) if new_lmem_use < lmem_size: new_dsl = test_dsl min_mult = new_mult + min_why_not = why_not good_incr = increment if min_mult != 1: from warnings import warn warn("could not find a conflict-free mem layout " "for prefetch of '%s' " - "(currently: %dx conflict, increment: %d)" - % (pf.input_vector, min_mult, good_incr), + "(currently: %dx conflict, increment: %d, reason: %s)" + % (pf.input_vector, min_mult, good_incr, min_why_not), LoopyAdvisory) - new_pf = pf.copy(dim_storage_lengths=dim_storage_lengths, + new_pf = pf.copy(dim_storage_lengths=new_dsl, name="prefetch_%s_%d" % (pf.input_vector, i_pf)) new_prefetch_dict[pf.input_vector, pf.index_expr] = new_pf all_pf_list[i_pf] = new_pf -- GitLab