diff --git a/MEMO b/MEMO index 511306dbab09ec61559443629b1b5ab904b88cae..4d1acf21bc21bc9189152406048a1b217f6f78fc 100644 --- a/MEMO +++ b/MEMO @@ -45,8 +45,6 @@ To-do - Kernel splitting (via what variables get computed in a kernel) -- Test join_inames - - Make tests run on GPUs - Fuse: store/fetch elimination? @@ -111,6 +109,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- Test join_inames + - Divisibility, modulo, strides? -> Tested, gives correct (but suboptimal) code. diff --git a/loopy/__init__.py b/loopy/__init__.py index ca443db752f54498ca17b82a530d4e511cc095e6..330de2da16f576fe4959302ac29cf8eef5f3e867 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -193,12 +193,19 @@ def join_inames(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): if new_iname is None: new_iname = kernel.make_unique_var_name("_and_".join(inames)) - new_domain = kernel.domain + from loopy.kernel import DomainChanger + domch = DomainChanger(kernel, frozenset(inames)) + for iname in inames: + if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: + raise RuntimeError("iname '%s' is not 'at home' in the " + "join's leaf domain" % iname) + + new_domain = domch.domain new_dim_idx = new_domain.dim(dim_type.set) new_domain = new_domain.add_dims(dim_type.set, 1) new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname) - joint_aff = zero = isl.Aff.zero_on_domain(kernel.space) + joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space) subst_dict = {} base_divisor = 1 @@ -253,7 +260,7 @@ def join_inames(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): else: result.add(iname) - return result + return frozenset(result) new_insns = [ insn.copy( @@ -265,7 +272,8 @@ def join_inames(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): result = (kernel .map_expressions(subst_map, exclude_instructions=True) .copy( - instructions=new_insns, domain=new_domain, + instructions=new_insns, + domains=domch.get_domains_with(new_domain), applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_map] )) diff --git a/loopy/kernel.py b/loopy/kernel.py index 5add79c782e12ca14ca223903c5f759f2398e4c9..01ba6e236daeb3019d1842010725582303f100e6 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -1329,20 +1329,6 @@ class LoopKernel(Record): return result - # }}} - - # {{{ examine domains - - for i_dom, dom in enumerate(self.domains): - for iname in dom.get_var_names(dim_type.set): - for par_iname in dom.get_var_names(dim_type.param): - if par_iname in all_inames: - result[iname].add(par_iname) - - # }}} - - return result - # }}} # {{{ read and written variables diff --git a/test/test_loopy.py b/test/test_loopy.py index c3029091538ed94ff2558e1080fab8c3b0467792..3849318bae0234081646ed1fb3717d9a7ec12436 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -61,6 +61,34 @@ def test_wg_too_small(ctx_factory): +def test_join_inames(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel(ctx.devices[0], + "{[i,j]: 0<=i,j<16}", + [ + "b[i,j] = 2*a[i,j]" + ], + [ + lp.GlobalArg("a", np.float32, shape=(16, 16,)), + lp.GlobalArg("b", np.float32, shape=(16, 16,)) + ], + ) + + ref_knl = knl + + knl = lp.add_prefetch(knl, "a", sweep_inames=["i", "j"]) + knl = lp.join_inames(knl, ["a_dim_0", "a_dim_1"]) + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen) + + lp.auto_test_vs_ref(ref_knl, ctx, kernel_gen) + + + + + def test_multi_cse(ctx_factory): ctx = ctx_factory()