From b39551edad12d6497b9e7d0831453c57edf7bc55 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 24 Aug 2018 22:39:32 -0400 Subject: [PATCH 1/3] Implement first version of work removal transform --- loopy/transform/instruction.py | 129 +++++++++++++++++++++++++++++++++ test/test_transform.py | 32 ++++++++ 2 files changed, 161 insertions(+) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e6ecb4093..e251ef42e 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -25,6 +25,7 @@ THE SOFTWARE. import six # noqa from loopy.diagnostic import LoopyError +from loopy.symbolic import CombineMapper # {{{ find_instructions @@ -357,4 +358,132 @@ def uniquify_instruction_ids(kernel): # }}} +# {{{ remove_work + +class _MemAccessGatherer(CombineMapper): + def __init__(self, kernel, address_space): + self.kernel = kernel + self.address_space = address_space + + def combine(self, values): + from pytools import flatten + return set(flatten(values)) + + def map_constant(self, expr): + return set() + + def map_algebraic_leaf(self, expr): + return set() + + def _map_access(self, expr, name, index): + if name in self.kernel.all_inames(): + return set() + + descr = self.kernel.get_var_descriptor(name) + if descr.address_space == self.address_space: + result = set([expr]) + else: + result = set() + + return result | self.rec(index) + + def map_variable(self, expr): + return self._map_access(expr, expr.name, ()) + + def map_subscript(self, expr): + import pymbolic.primitives as p + assert isinstance(expr.aggregate, p.Variable) + return self._map_access(expr, expr.aggregate.name, expr.index) + + +def remove_work(kernel): + """This transform removes operations in a kernel, leaving only + accesses to global memory. + + .. note:: + + This routine will currently not work correctly in the presence of + data-dependent flow control or memory access. + """ + import loopy as lp + import pymbolic.primitives as p + + kernel = lp.preprocess_kernel(kernel) + + gatherer = _MemAccessGatherer(kernel, lp.AddressSpace.GLOBAL) + + from loopy.kernel.instruction import MultiAssignmentBase, make_assignment + + # maps each old ID to a frozenset of new IDs + old_to_new_ids = {} + new_instructions = [] + insn_id_gen = kernel.get_instruction_id_generator() + + var_name_gen = kernel.get_var_name_generator() + private_var_name = var_name_gen() + new_temporary_variables = kernel.temporary_variables.copy() + new_temporary_variables[private_var_name] = lp.TemporaryVariable( + private_var_name, address_space=lp.AddressSpace.PRIVATE) + + # {{{ rewrite instructions + + for insn in kernel.instructions: + if not isinstance(insn, MultiAssignmentBase): + new_instructions.append(insn) + old_to_new_ids[insn.id] = frozenset([insn.id]) + continue + + writer_accesses = set.union(*[ + gatherer(lhs) for lhs in insn.assignees]) + + reader_accesses = gatherer(insn.expression) + + new_insn_ids = set() + for read_expr in reader_accesses: + new_id = insn_id_gen(insn.id) + new_instructions.append( + make_assignment( + (p.Variable(private_var_name),), + p.Variable(private_var_name) + read_expr, + id=new_id, + within_inames=insn.within_inames, + depends_on=insn.depends_on)) + new_insn_ids.add(new_id) + + for write_expr in writer_accesses: + new_id = insn_id_gen(insn.id) + new_instructions.append( + make_assignment( + (write_expr,), + 17, + id=new_id, + within_inames=insn.within_inames, + depends_on=insn.depends_on)) + new_insn_ids.add(new_id) + + old_to_new_ids[insn.id] = frozenset(new_insn_ids) + + # }}} + + # {{{ rewrite dependencies for new IDs + + new_instructions_2 = [] + + for insn in new_instructions: + new_instructions_2.append( + insn.copy( + depends_on=frozenset( + subdep + for dep in insn.depends_on + for subdep in old_to_new_ids[dep]))) + + # }}} + + return kernel.copy( + state=lp.KernelState.INITIAL, + instructions=new_instructions_2, + temporary_variables=new_temporary_variables) + +# }}} + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index ed184fb50..ed00ebd1b 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -533,6 +533,38 @@ def test_uniquify_instruction_ids(): assert all(isinstance(id, str) for id in insn_ids) +def test_remove_work(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{[i,j]: 0<= i,j < n}", + [ + "a_offset(ii, jj) := a[ii+2, jj+2]", + "z[i,j] = -2*a_offset(i,j)" + " + a_offset(i,j-1)" + " + a_offset(i,j+1)" + " + a_offset(i-1,j)" + " + a_offset(i+1,j)" + + " + a_offset(i,j-2)" + " + a_offset(i,j+2)" + " + a_offset(i-2,j)" + " + a_offset(i+2,j)" + ], + assumptions="n>=1") + + knl = lp.split_iname(knl, "i", 16, outer_tag="g.1", inner_tag="l.1") + knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0") + knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], + fetch_bounding_box=True, default_tag="l.auto") + knl = lp.prioritize_loops(knl, ["a_dim_0_outer", "a_dim_1_outer"]) + + from loopy.transform.instruction import remove_work + knl = remove_work(knl) + + lp.auto_test_vs_ref(None, ctx, knl, print_ref_code=False) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 0de948d37beca09eaacfe6506d64a5bcce4d3826 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 24 Aug 2018 23:26:33 -0400 Subject: [PATCH 2/3] Fix work remover test --- test/test_transform.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_transform.py b/test/test_transform.py index ed00ebd1b..ae0a577c1 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -553,6 +553,8 @@ def test_remove_work(ctx_factory): ], assumptions="n>=1") + knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32)) + knl = lp.split_iname(knl, "i", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"], @@ -562,7 +564,8 @@ def test_remove_work(ctx_factory): from loopy.transform.instruction import remove_work knl = remove_work(knl) - lp.auto_test_vs_ref(None, ctx, knl, print_ref_code=False) + lp.auto_test_vs_ref(knl, ctx, None, print_ref_code=False, + parameters=dict(n=512)) if __name__ == "__main__": -- GitLab From d6b78f58326c00c181707feb82189a202871617d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 27 Aug 2018 20:45:45 -0500 Subject: [PATCH 3/3] Improve work remover: Initialize, rename, and write out the read target --- loopy/transform/instruction.py | 93 +++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e251ef42e..3dd7009ea 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ import six # noqa +import islpy as isl from loopy.diagnostic import LoopyError from loopy.symbolic import CombineMapper @@ -396,6 +397,29 @@ class _MemAccessGatherer(CombineMapper): return self._map_access(expr, expr.aggregate.name, expr.index) +def _make_grid_size_domain(kernel, var_name_gen=None): + if var_name_gen is None: + var_name_gen = kernel.get_var_name_generator() + + ggrid, lgrid = kernel.get_grid_size_upper_bounds() + ggrid_var_names = [var_name_gen("gid%d" % axis) for axis in range(len(ggrid))] + lgrid_var_names = [var_name_gen("lid%d" % axis) for axis in range(len(lgrid))] + grid_var_pwaffs = isl.make_zero_and_vars( + ggrid_var_names + lgrid_var_names, kernel.all_params()) + + grid_range_dom = grid_var_pwaffs[0].le_set(grid_var_pwaffs[0]) + for var, ubound in zip(ggrid_var_names + lgrid_var_names, ggrid + lgrid): + ubound = isl.align_spaces(ubound, grid_var_pwaffs[0]) + grid_range_dom = grid_range_dom & ( + grid_var_pwaffs[0].le_set(grid_var_pwaffs[var]) + & + grid_var_pwaffs[var].lt_set(ubound)) + + grid_range_dom, = grid_range_dom.get_basic_sets() + + return ggrid_var_names, lgrid_var_names, grid_range_dom + + def remove_work(kernel): """This transform removes operations in a kernel, leaving only accesses to global memory. @@ -416,17 +440,36 @@ def remove_work(kernel): # maps each old ID to a frozenset of new IDs old_to_new_ids = {} - new_instructions = [] insn_id_gen = kernel.get_instruction_id_generator() var_name_gen = kernel.get_var_name_generator() - private_var_name = var_name_gen() + read_tgt_var_name = var_name_gen("read_tgt") new_temporary_variables = kernel.temporary_variables.copy() - new_temporary_variables[private_var_name] = lp.TemporaryVariable( - private_var_name, address_space=lp.AddressSpace.PRIVATE) + new_temporary_variables[read_tgt_var_name] = lp.TemporaryVariable( + read_tgt_var_name, address_space=lp.AddressSpace.PRIVATE) + + new_instructions = [] + + # {{{ create init insn for read target + + ggrid_var_names, lgrid_var_names, grid_range_dom = _make_grid_size_domain(kernel) + grid_inames = frozenset(ggrid_var_names + lgrid_var_names) + + read_tgt_init_id = insn_id_gen("init_read_tgt") + old_to_new_ids[read_tgt_init_id] = [read_tgt_init_id] + new_instructions.append( + make_assignment( + (p.Variable(read_tgt_var_name),), + 0, + id=read_tgt_init_id, + within_inames=grid_inames)) + + # }}} # {{{ rewrite instructions + read_insn_ids = [] + for insn in kernel.instructions: if not isinstance(insn, MultiAssignmentBase): new_instructions.append(insn) @@ -441,13 +484,14 @@ def remove_work(kernel): new_insn_ids = set() for read_expr in reader_accesses: new_id = insn_id_gen(insn.id) + read_insn_ids.append(insn.id) new_instructions.append( make_assignment( - (p.Variable(private_var_name),), - p.Variable(private_var_name) + read_expr, + (p.Variable(read_tgt_var_name),), + p.Variable(read_tgt_var_name) + read_expr, id=new_id, within_inames=insn.within_inames, - depends_on=insn.depends_on)) + depends_on=insn.depends_on | frozenset([read_tgt_init_id]))) new_insn_ids.add(new_id) for write_expr in writer_accesses: @@ -465,6 +509,28 @@ def remove_work(kernel): # }}} + # {{{ create write-out insn for read target + + _, lgrid = kernel.get_grid_size_upper_bounds_as_exprs() + read_tgt_local_dest_name = var_name_gen("read_tgt_dest") + new_temporary_variables[read_tgt_local_dest_name] = lp.TemporaryVariable( + name=read_tgt_local_dest_name, + address_space=lp.AddressSpace.LOCAL, + shape=lgrid) + + write_read_tgt_id = insn_id_gen("write_read_tgt") + old_to_new_ids[write_read_tgt_id] = [write_read_tgt_id] + new_instructions.append( + make_assignment( + (p.Variable(read_tgt_local_dest_name)[ + tuple(p.Variable(lgn) for lgn in lgrid_var_names)],), + p.Variable(read_tgt_var_name), + id=write_read_tgt_id, + depends_on=frozenset(read_insn_ids), + within_inames=grid_inames)) + + # }}} + # {{{ rewrite dependencies for new IDs new_instructions_2 = [] @@ -479,11 +545,22 @@ def remove_work(kernel): # }}} - return kernel.copy( + kernel = kernel.copy( + domains=kernel.domains + [grid_range_dom], state=lp.KernelState.INITIAL, instructions=new_instructions_2, temporary_variables=new_temporary_variables) + from loopy.kernel.data import GroupIndexTag, LocalIndexTag + kernel = lp.tag_inames(kernel, dict( + (ggrid_var_names[i], GroupIndexTag(i)) + for i in range(len(ggrid_var_names)))) + kernel = lp.tag_inames(kernel, dict( + (lgrid_var_names[i], LocalIndexTag(i)) + for i in range(len(lgrid_var_names)))) + + return kernel + # }}} # vim: foldmethod=marker -- GitLab