From 127bdd97817d27936f42d0d4973f084cf2599aaa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 20 Dec 2019 19:03:34 -0600 Subject: [PATCH 1/9] defines remove_dependency --- loopy/transform/instruction.py | 66 +++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e6ecb4093..e0bafd205 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -73,28 +73,16 @@ def set_instruction_priority(kernel, insn_match, priority): # }}} -# {{{ add_dependency - -def add_dependency(kernel, insn_match, depends_on): - """Add the instruction dependency *dependency* to the instructions matched - by *insn_match*. - - *insn_match* and *depends_on* may be any instruction id match understood by - :func:`loopy.match.parse_match`. - - .. versionchanged:: 2016.3 - - Third argument renamed to *depends_on* for clarity, allowed to - be not just ID but also match expression. - """ +# {{{ add/remove_dependency +def _add_or_remove_dependency(kernel, insn_match, depends_on, adds): if isinstance(depends_on, str) and depends_on in kernel.id_to_insn: - added_deps = frozenset([depends_on]) + depends_on = frozenset([depends_on]) else: - added_deps = frozenset( + depends_on = frozenset( dep.id for dep in find_instructions(kernel, depends_on)) - if not added_deps: + if not depends_on: raise LoopyError("no instructions found matching '%s' " "(to add as dependencies)" % depends_on) @@ -104,13 +92,26 @@ def add_dependency(kernel, insn_match, depends_on): new_deps = insn.depends_on matched[0] = True if new_deps is None: - new_deps = added_deps + new_deps = depends_on + else: + new_deps = new_deps | depends_on + + return insn.copy(depends_on=new_deps) + + def remove_dep(insn): + new_deps = insn.depends_on + matched[0] = True + if new_deps is None: + new_deps = None else: - new_deps = new_deps | added_deps + new_deps = new_deps - depends_on return insn.copy(depends_on=new_deps) - result = map_instructions(kernel, insn_match, add_dep) + if adds: + result = map_instructions(kernel, insn_match, add_dep) + else: + result = map_instructions(kernel, insn_match, remove_dep) if not matched[0]: raise LoopyError("no instructions found matching '%s' " @@ -118,6 +119,31 @@ def add_dependency(kernel, insn_match, depends_on): return result + +def add_dependency(kernel, insn_match, depends_on): + """Add the instruction dependency *dependency* to the instructions matched + by *insn_match*. + + *insn_match* and *depends_on* may be any instruction id match understood by + :func:`loopy.match.parse_match`. + + .. versionchanged:: 2016.3 + + Third argument renamed to *depends_on* for clarity, allowed to + be not just ID but also match expression. + """ + return _add_or_remove_dependency(kernel, insn_match, depends_on, adds=True) + + +def remove_dependency(kernel, insn_match, depends_on): + """Remove the instruction dependency *depends_on* to the instructions matched + by *insn_match*. + + *insn_match* and *depends_on* may be any instruction id match understood by + :func:`loopy.match.parse_match`. + """ + return _add_or_remove_dependency(kernel, insn_match, depends_on, adds=False) + # }}} -- GitLab From fe4ff2a88a50e1293f91991180d512c41d865138 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 00:59:44 -0600 Subject: [PATCH 2/9] adds transformation to remove unnecessary deps --- loopy/__init__.py | 4 +++- loopy/transform/instruction.py | 29 +++++++++++++++++++++++++++++ test/test_transform.py | 13 +++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index b60de6e2d..6f8772fe1 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -86,7 +86,8 @@ from loopy.transform.instruction import ( remove_instructions, replace_instruction_ids, tag_instructions, - add_nosync) + add_nosync, + impose_only_read_after_write_deps) from loopy.transform.data import ( add_prefetch, change_arg_to_image, @@ -210,6 +211,7 @@ __all__ = [ "replace_instruction_ids", "tag_instructions", "add_nosync", + "impose_only_read_after_write_deps", "extract_subst", "expand_subst", "assignment_to_subst", "find_rules_matching", "find_one_rule_matching", diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e0bafd205..cb5c903a6 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -383,4 +383,33 @@ def uniquify_instruction_ids(kernel): # }}} +# {{{ impose_only_read_after_write_deps + +def impose_only_read_after_write_deps(kernel): + """ + Returns a kernel with every instruction depending only on instructions + which write to the variables that it reads. + """ + from loopy.kernel.tools import find_recursive_dependencies + + # insn_to_all_deps: stores all direct or indirect dependencies of an insn + insn_to_all_deps = dict((insn.id, set()) for insn in kernel.instructions) + + for insn in kernel.instructions: + insn_to_all_deps[insn.id] = find_recursive_dependencies(kernel, + [insn.id]) - set([insn.id]) + + new_insns = [] + + for insn in kernel.instructions: + depends_on = frozenset([dep_id for dep_id in insn_to_all_deps[insn.id] if + insn.read_dependency_names() & ( + kernel.id_to_insn[dep_id].write_dependency_names() + - kernel.all_inames())]) + new_insns.append(insn.copy(depends_on=depends_on)) + + return kernel.copy(instructions=new_insns) + +# }}} + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index cdc0c14b8..98cb50562 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -570,6 +570,19 @@ def test_nested_substs_in_insns(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl) +def test_impose_only_raw_deps(): + knl = lp.make_kernel( + "{[i]: 0<=i<10}", + """ + a[i] = 2*b[i] {id=insn_0} + c[i] = 2*d[i] {id=insn_1} + e[i] = 2*a[i] {id=insn_2} + """, seq_dependencies=True) + knl = lp.impose_only_read_after_write_deps(knl) + assert knl.id_to_insn['insn_2'].depends_on == frozenset(['insn_0']) + assert knl.id_to_insn['insn_1'].depends_on == frozenset() + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 70e46ac3688a7dc8533221bb0068d7c7730b6cb8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 10:32:16 -0600 Subject: [PATCH 3/9] Defines 2 new transformations - remove_unused_axes_in_temporaries - flatten_variable --- loopy/__init__.py | 5 +- loopy/transform/data.py | 112 ++++++++++++++++++++++++++++++++++++++++ test/test_transform.py | 37 +++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 6f8772fe1..8b79bdcc4 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -96,7 +96,8 @@ from loopy.transform.data import ( remove_unused_arguments, alias_temporaries, set_argument_order, rename_argument, - set_temporary_scope) + set_temporary_scope, remove_unused_axes_in_temporaries, + flatten_variable) from loopy.transform.subst import (extract_subst, assignment_to_subst, expand_subst, find_rules_matching, @@ -204,6 +205,8 @@ __all__ = [ "remove_unused_arguments", "alias_temporaries", "set_argument_order", "rename_argument", "set_temporary_scope", + "remove_unused_axes_in_temporaries", + "flatten_variable", "find_instructions", "map_instructions", "set_instruction_priority", "add_dependency", diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a6a2d7b4f..a0dd15e07 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -30,6 +30,7 @@ from islpy import dim_type from loopy.kernel.data import ImageArg from pytools import MovedFunctionDeprecationWrapper +from loopy.symbolic import IdentityMapper # {{{ convenience: add_prefetch @@ -767,4 +768,115 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No # }}} +# {{{ remove_unused_axes_in_temporaries + +class AxesRemovingMapper(IdentityMapper): + def __init__(self, tv_to_removable_axes): + self.tv_to_removable_axes = tv_to_removable_axes + + def map_subscript(self, expr): + removable_indices = self.tv_to_removable_axes.get(expr.aggregate.name, + None) + + if removable_indices: + assert all(expr.index_tuple[idx] == 0 for idx in removable_indices) + new_expr = type(expr)(expr.aggregate, tuple(self.rec(idx) for i, idx + in enumerate(expr.index_tuple) if i not in + removable_indices)) + + return new_expr + + return super(AxesRemovingMapper, self).map_subscript(expr) + + +def remove_unused_axes_in_temporaries(kernel): + """ + Returns a kernel with all unused axes in a temporary variable removed. This + is helpful if some temporaries are intended to be run through + :func:`loopy.assignment_to_subst`, but all references to the variable are + of the form ``var_name[0, i, j]``. + + .. note:: + + If the shape of ``A`` is ``(1, 1, 3, 6)`` and all references to ``A`` + are of the form ``A[0, 0, i0, i1]`` then axes 0 and 1 are unused axes + and the references to ``A`` will be updated to ``A[i0, i1]``. + """ + new_temps = {} + tv_x_removable_axes = {} + for tv in kernel.temporary_variables.values(): + removable_axes = tuple(i for i, axis_len in enumerate(tv.shape) if + axis_len == 1) + if removable_axes: + tv_x_removable_axes[tv.name] = removable_axes + new_temps[tv.name] = tv.copy(shape=tuple(axis_len for axis_len in + tv.shape if axis_len != 1), + dim_tags=None) + else: + new_temps[tv.name] = tv + + new_insns = [] + axes_removing_mapper = AxesRemovingMapper(tv_x_removable_axes) + + for insn in kernel.instructions: + new_insns.append(insn.with_transformed_expressions(axes_removing_mapper)) + + return kernel.copy(instructions=new_insns, temporary_variables=new_temps) + +# }}} + + +# {{{ flatten variable + +class FlattenMapper(IdentityMapper): + def __init__(self, var_name, strides): + self.var_name = var_name + self.strides = strides + + def map_subscript(self, expr): + if expr.aggregate.name == self.var_name: + new_idx = sum(stride*idx for stride, idx in zip(self.strides, + expr.index_tuple)) + return type(expr)(expr.aggregate, (new_idx, )) + return super(FlattenMapper, self).map_subscript(expr) + + +def flatten_variable(kernel, var_name): + """ + Returns a kernel with the temporary variable *var_name* flattened. + + :arg var_name: an instance of :class:`str`. + + .. note:: + + Use case: Can be used in conjunction with + :func:`loopy.absorb_temporary_into` to reduce memory usage. + """ + import numpy as np + old_tv = kernel.temporary_variables[var_name] + from loopy import auto + from loopy.kernel.array import FixedStrideArrayDimTag + if not all(isinstance(dim_tag, FixedStrideArrayDimTag) and + dim_tag.stride != auto for dim_tag in + old_tv.dim_tags): + raise LoopyError("Strides of '%s' must be deterministic in order to" + " flatten it." % var_name) + strides = tuple(dim_tag.stride for dim_tag in old_tv.dim_tags) + flattener = FlattenMapper(var_name, strides) + + new_temps = kernel.temporary_variables.copy() + new_temps[var_name] = old_tv.copy( + shape=np.prod(old_tv.shape), dim_tags=None, dim_names=None, + strides=None) + + kernel = kernel.copy( + instructions=[insn.with_transformed_expressions(flattener) for + insn in kernel.instructions], + temporary_variables=new_temps) + + return kernel + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index 98cb50562..7c7d38ca3 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -583,6 +583,43 @@ def test_impose_only_raw_deps(): assert knl.id_to_insn['insn_1'].depends_on == frozenset() +def test_remove_unused_axes_in_temps(ctx_factory): + knl = lp.make_kernel( + "{[n, i, j]: 0<=i, j<32 and 0<=n<100}", + """ + # unnecessary temps which might exacerbate register pressure + <> temp_1[0, i] = 2*x[n, i]**2 + <> temp_2[0, i] = x[n, i]**2 + <> temp_3[0, i] = 12*x[n, i]**2 + <> temp_4[0, i] = 0.2*x[n, i]**2 + y[n, j] = temp_1[0, j]+2*temp_2[0, j]+11*temp_3[0, j]+2*temp_4[0, j] + """, [lp.GlobalArg('x, y', shape=(100, 32), dtype=float), '...'], + seq_dependencies=True) + + ref_knl = knl.copy() + knl = lp.remove_unused_axes_in_temporaries(knl) + knl = lp.assignment_to_subst(knl, 'temp_1') + knl = lp.assignment_to_subst(knl, 'temp_2') + knl = lp.assignment_to_subst(knl, 'temp_3') + knl = lp.assignment_to_subst(knl, 'temp_4') + lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) + + +def test_flatten_variable(ctx_factory): + knl = lp.make_kernel( + "{[n, i1, i2, j1, j2]: 0<=i1, i2<3 and 0<=j1, j2<2 and 0<=n<100}", + """ + temp[i1, j1] = 2*x[n, i1, j1] + y[n, i2, j2] = 2*temp[i2, j2] + """, [lp.TemporaryVariable('temp', shape=(3, 2), dtype=float, + order='C'), lp.GlobalArg(name='x,y', shape=lp.auto, + dtype=float)]) + ref_knl = knl.copy() + knl = lp.flatten_variable(knl, 'temp') + assert knl.temporary_variables['temp'].shape == (6,) + lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 919ba2bba7f0108cccbe98768c20726e18fe7cca Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 16:37:36 -0600 Subject: [PATCH 4/9] defined absorb_temporary_into --- loopy/__init__.py | 4 +-- loopy/transform/data.py | 58 ++++++++++++++++++++++++++++++++++++++++- test/test_transform.py | 30 +++++++++++++++++++++ 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 8b79bdcc4..2f86debcd 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -97,7 +97,7 @@ from loopy.transform.data import ( alias_temporaries, set_argument_order, rename_argument, set_temporary_scope, remove_unused_axes_in_temporaries, - flatten_variable) + flatten_variable, absorb_temporary_into) from loopy.transform.subst import (extract_subst, assignment_to_subst, expand_subst, find_rules_matching, @@ -206,7 +206,7 @@ __all__ = [ "alias_temporaries", "set_argument_order", "rename_argument", "set_temporary_scope", "remove_unused_axes_in_temporaries", - "flatten_variable", + "flatten_variable", "absorb_temporary_into", "find_instructions", "map_instructions", "set_instruction_priority", "add_dependency", diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a0dd15e07..f9f32c79e 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -768,7 +768,7 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No # }}} -# {{{ remove_unused_axes_in_temporaries +# {{{ remove unused axes in temporaries class AxesRemovingMapper(IdentityMapper): def __init__(self, tv_to_removable_axes): @@ -879,4 +879,60 @@ def flatten_variable(kernel, var_name): # }}} +# {{{ absorb temporary + +class NameChangingMapper(IdentityMapper): + def __init__(self, absorber_name, absorbee_name): + self.absorber_name = absorber_name + self.absorbee_name = absorbee_name + + def map_subscript(self, expr): + from pymbolic.primitives import Variable, Subscript + if expr.aggregate.name == self.absorbee_name: + return Subscript(Variable(self.absorber_name), expr.index_tuple) + return super(NameChangingMapper, self).map_subscript(expr) + + +def absorb_temporary_into(kernel, absorber, absorbee): + """ + Returns a kernel with all uses of variable *absorbee* replaced with + *absorber*. + + ..note:: + + * Could be used to reduce memory usage. + + ..warning:: + + The caller must make sure that *absorber* and *absorbee* have + disjoint live intervals for correctness. + """ + + absorber_tv = kernel.temporary_variables[absorber] + absorbee_tv = kernel.temporary_variables[absorbee] + + if len(absorber_tv.shape) != len(absorbee_tv.shape): + raise LoopyError("The number of axes of '%s' and '%s' do not match." % + (absorber, absorbee)) + + if not all(absorber_len >= absorbee_len for (absorber_len, absorbee_len) in + zip(absorber_tv.shape, absorbee_tv.shape)): + raise LoopyError("Shape of '%s' ('%s') not >= Shape of '%s'" + " ('%s').".format(absorber, absorber_tv.shape, absorbee, + absorbee_tv.shape)) + + new_temps = kernel.temporary_variables.copy() + del new_temps[absorbee] + + name_changer = NameChangingMapper(absorber_tv.name, absorbee_tv.name) + + kernel = kernel.copy( + instructions=[insn.with_transformed_expressions(name_changer) for + insn in kernel.instructions], + temporary_variables=new_temps) + + return kernel + +# }}} + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index 7c7d38ca3..a2935960b 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -620,6 +620,36 @@ def test_flatten_variable(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) +def test_absorb_temporary_into(ctx_factory): + knl = lp.make_kernel( + "{[i]: 0<=i<10}", + """ + tmp_0[i] = 2*x[i] + y[i] = y[i] + tmp_0[i] + tmp_1[i] = 3*x[i] {id=insn} + z[i] = z[i] + tmp_1[i] + """, + [ + lp.GlobalArg('x, y, z', shape=(10,), dtype=float), + lp.TemporaryVariable('tmp_0', shape=(10,), dtype=float, + address_space=lp.AddressSpace.LOCAL), + lp.TemporaryVariable('tmp_1', shape=(10,), dtype=float, + address_space=lp.AddressSpace.LOCAL) + ], + seq_dependencies=True) + + # forcing one thread per workgroup to avoid write contention to shared mem. vars + knl = lp.split_iname(knl, 'i', 1, outer_tag="g.0", inner_tag="l.0") + ref_knl = knl.copy(name="ref_loopy_knl") + + knl = lp.absorb_temporary_into(knl, 'tmp_0', 'tmp_1') + assert 'tmp_1' not in knl.temporary_variables + assert ref_knl.id_to_insn['insn'].assignee.aggregate.name == 'tmp_1' + assert knl.id_to_insn['insn'].assignee.aggregate.name == 'tmp_0' + + lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 285889a77a9c005ee44e1e27549970fc525c8360 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 17:36:58 -0600 Subject: [PATCH 5/9] new transformation: remove_axis --- loopy/__init__.py | 3 ++- loopy/transform/data.py | 52 ++++++++++++++++++++++++++++++++++++++++- test/test_transform.py | 31 ++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 2f86debcd..27482e682 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -97,7 +97,7 @@ from loopy.transform.data import ( alias_temporaries, set_argument_order, rename_argument, set_temporary_scope, remove_unused_axes_in_temporaries, - flatten_variable, absorb_temporary_into) + flatten_variable, absorb_temporary_into, remove_axis) from loopy.transform.subst import (extract_subst, assignment_to_subst, expand_subst, find_rules_matching, @@ -207,6 +207,7 @@ __all__ = [ "rename_argument", "set_temporary_scope", "remove_unused_axes_in_temporaries", "flatten_variable", "absorb_temporary_into", + "remove_axis", "find_instructions", "map_instructions", "set_instruction_priority", "add_dependency", diff --git a/loopy/transform/data.py b/loopy/transform/data.py index f9f32c79e..d57980f54 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -30,7 +30,8 @@ from islpy import dim_type from loopy.kernel.data import ImageArg from pytools import MovedFunctionDeprecationWrapper -from loopy.symbolic import IdentityMapper +from loopy.symbolic import (IdentityMapper, RuleAwareIdentityMapper, + SubstitutionRuleMappingContext) # {{{ convenience: add_prefetch @@ -935,4 +936,53 @@ def absorb_temporary_into(kernel, absorber, absorbee): # }}} + +# {{{ remove axis + +class AxisRemover(RuleAwareIdentityMapper): + def __init__(self, rule_mapping_context, var_name, axis_num): + self.var_name = var_name + self.axis_num = axis_num + super(AxisRemover, self).__init__(rule_mapping_context) + + def map_subscript(self, expr, expn_state): + from pymbolic.primitives import Variable, Subscript + if expr.aggregate.name == self.var_name: + if len(expr.index_tuple) == 1: + return Variable(self.var_name) + else: + return Subscript(expr.aggregate, + expr.index_tuple[:self.axis_num] + + expr.index_tuple[self.axis_num+1:]) + + return super(AxisRemover, self).map_subscript(expr, expn_state) + + +def remove_axis(kernel, var_name, axis_num): + assert var_name in kernel.temporary_variables + + assert axis_num < len(kernel.temporary_variables[var_name].shape) + + rule_mapping_context = SubstitutionRuleMappingContext(kernel.substitutions, + kernel.get_var_name_generator()) + + kernel = AxisRemover(rule_mapping_context, var_name, axis_num).map_kernel(kernel) + + if len(kernel.temporary_variables[var_name].shape) == 1: + new_temps = dict((tv.name, tv.copy(shape=(), dim_tags=None)) + if tv.name == var_name else (tv.name, tv) for tv in + kernel.temporary_variables.values()) + else: + from loopy import auto + new_temps = dict((tv.name, + tv.copy(shape=tv.shape[:axis_num]+tv.shape[axis_num+1:], + strides=auto, dim_tags=None)) + if tv.name == var_name else (tv.name, tv) for tv in + kernel.temporary_variables.values()) + + return kernel.copy(temporary_variables=new_temps) + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_transform.py b/test/test_transform.py index a2935960b..a5f0cf2e9 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -650,6 +650,37 @@ def test_absorb_temporary_into(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) +def test_remove_axis(ctx_factory): + knl = lp.make_kernel( + "{[n, i1, i2, j, k]: 0<=n<100 and 0<=i1, i2<10 and 0<=j, k<6}", + """ + # gather + tmp[i1, j] = x[n, i1, j] + # scatter + y[n, i2, k] = tmp[i2, k] + """, + [ + lp.GlobalArg('x, y', shape=(100, 10, 6), dtype=float), + lp.TemporaryVariable('tmp', shape=(10, 6,), dtype=float, + address_space=lp.AddressSpace.PRIVATE) + ], + seq_dependencies=True + ) + + knl = lp.tag_inames(knl, "n:g.0, j:l.0, k:l.0") + + ref_knl = knl.copy() + ref_knl = lp.set_options(ref_knl, 'write_cl') + + # get rid of unnecessary usage of private memory + knl = lp.remove_axis(knl, 'tmp', 1) + + assert ref_knl.temporary_variables['tmp'].shape == (10, 6) + assert knl.temporary_variables['tmp'].shape == (10,) + + lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From ab7f9843b46779707d377c9da5b17beb99cae536 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 17:46:19 -0600 Subject: [PATCH 6/9] adds docs for remove_axis --- loopy/transform/data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index d57980f54..4366d0e03 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -959,8 +959,14 @@ class AxisRemover(RuleAwareIdentityMapper): def remove_axis(kernel, var_name, axis_num): - assert var_name in kernel.temporary_variables + """ + Returns a kernel after removing *axis_num* axis of the temporary variable + *var_name*. + One might interpret this operation as the inverse of privatization. + """ + + assert var_name in kernel.temporary_variables assert axis_num < len(kernel.temporary_variables[var_name].shape) rule_mapping_context = SubstitutionRuleMappingContext(kernel.substitutions, -- GitLab From 4f85ee9e494d40698092990bdcfe41461c5b637c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 22 Dec 2019 19:19:43 -0600 Subject: [PATCH 7/9] pylint caught an error in the error message --- loopy/transform/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 4366d0e03..3397b6432 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -919,7 +919,7 @@ def absorb_temporary_into(kernel, absorber, absorbee): if not all(absorber_len >= absorbee_len for (absorber_len, absorbee_len) in zip(absorber_tv.shape, absorbee_tv.shape)): raise LoopyError("Shape of '%s' ('%s') not >= Shape of '%s'" - " ('%s').".format(absorber, absorber_tv.shape, absorbee, + " ('%s')." % (absorber, absorber_tv.shape, absorbee, absorbee_tv.shape)) new_temps = kernel.temporary_variables.copy() -- GitLab From 6821d2b58e77f8b9eefa4c39d6e84db7a2e5229f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 23 Dec 2019 14:40:04 -0600 Subject: [PATCH 8/9] name change: remove_unused_axes_in_temporaries->squeeze_axes_in_temporaries --- loopy/__init__.py | 4 ++-- loopy/transform/data.py | 12 ++++++------ test/test_transform.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 27482e682..8360dd3e7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -96,7 +96,7 @@ from loopy.transform.data import ( remove_unused_arguments, alias_temporaries, set_argument_order, rename_argument, - set_temporary_scope, remove_unused_axes_in_temporaries, + set_temporary_scope, squeeze_axes_in_temporaries, flatten_variable, absorb_temporary_into, remove_axis) from loopy.transform.subst import (extract_subst, @@ -205,7 +205,7 @@ __all__ = [ "remove_unused_arguments", "alias_temporaries", "set_argument_order", "rename_argument", "set_temporary_scope", - "remove_unused_axes_in_temporaries", + "squeeze_axes_in_temporaries", "flatten_variable", "absorb_temporary_into", "remove_axis", diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 3397b6432..70d028b8e 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -771,7 +771,7 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No # {{{ remove unused axes in temporaries -class AxesRemovingMapper(IdentityMapper): +class AxesSqueezer(IdentityMapper): def __init__(self, tv_to_removable_axes): self.tv_to_removable_axes = tv_to_removable_axes @@ -787,12 +787,12 @@ class AxesRemovingMapper(IdentityMapper): return new_expr - return super(AxesRemovingMapper, self).map_subscript(expr) + return super(AxesSqueezer, self).map_subscript(expr) -def remove_unused_axes_in_temporaries(kernel): +def squeeze_axes_in_temporaries(kernel): """ - Returns a kernel with all unused axes in a temporary variable removed. This + Returns a kernel with all 1-length axes in a temporary variable removed. This is helpful if some temporaries are intended to be run through :func:`loopy.assignment_to_subst`, but all references to the variable are of the form ``var_name[0, i, j]``. @@ -817,10 +817,10 @@ def remove_unused_axes_in_temporaries(kernel): new_temps[tv.name] = tv new_insns = [] - axes_removing_mapper = AxesRemovingMapper(tv_x_removable_axes) + axes_squeezer = AxesSqueezer(tv_x_removable_axes) for insn in kernel.instructions: - new_insns.append(insn.with_transformed_expressions(axes_removing_mapper)) + new_insns.append(insn.with_transformed_expressions(axes_squeezer)) return kernel.copy(instructions=new_insns, temporary_variables=new_temps) diff --git a/test/test_transform.py b/test/test_transform.py index a5f0cf2e9..4f80682a1 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -583,7 +583,7 @@ def test_impose_only_raw_deps(): assert knl.id_to_insn['insn_1'].depends_on == frozenset() -def test_remove_unused_axes_in_temps(ctx_factory): +def test_squeeze_axes_in_temps(ctx_factory): knl = lp.make_kernel( "{[n, i, j]: 0<=i, j<32 and 0<=n<100}", """ @@ -597,7 +597,7 @@ def test_remove_unused_axes_in_temps(ctx_factory): seq_dependencies=True) ref_knl = knl.copy() - knl = lp.remove_unused_axes_in_temporaries(knl) + knl = lp.squeeze_axes_in_temporaries(knl) knl = lp.assignment_to_subst(knl, 'temp_1') knl = lp.assignment_to_subst(knl, 'temp_2') knl = lp.assignment_to_subst(knl, 'temp_3') -- GitLab From b382f58a19de146afa5a5815a0b297dc3482480b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 17 Feb 2020 08:35:50 -0600 Subject: [PATCH 9/9] reverts duplication of alias_temporaries --- loopy/__init__.py | 3 +- loopy/transform/data.py | 110 ---------------------------------------- test/test_transform.py | 45 ---------------- 3 files changed, 1 insertion(+), 157 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 8360dd3e7..c74c56768 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -97,7 +97,7 @@ from loopy.transform.data import ( alias_temporaries, set_argument_order, rename_argument, set_temporary_scope, squeeze_axes_in_temporaries, - flatten_variable, absorb_temporary_into, remove_axis) + remove_axis) from loopy.transform.subst import (extract_subst, assignment_to_subst, expand_subst, find_rules_matching, @@ -206,7 +206,6 @@ __all__ = [ "alias_temporaries", "set_argument_order", "rename_argument", "set_temporary_scope", "squeeze_axes_in_temporaries", - "flatten_variable", "absorb_temporary_into", "remove_axis", "find_instructions", "map_instructions", diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 70d028b8e..02456d64d 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -827,116 +827,6 @@ def squeeze_axes_in_temporaries(kernel): # }}} -# {{{ flatten variable - -class FlattenMapper(IdentityMapper): - def __init__(self, var_name, strides): - self.var_name = var_name - self.strides = strides - - def map_subscript(self, expr): - if expr.aggregate.name == self.var_name: - new_idx = sum(stride*idx for stride, idx in zip(self.strides, - expr.index_tuple)) - return type(expr)(expr.aggregate, (new_idx, )) - return super(FlattenMapper, self).map_subscript(expr) - - -def flatten_variable(kernel, var_name): - """ - Returns a kernel with the temporary variable *var_name* flattened. - - :arg var_name: an instance of :class:`str`. - - .. note:: - - Use case: Can be used in conjunction with - :func:`loopy.absorb_temporary_into` to reduce memory usage. - """ - import numpy as np - old_tv = kernel.temporary_variables[var_name] - from loopy import auto - from loopy.kernel.array import FixedStrideArrayDimTag - if not all(isinstance(dim_tag, FixedStrideArrayDimTag) and - dim_tag.stride != auto for dim_tag in - old_tv.dim_tags): - raise LoopyError("Strides of '%s' must be deterministic in order to" - " flatten it." % var_name) - strides = tuple(dim_tag.stride for dim_tag in old_tv.dim_tags) - flattener = FlattenMapper(var_name, strides) - - new_temps = kernel.temporary_variables.copy() - new_temps[var_name] = old_tv.copy( - shape=np.prod(old_tv.shape), dim_tags=None, dim_names=None, - strides=None) - - kernel = kernel.copy( - instructions=[insn.with_transformed_expressions(flattener) for - insn in kernel.instructions], - temporary_variables=new_temps) - - return kernel - -# }}} - - -# {{{ absorb temporary - -class NameChangingMapper(IdentityMapper): - def __init__(self, absorber_name, absorbee_name): - self.absorber_name = absorber_name - self.absorbee_name = absorbee_name - - def map_subscript(self, expr): - from pymbolic.primitives import Variable, Subscript - if expr.aggregate.name == self.absorbee_name: - return Subscript(Variable(self.absorber_name), expr.index_tuple) - return super(NameChangingMapper, self).map_subscript(expr) - - -def absorb_temporary_into(kernel, absorber, absorbee): - """ - Returns a kernel with all uses of variable *absorbee* replaced with - *absorber*. - - ..note:: - - * Could be used to reduce memory usage. - - ..warning:: - - The caller must make sure that *absorber* and *absorbee* have - disjoint live intervals for correctness. - """ - - absorber_tv = kernel.temporary_variables[absorber] - absorbee_tv = kernel.temporary_variables[absorbee] - - if len(absorber_tv.shape) != len(absorbee_tv.shape): - raise LoopyError("The number of axes of '%s' and '%s' do not match." % - (absorber, absorbee)) - - if not all(absorber_len >= absorbee_len for (absorber_len, absorbee_len) in - zip(absorber_tv.shape, absorbee_tv.shape)): - raise LoopyError("Shape of '%s' ('%s') not >= Shape of '%s'" - " ('%s')." % (absorber, absorber_tv.shape, absorbee, - absorbee_tv.shape)) - - new_temps = kernel.temporary_variables.copy() - del new_temps[absorbee] - - name_changer = NameChangingMapper(absorber_tv.name, absorbee_tv.name) - - kernel = kernel.copy( - instructions=[insn.with_transformed_expressions(name_changer) for - insn in kernel.instructions], - temporary_variables=new_temps) - - return kernel - -# }}} - - # {{{ remove axis class AxisRemover(RuleAwareIdentityMapper): diff --git a/test/test_transform.py b/test/test_transform.py index 4f80682a1..da0630ca6 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -605,51 +605,6 @@ def test_squeeze_axes_in_temps(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) -def test_flatten_variable(ctx_factory): - knl = lp.make_kernel( - "{[n, i1, i2, j1, j2]: 0<=i1, i2<3 and 0<=j1, j2<2 and 0<=n<100}", - """ - temp[i1, j1] = 2*x[n, i1, j1] - y[n, i2, j2] = 2*temp[i2, j2] - """, [lp.TemporaryVariable('temp', shape=(3, 2), dtype=float, - order='C'), lp.GlobalArg(name='x,y', shape=lp.auto, - dtype=float)]) - ref_knl = knl.copy() - knl = lp.flatten_variable(knl, 'temp') - assert knl.temporary_variables['temp'].shape == (6,) - lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) - - -def test_absorb_temporary_into(ctx_factory): - knl = lp.make_kernel( - "{[i]: 0<=i<10}", - """ - tmp_0[i] = 2*x[i] - y[i] = y[i] + tmp_0[i] - tmp_1[i] = 3*x[i] {id=insn} - z[i] = z[i] + tmp_1[i] - """, - [ - lp.GlobalArg('x, y, z', shape=(10,), dtype=float), - lp.TemporaryVariable('tmp_0', shape=(10,), dtype=float, - address_space=lp.AddressSpace.LOCAL), - lp.TemporaryVariable('tmp_1', shape=(10,), dtype=float, - address_space=lp.AddressSpace.LOCAL) - ], - seq_dependencies=True) - - # forcing one thread per workgroup to avoid write contention to shared mem. vars - knl = lp.split_iname(knl, 'i', 1, outer_tag="g.0", inner_tag="l.0") - ref_knl = knl.copy(name="ref_loopy_knl") - - knl = lp.absorb_temporary_into(knl, 'tmp_0', 'tmp_1') - assert 'tmp_1' not in knl.temporary_variables - assert ref_knl.id_to_insn['insn'].assignee.aggregate.name == 'tmp_1' - assert knl.id_to_insn['insn'].assignee.aggregate.name == 'tmp_0' - - lp.auto_test_vs_ref(ref_knl, ctx_factory(), knl) - - def test_remove_axis(ctx_factory): knl = lp.make_kernel( "{[n, i1, i2, j, k]: 0<=n<100 and 0<=i1, i2<10 and 0<=j, k<6}", -- GitLab