From 099e17dced273eb2a95446526eab4a6c534acf79 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 11:37:17 -0600 Subject: [PATCH 01/21] imports tests; minor corrections in invocations of simplify_using_aff --- loopy/isl_helpers.py | 4 +-- test/test_apps.py | 24 ++++++------- test/test_transform.py | 76 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 14 deletions(-) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 1b2a83aad..446302197 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -430,9 +430,9 @@ def boxify(cache_manager, domain, box_inames, context): def simplify_via_aff(expr): - from loopy.symbolic import aff_from_expr, aff_to_expr, get_dependencies + from loopy.symbolic import guarded_aff_from_expr, aff_to_expr, get_dependencies deps = get_dependencies(expr) - return aff_to_expr(aff_from_expr( + return aff_to_expr(guarded_aff_from_expr( isl.Space.create_from_names(isl.DEFAULT_CONTEXT, list(deps)), expr)) diff --git a/test/test_apps.py b/test/test_apps.py index e07262dbd..37f1fcf9b 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -449,23 +449,23 @@ def test_lbm(ctx_factory): # Example by Loic Gouarin knl = lp.make_kernel( "{[ii,jj]:0<=ii m[0] = + f[i-1, j, 0] + f[i, j-1, 1] + f[i+1, j, 2] + f[i, j+1, 3] - m[1] = + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2] - m[2] = + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3] - m[3] = + f[i-1, j, 0] - f[i, j-1, 1] + f[i+1, j, 2] - f[i, j+1, 3] - m[4] = + f[i-1, j, 4] + f[i, j-1, 5] + f[i+1, j, 6] + f[i, j+1, 7] - m[5] = + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6] - m[6] = + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7] - m[7] = + f[i-1, j, 4] - f[i, j-1, 5] + f[i+1, j, 6] - f[i, j+1, 7] - m[8] = + f[i-1, j, 8] + f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11] - m[9] = + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10] - m[10] = + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11] - m[11] = + f[i-1, j, 8] - f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11] + m[1] = + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2] + m[2] = + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3] + m[3] = + f[i-1, j, 0] - f[i, j-1, 1] + f[i+1, j, 2] - f[i, j+1, 3] + m[4] = + f[i-1, j, 4] + f[i, j-1, 5] + f[i+1, j, 6] + f[i, j+1, 7] + m[5] = + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6] + m[6] = + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7] + m[7] = + f[i-1, j, 4] - f[i, j-1, 5] + f[i+1, j, 6] - f[i, j+1, 7] + m[8] = + f[i-1, j, 8] + f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11] + m[9] = + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10] + m[10] = + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11] + m[11] = + f[i-1, j, 8] - f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11] end with {id_prefix=update_m,dep=init_m*} diff --git a/test/test_transform.py b/test/test_transform.py index 6eb6697b5..918db291f 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -585,6 +585,82 @@ def test_extract_subst_with_iname_deps_in_templ(ctx_factory): lp.auto_test_vs_ref(knl, ctx_factory(), knl) +def test_precompute_with_aff_and_constant_wrt_sweep_terms(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i,j,k]: 0<=i<100 and 0<=j<32 and 0<=k<8}", + """ + for i + for j + x[j] = 3.14 * (i + j) + y[j] = 0 + for k + y[j] = y[j] + L[map[i], k, j] *x[j] + end + out[i, j] = y[j] + end + end + """, + [ + lp.GlobalArg('L', dtype=np.float64, shape=(1, 8, 32)), + lp.GlobalArg('map', dtype=np.int32, shape=lp.auto), + lp.TemporaryVariable('x', dtype=np.float64, shape=(32,)), + lp.TemporaryVariable('y', dtype=np.float64, shape=(32,)), + '...'], + seq_dependencies=True, + lang_version=(2018, 2) + ) + + ref_knl = knl + + knl = lp.add_prefetch(knl, "L", sweep_inames=["j", "k"], + temporary_scope=lp.AddressSpace.LOCAL, fetch_outer_inames="i", + default_tag=None, dim_arg_names=["iprftch0", "iprftch1", "iprftch2"]) + + knl = lp.tag_inames(knl, "i:g.0, j:l.0, iprftch2:l.0") + + lp.auto_test_vs_ref(ref_knl, ctx, ref_knl, parameters={"map": + np.zeros(100, dtype=np.int32)}) + + +def test_precompute_with_nonaff_and_constant_wrt_sweep_terms(ctx_factory): + ctx = ctx_factory() + knl = lp.make_kernel( + "{[i,j,k]: 0<=i<100 and 0<=j<32 and 0<=k<8}", + """ + for i + for j + x[j] = 3.14 * (i + j) + y[j] = 0 + for k + y[j] = y[j] + L[map[i]*map[i], k, j] *x[j] + end + out[i, j] = y[j] + end + end + """, + [ + lp.GlobalArg('L', dtype=np.float64, shape=(1, 8, 32)), + lp.GlobalArg('map', dtype=np.int32, shape=lp.auto), + lp.TemporaryVariable('x', dtype=np.float64, shape=(32,)), + lp.TemporaryVariable('y', dtype=np.float64, shape=(32,)), + '...'], + seq_dependencies=True, + lang_version=(2018, 2) + ) + + ref_knl = knl + + knl = lp.add_prefetch(knl, "L", sweep_inames=["j", "k"], + temporary_scope=lp.AddressSpace.LOCAL, fetch_outer_inames="i", + default_tag=None, dim_arg_names=["iprftch0", "iprftch1", "iprftch2"]) + + knl = lp.tag_inames(knl, "i:g.0, j:l.0, iprftch2:l.0") + + lp.auto_test_vs_ref(ref_knl, ctx, ref_knl, parameters={"map": + np.zeros(100, dtype=np.int32)}) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From a4436c3ea2da9c273fb45ed5dd032015d277c6e9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 12:03:46 -0600 Subject: [PATCH 02/21] less stricter rule invocation gathering --- loopy/transform/precompute.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 9f426f76b..f93b690e3 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -31,6 +31,7 @@ from loopy.symbolic import (get_dependencies, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError from pymbolic.mapper.substitutor import make_subst_func +from pytools import memoize_method import numpy as np from pymbolic import var @@ -62,7 +63,10 @@ def storage_axis_exprs(storage_axis_sources, args): # {{{ gather rule invocations class RuleInvocationGatherer(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within): + def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, + within, sweep_inames): + assert isinstance(sweep_inames, frozenset) + super(RuleInvocationGatherer, self).__init__(rule_mapping_context) from loopy.symbolic import SubstitutionRuleExpander @@ -73,9 +77,22 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): self.subst_name = subst_name self.subst_tag = subst_tag self.within = within + self.sweep_inames = sweep_inames self.access_descriptors = [] + @memoize_method + def vars_constant_during_sweeping(self): + """ + Returns a :class:`frozenset` of variables which are invariant wrt + the sweep. + """ + from loopy.kernel.instruction import MultiAssignmentBase + return self.kernel.all_variable_names() - frozenset().union(*( + frozenset(insn.assignees) for insn in self.kernel.instructions if + isinstance(insn, MultiAssignmentBase) and insn.within_inames & + self.sweep_inames)) + def map_substitution(self, name, tag, arguments, expn_state): process_me = name == self.subst_name @@ -100,10 +117,7 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): arg_deps = (arg_deps | get_dependencies(self.subst_expander(arg_val))) - # FIXME: This is too strict--and the footprint machinery - # needs to be taught how to deal with locally constant - # variables. - if not arg_deps <= self.kernel.all_inames(): + if not arg_deps <= self.vars_constant_during_sweeping(): from warnings import warn warn("Precompute arguments in '%s(%s)' do not consist exclusively " "of inames and constants--specifically, these are " @@ -516,7 +530,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) invg = RuleInvocationGatherer( - rule_mapping_context, kernel, subst_name, subst_tag, within) + rule_mapping_context, kernel, subst_name, subst_tag, within, + sweep_inames_set) del rule_mapping_context import loopy as lp -- GitLab From 780882f8443df32420fe2237e73b05ccdc894713 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 12:04:10 -0600 Subject: [PATCH 03/21] adds precompute with offset test --- test/test_transform.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/test_transform.py b/test/test_transform.py index 918db291f..d4cf4cff4 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -661,6 +661,20 @@ def test_precompute_with_nonaff_and_constant_wrt_sweep_terms(ctx_factory): np.zeros(100, dtype=np.int32)}) +def test_precompute_with_offsets(ctx_factory): + knl = lp.make_kernel( + "{[i, j, n]:0<=i<10 and 0<=j<9 and 0<=n<100}", + """ + subst(x) := x + y[n] = sum((i, j), subst(map[n]+i)*subst(map[n]+1+j)) + """, [lp.GlobalArg('map', dtype=np.int32, shape=(100,)), '...']) + + knl = lp.precompute(knl, 'subst', sweep_inames='i') + assert 'subst' not in knl.substitutions + + print(knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 3987e4d348a91c1ee0f0a376a9ecb4bd65ca7cea Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 16:09:55 -0600 Subject: [PATCH 04/21] some more tests for the non-affine sweep precompute work --- test/test_transform.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/test/test_transform.py b/test/test_transform.py index d4cf4cff4..0f9a410a2 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -661,7 +661,7 @@ def test_precompute_with_nonaff_and_constant_wrt_sweep_terms(ctx_factory): np.zeros(100, dtype=np.int32)}) -def test_precompute_with_offsets(ctx_factory): +def test_precompute_with_same_offsets(ctx_factory): knl = lp.make_kernel( "{[i, j, n]:0<=i<10 and 0<=j<9 and 0<=n<100}", """ @@ -675,6 +675,37 @@ def test_precompute_with_offsets(ctx_factory): print(knl) +def test_precompute_with_variable_offsets(ctx_factory): + knl = lp.make_kernel( + "{[i, j, n]:0<=i<10 and 0<=j<9 and 0<=n<100}", + """ + subst(x) := x + y[n] = sum((i, j), subst(map1[n]+i)*subst(map2[n]+1+j)) + """, [lp.GlobalArg('map1, map2', dtype=np.int32, shape=(100,)), '...']) + + knl = lp.precompute(knl, 'subst', sweep_inames='i') + assert 'subst' in knl.substitutions + + print(knl) + + +def test_precompute_with_variable_offsets_included_in_sweep(ctx_factory): + knl = lp.make_kernel( + "{[i, j, n]:0<=i<10 and 0<=j<9 and 0<=n<100}", + """ + subst(x) := x + y[n] = sum((i, j), subst(n**2+i)*subst(n**3+1+j)) + """, [lp.GlobalArg('map1, map2', dtype=np.int32, shape=(100,)), '...']) + + raise NotImplementedError("Must check that precomputing is not possible," + " and the following invocation of precomputing fails") + + knl = lp.precompute(knl, 'subst', sweep_inames='i,j') + assert 'subst' in knl.substitutions + + print(knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From dd8d637ec75dc4148078804c07e1dfbdaf5a5ff8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 16:12:28 -0600 Subject: [PATCH 05/21] wip:identifying offsets --- loopy/transform/precompute.py | 68 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index f93b690e3..ded9d91c5 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -28,7 +28,7 @@ from six.moves import range, zip import islpy as isl from loopy.symbolic import (get_dependencies, RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, - SubstitutionRuleMappingContext) + SubstitutionRuleMappingContext, CoefficientCollector) from loopy.diagnostic import LoopyError from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_method @@ -268,6 +268,15 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): # }}} +class SweepInamesCoefficientCollector(CoefficientCollector): + def map_constant(self, expr): + return super(SweepInamesCoefficientCollector, self).map_constant(expr) + + map_power = map_constant + map_subscript = map_constant + map_call = map_constant + + class _not_provided(object): # noqa: N801 pass @@ -667,6 +676,58 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} + # {{{ figure out the offsets + + sweep_dependency = [False for _ in subst.arguments] + sweep_inames_as_vars = [Variable(s) for s in sweep_inames_set] + for accdesc in access_descriptors: + for i, sax in enumerate(accdesc.storage_axis_exprs): + if get_dependencies(sax) & sweep_inames_set: + sweep_dependency[i] = True + + offsets = [None for _ in subst.arguments] + sweep_inames_as_vars = [Variable(s) for s in sweep_inames_set] + from numbers import Integral + from pymbolic.primitives import Sum + from pymbolic.mapper.distributor import distribute + + for accdesc in access_descriptors: + for i, sax in enumerate(accdesc.storage_axis_exprs): + coeffs = SweepInamesCoefficientCollector(sweep_inames_set)(sax) + offst = coeffs.get(1, 0) + if isinstance(offst, Sum): + try: + cnst, = [child for child in offst.children if + isinstance(child, Integral)] + except ValueError: + cnst = 0 + elif isinstance(offst, Integral): + cnst = offst + else: + cnst = 0 + + offst = distribute(offst-cnst) + + if (sweep_dependency[i] and (sweep_inames_as_vars & + six.viewkeys(coeffs))) or not sweep_dependency[i]: + if offsets[i] is None: + offsets[i] = offst + else: + if offsets[i] != offst: + raise LoopyError("Offsets in invocations of '%s' do not" + " match." % subst_name) + + offsets = [offst if offst is not None else 0 for offst in offsets] + + def substract_offsets(accdesc): + saxes = [distribute(saxis-offst) for saxis, offst in + zip(accdesc.storage_axis_exprs, offsets)] + return accdesc.copy(storage_axis_exprs=saxes) + + access_descriptors = map(substract_offsets, access_descriptors) + + # }}} + expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps) assert expanding_inames <= kernel.all_inames() @@ -831,14 +892,15 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_subst_dict = {} - for arg_name, bi in zip(storage_axis_names, abm.storage_base_indices): + for arg_name, bi, off in zip(storage_axis_names, abm.storage_base_indices, + offsets): if arg_name in non1_storage_axis_names: arg = var(arg_name) else: arg = 0 storage_axis_subst_dict[ - prior_storage_axis_name_dict.get(arg_name, arg_name)] = arg+bi + prior_storage_axis_name_dict.get(arg_name, arg_name)] = arg+bi+off rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) -- GitLab From da6d80aff3f09336681a512d470a3a2b2fec1602 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 16:16:39 -0600 Subject: [PATCH 06/21] eww.. subtract not substract --- loopy/transform/precompute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index ded9d91c5..8c76bc1ed 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -719,12 +719,12 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, offsets = [offst if offst is not None else 0 for offst in offsets] - def substract_offsets(accdesc): + def subtract_offsets(accdesc): saxes = [distribute(saxis-offst) for saxis, offst in zip(accdesc.storage_axis_exprs, offsets)] return accdesc.copy(storage_axis_exprs=saxes) - access_descriptors = map(substract_offsets, access_descriptors) + access_descriptors = map(subtract_offsets, access_descriptors) # }}} -- GitLab From 85917d7077b0d19538fc1938742cd050eb1beeaa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 17:30:37 -0600 Subject: [PATCH 07/21] implements offsets for non-affine parts of a sweep --- loopy/transform/precompute.py | 15 +++++++++++---- test/test_transform.py | 11 ++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 8c76bc1ed..ed34e4f6b 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -151,7 +151,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): storage_axis_names, storage_axis_sources, non1_storage_axis_names, temporary_name, compute_insn_id, compute_dep_id, - compute_read_variables): + compute_read_variables, offsets): super(RuleInvocationReplacer, self).__init__(rule_mapping_context) self.subst_name = subst_name @@ -171,8 +171,12 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.compute_read_variables = compute_read_variables self.compute_insn_depends_on = set() + self.offsets = offsets def map_substitution(self, name, tag, arguments, expn_state): + from pymbolic.mapper.distributor import distribute + arguments = [distribute(arg - off) for arg, off in zip(arguments, + self.offsets)] if not ( name == self.subst_name and self.within( @@ -722,9 +726,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, def subtract_offsets(accdesc): saxes = [distribute(saxis-offst) for saxis, offst in zip(accdesc.storage_axis_exprs, offsets)] - return accdesc.copy(storage_axis_exprs=saxes) + args = [distribute(arg-offst) for arg, offst in + zip(accdesc.args, offsets)] + return accdesc.copy(args=args, storage_axis_exprs=saxes) - access_descriptors = map(subtract_offsets, access_descriptors) + access_descriptors = list(map(subtract_offsets, access_descriptors)) # }}} @@ -954,7 +960,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_names, storage_axis_sources, non1_storage_axis_names, temporary_name, compute_insn_id, compute_dep_id, - compute_read_variables=get_dependencies(expander(compute_expression))) + compute_read_variables=get_dependencies(expander(compute_expression)), + offsets=offsets) kernel = invr.map_kernel(kernel) kernel = kernel.copy( diff --git a/test/test_transform.py b/test/test_transform.py index 0f9a410a2..3d6851d02 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -676,6 +676,8 @@ def test_precompute_with_same_offsets(ctx_factory): def test_precompute_with_variable_offsets(ctx_factory): + # subst(map1[n]+1+j) does not lie in the sweep footprint + # and hence should not be substituted. knl = lp.make_kernel( "{[i, j, n]:0<=i<10 and 0<=j<9 and 0<=n<100}", """ @@ -697,13 +699,8 @@ def test_precompute_with_variable_offsets_included_in_sweep(ctx_factory): y[n] = sum((i, j), subst(n**2+i)*subst(n**3+1+j)) """, [lp.GlobalArg('map1, map2', dtype=np.int32, shape=(100,)), '...']) - raise NotImplementedError("Must check that precomputing is not possible," - " and the following invocation of precomputing fails") - - knl = lp.precompute(knl, 'subst', sweep_inames='i,j') - assert 'subst' in knl.substitutions - - print(knl) + with pytest.raises(lp.LoopyError): + knl = lp.precompute(knl, 'subst', sweep_inames='i,j') if __name__ == "__main__": -- GitLab From aeb57172dcf43b5a0b13d83f1897d05a1777b41e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 18:13:46 -0600 Subject: [PATCH 08/21] enable non-affine expression for substitutions which are not within the footprint --- loopy/transform/precompute.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index ed34e4f6b..54a7c8163 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -198,7 +198,15 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): storage_axis_exprs=storage_axis_exprs( self.storage_axis_sources, args)) - if not self.array_base_map.is_access_descriptor_in_footprint(accdesc): + from loopy.diagnostic import ExpressionToAffineConversionError + + try: + process_me = self.array_base_map.is_access_descriptor_in_footprint( + accdesc) + except ExpressionToAffineConversionError: + process_me = False + + if not process_me: return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) @@ -734,6 +742,31 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} + # {{{ after getting the offsets get rid of accesss descriptors which aren't + # affine + + from loopy.symbolic import guarded_aff_from_expr + from loopy.diagnostic import ExpressionToAffineConversionError + aff_access_descs = [] + for accdesc in access_descriptors: + for saxis in accdesc.storage_axis_exprs: + try: + guarded_aff_from_expr(isl.Space.create_from_names(kernel.isl_context, + set=sweep_inames_set), saxis) + aff_access_descs.append(accdesc) + except ExpressionToAffineConversionError: + pass + + if aff_access_descs == []: + raise ExpressionToAffineConversionError("None of the invocations of" + " '{}' are affine wrt the sweep inames".format(subst_name)) + + access_descriptors = aff_access_descs[:] + + del aff_access_descs + + # }}} + expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps) assert expanding_inames <= kernel.all_inames() -- GitLab From 1abf2b0321d6ffbd68bc42d114099a8713b57e5d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 19:42:38 -0600 Subject: [PATCH 09/21] fixes to minor errors in non-affine sweep implementation --- loopy/transform/precompute.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 54a7c8163..67f6c3f38 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -174,9 +174,6 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.offsets = offsets def map_substitution(self, name, tag, arguments, expn_state): - from pymbolic.mapper.distributor import distribute - arguments = [distribute(arg - off) for arg, off in zip(arguments, - self.offsets)] if not ( name == self.subst_name and self.within( @@ -187,6 +184,10 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) + from pymbolic.mapper.distributor import distribute + arguments = [distribute(arg - off) for arg, off in zip(arguments, + self.offsets)] + # {{{ check if in footprint rule = self.rule_mapping_context.old_subst_rules[name] @@ -207,6 +208,10 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): process_me = False if not process_me: + # subst will remain as a "call" node => re-add the offsets + from pymbolic.mapper.distributor import distribute + arguments = [distribute(arg + off) for arg, off in zip(arguments, + self.offsets)] return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) @@ -725,7 +730,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, if offsets[i] is None: offsets[i] = offst else: - if offsets[i] != offst: + if distribute(offsets[i] - offst) != 0: raise LoopyError("Offsets in invocations of '%s' do not" " match." % subst_name) -- GitLab From a135f0be8c7e6617a08e8f3cc25fe5badecc49cd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 20:02:09 -0600 Subject: [PATCH 10/21] flake8: ignore N815; fix over-indentation --- loopy/transform/precompute.py | 12 ++++++------ setup.cfg | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 67f6c3f38..0599f5305 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -727,12 +727,12 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, if (sweep_dependency[i] and (sweep_inames_as_vars & six.viewkeys(coeffs))) or not sweep_dependency[i]: - if offsets[i] is None: - offsets[i] = offst - else: - if distribute(offsets[i] - offst) != 0: - raise LoopyError("Offsets in invocations of '%s' do not" - " match." % subst_name) + if offsets[i] is None: + offsets[i] = offst + else: + if distribute(offsets[i] - offst) != 0: + raise LoopyError("Offsets in invocations of '%s' do not" + " match." % subst_name) offsets = [offst if offst is not None else 0 for offst in offsets] diff --git a/setup.cfg b/setup.cfg index eec3dfd1f..ab5bd8916 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,W504 +ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,N815,W504 max-line-length=85 exclude= loopy/target/c/compyte/ndarray, -- GitLab From e6914e0dc6c1fcfac7468876eb3f28ae4ce67ce3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 20:13:35 -0600 Subject: [PATCH 11/21] precompute: take care of the "extra" arguments that are added --- loopy/transform/precompute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 0599f5305..9db562d89 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -695,14 +695,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ figure out the offsets - sweep_dependency = [False for _ in subst.arguments] + sweep_dependency = [False for _ in access_descriptors[0].storage_axis_exprs] sweep_inames_as_vars = [Variable(s) for s in sweep_inames_set] for accdesc in access_descriptors: for i, sax in enumerate(accdesc.storage_axis_exprs): if get_dependencies(sax) & sweep_inames_set: sweep_dependency[i] = True - offsets = [None for _ in subst.arguments] + offsets = [None for _ in access_descriptors[0].storage_axis_exprs] sweep_inames_as_vars = [Variable(s) for s in sweep_inames_set] from numbers import Integral from pymbolic.primitives import Sum -- GitLab From 767502bb0955068700795fad4fb1bb88c765c99e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 22:57:09 -0600 Subject: [PATCH 12/21] test_prefetch_through_indirect_addressing is a feature unit test, so test_transform is a better place --- test/test_apps.py | 22 ---------------------- test/test_transform.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/test/test_apps.py b/test/test_apps.py index 37f1fcf9b..0ce953e93 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -43,8 +43,6 @@ else: from pyopencl.tools import pytest_generate_tests_for_pyopencl \ as pytest_generate_tests -from loopy.diagnostic import LoopyError - __all__ = [ "pytest_generate_tests", "cl" # 'cl.create_some_context' @@ -674,26 +672,6 @@ def test_domain_tree_nesting(): assert depth(i) < 2 -def test_prefetch_through_indirect_access(): - knl = lp.make_kernel("{[i, j, k]: 0 <= i,k < 10 and 0<=j<2}", - """ - for i, j, k - a[map1[indirect[i], j], k] = 2 - end - """, - [ - lp.GlobalArg("a", strides=(2, 1), dtype=int), - lp.GlobalArg("map1", shape=(10, 10), dtype=int), - "..." - ], - target=lp.CTarget()) - - knl = lp.prioritize_loops(knl, "i,j,k") - - with pytest.raises(LoopyError): - knl = lp.add_prefetch(knl, "map1[:, j]") - - if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff --git a/test/test_transform.py b/test/test_transform.py index 3d6851d02..5bac662c9 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -570,6 +570,26 @@ def test_nested_substs_in_insns(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl) +def test_prefetch_through_indirect_access(): + knl = lp.make_kernel("{[i, j, k]: 0 <= i,k < 10 and 0<=j<2}", + """ + for i, j, k + a[map1[indirect[i], j], k] = 2 + end + """, + [ + lp.GlobalArg("a", strides=(2, 1), dtype=int), + lp.GlobalArg("map1", shape=(10, 10), dtype=int), + "..." + ], + target=lp.CTarget()) + + knl = lp.prioritize_loops(knl, "i,j,k") + + with pytest.raises(lp.LoopyError): + knl = lp.add_prefetch(knl, "map1[:, j]", default_tag='l.auto') + + def test_extract_subst_with_iname_deps_in_templ(ctx_factory): knl = lp.make_kernel( "{[i, j, k]: 0<=i<100 and 0<=j,k<5}", -- GitLab From a845c5f15e23c05bed343fd4fbb350d6458970b0 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 23:06:31 -0600 Subject: [PATCH 13/21] assert atleast one substitution replaced with precompute temp --- loopy/transform/precompute.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 9db562d89..4218da074 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -172,6 +172,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.compute_read_variables = compute_read_variables self.compute_insn_depends_on = set() self.offsets = offsets + self.substituted_atleast_one = False def map_substitution(self, name, tag, arguments, expn_state): if not ( @@ -217,6 +218,10 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): # }}} + # at this point an invocation will surely be processed => set + # corresponding flag for later assertion + self.substituted_atleast_one = True + assert len(arguments) == len(rule.arguments) abm = self.array_base_map @@ -1006,6 +1011,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, instructions=added_compute_insns + kernel.instructions) kernel = rule_mapping_context.finish_kernel(kernel) + if not invr.substituted_atleast_one: + raise ExpressionToAffineConversionError("All invocations of '{}'" + " contained non-affine arguments.".format(subst_name)) + # }}} # {{{ add dependencies to compute insn -- GitLab From d3d0c6c22ebbb816d04e21bb7cbcfd6666735a20 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 23:06:48 -0600 Subject: [PATCH 14/21] gets rid of some warnings --- test/test_transform.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_transform.py b/test/test_transform.py index 5bac662c9..50f0d7848 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -689,7 +689,7 @@ def test_precompute_with_same_offsets(ctx_factory): y[n] = sum((i, j), subst(map[n]+i)*subst(map[n]+1+j)) """, [lp.GlobalArg('map', dtype=np.int32, shape=(100,)), '...']) - knl = lp.precompute(knl, 'subst', sweep_inames='i') + knl = lp.precompute(knl, 'subst', sweep_inames='i', default_tag=None) assert 'subst' not in knl.substitutions print(knl) @@ -705,7 +705,7 @@ def test_precompute_with_variable_offsets(ctx_factory): y[n] = sum((i, j), subst(map1[n]+i)*subst(map2[n]+1+j)) """, [lp.GlobalArg('map1, map2', dtype=np.int32, shape=(100,)), '...']) - knl = lp.precompute(knl, 'subst', sweep_inames='i') + knl = lp.precompute(knl, 'subst', sweep_inames='i', default_tag=None) assert 'subst' in knl.substitutions print(knl) @@ -720,7 +720,8 @@ def test_precompute_with_variable_offsets_included_in_sweep(ctx_factory): """, [lp.GlobalArg('map1, map2', dtype=np.int32, shape=(100,)), '...']) with pytest.raises(lp.LoopyError): - knl = lp.precompute(knl, 'subst', sweep_inames='i,j') + knl = lp.precompute(knl, 'subst', sweep_inames='i,j', + default_tag=None) if __name__ == "__main__": -- GitLab From e6b8b988ca2e18734923669aec677ebbfec3ef7d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 14 Jan 2020 23:28:50 -0600 Subject: [PATCH 15/21] fix a bug in checking which accesses are affine --- loopy/transform/precompute.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 4218da074..5379cfe4b 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -759,13 +759,14 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from loopy.diagnostic import ExpressionToAffineConversionError aff_access_descs = [] for accdesc in access_descriptors: - for saxis in accdesc.storage_axis_exprs: - try: - guarded_aff_from_expr(isl.Space.create_from_names(kernel.isl_context, - set=sweep_inames_set), saxis) - aff_access_descs.append(accdesc) - except ExpressionToAffineConversionError: - pass + try: + for saxis in accdesc.storage_axis_exprs: + guarded_aff_from_expr(isl.Space.create_from_names( + kernel.isl_context, set=sweep_inames_set), saxis) + # all storage axes exprs are affine terms of sweep inames + aff_access_descs.append(accdesc) + except ExpressionToAffineConversionError: + pass if aff_access_descs == []: raise ExpressionToAffineConversionError("None of the invocations of" -- GitLab From a8c04a3d83e35afab5bc7e1a607ee1738754d9eb Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 15 Jan 2020 00:16:19 -0600 Subject: [PATCH 16/21] comments; docs --- loopy/transform/precompute.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 5379cfe4b..581d27e5c 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -206,6 +206,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): process_me = self.array_base_map.is_access_descriptor_in_footprint( accdesc) except ExpressionToAffineConversionError: + # accdesc has non-affine storage axes exprs=>cannot be processed process_me = False if not process_me: @@ -291,6 +292,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): class SweepInamesCoefficientCollector(CoefficientCollector): + """ + Coefficient Collector which casts non-linear terms as constants. + """ def map_constant(self, expr): return super(SweepInamesCoefficientCollector, self).map_constant(expr) @@ -698,8 +702,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} - # {{{ figure out the offsets + # {{{ realize non-sweep offset terms + # 'sweep_dependency[i]' is True for a storage axes #i if axes #i is being swept. sweep_dependency = [False for _ in access_descriptors[0].storage_axis_exprs] sweep_inames_as_vars = [Variable(s) for s in sweep_inames_set] for accdesc in access_descriptors: @@ -717,6 +722,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, for i, sax in enumerate(accdesc.storage_axis_exprs): coeffs = SweepInamesCoefficientCollector(sweep_inames_set)(sax) offst = coeffs.get(1, 0) + + # {{{ ISL works fine with integral constants => do not include them + # in offsets + if isinstance(offst, Sum): try: cnst, = [child for child in offst.children if @@ -730,6 +739,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, offst = distribute(offst-cnst) + # }}} + if (sweep_dependency[i] and (sweep_inames_as_vars & six.viewkeys(coeffs))) or not sweep_dependency[i]: if offsets[i] is None: -- GitLab From e2e9ae9a6825509fe7976ed27a8959aaee1f74b9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 23 Jan 2020 12:44:32 -0600 Subject: [PATCH 17/21] reverts "flake8:ignore N815" --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ab5bd8916..eec3dfd1f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,N815,W504 +ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,W504 max-line-length=85 exclude= loopy/target/c/compyte/ndarray, -- GitLab From 6cb0362dd3a93ce134e400c2d27d2c54cf0e43c9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 23 Jan 2020 12:54:00 -0600 Subject: [PATCH 18/21] do not override lang_version at the kernel creation level --- test/test_transform.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/test_transform.py b/test/test_transform.py index 50f0d7848..d94c38f37 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -596,8 +596,7 @@ def test_extract_subst_with_iname_deps_in_templ(ctx_factory): """ y[i, j, k] = x[i, j, k] """, - [lp.GlobalArg('x,y', shape=lp.auto, dtype=float)], - lang_version=(2018, 2)) + [lp.GlobalArg('x,y', shape=lp.auto, dtype=float)]) knl = lp.extract_subst(knl, 'rule1', 'x[i, arg1, arg2]', parameters=('arg1', 'arg2')) @@ -628,7 +627,6 @@ def test_precompute_with_aff_and_constant_wrt_sweep_terms(ctx_factory): lp.TemporaryVariable('y', dtype=np.float64, shape=(32,)), '...'], seq_dependencies=True, - lang_version=(2018, 2) ) ref_knl = knl @@ -666,7 +664,6 @@ def test_precompute_with_nonaff_and_constant_wrt_sweep_terms(ctx_factory): lp.TemporaryVariable('y', dtype=np.float64, shape=(32,)), '...'], seq_dependencies=True, - lang_version=(2018, 2) ) ref_knl = knl -- GitLab From 3aeca60e0be9610537d8013145f327f106e38937 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 23 Jan 2020 13:35:34 -0600 Subject: [PATCH 19/21] review: remove code bloat by pytest parametrization --- test/test_transform.py | 47 +++++++----------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/test/test_transform.py b/test/test_transform.py index d94c38f37..f60f6cbe4 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -604,7 +604,11 @@ def test_extract_subst_with_iname_deps_in_templ(ctx_factory): lp.auto_test_vs_ref(knl, ctx_factory(), knl) -def test_precompute_with_aff_and_constant_wrt_sweep_terms(ctx_factory): +@pytest.mark.parametrize("expr", [ + "map[i]", + "map[i]*map[i]" +]) +def test_precompute_with_constant_wrt_sweep_terms(ctx_factory, expr): ctx = ctx_factory() knl = lp.make_kernel( "{[i,j,k]: 0<=i<100 and 0<=j<32 and 0<=k<8}", @@ -614,49 +618,12 @@ def test_precompute_with_aff_and_constant_wrt_sweep_terms(ctx_factory): x[j] = 3.14 * (i + j) y[j] = 0 for k - y[j] = y[j] + L[map[i], k, j] *x[j] + y[j] = y[j] + L[{expr}, k, j] *x[j] end out[i, j] = y[j] end end - """, - [ - lp.GlobalArg('L', dtype=np.float64, shape=(1, 8, 32)), - lp.GlobalArg('map', dtype=np.int32, shape=lp.auto), - lp.TemporaryVariable('x', dtype=np.float64, shape=(32,)), - lp.TemporaryVariable('y', dtype=np.float64, shape=(32,)), - '...'], - seq_dependencies=True, - ) - - ref_knl = knl - - knl = lp.add_prefetch(knl, "L", sweep_inames=["j", "k"], - temporary_scope=lp.AddressSpace.LOCAL, fetch_outer_inames="i", - default_tag=None, dim_arg_names=["iprftch0", "iprftch1", "iprftch2"]) - - knl = lp.tag_inames(knl, "i:g.0, j:l.0, iprftch2:l.0") - - lp.auto_test_vs_ref(ref_knl, ctx, ref_knl, parameters={"map": - np.zeros(100, dtype=np.int32)}) - - -def test_precompute_with_nonaff_and_constant_wrt_sweep_terms(ctx_factory): - ctx = ctx_factory() - knl = lp.make_kernel( - "{[i,j,k]: 0<=i<100 and 0<=j<32 and 0<=k<8}", - """ - for i - for j - x[j] = 3.14 * (i + j) - y[j] = 0 - for k - y[j] = y[j] + L[map[i]*map[i], k, j] *x[j] - end - out[i, j] = y[j] - end - end - """, + """.format(expr=expr), [ lp.GlobalArg('L', dtype=np.float64, shape=(1, 8, 32)), lp.GlobalArg('map', dtype=np.int32, shape=lp.auto), -- GitLab From fe5dbe0021212e431db28dab19c31adf84f9bea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Mon, 27 Jan 2020 18:28:46 +0100 Subject: [PATCH 20/21] Revert use of guarded_aff_from_expr in simplify_via_aff --- loopy/isl_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 446302197..1b2a83aad 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -430,9 +430,9 @@ def boxify(cache_manager, domain, box_inames, context): def simplify_via_aff(expr): - from loopy.symbolic import guarded_aff_from_expr, aff_to_expr, get_dependencies + from loopy.symbolic import aff_from_expr, aff_to_expr, get_dependencies deps = get_dependencies(expr) - return aff_to_expr(guarded_aff_from_expr( + return aff_to_expr(aff_from_expr( isl.Space.create_from_names(isl.DEFAULT_CONTEXT, list(deps)), expr)) -- GitLab From ee54f7998f174816e9a0d816312f055cc20093f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 28 Jan 2020 00:35:34 +0100 Subject: [PATCH 21/21] Minor style tweak in RuleInvocationGatherer --- loopy/transform/precompute.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 581d27e5c..57955acd3 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -88,10 +88,12 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): the sweep. """ from loopy.kernel.instruction import MultiAssignmentBase - return self.kernel.all_variable_names() - frozenset().union(*( - frozenset(insn.assignees) for insn in self.kernel.instructions if - isinstance(insn, MultiAssignmentBase) and insn.within_inames & - self.sweep_inames)) + return self.kernel.all_variable_names() - frozenset().union( + *( + frozenset(insn.assignees) + for insn in self.kernel.instructions + if isinstance(insn, MultiAssignmentBase) + and insn.within_inames & self.sweep_inames)) def map_substitution(self, name, tag, arguments, expn_state): process_me = name == self.subst_name -- GitLab