From 655b07d24f6f4fffde31f7206fe30445e46d8823 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Mar 2012 01:43:46 -0400 Subject: [PATCH] Improve the user interface of precompute(), by unifying subst_name and footprint_generators. --- loopy/__init__.py | 13 +++-- loopy/compiled.py | 6 ++- loopy/cse.py | 115 ++++++++++++++++++++++++++++------------ test/test_linalg.py | 7 +-- test/test_sem_reagan.py | 32 +++-------- 5 files changed, 100 insertions(+), 73 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 913479d78..a35060141 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -22,7 +22,7 @@ class LoopyAdvisory(UserWarning): from loopy.kernel import ScalarArg, ArrayArg, ConstantArrayArg, ImageArg -from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph +from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph, LoopKernel from loopy.subst import extract_subst, expand_subst from loopy.cse import precompute from loopy.preprocess import preprocess_kernel, realize_reduction @@ -52,7 +52,6 @@ def make_kernel(*args, **kwargs): and temporary variable declaration received as part of string instructions. """ - from loopy.kernel import LoopKernel knl = LoopKernel(*args, **kwargs) knl = tag_dimensions( @@ -503,7 +502,6 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, kernel = extract_subst(kernel, rule_name, uni_template, parameters) - footprint_generators = None if footprint_subscripts is not None: if not isinstance(footprint_subscripts, (list, tuple)): @@ -530,11 +528,12 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, footprint_subscripts = [standardize_footprint_indices(si) for si in footprint_subscripts] from pymbolic.primitives import Variable - footprint_generators = [ - Variable(var_name)(*si) for si in footprint_subscripts] + subst_use = [ + Variable(rule_name)(*si) for si in footprint_subscripts] + else: + subst_use = rule_name - new_kernel = precompute(kernel, rule_name, arg.dtype, sweep_inames, - footprint_generators=footprint_generators, + new_kernel = precompute(kernel, subst_use, arg.dtype, sweep_inames, new_storage_axis_names=dim_arg_names, default_tag=default_tag) diff --git a/loopy/compiled.py b/loopy/compiled.py index 926d7a9d6..cc78d2b24 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -287,8 +287,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters, print_ref_code=False, print_code=True, warmup_rounds=2, edit_code=False, dump_binary=False, with_annotation=False, fills_entire_output=True, check_result=None): - """ - :arg check_result: a callable with :cls:`numpy.ndarray` arguments + """Compare results of `ref_knl` to the kernels generated by the generator + `kernel_gen`. + + :arg check_result: a callable with :class:`numpy.ndarray` arguments *(result, reference_result)* returning a a tuple (class:`bool`, message) indicating correctness/acceptability of the result """ diff --git a/loopy/cse.py b/loopy/cse.py index 398a80897..d46c5f7fe 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -305,22 +305,40 @@ def simplify_via_aff(expr): -def precompute(kernel, subst_name, dtype, sweep_inames=[], - footprint_generators=None, +def precompute(kernel, subst_use, dtype, sweep_inames=[], storage_axes=None, new_storage_axis_names=None, storage_axis_to_tag={}, default_tag="l.auto"): - """Precompute the expression described in the substitution rule *subst_name* - and store it in a temporary array. A precomputation needs two things to operate, - a list of *sweep_inames* (order irrelevant) and an ordered list of *storage_axes* - (whose order will describe the axis ordering of the temporary array). + """Precompute the expression described in the substitution rule determined by + *subst_use* and store it in a temporary array. A precomputation needs two + things to operate, a list of *sweep_inames* (order irrelevant) and an + ordered list of *storage_axes* (whose order will describe the axis ordering + of the temporary array). - *subst_name* may contain a period (".") to filter out a subset of the - usage sites of the substitution rule. (Namely those usage sites that - use the same dotted name.) + :arg subst_use: Describes what to prefetch. - This function will then examine the *footprint_generators* (or all usage - sites of the substitution rule if not specified) and determine what the - storage footprint of that sweep is. + The following objects may be given for *subst_use*: + + * The name of the substitution rule. + + * The tagged name ("name$tag") of the substitution rule. + + * A list of invocations of the substitution rule. + This list of invocations, when swept across *sweep_inames*, then serves + to define the footprint of the precomputation. + + Invocations may be tagged ("name$tag") to filter out a subset of the + usage sites of the substitution rule. (Namely those usage sites that + use the same tagged name.) + + Invocations may be given as a string or as a + :class:`pymbolic.primitives.Expression` object. + + If only one invocation is to be given, then the only entry of the list + may be given directly. + + If the list of invocations generating the footprint is not given, + all (tag-matching, if desired) usage sites of the substitution rule + are used to determine the footprint. The following cases can arise for each sweep axis: @@ -343,42 +361,65 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[], eliminated. """ - # {{{ check arguments + # {{{ check, standardize arguments for iname in sweep_inames: if iname not in kernel.all_inames(): raise RuntimeError("sweep iname '%s' is not a known iname" % iname) - if footprint_generators is not None: - if isinstance(footprint_generators, str): - footprint_generators = [footprint_generators] + if isinstance(storage_axes, str): + raise TypeError("storage_axes may not be a string--likely a leftover " + "footprint_generators argument") - # }}} + if isinstance(subst_use, str): + subst_use = [subst_use] - from loopy.symbolic import SubstitutionCallbackMapper + footprint_generators = None - c_subst_name = subst_name.replace(".", "_") - subst_name, subst_tag = SubstitutionCallbackMapper.parse_filter(subst_name) + subst_name = None + subst_tag = None - from loopy.kernel import parse_tag - default_tag = parse_tag(default_tag) + from pymbolic.primitives import Variable, Call + from loopy.symbolic import parse, TaggedVariable - subst = kernel.substitutions[subst_name] - arg_names = subst.arguments + for use in subst_use: + if isinstance(use, str): + use = parse(use) - # {{{ create list of invocation descriptors + if isinstance(use, Call): + if footprint_generators is None: + footprint_generators = [] - invocation_descriptors = [] + footprint_generators.append(use) + subst_name_as_expr = use.function + else: + subst_name_as_expr = use + + if isinstance(subst_name_as_expr, Variable): + new_subst_name = subst_name_as_expr.name + new_subst_tag = None + elif isinstance(subst_name_as_expr, TaggedVariable): + new_subst_name = subst_name_as_expr.name + new_subst_tag = subst_name_as_expr.tag + else: + raise ValueError("unexpected type of subst_name") - # {{{ process invocations in footprint generators + if (subst_name, subst_tag) == (None, None): + subst_name, subst_tag = new_subst_name, new_subst_tag + else: + if (subst_name, subst_tag) != (new_subst_name, new_subst_tag): + raise ValueError("not all uses in subst_use agree " + "on rule name and tag") + + # }}} + + # {{{ process invocations in footprint generators, start invocation_descriptors + + invocation_descriptors = [] if footprint_generators: for fpg in footprint_generators: - if isinstance(fpg, str): - from loopy.symbolic import parse - fpg = parse(fpg) - from pymbolic.primitives import Variable, Call if isinstance(fpg, Variable): args = () @@ -395,7 +436,15 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[], # }}} - # {{{ gather up invocations in kernel code + c_subst_name = subst_name.replace(".", "_") + + from loopy.kernel import parse_tag + default_tag = parse_tag(default_tag) + + subst = kernel.substitutions[subst_name] + arg_names = subst.arguments + + # {{{ gather up invocations in kernel code, finish invocation_descriptors current_subst_rule_stack = [] @@ -459,8 +508,6 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[], # }}} - # }}} - sweep_inames = list(sweep_inames) # {{{ see if we need extra storage dimensions diff --git a/test/test_linalg.py b/test/test_linalg.py index 45112958f..a833a83fc 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -318,9 +318,6 @@ def test_rank_one(ctx_factory): return knl def variant_3(knl): - # Throws an error--doesn't use all hardware axes. - # Probably the right thing to do. - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.split_dimension(knl, "j", 16, @@ -352,8 +349,8 @@ def test_rank_one(ctx_factory): seq_knl = knl - #for variant in [variant_1, variant_2, variant_4]: - for variant in [variant_2, variant_4]: + for variant in [variant_1, variant_2, variant_3, variant_4]: + #for variant in [variant_4]: kernel_gen = lp.generate_loop_schedules(variant(knl)) kernel_gen = lp.check_kernels(kernel_gen, dict(n=n)) diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index bdccb112f..c147c20b9 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -31,8 +31,8 @@ def test_tim2d(ctx_factory): #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)", - "Gux(a,b) := G[0,e,a,b]*ur(a,b)+G[1,e,a,b]*us(a,b)", - "Guy(a,b) := G[1,e,a,b]*ur(a,b)+G[2,e,a,b]*us(a,b)", + "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)", + "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)", "lap[e,i,j] = " " sum_float32(m, D[m,i]*Gux(m,j))" "+ sum_float32(m, D[m,j]*Guy(i,m))" @@ -57,24 +57,19 @@ def test_tim2d(ctx_factory): knl = lp.add_prefetch(knl, "D", ["m", "j", "i","o"]) knl = lp.add_prefetch(knl, "u", ["i", "j", "o"]) - knl = lp.precompute(knl, "ur", np.float32, ["m", "j"], "ur(m,j)") - knl = lp.precompute(knl, "us", np.float32, ["i", "m"], "us(i,m)") + knl = lp.precompute(knl, "ur(m,j)", np.float32, ["m", "j"]) + knl = lp.precompute(knl, "us(i,m)", np.float32, ["i", "m"]) - knl = lp.add_prefetch(knl, "G") + knl = lp.precompute(knl, "Gux(m,j)", np.float32, ["m", "j"]) + knl = lp.precompute(knl, "Guy(i,m)", np.float32, ["i", "m"]) - knl = lp.precompute(knl, "Gux", np.float32, ["m", "j"], "Gux(m,j)") - knl = lp.precompute(knl, "Guy", np.float32, ["i", "m"], "Gux(i,m)") + knl = lp.add_prefetch(knl, "G$x") knl = lp.tag_dimensions(knl, dict(o="unr")) knl = lp.tag_dimensions(knl, dict(m="unr")) return knl - def variant_prefetch(knl): - knl = lp.precompute(knl, "ur", np.float32, ["a", "b"]) - knl = lp.precompute(knl, "us", np.float32, ["a", "b"]) - return knl - def variant_1(knl): # BUG? why can't the prefetch be in the j loop??! print knl @@ -85,19 +80,6 @@ def test_tim2d(ctx_factory): #knl = lp.precompute(knl, "us", np.float32, ["a"]) return knl - def variant_g_prefetch(knl): - knl = lp.precompute(knl, "ur", np.float32, ["a"]) - knl = lp.precompute(knl, "us", np.float32, ["a"]) - knl = lp.add_prefetch(knl, "G", per_access=True) # IMPLEMENT! - return knl - - def variant_gu_precomp(knl): - knl = lp.precompute(knl, "ur", np.float32, ["a"]) - knl = lp.precompute(knl, "us", np.float32, ["a"]) - knl = lp.precompute(knl, "Gux", np.float32, ["a", "b"]) - knl = lp.precompute(knl, "Guy", np.float32, ["a", "b"]) - return knl - for variant in [variant_orig]: #for variant in [variant_1]: kernel_gen = lp.generate_loop_schedules(variant(knl)) -- GitLab