From 655b07d24f6f4fffde31f7206fe30445e46d8823 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 26 Mar 2012 01:43:46 -0400
Subject: [PATCH] Improve the user interface of precompute(), by unifying
 subst_name and footprint_generators.

---
 loopy/__init__.py       |  13 +++--
 loopy/compiled.py       |   6 ++-
 loopy/cse.py            | 115 ++++++++++++++++++++++++++++------------
 test/test_linalg.py     |   7 +--
 test/test_sem_reagan.py |  32 +++--------
 5 files changed, 100 insertions(+), 73 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 913479d78..a35060141 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -22,7 +22,7 @@ class LoopyAdvisory(UserWarning):
 
 from loopy.kernel import ScalarArg, ArrayArg, ConstantArrayArg, ImageArg
 
-from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph
+from loopy.kernel import AutoFitLocalIndexTag, get_dot_dependency_graph, LoopKernel
 from loopy.subst import extract_subst, expand_subst
 from loopy.cse import precompute
 from loopy.preprocess import preprocess_kernel, realize_reduction
@@ -52,7 +52,6 @@ def make_kernel(*args, **kwargs):
     and temporary variable declaration received as part of string instructions.
     """
 
-    from loopy.kernel import LoopKernel
     knl = LoopKernel(*args, **kwargs)
 
     knl = tag_dimensions(
@@ -503,7 +502,6 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
 
     kernel = extract_subst(kernel, rule_name, uni_template, parameters)
 
-    footprint_generators = None
 
     if footprint_subscripts is not None:
         if not isinstance(footprint_subscripts, (list, tuple)):
@@ -530,11 +528,12 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
         footprint_subscripts = [standardize_footprint_indices(si) for si in footprint_subscripts]
 
         from pymbolic.primitives import Variable
-        footprint_generators = [
-                Variable(var_name)(*si) for si in footprint_subscripts]
+        subst_use = [
+                Variable(rule_name)(*si) for si in footprint_subscripts]
+    else:
+        subst_use = rule_name
 
-    new_kernel = precompute(kernel, rule_name, arg.dtype, sweep_inames,
-            footprint_generators=footprint_generators,
+    new_kernel = precompute(kernel, subst_use, arg.dtype, sweep_inames,
             new_storage_axis_names=dim_arg_names,
             default_tag=default_tag)
 
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 926d7a9d6..cc78d2b24 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -287,8 +287,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count, op_label, parameters,
         print_ref_code=False, print_code=True, warmup_rounds=2,
         edit_code=False, dump_binary=False, with_annotation=False,
         fills_entire_output=True, check_result=None):
-    """
-    :arg check_result: a callable with :cls:`numpy.ndarray` arguments
+    """Compare results of `ref_knl` to the kernels generated by the generator
+    `kernel_gen`.
+
+    :arg check_result: a callable with :class:`numpy.ndarray` arguments
         *(result, reference_result)* returning a a tuple (class:`bool`, message)
         indicating correctness/acceptability of the result
     """
diff --git a/loopy/cse.py b/loopy/cse.py
index 398a80897..d46c5f7fe 100644
--- a/loopy/cse.py
+++ b/loopy/cse.py
@@ -305,22 +305,40 @@ def simplify_via_aff(expr):
 
 
 
-def precompute(kernel, subst_name, dtype, sweep_inames=[],
-        footprint_generators=None,
+def precompute(kernel, subst_use, dtype, sweep_inames=[],
         storage_axes=None, new_storage_axis_names=None, storage_axis_to_tag={},
         default_tag="l.auto"):
-    """Precompute the expression described in the substitution rule *subst_name*
-    and store it in a temporary array. A precomputation needs two things to operate,
-    a list of *sweep_inames* (order irrelevant) and an ordered list of *storage_axes*
-    (whose order will describe the axis ordering of the temporary array).
+    """Precompute the expression described in the substitution rule determined by
+    *subst_use* and store it in a temporary array. A precomputation needs two
+    things to operate, a list of *sweep_inames* (order irrelevant) and an
+    ordered list of *storage_axes* (whose order will describe the axis ordering
+    of the temporary array).
 
-    *subst_name* may contain a period (".") to filter out a subset of the
-    usage sites of the substitution rule. (Namely those usage sites that
-    use the same dotted name.)
+    :arg subst_use: Describes what to prefetch.
 
-    This function will then examine the *footprint_generators* (or all usage
-    sites of the substitution rule if not specified) and determine what the
-    storage footprint of that sweep is.
+    The following objects may be given for *subst_use*:
+
+    * The name of the substitution rule.
+
+    * The tagged name ("name$tag") of the substitution rule.
+
+    * A list of invocations of the substitution rule.
+      This list of invocations, when swept across *sweep_inames*, then serves
+      to define the footprint of the precomputation.
+
+      Invocations may be tagged ("name$tag") to filter out a subset of the
+      usage sites of the substitution rule. (Namely those usage sites that
+      use the same tagged name.)
+
+      Invocations may be given as a string or as a
+      :class:`pymbolic.primitives.Expression` object.
+
+      If only one invocation is to be given, then the only entry of the list
+      may be given directly.
+
+    If the list of invocations generating the footprint is not given,
+    all (tag-matching, if desired) usage sites of the substitution rule
+    are used to determine the footprint.
 
     The following cases can arise for each sweep axis:
 
@@ -343,42 +361,65 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[],
     eliminated.
     """
 
-    # {{{ check arguments
+    # {{{ check, standardize arguments
 
     for iname in sweep_inames:
         if iname not in kernel.all_inames():
             raise RuntimeError("sweep iname '%s' is not a known iname"
                     % iname)
 
-    if footprint_generators is not None:
-        if isinstance(footprint_generators, str):
-            footprint_generators = [footprint_generators]
+    if isinstance(storage_axes, str):
+        raise TypeError("storage_axes may not be a string--likely a leftover "
+                "footprint_generators argument")
 
-    # }}}
+    if isinstance(subst_use, str):
+        subst_use = [subst_use]
 
-    from loopy.symbolic import SubstitutionCallbackMapper
+    footprint_generators = None
 
-    c_subst_name = subst_name.replace(".", "_")
-    subst_name, subst_tag = SubstitutionCallbackMapper.parse_filter(subst_name)
+    subst_name = None
+    subst_tag = None
 
-    from loopy.kernel import parse_tag
-    default_tag = parse_tag(default_tag)
+    from pymbolic.primitives import Variable, Call
+    from loopy.symbolic import parse, TaggedVariable
 
-    subst = kernel.substitutions[subst_name]
-    arg_names = subst.arguments
+    for use in subst_use:
+        if isinstance(use, str):
+            use = parse(use)
 
-    # {{{ create list of invocation descriptors
+        if isinstance(use, Call):
+            if footprint_generators is None:
+                footprint_generators = []
 
-    invocation_descriptors = []
+            footprint_generators.append(use)
+            subst_name_as_expr = use.function
+        else:
+            subst_name_as_expr = use
+
+        if isinstance(subst_name_as_expr, Variable):
+            new_subst_name = subst_name_as_expr.name
+            new_subst_tag = None
+        elif isinstance(subst_name_as_expr, TaggedVariable):
+            new_subst_name = subst_name_as_expr.name
+            new_subst_tag = subst_name_as_expr.tag
+        else:
+            raise ValueError("unexpected type of subst_name")
 
-    # {{{ process invocations in footprint generators
+        if (subst_name, subst_tag) == (None, None):
+            subst_name, subst_tag = new_subst_name, new_subst_tag
+        else:
+            if (subst_name, subst_tag) != (new_subst_name, new_subst_tag):
+                raise ValueError("not all uses in subst_use agree "
+                        "on rule name and tag")
+
+    # }}}
+
+    # {{{ process invocations in footprint generators, start invocation_descriptors
+
+    invocation_descriptors = []
 
     if footprint_generators:
         for fpg in footprint_generators:
-            if isinstance(fpg, str):
-                from loopy.symbolic import parse
-                fpg = parse(fpg)
-
             from pymbolic.primitives import Variable, Call
             if isinstance(fpg, Variable):
                 args = ()
@@ -395,7 +436,15 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[],
 
     # }}}
 
-    # {{{ gather up invocations in kernel code
+    c_subst_name = subst_name.replace(".", "_")
+
+    from loopy.kernel import parse_tag
+    default_tag = parse_tag(default_tag)
+
+    subst = kernel.substitutions[subst_name]
+    arg_names = subst.arguments
+
+    # {{{ gather up invocations in kernel code, finish invocation_descriptors
 
     current_subst_rule_stack = []
 
@@ -459,8 +508,6 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[],
 
     # }}}
 
-    # }}}
-
     sweep_inames = list(sweep_inames)
 
     # {{{ see if we need extra storage dimensions
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 45112958f..a833a83fc 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -318,9 +318,6 @@ def test_rank_one(ctx_factory):
         return knl
 
     def variant_3(knl):
-        # Throws an error--doesn't use all hardware axes.
-        # Probably the right thing to do.
-
         knl = lp.split_dimension(knl, "i", 16,
                 outer_tag="g.0", inner_tag="l.0")
         knl = lp.split_dimension(knl, "j", 16,
@@ -352,8 +349,8 @@ def test_rank_one(ctx_factory):
 
     seq_knl = knl
 
-    #for variant in [variant_1, variant_2, variant_4]:
-    for variant in [variant_2, variant_4]:
+    for variant in [variant_1, variant_2, variant_3, variant_4]:
+    #for variant in [variant_4]:
         kernel_gen = lp.generate_loop_schedules(variant(knl))
         kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))
 
diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py
index bdccb112f..c147c20b9 100644
--- a/test/test_sem_reagan.py
+++ b/test/test_sem_reagan.py
@@ -31,8 +31,8 @@ def test_tim2d(ctx_factory):
 
             #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)",
 
-            "Gux(a,b) := G[0,e,a,b]*ur(a,b)+G[1,e,a,b]*us(a,b)",
-            "Guy(a,b) := G[1,e,a,b]*ur(a,b)+G[2,e,a,b]*us(a,b)",
+            "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)",
+            "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)",
             "lap[e,i,j]  = "
             "  sum_float32(m, D[m,i]*Gux(m,j))"
             "+ sum_float32(m, D[m,j]*Guy(i,m))"
@@ -57,24 +57,19 @@ def test_tim2d(ctx_factory):
         knl = lp.add_prefetch(knl, "D", ["m", "j", "i","o"])
         knl = lp.add_prefetch(knl, "u", ["i", "j",  "o"])
 
-        knl = lp.precompute(knl, "ur", np.float32, ["m", "j"], "ur(m,j)")
-        knl = lp.precompute(knl, "us", np.float32, ["i", "m"], "us(i,m)")
+        knl = lp.precompute(knl, "ur(m,j)", np.float32, ["m", "j"])
+        knl = lp.precompute(knl, "us(i,m)", np.float32, ["i", "m"])
 
-        knl = lp.add_prefetch(knl, "G")
+        knl = lp.precompute(knl, "Gux(m,j)", np.float32, ["m", "j"])
+        knl = lp.precompute(knl, "Guy(i,m)", np.float32, ["i", "m"])
 
-        knl = lp.precompute(knl, "Gux", np.float32, ["m", "j"], "Gux(m,j)")
-        knl = lp.precompute(knl, "Guy", np.float32, ["i", "m"], "Gux(i,m)")
+        knl = lp.add_prefetch(knl, "G$x")
 
         knl = lp.tag_dimensions(knl, dict(o="unr"))
         knl = lp.tag_dimensions(knl, dict(m="unr"))
 
         return knl
 
-    def variant_prefetch(knl):
-        knl = lp.precompute(knl, "ur", np.float32, ["a", "b"])
-        knl = lp.precompute(knl, "us", np.float32, ["a", "b"])
-        return knl
-
     def variant_1(knl):
         # BUG? why can't the prefetch be in the j loop??!
         print knl
@@ -85,19 +80,6 @@ def test_tim2d(ctx_factory):
         #knl = lp.precompute(knl, "us", np.float32, ["a"])
         return knl
 
-    def variant_g_prefetch(knl):
-        knl = lp.precompute(knl, "ur", np.float32, ["a"])
-        knl = lp.precompute(knl, "us", np.float32, ["a"])
-        knl = lp.add_prefetch(knl, "G", per_access=True) # IMPLEMENT!
-        return knl
-
-    def variant_gu_precomp(knl):
-        knl = lp.precompute(knl, "ur", np.float32, ["a"])
-        knl = lp.precompute(knl, "us", np.float32, ["a"])
-        knl = lp.precompute(knl, "Gux", np.float32, ["a", "b"])
-        knl = lp.precompute(knl, "Guy", np.float32, ["a", "b"])
-        return knl
-
     for variant in [variant_orig]:
     #for variant in [variant_1]:
         kernel_gen = lp.generate_loop_schedules(variant(knl))
-- 
GitLab