From ded9342c7b96bf5bf6e28c7b8f0bbe4eb11ac00b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 5 Jun 2016 01:43:33 -0500
Subject: [PATCH] Deprecate iname infercence. Do iname inference once up front
 and never again, fix transformations to work without it, and complain if
 iname inference actually does anything

---
 loopy/kernel/__init__.py      | 14 +++++++-------
 loopy/kernel/creation.py      | 27 +++++++++++++++++++++++++++
 loopy/kernel/data.py          | 18 ++----------------
 loopy/kernel/tools.py         | 26 ++++++++++++++++++--------
 loopy/preprocess.py           | 12 ++----------
 loopy/transform/batch.py      | 10 +++++++++-
 loopy/transform/buffer.py     |  9 +++++++--
 loopy/transform/diff.py       |  4 +++-
 loopy/transform/iname.py      | 25 ++++++++++++++++++++++---
 loopy/transform/precompute.py | 17 ++++++++++++-----
 loopy/version.py              |  2 +-
 11 files changed, 110 insertions(+), 54 deletions(-)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index f9049ee43..3d64d227e 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -669,9 +669,11 @@ class LoopKernel(RecordWithoutPickling):
         """Return a mapping from instruction ids to inames inside which
         they should be run.
         """
+        result = {}
+        for insn in self.instructions:
+            result[insn.id] = insn.forced_iname_deps
 
-        from loopy.kernel.tools import find_all_insn_inames
-        return find_all_insn_inames(self)
+        return result
 
     @memoize_method
     def all_referenced_inames(self):
@@ -681,11 +683,9 @@ class LoopKernel(RecordWithoutPickling):
         return result
 
     def insn_inames(self, insn):
-        from loopy.kernel.data import InstructionBase
-        if isinstance(insn, InstructionBase):
-            return self.all_insn_inames()[insn.id]
-        else:
-            return self.all_insn_inames()[insn]
+        if isinstance(insn, str):
+            insn = self.id_to_insn[insn]
+        return insn.forced_iname_deps
 
     @memoize_method
     def iname_to_insns(self):
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index 1c988e34d..ec3b8b6ef 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -1084,6 +1084,19 @@ def resolve_wildcard_deps(knl):
 # }}}
 
 
+# {{{ add inferred iname deps
+
+def add_inferred_inames(knl):
+    from loopy.kernel.tools import find_all_insn_inames
+    insn_inames = find_all_insn_inames(knl)
+
+    return knl.copy(instructions=[
+            insn.copy(forced_iname_deps=insn_inames[insn.id])
+            for insn in knl.instructions])
+
+# }}}
+
+
 # {{{ kernel creation top-level
 
 def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
@@ -1302,6 +1315,20 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
     check_for_nonexistent_iname_deps(knl)
 
     knl = create_temporaries(knl, default_order)
+    # -------------------------------------------------------------------------
+    # Ordering dependency:
+    # -------------------------------------------------------------------------
+    # Must create temporaries before inferring inames (because those temporaries
+    # mediate dependencies that are then used for iname propagation.)
+    # -------------------------------------------------------------------------
+    # NOTE: add_inferred_inames will be phased out and throws warnings if it
+    # does something.
+    knl = add_inferred_inames(knl)
+    # -------------------------------------------------------------------------
+    # Ordering dependency:
+    # -------------------------------------------------------------------------
+    # Must infer inames before determining shapes.
+    # -------------------------------------------------------------------------
     knl = determine_shapes_of_temporaries(knl)
     knl = expand_defines_in_shapes(knl, defines)
     knl = guess_arg_shape_if_requested(knl, default_order)
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 3ba3f1918..44a5618e7 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -561,15 +561,14 @@ class InstructionBase(Record):
             "groups conflicts_with_groups "
             "no_sync_with "
             "predicates "
-            "forced_iname_deps_is_final stop_iname_dep_propagation "
-            "stop_iname_dep_propagation "
+            "forced_iname_deps_is_final forced_iname_deps "
             "priority boostable boostable_into".split())
 
     def __init__(self, id, depends_on, depends_on_is_final,
             groups, conflicts_with_groups,
             no_sync_with,
             forced_iname_deps_is_final, forced_iname_deps,
-            stop_iname_dep_propagation, priority,
+            priority,
             boostable, boostable_into, predicates, tags,
             insn_deps=None, insn_deps_is_final=None):
 
@@ -598,9 +597,6 @@ class InstructionBase(Record):
         if forced_iname_deps_is_final is None:
             forced_iname_deps_is_final = False
 
-        if stop_iname_dep_propagation is None:
-            stop_iname_dep_propagation = frozenset()
-
         if depends_on_is_final is None:
             depends_on_is_final = False
 
@@ -623,7 +619,6 @@ class InstructionBase(Record):
         # assert all(is_interned(pred) for pred in predicates)
 
         assert isinstance(forced_iname_deps, frozenset)
-        assert isinstance(stop_iname_dep_propagation, frozenset)
         assert isinstance(depends_on, frozenset) or depends_on is None
         assert isinstance(groups, frozenset)
         assert isinstance(conflicts_with_groups, frozenset)
@@ -636,7 +631,6 @@ class InstructionBase(Record):
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
-                stop_iname_dep_propagation=stop_iname_dep_propagation,
                 priority=priority,
                 boostable=boostable,
                 boostable_into=boostable_into,
@@ -808,8 +802,6 @@ class InstructionBase(Record):
                 intern_frozenset_of_ids(self.forced_iname_deps))
         self.predicates = (
                 intern_frozenset_of_ids(self.predicates))
-        self.stop_iname_dep_propagation = (
-                intern_frozenset_of_ids(self.stop_iname_dep_propagation))
 
 # }}}
 
@@ -1101,7 +1093,6 @@ class Assignment(MultiAssignmentBase):
             no_sync_with=None,
             forced_iname_deps_is_final=None,
             forced_iname_deps=frozenset(),
-            stop_iname_dep_propagation=None,
             boostable=None, boostable_into=None, tags=None,
             temp_var_type=None, atomicity=(),
             priority=0, predicates=frozenset(),
@@ -1116,7 +1107,6 @@ class Assignment(MultiAssignmentBase):
                 no_sync_with=no_sync_with,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
-                stop_iname_dep_propagation=stop_iname_dep_propagation,
                 boostable=boostable,
                 boostable_into=boostable_into,
                 priority=priority,
@@ -1245,7 +1235,6 @@ class CallInstruction(MultiAssignmentBase):
             no_sync_with=None,
             forced_iname_deps_is_final=None,
             forced_iname_deps=frozenset(),
-            stop_iname_dep_propagation=None,
             boostable=None, boostable_into=None, tags=None,
             temp_var_types=None,
             priority=0, predicates=frozenset(),
@@ -1260,7 +1249,6 @@ class CallInstruction(MultiAssignmentBase):
                 no_sync_with=no_sync_with,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
-                stop_iname_dep_propagation=stop_iname_dep_propagation,
                 boostable=boostable,
                 boostable_into=boostable_into,
                 priority=priority,
@@ -1421,7 +1409,6 @@ class CInstruction(InstructionBase):
             groups=None, conflicts_with_groups=None,
             no_sync_with=None,
             forced_iname_deps_is_final=None, forced_iname_deps=frozenset(),
-            stop_iname_dep_propagation=None,
             priority=0, boostable=None, boostable_into=None,
             predicates=frozenset(), tags=None,
             insn_deps=None, insn_deps_is_final=None):
@@ -1443,7 +1430,6 @@ class CInstruction(InstructionBase):
                 no_sync_with=no_sync_with,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
-                stop_iname_dep_propagation=stop_iname_dep_propagation,
                 boostable=boostable,
                 boostable_into=boostable_into,
                 priority=priority, predicates=predicates, tags=tags,
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 5a100d712..1775032cb 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -30,7 +30,7 @@ from six.moves import intern
 import numpy as np
 import islpy as isl
 from islpy import dim_type
-from loopy.diagnostic import LoopyError
+from loopy.diagnostic import LoopyError, warn_with_kernel
 
 import logging
 logger = logging.getLogger(__name__)
@@ -164,9 +164,7 @@ def find_all_insn_inames(kernel):
                     ))
 
         insn_id_to_inames[insn.id] = iname_deps
-        insn_assignee_inames[insn.id] = (
-                write_deps & kernel.all_inames()
-                | insn.stop_iname_dep_propagation)
+        insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()
 
     written_vars = kernel.get_written_variables()
 
@@ -216,8 +214,14 @@ def find_all_insn_inames(kernel):
 
                 if inames_new != inames_old:
                     did_something = True
-                    logger.debug("%s: find_all_insn_inames: %s -> %s (dep-based)" % (
-                        kernel.name, insn.id, ", ".join(sorted(inames_new))))
+
+                    warn_with_kernel(kernel, "inferred_iname",
+                            "The iname(s) '%s' on instruction '%s' was "
+                            "automatically added. "
+                            "This is deprecated. Please add the iname "
+                            "to the instruction "
+                            "implicitly, e.g. by adding '{inames=...}"
+                            % (inames_new-inames_old, insn.id))
 
             # }}}
 
@@ -247,8 +251,14 @@ def find_all_insn_inames(kernel):
             if inames_new != inames_old:
                 did_something = True
                 insn_id_to_inames[insn.id] = frozenset(inames_new)
-                logger.debug("%s: find_all_insn_inames: %s -> %s (domain-based)" % (
-                    kernel.name, insn.id, ", ".join(sorted(inames_new))))
+
+                warn_with_kernel(kernel, "inferred_iname",
+                        "The iname(s) '%s' on instruction '%s' was "
+                        "automatically added. "
+                        "This is deprecated. Please add the iname "
+                        "to the instruction "
+                        "implicitly, e.g. by adding '{inames=...}"
+                        % (inames_new-inames_old, insn.id))
 
             # }}}
 
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 5690218e6..d333cdf18 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -544,8 +544,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
                     expr.expr, expr.inames),
                 depends_on=frozenset([init_insn.id]) | insn.depends_on,
                 forced_iname_deps=update_insn_iname_deps,
-                forced_iname_deps_is_final=insn.forced_iname_deps_is_final,
-                stop_iname_dep_propagation=frozenset(expr.inames))
+                forced_iname_deps_is_final=insn.forced_iname_deps_is_final)
 
         generated_insns.append(reduction_insn)
 
@@ -716,8 +715,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
         new_insn_add_depends_on.add(prev_id)
         new_insn_add_no_sync_with.add(prev_id)
         new_insn_add_forced_iname_deps.add(stage_exec_iname or base_exec_iname)
-        new_insn_add_stop_iname_dep_propagation.add(
-                stage_exec_iname or base_exec_iname)
 
         if nresults == 1:
             assert len(acc_vars) == 1
@@ -828,7 +825,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
         new_insn_add_depends_on = set()
         new_insn_add_no_sync_with = set()
         new_insn_add_forced_iname_deps = set()
-        new_insn_add_stop_iname_dep_propagation = set()
 
         generated_insns = []
 
@@ -859,11 +855,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
                     | frozenset(new_insn_add_no_sync_with),
                     forced_iname_deps=(
                         temp_kernel.insn_inames(insn)
-                        | new_insn_add_forced_iname_deps),
-                    stop_iname_dep_propagation=(
-                        insn.stop_iname_dep_propagation
-                        | new_insn_add_stop_iname_dep_propagation),
-                    )
+                        | new_insn_add_forced_iname_deps))
 
             kwargs.pop("id")
             kwargs.pop("expression")
diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py
index 967e14de6..97c15c6f2 100644
--- a/loopy/transform/batch.py
+++ b/loopy/transform/batch.py
@@ -143,9 +143,17 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch",
     bvc = _BatchVariableChanger(rule_mapping_context,
             knl, batch_varying_args, batch_iname_expr,
             sequential=sequential)
-    return rule_mapping_context.finish_kernel(
+    kernel = rule_mapping_context.finish_kernel(
             bvc.map_kernel(knl))
 
+    batch_iname_set = frozenset([batch_iname])
+    kernel = kernel.copy(
+            instructions=[
+                insn.copy(forced_iname_deps=insn.forced_iname_deps | batch_iname_set)
+                for insn in kernel.instructions])
+
+    return kernel
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py
index fb32b3ce8..0c25b6393 100644
--- a/loopy/transform/buffer.py
+++ b/loopy/transform/buffer.py
@@ -400,7 +400,9 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
     init_instruction = Assignment(id=init_insn_id,
                 assignee=buf_var_init,
                 expression=init_expression,
-                forced_iname_deps=frozenset(within_inames),
+                forced_iname_deps=(
+                    frozenset(within_inames)
+                    | frozenset(non1_init_inames)),
                 depends_on=frozenset(),
                 depends_on_is_final=True)
 
@@ -475,9 +477,12 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
         store_instruction = Assignment(
                     id=kernel.make_unique_instruction_id(based_on="store_"+var_name),
                     depends_on=frozenset(aar.modified_insn_ids),
+                    no_sync_with=frozenset([init_insn_id]),
                     assignee=store_target,
                     expression=store_expression,
-                    forced_iname_deps=frozenset(within_inames))
+                    forced_iname_deps=(
+                        frozenset(within_inames)
+                        | frozenset(non1_store_inames)))
     else:
         did_write = False
 
diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py
index 1f75a60b8..a8c3b5849 100644
--- a/loopy/transform/diff.py
+++ b/loopy/transform/diff.py
@@ -310,7 +310,9 @@ class DifferentiationContext(object):
                 id=new_insn_id,
                 assignee=var(new_var_name)[
                     lhs_ind + diff_iname_exprs],
-                expression=diff_expr)
+                expression=diff_expr,
+                forced_iname_deps=(
+                    orig_writer_insn.forced_iname_deps | frozenset(diff_inames)))
 
         self.new_instructions.append(insn)
 
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 12749a303..4c3cd0a69 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1142,8 +1142,8 @@ def affine_map_inames(kernel, old_inames, new_inames, equations):
 
     # {{{ change domains
 
-    new_inames_set = set(new_inames)
-    old_inames_set = set(old_inames)
+    new_inames_set = frozenset(new_inames)
+    old_inames_set = frozenset(old_inames)
 
     new_domains = []
     for idom, dom in enumerate(kernel.domains):
@@ -1229,7 +1229,26 @@ def affine_map_inames(kernel, old_inames, new_inames, equations):
 
     # }}}
 
-    return kernel.copy(domains=new_domains)
+    # {{{ switch iname refs in instructions
+
+    def fix_iname_set(insn_id, inames):
+        if old_inames_set <= inames:
+            return (inames - old_inames_set) | new_inames_set
+        elif old_inames_set & inames:
+            raise LoopyError("instruction '%s' uses only a part (%s), not all, "
+                    "of the old inames"
+                    % (insn_id, ", ".join(old_inames_set & inames)))
+        else:
+            return inames
+
+    new_instructions = [
+            insn.copy(forced_iname_deps=fix_iname_set(
+                insn.id, insn.forced_iname_deps))
+            for insn in kernel.instructions]
+
+    # }}}
+
+    return kernel.copy(domains=new_domains, instructions=new_instructions)
 
 # }}}
 
diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index fd6f33efc..f9d71b9f1 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -480,8 +480,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     from loopy.symbolic import SubstitutionRuleExpander
     submap = SubstitutionRuleExpander(kernel.substitutions)
 
-    value_inames = get_dependencies(
-            submap(subst.expression)
+    value_inames = (
+            get_dependencies(submap(subst.expression))
+            - frozenset(subst.arguments)
             ) & kernel.all_inames()
     if value_inames - expanding_usage_arg_deps < extra_storage_axes:
         raise RuntimeError("unreferenced sweep inames specified: "
@@ -728,8 +729,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     assignee = var(temporary_name)
 
     if non1_storage_axis_names:
-        assignee = assignee.index(
-                tuple(var(iname) for iname in non1_storage_axis_names))
+        assignee = assignee[
+                tuple(var(iname) for iname in non1_storage_axis_names)]
 
     # {{{ process substitutions on compute instruction
 
@@ -764,7 +765,13 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     compute_insn = Assignment(
             id=compute_insn_id,
             assignee=assignee,
-            expression=compute_expression)
+            expression=compute_expression,
+            forced_iname_deps=(
+                frozenset(non1_storage_axis_names)
+                | frozenset(
+                    (expanding_usage_arg_deps | value_inames)
+                    - sweep_inames_set))
+            )
 
     # }}}
 
diff --git a/loopy/version.py b/loopy/version.py
index bb1b123cb..0fcb9c04d 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v35-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v36-islpy%s" % _islpy_version
-- 
GitLab