From 871cc0a5b82c227d948dbe3da7be531dba3cb86c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 26 Jul 2013 13:14:00 -0400
Subject: [PATCH] Automatic dependencies (dumb) -> default dependencies
 (better)

---
 doc/reference.rst        |  11 ++--
 loopy/check.py           |   2 +-
 loopy/kernel/creation.py |   5 +-
 loopy/kernel/data.py     |  10 ++--
 loopy/preprocess.py      | 111 +++++++++++++++++++++++++--------------
 test/test_sem_reagan.py  |   1 +
 6 files changed, 91 insertions(+), 49 deletions(-)

diff --git a/doc/reference.rst b/doc/reference.rst
index a1820e219..251d86eb4 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -183,9 +183,10 @@ These are usually key-value pairs. The following attributes are recognized:
 
   .. note::
 
-      Loopy will automatically add depdencies of reading instructions
-      on writing instructions *if and only if* there is exactly one writing
-      instruction for the written variable (temporary or argument).
+      If this is not specified, :mod:`loopy` will automatically add
+      depdencies of reading instructions on writing instructions *if and
+      only if* there is exactly one writing instruction for the written
+      variable (temporary or argument).
 
 * ``priority=integer`` sets the instructions priority to the value
   ``integer``. Instructions with higher priority will be scheduled sooner,
@@ -193,6 +194,10 @@ These are usually key-value pairs. The following attributes are recognized:
   instruction ahead of a higher-priority one if loop orders or dependencies
   require it.
 
+* ``if=variable1:variable2`` Only execute this instruction if all condition
+  variables (which must be scalar variables) evaluate to ``true`` (as
+  defined by C).
+
 .. autoclass:: ExpressionInstruction
 
 C Block Instructions
diff --git a/loopy/check.py b/loopy/check.py
index de89beac0..e39b28f99 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -44,7 +44,7 @@ def check_insn_attributes(kernel):
                     % (insn.id, ", ".join(
                         insn.forced_iname_deps - kernel.all_inames())))
 
-        if not insn.insn_deps <= all_insn_ids:
+        if insn.insn_deps is not None and not insn.insn_deps <= all_insn_ids:
             raise LoopyError("insn '%s' has unknown instruction "
                     "dependencies: %s"
                     % (insn.id, ", ".join(
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index a3a35b082..d0a71dae6 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -177,7 +177,7 @@ def parse_insn(insn):
         raise
 
     if insn_match is not None:
-        insn_deps = set()
+        insn_deps = None
         insn_id = None
         priority = 0
         forced_iname_deps = frozenset()
@@ -204,7 +204,8 @@ def parse_insn(insn):
                 elif opt_key == "priority":
                     priority = int(opt_value)
                 elif opt_key == "dep":
-                    insn_deps = set(opt_value.split(":"))
+                    insn_deps = frozenset(dep.strip() for dep in opt_value.split(":")
+                            if dep.strip())
                 elif opt_key == "inames":
                     forced_iname_deps = frozenset(opt_value.split(":"))
                 elif opt_key == "if":
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 72e774c5e..df9b7f3ad 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -349,6 +349,8 @@ class InstructionBase(Record):
         *must* be executed before this one. Note that loop scheduling augments this
         by adding dependencies on any writes to temporaries read by this instruction.
 
+        May be *None* to invoke the default.
+
     .. attribute:: predicates
 
         a :class:`frozenset` of variable names whose truth values (as defined
@@ -384,7 +386,7 @@ class InstructionBase(Record):
             boostable, boostable_into, predicates):
 
         assert isinstance(forced_iname_deps, frozenset)
-        assert isinstance(insn_deps, set)
+        assert isinstance(insn_deps, frozenset) or insn_deps is None
 
         Record.__init__(self,
                 id=id,
@@ -500,8 +502,8 @@ class ExpressionInstruction(InstructionBase):
 
     def __init__(self,
             assignee, expression,
-            id=None, forced_iname_deps=frozenset(), insn_deps=set(), boostable=None,
-            boostable_into=None,
+            id=None, forced_iname_deps=frozenset(), insn_deps=None,
+            boostable=None, boostable_into=None,
             temp_var_type=None, priority=0, predicates=frozenset()):
 
         InstructionBase.__init__(self,
@@ -640,7 +642,7 @@ class CInstruction(InstructionBase):
     def __init__(self,
             iname_exprs, code,
             read_variables=frozenset(), assignees=frozenset(),
-            id=None, insn_deps=set(), forced_iname_deps=frozenset(), priority=0,
+            id=None, insn_deps=None, forced_iname_deps=frozenset(), priority=0,
             boostable=None, boostable_into=None, predicates=frozenset()):
         """
         :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index b7b3a916d..a656ce6fe 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -288,6 +288,52 @@ def mark_local_temporaries(kernel):
 # }}}
 
 
+# {{{ default dependencies
+
+def add_default_dependencies(kernel):
+    logger.debug("%s: default deps" % kernel.name)
+
+    writer_map = kernel.writer_map()
+
+    arg_names = set(arg.name for arg in kernel.args)
+
+    var_names = arg_names | set(kernel.temporary_variables.iterkeys())
+
+    dep_map = dict(
+            (insn.id, insn.read_dependency_names() & var_names)
+            for insn in kernel.instructions)
+
+    new_insns = []
+    for insn in kernel.instructions:
+        if insn.insn_deps is None:
+            auto_deps = set()
+
+            # {{{ add automatic dependencies
+
+            all_my_var_writers = set()
+            for var in dep_map[insn.id]:
+                var_writers = writer_map.get(var, set())
+                all_my_var_writers |= var_writers
+
+                if not var_writers and var not in arg_names:
+                    warn(kernel, "read_no_write(%s)" % var,
+                            "temporary variable '%s' is read, but never written."
+                            % var)
+
+                if len(var_writers) == 1:
+                    auto_deps.update(var_writers - set([insn.id]))
+
+            # }}}
+
+            insn = insn.copy(insn_deps=frozenset(auto_deps))
+
+        new_insns.append(insn)
+
+    return kernel.copy(instructions=new_insns)
+
+# }}}
+
+
 # {{{ rewrite reduction to imperative form
 
 def realize_reduction(kernel, insn_id_filter=None):
@@ -333,33 +379,34 @@ def realize_reduction(kernel, insn_id_filter=None):
                 is_local=False)
 
         outer_insn_inames = temp_kernel.insn_inames(insn)
-        bad_inames = set(expr.inames) & outer_insn_inames
+        bad_inames = frozenset(expr.inames) & outer_insn_inames
         if bad_inames:
             raise LoopyError("reduction used within loop(s) that it was "
                     "supposed to reduce over: " + ", ".join(bad_inames))
 
-        new_id = temp_kernel.make_unique_instruction_id(
+        init_id = temp_kernel.make_unique_instruction_id(
                 based_on="%s_%s_init" % (insn.id, "_".join(expr.inames)),
                 extra_used_ids=set(i.id for i in generated_insns))
 
         init_insn = ExpressionInstruction(
-                id=new_id,
+                id=init_id,
                 assignee=target_var,
-                forced_iname_deps=outer_insn_inames - set(expr.inames),
+                forced_iname_deps=outer_insn_inames - frozenset(expr.inames),
+                insn_deps=frozenset(),
                 expression=expr.operation.neutral_element(arg_dtype, expr.inames))
 
         generated_insns.append(init_insn)
 
-        new_id = temp_kernel.make_unique_instruction_id(
+        update_id = temp_kernel.make_unique_instruction_id(
                 based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)),
                 extra_used_ids=set(i.id for i in generated_insns))
 
         reduction_insn = ExpressionInstruction(
-                id=new_id,
+                id=update_id,
                 assignee=target_var,
                 expression=expr.operation(
                     arg_dtype, target_var, expr.expr, expr.inames),
-                insn_deps=set([init_insn.id]) | insn.insn_deps,
+                insn_deps=frozenset([init_insn.id]) | insn.insn_deps,
                 forced_iname_deps=temp_kernel.insn_inames(insn) | set(expr.inames))
 
         generated_insns.append(reduction_insn)
@@ -397,7 +444,7 @@ def realize_reduction(kernel, insn_id_filter=None):
             insn = insn.copy(
                         expression=new_expression,
                         insn_deps=insn.insn_deps
-                            | new_insn_insn_deps,
+                            | frozenset(new_insn_insn_deps),
                         forced_iname_deps=temp_kernel.insn_inames(insn))
 
             insn_queue = generated_insns + [insn] + insn_queue
@@ -544,10 +591,10 @@ def duplicate_private_temporaries_for_ilp(kernel):
 # }}}
 
 
-# {{{ automatic dependencies, find boostability of instructions
+# {{{ find boostability of instructions
 
-def add_boostability_and_automatic_dependencies(kernel):
-    logger.debug("%s: automatic deps, boostability" % kernel.name)
+def find_boostability(kernel):
+    logger.debug("%s: boostability" % kernel.name)
 
     writer_map = kernel.writer_map()
 
@@ -563,31 +610,11 @@ def add_boostability_and_automatic_dependencies(kernel):
 
     new_insns = []
     for insn in kernel.instructions:
-        auto_deps = set()
-
-        # {{{ add automatic dependencies
-
         all_my_var_writers = set()
         for var in dep_map[insn.id]:
             var_writers = writer_map.get(var, set())
             all_my_var_writers |= var_writers
 
-            if not var_writers and var not in arg_names:
-                warn(kernel, "read_no_write(%s)" % var,
-                        "temporary variable '%s' is read, but never written." % var)
-
-            if len(var_writers) > 1 and not var_writers & set(insn.insn_deps):
-                warn(kernel, "read_without_dep(%s,%s)" % (var, insn.id),
-                        "'%s' is written from more than one place, "
-                        "but instruction '%s' (which reads this variable) "
-                        "does not specify a dependency on any of the writers."
-                        % (var, insn.id))
-
-            if len(var_writers) == 1:
-                auto_deps.update(var_writers - set([insn.id]))
-
-        # }}}
-
         # {{{ find dependency loops, flag boostability
 
         while True:
@@ -609,10 +636,9 @@ def add_boostability_and_automatic_dependencies(kernel):
             non_boostable_vars.update(
                     var_name for var_name, _ in insn.assignees_and_indices())
 
-        new_insns.append(
-                insn.copy(
-                    insn_deps=insn.insn_deps | auto_deps,
-                    boostable=boostable))
+        insn = insn.copy(boostable=boostable)
+
+        new_insns.append(insn)
 
     # {{{ remove boostability from isns that access non-boostable vars
 
@@ -1029,9 +1055,16 @@ def preprocess_kernel(kernel):
 
     kernel = infer_unknown_types(kernel, expect_completion=False)
 
-    # Ordering restriction:
-    # realize_reduction must happen after type inference because it needs
-    # to be able to determine the types of the reduced expressions.
+    kernel = add_default_dependencies(kernel)
+
+    # Ordering restrictions:
+    #
+    # - realize_reduction must happen after type inference because it needs
+    #   to be able to determine the types of the reduced expressions.
+    #
+    # - realize_reduction must happen after default dependencies are added
+    #   because it manipulates the insn_deps field, which could prevent
+    #   defaults from being applied.
 
     kernel = realize_reduction(kernel)
 
@@ -1042,7 +1075,7 @@ def preprocess_kernel(kernel):
     kernel = duplicate_private_temporaries_for_ilp(kernel)
     kernel = mark_local_temporaries(kernel)
     kernel = assign_automatic_axes(kernel)
-    kernel = add_boostability_and_automatic_dependencies(kernel)
+    kernel = find_boostability(kernel)
     kernel = limit_boostability(kernel)
     kernel = adjust_local_temp_var_storage(kernel)
 
diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py
index 819de845d..2de1db43b 100644
--- a/test/test_sem_reagan.py
+++ b/test/test_sem_reagan.py
@@ -24,6 +24,7 @@ THE SOFTWARE.
 
 
 import numpy as np
+import pyopencl as cl  # noqa
 import loopy as lp
 
 from pyopencl.tools import (  # noqa
-- 
GitLab