From 577db396135869838cc4b699d72609b3a2156471 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 8 May 2016 21:00:41 -0500
Subject: [PATCH] Add Instruction.no_sync_with

---
 doc/ref_kernel.rst       | 15 +++++++++---
 loopy/kernel/creation.py | 49 ++++++++++++++++++++++++++++------------
 loopy/kernel/data.py     | 36 +++++++++++++++++++++++++----
 loopy/preprocess.py      |  4 +++-
 loopy/schedule.py        |  3 +++
 loopy/version.py         |  2 +-
 6 files changed, 86 insertions(+), 23 deletions(-)

diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index d151a2128..a323fff52 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -217,9 +217,9 @@ These are usually key-value pairs. The following attributes are recognized:
   dependency is that the code generated for this instruction is required to
   appear textually after all of these dependees' generated code.
 
-  Identifiers here are allowed to be wildcards as defined by
-  the Python module :mod:`fnmatchcase`. This is helpful in conjunction
-  with ``id_prefix``.
+  Identifiers here are allowed to be wildcards as defined by the Python
+  function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with
+  ``id_prefix``.
 
   .. note::
 
@@ -242,6 +242,15 @@ These are usually key-value pairs. The following attributes are recognized:
       heuristic and indicate that the specified list of dependencies is
       exhaustive.
 
+* ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary
+  the instructions with identifiers ``id1`` and ``id2`` to the, even if
+  a dependency chain exists and variables are accessed in an apparently
+  racy way.
+
+  Identifiers here are allowed to be wildcards as defined by the Python
+  function :func:`fnmatch.fnmatchcase`. This is helpful in conjunction with
+  ``id_prefix``.
+
 * ``priority=integer`` sets the instructions priority to the value
   ``integer``. Instructions with higher priority will be scheduled sooner,
   if possible. Note that the scheduler may still schedule a lower-priority
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index aedc1edc6..034c9dd82 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -207,6 +207,7 @@ def parse_insn(insn):
     if insn_match is not None:
         depends_on = None
         depends_on_is_final = False
+        no_sync_with = None
         insn_groups = None
         conflicts_with_groups = None
         insn_id = None
@@ -256,6 +257,11 @@ def parse_insn(insn):
                             intern(dep.strip()) for dep in opt_value.split(":")
                             if dep.strip())
 
+                elif opt_key == "nosync" and opt_value is not None:
+                    no_sync_with = frozenset(
+                            intern(dep.strip()) for dep in opt_value.split(":")
+                            if dep.strip())
+
                 elif opt_key == "groups" and opt_value is not None:
                     insn_groups = frozenset(
                             intern(grp.strip()) for grp in opt_value.split(":")
@@ -319,6 +325,7 @@ def parse_insn(insn):
                         else insn_id),
                     depends_on=depends_on,
                     depends_on_is_final=depends_on_is_final,
+                    no_sync_with=no_sync_with,
                     groups=insn_groups,
                     conflicts_with_groups=conflicts_with_groups,
                     forced_iname_deps_is_final=forced_iname_deps_is_final,
@@ -1021,25 +1028,39 @@ def apply_default_order_to_args(kernel, default_order):
 
 # {{{ resolve wildcard insn dependencies
 
+def find_matching_insn_ids(knl, dep):
+    from fnmatch import fnmatchcase
+
+    return [
+        other_insn.id
+        for other_insn in knl.instructions
+        if fnmatchcase(other_insn.id, dep)]
+
+
+def resove_wildcard_insn_ids(knl, deps):
+    new_deps = []
+    for dep in deps:
+        matches = find_matching_insn_ids(knl, dep)
+
+        if matches:
+            new_deps.extend(matches)
+        else:
+            # Uh, best we can do
+            new_deps.append(dep)
+
+    return frozenset(new_deps)
+
+
 def resolve_wildcard_deps(knl):
     new_insns = []
 
-    from fnmatch import fnmatchcase
     for insn in knl.instructions:
         if insn.depends_on is not None:
-            new_deps = set()
-            for dep in insn.depends_on:
-                match_count = 0
-                for other_insn in knl.instructions:
-                    if fnmatchcase(other_insn.id, dep):
-                        new_deps.add(other_insn.id)
-                        match_count += 1
-
-                if match_count == 0:
-                    # Uh, best we can do
-                    new_deps.add(dep)
-
-            insn = insn.copy(depends_on=frozenset(new_deps))
+            insn = insn.copy(
+                    depends_on=resove_wildcard_insn_ids(knl, insn.depends_on),
+                    no_sync_with=resove_wildcard_insn_ids(
+                        knl, insn.no_sync_with),
+                    )
 
         new_insns.append(insn)
 
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 0e0638491..c4433d5e3 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -430,6 +430,8 @@ class InstructionBase(Record):
         An (otherwise meaningless) identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
+    .. rubric:: Instruction ordering
+
     .. attribute:: depends_on
 
         a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances
@@ -460,6 +462,21 @@ class InstructionBase(Record):
         (see :class:`InstructionBase.groups`) may not be active when this
         instruction is scheduled.
 
+    .. attribute:: priority
+
+        Scheduling priority, an integer. Higher means 'execute sooner'.
+        Default 0.
+
+    .. rubric :: Synchronization
+
+    .. attribute:: no_sync_with
+
+        a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances
+        with which no barrier synchronization is necessary, even given the existence
+        of a dependency chain and apparently conflicting writes
+
+    .. rubric:: Conditionals
+
     .. attribute:: predicates
 
         a :class:`frozenset` of variable names the conjunction (logical and) of
@@ -467,6 +484,8 @@ class InstructionBase(Record):
         should be run. Each variable name may, optionally, be preceded by
         an exclamation point, indicating negation.
 
+    .. rubric:: Iname dependencies
+
     .. attribute:: forced_iname_deps_is_final
 
         A :class:`bool` determining whether :attr:`forced_iname_deps` constitutes
@@ -478,10 +497,7 @@ class InstructionBase(Record):
         dependencies *or* constitute the entire list of iname dependencies,
         depending on the value of :attr:`forced_iname_deps_is_final`.
 
-    .. attribute:: priority
-
-        Scheduling priority, an integer. Higher means 'execute sooner'.
-        Default 0.
+    .. rubric:: Iname dependencies
 
     .. attribute:: boostable
 
@@ -495,6 +511,8 @@ class InstructionBase(Record):
         may need to be boosted, as a heuristic help for the scheduler.
         Also allowed to be *None*.
 
+    .. rubric:: Tagging
+
     .. attribute:: tags
 
         A tuple of string identifiers that can be used to identify groups
@@ -512,12 +530,14 @@ class InstructionBase(Record):
 
     fields = set("id depends_on depends_on_is_final "
             "groups conflicts_with_groups "
+            "no_sync_with "
             "predicates "
             "forced_iname_deps_is_final forced_iname_deps "
             "priority boostable boostable_into".split())
 
     def __init__(self, id, depends_on, depends_on_is_final,
             groups, conflicts_with_groups,
+            no_sync_with,
             forced_iname_deps_is_final, forced_iname_deps, priority,
             boostable, boostable_into, predicates, tags,
             insn_deps=None, insn_deps_is_final=None):
@@ -541,6 +561,9 @@ class InstructionBase(Record):
         if conflicts_with_groups is None:
             conflicts_with_groups = frozenset()
 
+        if no_sync_with is None:
+            no_sync_with = frozenset()
+
         if forced_iname_deps_is_final is None:
             forced_iname_deps_is_final = False
 
@@ -574,6 +597,7 @@ class InstructionBase(Record):
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
+                no_sync_with=no_sync_with,
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
@@ -967,6 +991,7 @@ class Assignment(InstructionBase):
             depends_on_is_final=None,
             groups=None,
             conflicts_with_groups=None,
+            no_sync_with=None,
             forced_iname_deps_is_final=None,
             forced_iname_deps=frozenset(),
             boostable=None, boostable_into=None, tags=None,
@@ -980,6 +1005,7 @@ class Assignment(InstructionBase):
                 depends_on_is_final=depends_on_is_final,
                 groups=groups,
                 conflicts_with_groups=conflicts_with_groups,
+                no_sync_with=no_sync_with,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
                 boostable=boostable,
@@ -1134,6 +1160,7 @@ class CInstruction(InstructionBase):
             read_variables=frozenset(), assignees=frozenset(),
             id=None, depends_on=None, depends_on_is_final=None,
             groups=None, conflicts_with_groups=None,
+            no_sync_with=None,
             forced_iname_deps_is_final=None, forced_iname_deps=frozenset(),
             priority=0, boostable=None, boostable_into=None,
             predicates=frozenset(), tags=None,
@@ -1153,6 +1180,7 @@ class CInstruction(InstructionBase):
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
                 groups=groups, conflicts_with_groups=conflicts_with_groups,
+                no_sync_with=no_sync_with,
                 forced_iname_deps_is_final=forced_iname_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
                 boostable=boostable,
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index e30d3bcb3..93e898068 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -400,7 +400,9 @@ def add_default_dependencies(kernel):
                             % var)
 
                 if len(var_writers) == 1:
-                    auto_deps.update(var_writers - set([insn.id]))
+                    auto_deps.update(
+                            var_writers
+                            - set([insn.id]))
 
             # }}}
 
diff --git a/loopy/schedule.py b/loopy/schedule.py
index 4bacc43b9..b606ba360 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -1043,6 +1043,9 @@ def get_barrier_needing_dependency(kernel, target, source, reverse, var_kind):
     if reverse:
         source, target = target, source
 
+    if source.id in target.no_sync_with:
+        return None
+
     # {{{ check that a dependency exists
 
     dep_descr = None
diff --git a/loopy/version.py b/loopy/version.py
index 9fa881d02..cd9f45ac3 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v24-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v25-islpy%s" % _islpy_version
-- 
GitLab