From fd69a7fc8bf31aebee9d61ddc95986f2db43e013 Mon Sep 17 00:00:00 2001
From: Matt Wala <wala1@illinois.edu>
Date: Fri, 7 Apr 2017 00:56:30 -0500
Subject: [PATCH] Add tools for finding the global barrier order to the kernel.

---
 loopy/kernel/__init__.py | 132 +++++++++++++++++++++++++++++++++++++++
 test/test_loopy.py       |  44 +++++++++++++
 2 files changed, 176 insertions(+)

diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 793d31791..ed01a3294 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -823,6 +823,138 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
         return result
 
+    @property
+    @memoize_method
+    def global_barrier_order(self):
+        """Return a :class:`tuple` of the listing the ids of global barrier instructions
+        as they appear in order in the kernel.
+
+        See also :class:`loopy.instruction.BarrierInstruction`.
+        """
+        barriers = []
+        visiting = set()
+        visited = set()
+
+        unvisited = set(insn.id for insn in self.instructions)
+
+        while unvisited:
+            stack = [unvisited.pop()]
+
+            while stack:
+                top = stack[-1]
+
+                if top in visiting:
+                    visiting.remove(top)
+
+                    from loopy.kernel.instruction import BarrierInstruction
+                    insn = self.id_to_insn[top]
+                    if isinstance(insn, BarrierInstruction):
+                        if insn.kind == "global":
+                            barriers.append(top)
+
+                if top in visited:
+                    stack.pop()
+                    continue
+
+                visited.add(top)
+                visiting.add(top)
+
+                for child in self.id_to_insn[top].depends_on:
+                    # Check for no cycles.
+                    assert child not in visiting
+                    stack.append(child)
+
+        # Ensure this is the only possible order.
+        for prev_barrier, barrier in zip(barriers, barriers[1:]):
+            if prev_barrier not in self.recursive_insn_dep_map()[barrier]:
+                raise LoopyError(
+                        "Unordered global barriers detected: '%s', '%s'"
+                        % (barrier, prev_barrier))
+
+        return tuple(barriers)
+
+    @memoize_method
+    def find_most_recent_global_barrier(self, insn_id):
+        """Return the id of the latest occuring global barrier which the
+        given instruction (indirectly or directly) depends on, or *None* if this
+        instruction does not depend on a global barrier.
+
+        The return value is guaranteed to be unique because global barriers are
+        totally ordered within the kernel.
+        """
+
+        if len(self.global_barrier_order) == 0:
+            return None
+
+        insn = self.id_to_insn[insn_id]
+
+        if len(insn.depends_on) == 0:
+            return None
+
+        def is_barrier(my_insn_id):
+            insn = self.id_to_insn[my_insn_id]
+            from loopy.kernel.instruction import BarrierInstruction
+            return isinstance(insn, BarrierInstruction) and insn.kind == "global"
+
+        global_barrier_to_ordinal = dict(
+            (b, i) for i, b in enumerate(self.global_barrier_order))
+
+        def get_barrier_ordinal(barrier_id):
+            return (global_barrier_to_ordinal[barrier_id]
+                    if barrier_id is not None
+                    else -1)
+
+        direct_barrier_dependencies = set(
+                dep for dep in insn.depends_on if is_barrier(dep))
+
+        if len(direct_barrier_dependencies) > 0:
+            return max(direct_barrier_dependencies, key=get_barrier_ordinal)
+        else:
+            return max((self.find_most_recent_global_barrier(dep)
+                        for dep in insn.depends_on),
+                    key=get_barrier_ordinal)
+
+    @property
+    @memoize_method
+    def subkernels(self):
+        if self.state != kernel_state.SCHEDULED:
+            raise LoopyError("Kernel must be scheduled")
+
+        from loopy.schedule import CallKernel
+
+        return tuple(sched_item.kernel_name
+                for sched_item in self.schedule
+                if isinstance(sched_item, CallKernel))
+
+    @property
+    @memoize_method
+    def subkernel_to_insn_ids(self):
+        if self.state != kernel_state.SCHEDULED:
+            raise LoopyError("Kernel must be scheduled")
+
+        from loopy.schedule import (
+                sched_item_to_insn_id, CallKernel, ReturnFromKernel)
+
+        subkernel = None
+        result = {}
+
+        for sched_item in self.schedule:
+            if isinstance(sched_item, CallKernel):
+                subkernel = sched_item.kernel_name
+                result[subkernel] = set()
+
+            if isinstance(sched_item, ReturnFromKernel):
+                subkernel = None
+
+            if subkernel is not None:
+                for insn_id in sched_item_to_insn_id(sched_item):
+                    result[subkernel].add(insn_id)
+
+        for subkernel in result:
+            result[subkernel] = frozenset(result[subkernel])
+
+        return result
+
     # }}}
 
     # {{{ argument wrangling
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 851a7f076..ec6dd5d3f 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2108,6 +2108,50 @@ def test_barrier_insertion_near_bottom_of_loop():
     assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1])
 
 
+def test_global_barrier_order_finding():
+    knl = lp.make_kernel(
+            "{[i,itrip]: 0<=i<n and 0<=itrip<ntrips}",
+            """
+            for i
+                for itrip
+                    ... gbarrier {id=top}
+                    <> z[i] = z[i+1] + z[i]  {id=wr_z,dep=top}
+                    <> v[i] = 11  {id=wr_v,dep=top}
+                    ... gbarrier {dep=wr_z:wr_v,id=yoink}
+                    z[i] = z[i] - z[i+1] + v[i] {id=iupd, dep=yoink}
+                end
+                ... nop {id=nop}
+                ... gbarrier {dep=iupd,id=postloop}
+                z[i] = z[i] - z[i+1] + v[i]  {id=zzzv,dep=postloop}
+            end
+            """)
+
+    assert knl.global_barrier_order == ("top", "yoink", "postloop")
+
+    for insn, barrier in (
+            ("nop", None),
+            ("top", None),
+            ("wr_z", "top"),
+            ("wr_v", "top"),
+            ("yoink", "top"),
+            ("postloop", "yoink"),
+            ("zzzv", "postloop")):
+        assert knl.find_most_recent_global_barrier(insn) == barrier
+
+
+def test_global_barrier_error_if_unordered():
+    # FIXME: Should be illegal to declare this
+    knl = lp.make_kernel("{[i]: 0 <= i < 10}",
+            """
+            ... gbarrier
+            ... gbarrier
+            """)
+
+    from loopy.diagnostic import LoopyError
+    with pytest.raises(LoopyError):
+        knl.global_barrier_order
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])
-- 
GitLab