From fd69a7fc8bf31aebee9d61ddc95986f2db43e013 Mon Sep 17 00:00:00 2001 From: Matt Wala <wala1@illinois.edu> Date: Fri, 7 Apr 2017 00:56:30 -0500 Subject: [PATCH] Add tools for finding the global barrier order to the kernel. --- loopy/kernel/__init__.py | 132 +++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 44 +++++++++++++ 2 files changed, 176 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 793d31791..ed01a3294 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -823,6 +823,138 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result + @property + @memoize_method + def global_barrier_order(self): + """Return a :class:`tuple` of the listing the ids of global barrier instructions + as they appear in order in the kernel. + + See also :class:`loopy.instruction.BarrierInstruction`. + """ + barriers = [] + visiting = set() + visited = set() + + unvisited = set(insn.id for insn in self.instructions) + + while unvisited: + stack = [unvisited.pop()] + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + from loopy.kernel.instruction import BarrierInstruction + insn = self.id_to_insn[top] + if isinstance(insn, BarrierInstruction): + if insn.kind == "global": + barriers.append(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + for child in self.id_to_insn[top].depends_on: + # Check for no cycles. + assert child not in visiting + stack.append(child) + + # Ensure this is the only possible order. + for prev_barrier, barrier in zip(barriers, barriers[1:]): + if prev_barrier not in self.recursive_insn_dep_map()[barrier]: + raise LoopyError( + "Unordered global barriers detected: '%s', '%s'" + % (barrier, prev_barrier)) + + return tuple(barriers) + + @memoize_method + def find_most_recent_global_barrier(self, insn_id): + """Return the id of the latest occuring global barrier which the + given instruction (indirectly or directly) depends on, or *None* if this + instruction does not depend on a global barrier. + + The return value is guaranteed to be unique because global barriers are + totally ordered within the kernel. + """ + + if len(self.global_barrier_order) == 0: + return None + + insn = self.id_to_insn[insn_id] + + if len(insn.depends_on) == 0: + return None + + def is_barrier(my_insn_id): + insn = self.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + global_barrier_to_ordinal = dict( + (b, i) for i, b in enumerate(self.global_barrier_order)) + + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) + + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) + else: + return max((self.find_most_recent_global_barrier(dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) + + @property + @memoize_method + def subkernels(self): + if self.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import CallKernel + + return tuple(sched_item.kernel_name + for sched_item in self.schedule + if isinstance(sched_item, CallKernel)) + + @property + @memoize_method + def subkernel_to_insn_ids(self): + if self.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + subkernel = None + result = {} + + for sched_item in self.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + for subkernel in result: + result[subkernel] = frozenset(result[subkernel]) + + return result + # }}} # {{{ argument wrangling diff --git a/test/test_loopy.py b/test/test_loopy.py index 851a7f076..ec6dd5d3f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2108,6 +2108,50 @@ def test_barrier_insertion_near_bottom_of_loop(): assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) +def test_global_barrier_order_finding(): + knl = lp.make_kernel( + "{[i,itrip]: 0<=i<n and 0<=itrip<ntrips}", + """ + for i + for itrip + ... gbarrier {id=top} + <> z[i] = z[i+1] + z[i] {id=wr_z,dep=top} + <> v[i] = 11 {id=wr_v,dep=top} + ... gbarrier {dep=wr_z:wr_v,id=yoink} + z[i] = z[i] - z[i+1] + v[i] {id=iupd, dep=yoink} + end + ... nop {id=nop} + ... gbarrier {dep=iupd,id=postloop} + z[i] = z[i] - z[i+1] + v[i] {id=zzzv,dep=postloop} + end + """) + + assert knl.global_barrier_order == ("top", "yoink", "postloop") + + for insn, barrier in ( + ("nop", None), + ("top", None), + ("wr_z", "top"), + ("wr_v", "top"), + ("yoink", "top"), + ("postloop", "yoink"), + ("zzzv", "postloop")): + assert knl.find_most_recent_global_barrier(insn) == barrier + + +def test_global_barrier_error_if_unordered(): + # FIXME: Should be illegal to declare this + knl = lp.make_kernel("{[i]: 0 <= i < 10}", + """ + ... gbarrier + ... gbarrier + """) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + knl.global_barrier_order + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab