From fd69a7fc8bf31aebee9d61ddc95986f2db43e013 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 00:56:30 -0500 Subject: [PATCH 01/11] Add tools for finding the global barrier order to the kernel. --- loopy/kernel/__init__.py | 132 +++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 44 +++++++++++++ 2 files changed, 176 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 793d31791..ed01a3294 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -823,6 +823,138 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result + @property + @memoize_method + def global_barrier_order(self): + """Return a :class:`tuple` of the listing the ids of global barrier instructions + as they appear in order in the kernel. + + See also :class:`loopy.instruction.BarrierInstruction`. + """ + barriers = [] + visiting = set() + visited = set() + + unvisited = set(insn.id for insn in self.instructions) + + while unvisited: + stack = [unvisited.pop()] + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + from loopy.kernel.instruction import BarrierInstruction + insn = self.id_to_insn[top] + if isinstance(insn, BarrierInstruction): + if insn.kind == "global": + barriers.append(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + for child in self.id_to_insn[top].depends_on: + # Check for no cycles. + assert child not in visiting + stack.append(child) + + # Ensure this is the only possible order. + for prev_barrier, barrier in zip(barriers, barriers[1:]): + if prev_barrier not in self.recursive_insn_dep_map()[barrier]: + raise LoopyError( + "Unordered global barriers detected: '%s', '%s'" + % (barrier, prev_barrier)) + + return tuple(barriers) + + @memoize_method + def find_most_recent_global_barrier(self, insn_id): + """Return the id of the latest occuring global barrier which the + given instruction (indirectly or directly) depends on, or *None* if this + instruction does not depend on a global barrier. + + The return value is guaranteed to be unique because global barriers are + totally ordered within the kernel. + """ + + if len(self.global_barrier_order) == 0: + return None + + insn = self.id_to_insn[insn_id] + + if len(insn.depends_on) == 0: + return None + + def is_barrier(my_insn_id): + insn = self.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + global_barrier_to_ordinal = dict( + (b, i) for i, b in enumerate(self.global_barrier_order)) + + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) + + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) + else: + return max((self.find_most_recent_global_barrier(dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) + + @property + @memoize_method + def subkernels(self): + if self.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import CallKernel + + return tuple(sched_item.kernel_name + for sched_item in self.schedule + if isinstance(sched_item, CallKernel)) + + @property + @memoize_method + def subkernel_to_insn_ids(self): + if self.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + subkernel = None + result = {} + + for sched_item in self.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + for subkernel in result: + result[subkernel] = frozenset(result[subkernel]) + + return result + # }}} # {{{ argument wrangling diff --git a/test/test_loopy.py b/test/test_loopy.py index 851a7f076..ec6dd5d3f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2108,6 +2108,50 @@ def test_barrier_insertion_near_bottom_of_loop(): assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) +def test_global_barrier_order_finding(): + knl = lp.make_kernel( + "{[i,itrip]: 0<=i z[i] = z[i+1] + z[i] {id=wr_z,dep=top} + <> v[i] = 11 {id=wr_v,dep=top} + ... gbarrier {dep=wr_z:wr_v,id=yoink} + z[i] = z[i] - z[i+1] + v[i] {id=iupd, dep=yoink} + end + ... nop {id=nop} + ... gbarrier {dep=iupd,id=postloop} + z[i] = z[i] - z[i+1] + v[i] {id=zzzv,dep=postloop} + end + """) + + assert knl.global_barrier_order == ("top", "yoink", "postloop") + + for insn, barrier in ( + ("nop", None), + ("top", None), + ("wr_z", "top"), + ("wr_v", "top"), + ("yoink", "top"), + ("postloop", "yoink"), + ("zzzv", "postloop")): + assert knl.find_most_recent_global_barrier(insn) == barrier + + +def test_global_barrier_error_if_unordered(): + # FIXME: Should be illegal to declare this + knl = lp.make_kernel("{[i]: 0 <= i < 10}", + """ + ... gbarrier + ... gbarrier + """) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + knl.global_barrier_order + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From de137d544d2bb94e76a7196e4850a74c9a732ca8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 00:58:25 -0500 Subject: [PATCH 02/11] Add a return statement to reader_map(). --- loopy/kernel/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index ed01a3294..37aefec10 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -786,6 +786,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): for var_name in insn.read_dependency_names() & admissible_vars: result.setdefault(var_name, set()).add(insn.id) + return result + @memoize_method def writer_map(self): """ -- GitLab From 1a838063f2c43d0f9ba5bf82e0abd7307f87e6ee Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 01:06:56 -0500 Subject: [PATCH 03/11] Whitespace fix. --- loopy/kernel/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 37aefec10..a7b358698 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -899,7 +899,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): return isinstance(insn, BarrierInstruction) and insn.kind == "global" global_barrier_to_ordinal = dict( - (b, i) for i, b in enumerate(self.global_barrier_order)) + (b, i) for i, b in enumerate(self.global_barrier_order)) def get_barrier_ordinal(barrier_id): return (global_barrier_to_ordinal[barrier_id] -- GitLab From 967414c781b3d761d0e68d095cc07719f249deaa Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 01:19:45 -0500 Subject: [PATCH 04/11] find_most_recent_global_barrier(): Simplify by using recursive_insn_dep_map(). --- loopy/kernel/__init__.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a7b358698..9c4f86fb5 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -901,20 +901,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(self.global_barrier_order)) - def get_barrier_ordinal(barrier_id): - return (global_barrier_to_ordinal[barrier_id] - if barrier_id is not None - else -1) + barriers = set(dep + for dep in self.recursive_insn_dep_map()[insn_id] + if is_barrier(dep)) - direct_barrier_dependencies = set( - dep for dep in insn.depends_on if is_barrier(dep)) - - if len(direct_barrier_dependencies) > 0: - return max(direct_barrier_dependencies, key=get_barrier_ordinal) + if len(barriers) > 0: + return max(barriers, key=lambda b: global_barrier_to_ordinal[b]) else: - return max((self.find_most_recent_global_barrier(dep) - for dep in insn.depends_on), - key=get_barrier_ordinal) + return None @property @memoize_method -- GitLab From 98c27e6b9c9dfcfd97996f36f1329b02b44af88f Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 01:29:04 -0500 Subject: [PATCH 05/11] Revert "find_most_recent_global_barrier(): Simplify by using" This reverts commit 967414c781b3d761d0e68d095cc07719f249deaa. --- loopy/kernel/__init__.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9c4f86fb5..a7b358698 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -901,14 +901,20 @@ class LoopKernel(ImmutableRecordWithoutPickling): global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(self.global_barrier_order)) - barriers = set(dep - for dep in self.recursive_insn_dep_map()[insn_id] - if is_barrier(dep)) + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) - if len(barriers) > 0: - return max(barriers, key=lambda b: global_barrier_to_ordinal[b]) + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) else: - return None + return max((self.find_most_recent_global_barrier(dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) @property @memoize_method -- GitLab From b02329060fde472312b2ac0c38e78409cb7d1822 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 01:44:55 -0500 Subject: [PATCH 06/11] global_barrier_order: Try hard to avoid using recursive_insn_dep_map(). --- loopy/kernel/__init__.py | 59 ++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a7b358698..895e32daa 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -839,6 +839,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): unvisited = set(insn.id for insn in self.instructions) + def is_barrier(my_insn_id): + insn = self.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + while unvisited: stack = [unvisited.pop()] @@ -848,16 +853,13 @@ class LoopKernel(ImmutableRecordWithoutPickling): if top in visiting: visiting.remove(top) - from loopy.kernel.instruction import BarrierInstruction - insn = self.id_to_insn[top] - if isinstance(insn, BarrierInstruction): - if insn.kind == "global": - barriers.append(top) - if top in visited: stack.pop() continue + if is_barrier(top): + barriers.append(top) + visited.add(top) visiting.add(top) @@ -866,12 +868,47 @@ class LoopKernel(ImmutableRecordWithoutPickling): assert child not in visiting stack.append(child) + if len(barriers) == 0: + return () + # Ensure this is the only possible order. - for prev_barrier, barrier in zip(barriers, barriers[1:]): - if prev_barrier not in self.recursive_insn_dep_map()[barrier]: - raise LoopyError( - "Unordered global barriers detected: '%s', '%s'" - % (barrier, prev_barrier)) + # + # This is done by traversing back up the dependency chain starting with + # the last barrier. If we don't see all the barriers, we know there must + # be a break in the order. + + stack = [barriers[-1]] + visiting.clear() + visited.clear() + + seen_barriers = set() + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + if top in visited: + stack.pop() + continue + + if is_barrier(top): + seen_barriers.add(top) + if len(seen_barriers) == len(barriers): + break + + visited.add(top) + visiting.add(top) + + for child in self.id_to_insn[top].depends_on: + # Check for no cycles. + stack.append(child) + + if len(seen_barriers) < len(barriers): + raise LoopyError( + "Unordered global barrier sets detected: '%s', '%s'" + % (seen_barriers, set(barriers) - seen_barriers)) return tuple(barriers) -- GitLab From d07368b8d32f5e4d24bc502b78dba3134b4486a8 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 01:51:45 -0500 Subject: [PATCH 07/11] Revert "global_barrier_order: Try hard to avoid using recursive_insn_dep_map()." This reverts commit b02329060fde472312b2ac0c38e78409cb7d1822. --- loopy/kernel/__init__.py | 59 ++++++++-------------------------------- 1 file changed, 11 insertions(+), 48 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 895e32daa..a7b358698 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -839,11 +839,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): unvisited = set(insn.id for insn in self.instructions) - def is_barrier(my_insn_id): - insn = self.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" - while unvisited: stack = [unvisited.pop()] @@ -853,13 +848,16 @@ class LoopKernel(ImmutableRecordWithoutPickling): if top in visiting: visiting.remove(top) + from loopy.kernel.instruction import BarrierInstruction + insn = self.id_to_insn[top] + if isinstance(insn, BarrierInstruction): + if insn.kind == "global": + barriers.append(top) + if top in visited: stack.pop() continue - if is_barrier(top): - barriers.append(top) - visited.add(top) visiting.add(top) @@ -868,47 +866,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): assert child not in visiting stack.append(child) - if len(barriers) == 0: - return () - # Ensure this is the only possible order. - # - # This is done by traversing back up the dependency chain starting with - # the last barrier. If we don't see all the barriers, we know there must - # be a break in the order. - - stack = [barriers[-1]] - visiting.clear() - visited.clear() - - seen_barriers = set() - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - - if top in visited: - stack.pop() - continue - - if is_barrier(top): - seen_barriers.add(top) - if len(seen_barriers) == len(barriers): - break - - visited.add(top) - visiting.add(top) - - for child in self.id_to_insn[top].depends_on: - # Check for no cycles. - stack.append(child) - - if len(seen_barriers) < len(barriers): - raise LoopyError( - "Unordered global barrier sets detected: '%s', '%s'" - % (seen_barriers, set(barriers) - seen_barriers)) + for prev_barrier, barrier in zip(barriers, barriers[1:]): + if prev_barrier not in self.recursive_insn_dep_map()[barrier]: + raise LoopyError( + "Unordered global barriers detected: '%s', '%s'" + % (barrier, prev_barrier)) return tuple(barriers) -- GitLab From b67689d5a7c5dd6e62106c55e12ccbeb46c94076 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Fri, 7 Apr 2017 02:35:39 -0500 Subject: [PATCH 08/11] Actually get rid of the need for recursive_insn_dep_map(). --- loopy/kernel/__init__.py | 52 ++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a7b358698..15084df7e 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -839,6 +839,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): unvisited = set(insn.id for insn in self.instructions) + def is_barrier(my_insn_id): + insn = self.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + while unvisited: stack = [unvisited.pop()] @@ -847,12 +852,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): if top in visiting: visiting.remove(top) - - from loopy.kernel.instruction import BarrierInstruction - insn = self.id_to_insn[top] - if isinstance(insn, BarrierInstruction): - if insn.kind == "global": - barriers.append(top) + if is_barrier(top): + barriers.append(top) if top in visited: stack.pop() @@ -867,11 +868,42 @@ class LoopKernel(ImmutableRecordWithoutPickling): stack.append(child) # Ensure this is the only possible order. + # + # We do this by looking at the barriers in order. + # We check for each adjacent pair (a,b) in the order if a < b, + # i.e. if a is reachable by a chain of dependencies from b. + + visiting.clear() + visited.clear() + for prev_barrier, barrier in zip(barriers, barriers[1:]): - if prev_barrier not in self.recursive_insn_dep_map()[barrier]: - raise LoopyError( - "Unordered global barriers detected: '%s', '%s'" - % (barrier, prev_barrier)) + # Check if prev_barrier is reachable from barrier. + stack = [barrier] + visited.discard(prev_barrier) + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + if top == prev_barrier: + visiting.clear() + break + + for child in self.id_to_insn[top].depends_on: + stack.append(child) + else: + # Search exhausted and we did not find prev_barrier. + raise LoopyError("barriers '%s' and '%s' are not ordered" + % (prev_barrier, barrier)) return tuple(barriers) -- GitLab From 76b7ffd53620e599ef0689f5afef8a8a7b5fc811 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 00:44:50 -0500 Subject: [PATCH 09/11] Move barrier functions to loopy.kernel.tools. --- loopy/__init__.py | 10 ++- loopy/kernel/__init__.py | 166 ----------------------------------- loopy/kernel/tools.py | 184 +++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 6 +- 4 files changed, 196 insertions(+), 170 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 6cbb3362e..fa18446c7 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -54,7 +54,11 @@ from loopy.kernel.tools import ( get_dot_dependency_graph, show_dependency_graph, add_dtypes, - add_and_infer_dtypes) + add_and_infer_dtypes, + get_global_barrier_order, + find_most_recent_global_barrier, + get_subkernels, + get_subkernel_to_insn_id_map) from loopy.kernel.creation import make_kernel, UniqueName from loopy.library.reduction import register_reduction_parser @@ -215,6 +219,10 @@ __all__ = [ "show_dependency_graph", "add_dtypes", "add_and_infer_dtypes", + "get_global_barrier_order", + "find_most_recent_global_barrier", + "get_subkernels", + "get_subkernel_to_insn_id_map", "infer_unknown_types", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 15084df7e..134ea2b8b 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -825,172 +825,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result - @property - @memoize_method - def global_barrier_order(self): - """Return a :class:`tuple` of the listing the ids of global barrier instructions - as they appear in order in the kernel. - - See also :class:`loopy.instruction.BarrierInstruction`. - """ - barriers = [] - visiting = set() - visited = set() - - unvisited = set(insn.id for insn in self.instructions) - - def is_barrier(my_insn_id): - insn = self.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" - - while unvisited: - stack = [unvisited.pop()] - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - if is_barrier(top): - barriers.append(top) - - if top in visited: - stack.pop() - continue - - visited.add(top) - visiting.add(top) - - for child in self.id_to_insn[top].depends_on: - # Check for no cycles. - assert child not in visiting - stack.append(child) - - # Ensure this is the only possible order. - # - # We do this by looking at the barriers in order. - # We check for each adjacent pair (a,b) in the order if a < b, - # i.e. if a is reachable by a chain of dependencies from b. - - visiting.clear() - visited.clear() - - for prev_barrier, barrier in zip(barriers, barriers[1:]): - # Check if prev_barrier is reachable from barrier. - stack = [barrier] - visited.discard(prev_barrier) - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - - if top in visited: - stack.pop() - continue - - visited.add(top) - visiting.add(top) - - if top == prev_barrier: - visiting.clear() - break - - for child in self.id_to_insn[top].depends_on: - stack.append(child) - else: - # Search exhausted and we did not find prev_barrier. - raise LoopyError("barriers '%s' and '%s' are not ordered" - % (prev_barrier, barrier)) - - return tuple(barriers) - - @memoize_method - def find_most_recent_global_barrier(self, insn_id): - """Return the id of the latest occuring global barrier which the - given instruction (indirectly or directly) depends on, or *None* if this - instruction does not depend on a global barrier. - - The return value is guaranteed to be unique because global barriers are - totally ordered within the kernel. - """ - - if len(self.global_barrier_order) == 0: - return None - - insn = self.id_to_insn[insn_id] - - if len(insn.depends_on) == 0: - return None - - def is_barrier(my_insn_id): - insn = self.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" - - global_barrier_to_ordinal = dict( - (b, i) for i, b in enumerate(self.global_barrier_order)) - - def get_barrier_ordinal(barrier_id): - return (global_barrier_to_ordinal[barrier_id] - if barrier_id is not None - else -1) - - direct_barrier_dependencies = set( - dep for dep in insn.depends_on if is_barrier(dep)) - - if len(direct_barrier_dependencies) > 0: - return max(direct_barrier_dependencies, key=get_barrier_ordinal) - else: - return max((self.find_most_recent_global_barrier(dep) - for dep in insn.depends_on), - key=get_barrier_ordinal) - - @property - @memoize_method - def subkernels(self): - if self.state != kernel_state.SCHEDULED: - raise LoopyError("Kernel must be scheduled") - - from loopy.schedule import CallKernel - - return tuple(sched_item.kernel_name - for sched_item in self.schedule - if isinstance(sched_item, CallKernel)) - - @property - @memoize_method - def subkernel_to_insn_ids(self): - if self.state != kernel_state.SCHEDULED: - raise LoopyError("Kernel must be scheduled") - - from loopy.schedule import ( - sched_item_to_insn_id, CallKernel, ReturnFromKernel) - - subkernel = None - result = {} - - for sched_item in self.schedule: - if isinstance(sched_item, CallKernel): - subkernel = sched_item.kernel_name - result[subkernel] = set() - - if isinstance(sched_item, ReturnFromKernel): - subkernel = None - - if subkernel is not None: - for insn_id in sched_item_to_insn_id(sched_item): - result[subkernel].add(insn_id) - - for subkernel in result: - result[subkernel] = frozenset(result[subkernel]) - - return result - - # }}} - # {{{ argument wrangling @property diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 203342523..32b589e5a 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -34,6 +34,8 @@ import numpy as np import islpy as isl from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel +from pytools import memoize_on_first_arg + import logging logger = logging.getLogger(__name__) @@ -1367,4 +1369,186 @@ def draw_dependencies_as_unicode_arrows( # }}} + +# {{{ global barrier order finding + +@memoize_on_first_arg +def get_global_barrier_order(kernel): + """Return a :class:`tuple` of the listing the ids of global barrier instructions + as they appear in order in the kernel. + + See also :class:`loopy.instruction.BarrierInstruction`. + """ + barriers = [] + visiting = set() + visited = set() + + unvisited = set(insn.id for insn in kernel.instructions) + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + while unvisited: + stack = [unvisited.pop()] + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + if is_barrier(top): + barriers.append(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + for child in kernel.id_to_insn[top].depends_on: + # Check for no cycles. + assert child not in visiting + stack.append(child) + + # Ensure this is the only possible order. + # + # We do this by looking at the barriers in order. + # We check for each adjacent pair (a,b) in the order if a < b, + # i.e. if a is reachable by a chain of dependencies from b. + + visiting.clear() + visited.clear() + + for prev_barrier, barrier in zip(barriers, barriers[1:]): + # Check if prev_barrier is reachable from barrier. + stack = [barrier] + visited.discard(prev_barrier) + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + if top == prev_barrier: + visiting.clear() + break + + for child in kernel.id_to_insn[top].depends_on: + stack.append(child) + else: + # Search exhausted and we did not find prev_barrier. + raise LoopyError("barriers '%s' and '%s' are not ordered" + % (prev_barrier, barrier)) + + return tuple(barriers) + +# }}} + + +# {{{ find most recent global barrier + +@memoize_on_first_arg +def find_most_recent_global_barrier(kernel, insn_id): + """Return the id of the latest occuring global barrier which the + given instruction (indirectly or directly) depends on, or *None* if this + instruction does not depend on a global barrier. + + The return value is guaranteed to be unique because global barriers are + totally ordered within the kernel. + """ + + global_barrier_order = get_global_barrier_order(kernel) + + if len(global_barrier_order) == 0: + return None + + insn = kernel.id_to_insn[insn_id] + + if len(insn.depends_on) == 0: + return None + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + global_barrier_to_ordinal = dict( + (b, i) for i, b in enumerate(global_barrier_order)) + + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) + + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) + else: + return max((find_most_recent_global_barrier(kernel, dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) + +# }}} + + +# {{{ subkernel tools + +@memoize_on_first_arg +def get_subkernels(kernel): + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import CallKernel + + return tuple(sched_item.kernel_name + for sched_item in kernel.schedule + if isinstance(sched_item, CallKernel)) + + +@memoize_on_first_arg +def get_subkernel_to_insn_id_map(kernel): + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + subkernel = None + result = {} + + for sched_item in kernel.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + for subkernel in result: + result[subkernel] = frozenset(result[subkernel]) + + return result + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 82994c386..1218847a7 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2126,7 +2126,7 @@ def test_global_barrier_order_finding(): end """) - assert knl.global_barrier_order == ("top", "yoink", "postloop") + assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop") for insn, barrier in ( ("nop", None), @@ -2136,7 +2136,7 @@ def test_global_barrier_order_finding(): ("yoink", "top"), ("postloop", "yoink"), ("zzzv", "postloop")): - assert knl.find_most_recent_global_barrier(insn) == barrier + assert lp.find_most_recent_global_barrier(knl, insn) == barrier def test_global_barrier_error_if_unordered(): @@ -2149,7 +2149,7 @@ def test_global_barrier_error_if_unordered(): from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): - knl.global_barrier_order + lp.get_global_barrier_order(knl) def test_struct_assignment(ctx_factory): -- GitLab From 343372fec4dfe3e54d6a6093faf77afe7761870d Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 00:50:12 -0500 Subject: [PATCH 10/11] Document subkernel helpers. --- loopy/kernel/tools.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 32b589e5a..df0f3c931 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1508,6 +1508,11 @@ def find_most_recent_global_barrier(kernel, insn_id): @memoize_on_first_arg def get_subkernels(kernel): + """Return a :class:`tuple` of the names of the subkernels in the kernel. The + kernel must be scheduled. + + See also :class:`loopy.schedule.CallKernel`. + """ from loopy.kernel import kernel_state if kernel.state != kernel_state.SCHEDULED: raise LoopyError("Kernel must be scheduled") @@ -1521,6 +1526,10 @@ def get_subkernels(kernel): @memoize_on_first_arg def get_subkernel_to_insn_id_map(kernel): + """Return a :class:`dict` mapping subkernel names to a :class:`frozenset` + consisting of the instruction ids scheduled within the subkernel. The + kernel must be scheduled. + """ from loopy.kernel import kernel_state if kernel.state != kernel_state.SCHEDULED: raise LoopyError("Kernel must be scheduled") -- GitLab From 77c2170fb1aae0093c4a9a2d7795c0aef2b9c015 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 00:56:36 -0500 Subject: [PATCH 11/11] Fix deleted fold. --- loopy/kernel/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 134ea2b8b..324f7da1a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -825,6 +825,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result + # }}} + # {{{ argument wrangling @property -- GitLab