From 76b7ffd53620e599ef0689f5afef8a8a7b5fc811 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 8 Apr 2017 00:44:50 -0500 Subject: [PATCH] Move barrier functions to loopy.kernel.tools. --- loopy/__init__.py | 10 ++- loopy/kernel/__init__.py | 166 ----------------------------------- loopy/kernel/tools.py | 184 +++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 6 +- 4 files changed, 196 insertions(+), 170 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 6cbb3362..fa18446c 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -54,7 +54,11 @@ from loopy.kernel.tools import ( get_dot_dependency_graph, show_dependency_graph, add_dtypes, - add_and_infer_dtypes) + add_and_infer_dtypes, + get_global_barrier_order, + find_most_recent_global_barrier, + get_subkernels, + get_subkernel_to_insn_id_map) from loopy.kernel.creation import make_kernel, UniqueName from loopy.library.reduction import register_reduction_parser @@ -215,6 +219,10 @@ __all__ = [ "show_dependency_graph", "add_dtypes", "add_and_infer_dtypes", + "get_global_barrier_order", + "find_most_recent_global_barrier", + "get_subkernels", + "get_subkernel_to_insn_id_map", "infer_unknown_types", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 15084df7..134ea2b8 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -825,172 +825,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): return result - @property - @memoize_method - def global_barrier_order(self): - """Return a :class:`tuple` of the listing the ids of global barrier instructions - as they appear in order in the kernel. - - See also :class:`loopy.instruction.BarrierInstruction`. - """ - barriers = [] - visiting = set() - visited = set() - - unvisited = set(insn.id for insn in self.instructions) - - def is_barrier(my_insn_id): - insn = self.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" - - while unvisited: - stack = [unvisited.pop()] - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - if is_barrier(top): - barriers.append(top) - - if top in visited: - stack.pop() - continue - - visited.add(top) - visiting.add(top) - - for child in self.id_to_insn[top].depends_on: - # Check for no cycles. - assert child not in visiting - stack.append(child) - - # Ensure this is the only possible order. - # - # We do this by looking at the barriers in order. - # We check for each adjacent pair (a,b) in the order if a < b, - # i.e. if a is reachable by a chain of dependencies from b. - - visiting.clear() - visited.clear() - - for prev_barrier, barrier in zip(barriers, barriers[1:]): - # Check if prev_barrier is reachable from barrier. - stack = [barrier] - visited.discard(prev_barrier) - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - - if top in visited: - stack.pop() - continue - - visited.add(top) - visiting.add(top) - - if top == prev_barrier: - visiting.clear() - break - - for child in self.id_to_insn[top].depends_on: - stack.append(child) - else: - # Search exhausted and we did not find prev_barrier. - raise LoopyError("barriers '%s' and '%s' are not ordered" - % (prev_barrier, barrier)) - - return tuple(barriers) - - @memoize_method - def find_most_recent_global_barrier(self, insn_id): - """Return the id of the latest occuring global barrier which the - given instruction (indirectly or directly) depends on, or *None* if this - instruction does not depend on a global barrier. - - The return value is guaranteed to be unique because global barriers are - totally ordered within the kernel. - """ - - if len(self.global_barrier_order) == 0: - return None - - insn = self.id_to_insn[insn_id] - - if len(insn.depends_on) == 0: - return None - - def is_barrier(my_insn_id): - insn = self.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" - - global_barrier_to_ordinal = dict( - (b, i) for i, b in enumerate(self.global_barrier_order)) - - def get_barrier_ordinal(barrier_id): - return (global_barrier_to_ordinal[barrier_id] - if barrier_id is not None - else -1) - - direct_barrier_dependencies = set( - dep for dep in insn.depends_on if is_barrier(dep)) - - if len(direct_barrier_dependencies) > 0: - return max(direct_barrier_dependencies, key=get_barrier_ordinal) - else: - return max((self.find_most_recent_global_barrier(dep) - for dep in insn.depends_on), - key=get_barrier_ordinal) - - @property - @memoize_method - def subkernels(self): - if self.state != kernel_state.SCHEDULED: - raise LoopyError("Kernel must be scheduled") - - from loopy.schedule import CallKernel - - return tuple(sched_item.kernel_name - for sched_item in self.schedule - if isinstance(sched_item, CallKernel)) - - @property - @memoize_method - def subkernel_to_insn_ids(self): - if self.state != kernel_state.SCHEDULED: - raise LoopyError("Kernel must be scheduled") - - from loopy.schedule import ( - sched_item_to_insn_id, CallKernel, ReturnFromKernel) - - subkernel = None - result = {} - - for sched_item in self.schedule: - if isinstance(sched_item, CallKernel): - subkernel = sched_item.kernel_name - result[subkernel] = set() - - if isinstance(sched_item, ReturnFromKernel): - subkernel = None - - if subkernel is not None: - for insn_id in sched_item_to_insn_id(sched_item): - result[subkernel].add(insn_id) - - for subkernel in result: - result[subkernel] = frozenset(result[subkernel]) - - return result - - # }}} - # {{{ argument wrangling @property diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 20334252..32b589e5 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -34,6 +34,8 @@ import numpy as np import islpy as isl from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel +from pytools import memoize_on_first_arg + import logging logger = logging.getLogger(__name__) @@ -1367,4 +1369,186 @@ def draw_dependencies_as_unicode_arrows( # }}} + +# {{{ global barrier order finding + +@memoize_on_first_arg +def get_global_barrier_order(kernel): + """Return a :class:`tuple` of the listing the ids of global barrier instructions + as they appear in order in the kernel. + + See also :class:`loopy.instruction.BarrierInstruction`. + """ + barriers = [] + visiting = set() + visited = set() + + unvisited = set(insn.id for insn in kernel.instructions) + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + while unvisited: + stack = [unvisited.pop()] + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + if is_barrier(top): + barriers.append(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + for child in kernel.id_to_insn[top].depends_on: + # Check for no cycles. + assert child not in visiting + stack.append(child) + + # Ensure this is the only possible order. + # + # We do this by looking at the barriers in order. + # We check for each adjacent pair (a,b) in the order if a < b, + # i.e. if a is reachable by a chain of dependencies from b. + + visiting.clear() + visited.clear() + + for prev_barrier, barrier in zip(barriers, barriers[1:]): + # Check if prev_barrier is reachable from barrier. + stack = [barrier] + visited.discard(prev_barrier) + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + if top == prev_barrier: + visiting.clear() + break + + for child in kernel.id_to_insn[top].depends_on: + stack.append(child) + else: + # Search exhausted and we did not find prev_barrier. + raise LoopyError("barriers '%s' and '%s' are not ordered" + % (prev_barrier, barrier)) + + return tuple(barriers) + +# }}} + + +# {{{ find most recent global barrier + +@memoize_on_first_arg +def find_most_recent_global_barrier(kernel, insn_id): + """Return the id of the latest occuring global barrier which the + given instruction (indirectly or directly) depends on, or *None* if this + instruction does not depend on a global barrier. + + The return value is guaranteed to be unique because global barriers are + totally ordered within the kernel. + """ + + global_barrier_order = get_global_barrier_order(kernel) + + if len(global_barrier_order) == 0: + return None + + insn = kernel.id_to_insn[insn_id] + + if len(insn.depends_on) == 0: + return None + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + global_barrier_to_ordinal = dict( + (b, i) for i, b in enumerate(global_barrier_order)) + + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) + + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) + else: + return max((find_most_recent_global_barrier(kernel, dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) + +# }}} + + +# {{{ subkernel tools + +@memoize_on_first_arg +def get_subkernels(kernel): + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import CallKernel + + return tuple(sched_item.kernel_name + for sched_item in kernel.schedule + if isinstance(sched_item, CallKernel)) + + +@memoize_on_first_arg +def get_subkernel_to_insn_id_map(kernel): + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + subkernel = None + result = {} + + for sched_item in kernel.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + for subkernel in result: + result[subkernel] = frozenset(result[subkernel]) + + return result + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 82994c38..1218847a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2126,7 +2126,7 @@ def test_global_barrier_order_finding(): end """) - assert knl.global_barrier_order == ("top", "yoink", "postloop") + assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop") for insn, barrier in ( ("nop", None), @@ -2136,7 +2136,7 @@ def test_global_barrier_order_finding(): ("yoink", "top"), ("postloop", "yoink"), ("zzzv", "postloop")): - assert knl.find_most_recent_global_barrier(insn) == barrier + assert lp.find_most_recent_global_barrier(knl, insn) == barrier def test_global_barrier_error_if_unordered(): @@ -2149,7 +2149,7 @@ def test_global_barrier_error_if_unordered(): from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): - knl.global_barrier_order + lp.get_global_barrier_order(knl) def test_struct_assignment(ctx_factory): -- GitLab