diff --git a/loopy/__init__.py b/loopy/__init__.py index 6cbb3362ef91b27c3b7b1cf6a591f7f9a20c2f7a..fa18446c7b789181e890dfeb1e955b42f553088d 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -54,7 +54,11 @@ from loopy.kernel.tools import ( get_dot_dependency_graph, show_dependency_graph, add_dtypes, - add_and_infer_dtypes) + add_and_infer_dtypes, + get_global_barrier_order, + find_most_recent_global_barrier, + get_subkernels, + get_subkernel_to_insn_id_map) from loopy.kernel.creation import make_kernel, UniqueName from loopy.library.reduction import register_reduction_parser @@ -215,6 +219,10 @@ __all__ = [ "show_dependency_graph", "add_dtypes", "add_and_infer_dtypes", + "get_global_barrier_order", + "find_most_recent_global_barrier", + "get_subkernels", + "get_subkernel_to_insn_id_map", "infer_unknown_types", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 793d31791a3295ef1d7c03132f43489ab828f089..324f7da1a21de0115ea060ff7ef55e52ab0913d4 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -786,6 +786,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): for var_name in insn.read_dependency_names() & admissible_vars: result.setdefault(var_name, set()).add(insn.id) + return result + @memoize_method def writer_map(self): """ diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 2033425236836ecf000d6c341c46dcb8b087a29a..df0f3c931b46c32d0091a452855c27642a7bd269 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -34,6 +34,8 @@ import numpy as np import islpy as isl from islpy import dim_type from loopy.diagnostic import LoopyError, warn_with_kernel +from pytools import memoize_on_first_arg + import logging logger = logging.getLogger(__name__) @@ -1367,4 +1369,195 @@ def draw_dependencies_as_unicode_arrows( # }}} + +# {{{ global barrier order finding + +@memoize_on_first_arg +def get_global_barrier_order(kernel): + """Return a :class:`tuple` of the listing the ids of global barrier instructions + as they appear in order in the kernel. + + See also :class:`loopy.instruction.BarrierInstruction`. + """ + barriers = [] + visiting = set() + visited = set() + + unvisited = set(insn.id for insn in kernel.instructions) + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + while unvisited: + stack = [unvisited.pop()] + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + if is_barrier(top): + barriers.append(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + for child in kernel.id_to_insn[top].depends_on: + # Check for no cycles. + assert child not in visiting + stack.append(child) + + # Ensure this is the only possible order. + # + # We do this by looking at the barriers in order. + # We check for each adjacent pair (a,b) in the order if a < b, + # i.e. if a is reachable by a chain of dependencies from b. + + visiting.clear() + visited.clear() + + for prev_barrier, barrier in zip(barriers, barriers[1:]): + # Check if prev_barrier is reachable from barrier. + stack = [barrier] + visited.discard(prev_barrier) + + while stack: + top = stack[-1] + + if top in visiting: + visiting.remove(top) + + if top in visited: + stack.pop() + continue + + visited.add(top) + visiting.add(top) + + if top == prev_barrier: + visiting.clear() + break + + for child in kernel.id_to_insn[top].depends_on: + stack.append(child) + else: + # Search exhausted and we did not find prev_barrier. + raise LoopyError("barriers '%s' and '%s' are not ordered" + % (prev_barrier, barrier)) + + return tuple(barriers) + +# }}} + + +# {{{ find most recent global barrier + +@memoize_on_first_arg +def find_most_recent_global_barrier(kernel, insn_id): + """Return the id of the latest occuring global barrier which the + given instruction (indirectly or directly) depends on, or *None* if this + instruction does not depend on a global barrier. + + The return value is guaranteed to be unique because global barriers are + totally ordered within the kernel. + """ + + global_barrier_order = get_global_barrier_order(kernel) + + if len(global_barrier_order) == 0: + return None + + insn = kernel.id_to_insn[insn_id] + + if len(insn.depends_on) == 0: + return None + + def is_barrier(my_insn_id): + insn = kernel.id_to_insn[my_insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and insn.kind == "global" + + global_barrier_to_ordinal = dict( + (b, i) for i, b in enumerate(global_barrier_order)) + + def get_barrier_ordinal(barrier_id): + return (global_barrier_to_ordinal[barrier_id] + if barrier_id is not None + else -1) + + direct_barrier_dependencies = set( + dep for dep in insn.depends_on if is_barrier(dep)) + + if len(direct_barrier_dependencies) > 0: + return max(direct_barrier_dependencies, key=get_barrier_ordinal) + else: + return max((find_most_recent_global_barrier(kernel, dep) + for dep in insn.depends_on), + key=get_barrier_ordinal) + +# }}} + + +# {{{ subkernel tools + +@memoize_on_first_arg +def get_subkernels(kernel): + """Return a :class:`tuple` of the names of the subkernels in the kernel. The + kernel must be scheduled. + + See also :class:`loopy.schedule.CallKernel`. + """ + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import CallKernel + + return tuple(sched_item.kernel_name + for sched_item in kernel.schedule + if isinstance(sched_item, CallKernel)) + + +@memoize_on_first_arg +def get_subkernel_to_insn_id_map(kernel): + """Return a :class:`dict` mapping subkernel names to a :class:`frozenset` + consisting of the instruction ids scheduled within the subkernel. The + kernel must be scheduled. + """ + from loopy.kernel import kernel_state + if kernel.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + subkernel = None + result = {} + + for sched_item in kernel.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + for subkernel in result: + result[subkernel] = frozenset(result[subkernel]) + + return result + +# }}} + + # vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 94cdb499c096337db0657267d5afd472eef80a9c..1218847a7c42bd420a993d86a7534f066c2ab20e 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2108,6 +2108,50 @@ def test_barrier_insertion_near_bottom_of_loop(): assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) +def test_global_barrier_order_finding(): + knl = lp.make_kernel( + "{[i,itrip]: 0<=i z[i] = z[i+1] + z[i] {id=wr_z,dep=top} + <> v[i] = 11 {id=wr_v,dep=top} + ... gbarrier {dep=wr_z:wr_v,id=yoink} + z[i] = z[i] - z[i+1] + v[i] {id=iupd, dep=yoink} + end + ... nop {id=nop} + ... gbarrier {dep=iupd,id=postloop} + z[i] = z[i] - z[i+1] + v[i] {id=zzzv,dep=postloop} + end + """) + + assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop") + + for insn, barrier in ( + ("nop", None), + ("top", None), + ("wr_z", "top"), + ("wr_v", "top"), + ("yoink", "top"), + ("postloop", "yoink"), + ("zzzv", "postloop")): + assert lp.find_most_recent_global_barrier(knl, insn) == barrier + + +def test_global_barrier_error_if_unordered(): + # FIXME: Should be illegal to declare this + knl = lp.make_kernel("{[i]: 0 <= i < 10}", + """ + ... gbarrier + ... gbarrier + """) + + from loopy.diagnostic import LoopyError + with pytest.raises(LoopyError): + lp.get_global_barrier_order(knl) + + def test_struct_assignment(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx)