diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index ac1e00bae4c16a66ac2eedd3c6fdc618894eb027..e33d260fba4f3f4122f35e033ecc573b41999d5d 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1427,6 +1427,8 @@ def stringify_instruction_list(kernel): def insert_insn_into_order(insn): if insn.id in printed_insn_ids: + # Note: dependency cycles are deliberately ignored so that printing + # succeeds. return printed_insn_ids.add(insn.id) @@ -1582,6 +1584,13 @@ def stringify_instruction_list(kernel): # {{{ global barrier order finding +def _is_global_barrier(kernel, insn_id): + insn = kernel.id_to_insn[insn_id] + from loopy.kernel.instruction import BarrierInstruction + return isinstance(insn, BarrierInstruction) and \ + insn.synchronization_kind == "global" + + @memoize_on_first_arg def get_global_barrier_order(kernel): """Return a :class:`tuple` of the listing the ids of global barrier instructions @@ -1589,49 +1598,27 @@ def get_global_barrier_order(kernel): See also :class:`loopy.instruction.BarrierInstruction`. """ - barriers = [] - visiting = set() - visited = set() - - unvisited = set(insn.id for insn in kernel.instructions) - - def is_barrier(my_insn_id): - insn = kernel.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and \ - insn.synchronization_kind == "global" - - while unvisited: - stack = [unvisited.pop()] - - while stack: - top = stack[-1] - - if top in visiting: - visiting.remove(top) - if is_barrier(top): - barriers.append(top) + dep_graph = {insn.id: set() for insn in kernel.instructions} + for insn in kernel.instructions: + for dep in insn.depends_on: + dep_graph[dep].add(insn.id) - if top in visited: - stack.pop() - continue + from pytools.graph import compute_topological_order + order = compute_topological_order(dep_graph) - visited.add(top) - visiting.add(top) + barriers = [ + insn_id for insn_id in order + if _is_global_barrier(kernel, insn_id)] - for child in kernel.id_to_insn[top].depends_on: - # Check for no cycles. - assert child not in visiting - stack.append(child) + del order # Ensure this is the only possible order. # # We do this by looking at the barriers in order. # We check for each adjacent pair (a,b) in the order if a < b, # i.e. if a is reachable by a chain of dependencies from b. - - visiting.clear() - visited.clear() + visited = set() + visiting = set() for prev_barrier, barrier in zip(barriers, barriers[1:]): # Check if prev_barrier is reachable from barrier. @@ -1689,12 +1676,6 @@ def find_most_recent_global_barrier(kernel, insn_id): if len(insn.depends_on) == 0: return None - def is_barrier(my_insn_id): - insn = kernel.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and \ - insn.synchronization_kind == "global" - global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(global_barrier_order)) @@ -1704,7 +1685,7 @@ def find_most_recent_global_barrier(kernel, insn_id): else -1) direct_barrier_dependencies = set( - dep for dep in insn.depends_on if is_barrier(dep)) + dep for dep in insn.depends_on if _is_global_barrier(kernel, dep)) if len(direct_barrier_dependencies) > 0: return max(direct_barrier_dependencies, key=get_barrier_ordinal)