From 4c6c3c86eaee0b80f16203638fd84c1e96d8fb3d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 18 May 2020 00:56:28 -0500 Subject: [PATCH 01/20] graph traversal without memoization; saves memory but increases time complexity by a lot --- loopy/check.py | 283 +++++++++++++++++++++++++++---------------------- 1 file changed, 159 insertions(+), 124 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index da49c1d11..f9fc1024a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -449,45 +449,9 @@ def check_has_schedulable_iname_nesting(kernel): # {{{ check_variable_access_ordered -class IndirectDependencyEdgeFinder(object): - def __init__(self, kernel): - self.kernel = kernel - self.dep_edge_cache = {} - - def __call__(self, depender_id, dependee_id): - cache_key = (depender_id, dependee_id) - - try: - result = self.dep_edge_cache[cache_key] - except KeyError: - pass - else: - if result is None: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("when " - "checking for dependency edge between " - "depender '%s' and dependee '%s'" - % (depender_id, dependee_id)) - else: - return result - - depender = self.kernel.id_to_insn[depender_id] - - if dependee_id in depender.depends_on: - self.dep_edge_cache[cache_key] = True - return True - - self.dep_edge_cache[cache_key] = None - for dep in depender.depends_on: - if self(dep, dependee_id): - self.dep_edge_cache[cache_key] = True - return True - - self.dep_edge_cache[cache_key] = False - return False - - def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): + dep_a = kernel.id_to_insn[dep_a] + dep_b = kernel.id_to_insn[dep_b] from loopy.kernel.data import AddressSpace if var_address_space == AddressSpace.GLOBAL: search_scopes = ["global", "any"] @@ -510,116 +474,187 @@ def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): return ab_nosync and ba_nosync +class DependencyTraversingMapper(object): + def __init__(self, insn_id_to_dep_reqs, depends_on): + self.insn_id_to_dep_reqs = insn_id_to_dep_reqs + self.depends_on = depends_on + + def move(self, insn_id, rev_deps): + if insn_id in rev_deps: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(rev_deps)) + + self.insn_id_to_dep_reqs[insn_id] -= rev_deps + new_rev_deps = rev_deps | set([insn_id]) + for depender in self.depends_on[insn_id]: + self.move(depender, new_rev_deps) + + return + + +class ReverseDependencyTraversingMapper(object): + def __init__(self, insn_id_to_dep_reqs, rev_depends): + self.insn_id_to_dep_reqs = insn_id_to_dep_reqs + self.rev_depends = rev_depends + + def move(self, insn_id, deps): + if insn_id in deps: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(deps)) + + self.insn_id_to_dep_reqs[insn_id] -= deps + new_deps = deps | set([insn_id]) + for depender in self.rev_depends[insn_id]: + self.move(depender, new_deps) + + return + + +def _get_address_space(kernel, var): + from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg + if var in kernel.temporary_variables: + address_space = kernel.temporary_variables[var].address_space + else: + arg = kernel.arg_dict[var] + if isinstance(arg, ArrayArg): + address_space = arg.address_space + elif isinstance(arg, ValueArg): + address_space = AddressSpace.PRIVATE + else: + # No need to consider ConstantArg and ImageArg (for now) + # because those won't be written. + raise ValueError("could not determine address_space of '%s'" % var) + return address_space + + def _check_variable_access_ordered_inner(kernel): + from loopy.kernel.tools import find_aliasing_equivalence_classes + from loopy.symbolic import AccessRangeOverlapChecker + overlap_checker = AccessRangeOverlapChecker(kernel) + aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) + logger.debug("%s: check_variable_access_ordered: start" % kernel.name) - checked_variables = kernel.get_written_variables() & ( - set(kernel.temporary_variables) | set(arg for arg in kernel.arg_dict)) + # insn_id_to_dep_reqs: A mapping from insn_id to set of insn_ids between + # whom dependency requirements should be proved in order to assert + # variable access ordering. + insn_id_to_dep_reqs = dict((insn.id, set()) for insn in + kernel.instructions) wmap = kernel.writer_map() rmap = kernel.reader_map() - from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg - from loopy.kernel.tools import find_aliasing_equivalence_classes - - depfind = IndirectDependencyEdgeFinder(kernel) - aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) + # {{{ populate 'insn_id_to_dep_reqs' - for name in checked_variables: - # This is a tad redundant in that this could probably be restructured - # to iterate only over equivalence classes and not individual variables. - # But then the access-range overlap check below would have to be smarter. - eq_class = aliasing_equiv_classes[name] + for var in kernel.get_written_variables(): + address_space = _get_address_space(kernel, var) + eq_class = aliasing_equiv_classes[var] readers = set.union( *[rmap.get(eq_name, set()) for eq_name in eq_class]) writers = set.union( *[wmap.get(eq_name, set()) for eq_name in eq_class]) - unaliased_readers = rmap.get(name, set()) - unaliased_writers = wmap.get(name, set()) - if not writers: - continue + for writer in writers: + required_deps = (readers | writers) - set([writer]) + required_deps = set([req_dep for req_dep in required_deps if not + declares_nosync_with(kernel, address_space, writer, + req_dep)]) - if name in kernel.temporary_variables: - address_space = kernel.temporary_variables[name].address_space - else: - arg = kernel.arg_dict[name] - if isinstance(arg, ArrayArg): - address_space = arg.address_space - elif isinstance(arg, ValueArg): - address_space = AddressSpace.PRIVATE - else: - # No need to consider ConstantArg and ImageArg (for now) - # because those won't be written. - raise ValueError("could not determine address_space of '%s'" % name) + insn_id_to_dep_reqs[writer] |= required_deps - # Check even for PRIVATE address space, to ensure intentional program order. + # }}} - from loopy.symbolic import AccessRangeOverlapChecker - overlap_checker = AccessRangeOverlapChecker(kernel) + # depends_on: mapping from insn_ids to their dependencies + depends_on = dict((insn.id, set()) for insn in + kernel.instructions) + # rev_depends: mapping from insn_ids to their reverse deps. + rev_depends = dict((insn.id, set()) for insn in + kernel.instructions) - for writer_id in writers: - for other_id in readers | writers: - if writer_id == other_id: - continue + # {{{ populate rev_depends, depends_on - writer = kernel.id_to_insn[writer_id] - other = kernel.id_to_insn[other_id] + for insn in kernel.instructions: + depends_on[insn.id].update(insn.depends_on) + for dep in insn.depends_on: + rev_depends[dep].add(insn.id) + # }}} - has_dependency_relationship = ( - declares_nosync_with(kernel, address_space, other, writer) - or - depfind(writer_id, other_id) - or - depfind(other_id, writer_id) - ) + forward_dep_traverser = DependencyTraversingMapper(insn_id_to_dep_reqs, + depends_on) + reverse_dep_traverser = ReverseDependencyTraversingMapper(insn_id_to_dep_reqs, + rev_depends) - if has_dependency_relationship: - continue + for insn_id, rev_deps in six.iteritems(rev_depends): + if not rev_deps: + forward_dep_traverser.move(insn_id, set()) - is_relationship_by_aliasing = not ( - writer_id in unaliased_writers - and (other_id in unaliased_writers - or other_id in unaliased_readers)) + for insn_id, deps in six.iteritems(depends_on): + if not deps: + reverse_dep_traverser.move(insn_id, set()) - # Do not enforce ordering for disjoint access ranges - if (not is_relationship_by_aliasing and not - overlap_checker.do_access_ranges_overlap_conservative( - writer_id, "w", other_id, "any", name)): - continue + for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): + insn = kernel.id_to_insn[insn_id] + vars_written_by_insn = insn.write_dependency_names() & ( + kernel.get_written_variables()) + for dep_id in dep_ids: + dep = kernel.id_to_insn[dep_id] + vars_accessed_by_dep = dep.dependency_names() & ( + kernel.get_written_variables() | ( + kernel.get_read_variables())) + eq_classes_accessed_by_dep = set().union( + *(aliasing_equiv_classes[_] for _ in vars_accessed_by_dep)) + + for var_written_by_insn in vars_written_by_insn: + eq_class = aliasing_equiv_classes[var_written_by_insn] + if eq_class & eq_classes_accessed_by_dep: + unaliased_readers = rmap.get(var_written_by_insn, set()) + unaliased_writers = wmap.get(var_written_by_insn, set()) + is_relationship_by_aliasing = not ( + insn_id in unaliased_writers + and (dep_id in unaliased_writers + or dep_id in unaliased_readers)) + + # Do not enforce ordering for disjoint access ranges + if (not is_relationship_by_aliasing and not + overlap_checker.do_access_ranges_overlap_conservative( + insn_id, "w", dep_id, "any", + var_written_by_insn)): + continue - # Do not enforce ordering for aliasing-based relationships - # in different groups. - if (is_relationship_by_aliasing and ( - bool(writer.groups & other.conflicts_with_groups) - or - bool(other.groups & writer.conflicts_with_groups))): - continue + # Do not enforce ordering for aliasing-based relationships + # in different groups. + if (is_relationship_by_aliasing and ( + bool(writer.groups & dep_id.conflicts_with_groups) + or + bool(dep_id.groups & writer.conflicts_with_groups))): + continue - msg = ("No dependency relationship found between " - "'{writer_id}' which writes {var} and " - "'{other_id}' which also accesses {var}. " - "Either add a (possibly indirect) dependency " - "between the two, or add them to each others' nosync " - "set to indicate that no ordering is intended, or " - "turn off this check by setting the " - "'enforce_variable_access_ordered' option " - "(more issues of this type may exist--only reporting " - "the first one)" - .format( - writer_id=writer_id, - other_id=other_id, - var=( - "the variable '%s'" % name - if len(eq_class) == 1 - else ( - "the aliasing equivalence class '%s'" - % ", ".join(eq_class)) - ))) - - from loopy.diagnostic import VariableAccessNotOrdered - raise VariableAccessNotOrdered(msg) + msg = ("No dependency relationship found between " + "'{writer_id}' which writes {var} and " + "'{other_id}' which also accesses {var}. " + "Either add a (possibly indirect) dependency " + "between the two, or add them to each others' nosync " + "set to indicate that no ordering is intended, or " + "turn off this check by setting the " + "'enforce_variable_access_ordered' option " + "(more issues of this type may exist--only reporting " + "the first one)" + .format( + writer_id=insn_id, + other_id=dep_id, + var=( + "the variable '%s'" % var_written_by_insn + if len(eq_class) == 1 + else ( + "the aliasing equivalence class '%s'" + % ", ".join(eq_class)) + ))) + + from loopy.diagnostic import VariableAccessNotOrdered + raise VariableAccessNotOrdered(msg) logger.debug("%s: check_variable_access_ordered: done" % kernel.name) @@ -628,7 +663,7 @@ def check_variable_access_ordered(kernel): """Checks that between each write to a variable and all other accesses to the variable there is either: - * an (at least indirect) depdendency edge, or + * a direct/indirect depdendency edge, or * an explicit statement that no ordering is necessary (expressed through a bi-directional :attr:`loopy.Instruction.no_sync_with`) """ -- GitLab From a6fc3f7d2cd0498fbe104dc55532ac18cd4a794f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 18 May 2020 04:33:25 -0500 Subject: [PATCH 02/20] bug fixes in the graph traversal --- loopy/check.py | 62 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index f9fc1024a..0a010a229 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -475,9 +475,15 @@ def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): class DependencyTraversingMapper(object): - def __init__(self, insn_id_to_dep_reqs, depends_on): + """ + Helper class to traverse the dependency graph. + """ + def __init__(self, insn_id_to_dep_reqs, depends_on, rev_depends): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs self.depends_on = depends_on + self.rev_depends = rev_depends + self.visited = set() + self.memoized_rev_deps = {} def move(self, insn_id, rev_deps): if insn_id in rev_deps: @@ -485,18 +491,34 @@ class DependencyTraversingMapper(object): raise DependencyCycleFound("Dependency cycle found:" " '{}'.".format(rev_deps)) - self.insn_id_to_dep_reqs[insn_id] -= rev_deps - new_rev_deps = rev_deps | set([insn_id]) - for depender in self.depends_on[insn_id]: - self.move(depender, new_rev_deps) + if insn_id in self.visited: + return + + if self.rev_depends[insn_id] <= self.visited: + new_rev_deps = rev_deps | self.memoized_rev_deps.pop(insn_id, + set()) | set([insn_id]) + + self.insn_id_to_dep_reqs[insn_id] -= new_rev_deps + self.visited.add(insn_id) + for dep in self.depends_on[insn_id]: + self.move(dep, new_rev_deps) + else: + memoized_rev_deps = self.memoized_rev_deps.get(insn_id, set()) + memoized_rev_deps.update(rev_deps) return class ReverseDependencyTraversingMapper(object): - def __init__(self, insn_id_to_dep_reqs, rev_depends): + """ + Helper class to traverse the reverse dependency graph. + """ + def __init__(self, insn_id_to_dep_reqs, rev_depends, depends_on): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs self.rev_depends = rev_depends + self.depends_on = depends_on + self.visited = set() + self.memoized_deps = {} def move(self, insn_id, deps): if insn_id in deps: @@ -504,10 +526,20 @@ class ReverseDependencyTraversingMapper(object): raise DependencyCycleFound("Dependency cycle found:" " '{}'.".format(deps)) - self.insn_id_to_dep_reqs[insn_id] -= deps - new_deps = deps | set([insn_id]) - for depender in self.rev_depends[insn_id]: - self.move(depender, new_deps) + if insn_id in self.visited: + return + + if self.depends_on[insn_id] <= self.visited: + new_deps = deps | self.memoized_deps.pop(insn_id, + set()) | set([insn_id]) + + self.insn_id_to_dep_reqs[insn_id] -= new_deps + self.visited.add(insn_id) + for rev_dep in self.rev_depends[insn_id]: + self.move(rev_dep, new_deps) + else: + memoized_deps = self.memoized_deps.get(insn_id, set()) + memoized_deps.update(deps) return @@ -583,18 +615,24 @@ def _check_variable_access_ordered_inner(kernel): # }}} forward_dep_traverser = DependencyTraversingMapper(insn_id_to_dep_reqs, - depends_on) + depends_on, rev_depends) reverse_dep_traverser = ReverseDependencyTraversingMapper(insn_id_to_dep_reqs, - rev_depends) + rev_depends, depends_on) for insn_id, rev_deps in six.iteritems(rev_depends): if not rev_deps: forward_dep_traverser.move(insn_id, set()) + assert set([insn.id for insn in kernel.instructions]) == ( + forward_dep_traverser.visited) + for insn_id, deps in six.iteritems(depends_on): if not deps: reverse_dep_traverser.move(insn_id, set()) + assert set([insn.id for insn in kernel.instructions]) == ( + reverse_dep_traverser.visited) + for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] vars_written_by_insn = insn.write_dependency_names() & ( -- GitLab From a97a7b4237d120a7fb1107f3b9f6f418c178d8f1 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 19 May 2020 02:22:35 -0500 Subject: [PATCH 03/20] fix bugs in graph traversal --- loopy/check.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 0a010a229..854b81ab1 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -476,7 +476,7 @@ def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): class DependencyTraversingMapper(object): """ - Helper class to traverse the dependency graph. + Helper class to traverse the dependency graph in a postorder fashion. """ def __init__(self, insn_id_to_dep_reqs, depends_on, rev_depends): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs @@ -503,7 +503,7 @@ class DependencyTraversingMapper(object): for dep in self.depends_on[insn_id]: self.move(dep, new_rev_deps) else: - memoized_rev_deps = self.memoized_rev_deps.get(insn_id, set()) + memoized_rev_deps = self.memoized_rev_deps.setdefault(insn_id, set()) memoized_rev_deps.update(rev_deps) return @@ -511,7 +511,8 @@ class DependencyTraversingMapper(object): class ReverseDependencyTraversingMapper(object): """ - Helper class to traverse the reverse dependency graph. + Helper class to traverse the reverse dependency graph in a postorder + fashion. """ def __init__(self, insn_id_to_dep_reqs, rev_depends, depends_on): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs @@ -538,7 +539,7 @@ class ReverseDependencyTraversingMapper(object): for rev_dep in self.rev_depends[insn_id]: self.move(rev_dep, new_deps) else: - memoized_deps = self.memoized_deps.get(insn_id, set()) + memoized_deps = self.memoized_deps.setdefault(insn_id, set()) memoized_deps.update(deps) return @@ -665,9 +666,9 @@ def _check_variable_access_ordered_inner(kernel): # Do not enforce ordering for aliasing-based relationships # in different groups. if (is_relationship_by_aliasing and ( - bool(writer.groups & dep_id.conflicts_with_groups) + bool(insn.groups & dep.conflicts_with_groups) or - bool(dep_id.groups & writer.conflicts_with_groups))): + bool(dep.groups & insn.conflicts_with_groups))): continue msg = ("No dependency relationship found between " -- GitLab From f291a51b8cb942a7f6d85db9fe4dcb12b8188870 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 19 May 2020 02:45:48 -0500 Subject: [PATCH 04/20] convert assertion to dep cycle error --- loopy/check.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 854b81ab1..a4ba9ff2c 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -624,15 +624,25 @@ def _check_variable_access_ordered_inner(kernel): if not rev_deps: forward_dep_traverser.move(insn_id, set()) - assert set([insn.id for insn in kernel.instructions]) == ( + if set([insn.id for insn in kernel.instructions]) != ( + forward_dep_traverser.visited): + not_visited_insns = set([insn.id for insn in kernel.instructions]) - ( forward_dep_traverser.visited) + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found in:" + " '{}'.".format(not_visited_insns)) for insn_id, deps in six.iteritems(depends_on): if not deps: reverse_dep_traverser.move(insn_id, set()) - assert set([insn.id for insn in kernel.instructions]) == ( + if set([insn.id for insn in kernel.instructions]) != ( + reverse_dep_traverser.visited): + not_visited_insns = set([insn.id for insn in kernel.instructions]) - ( reverse_dep_traverser.visited) + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found in:" + " '{}'.".format(not_visited_insns)) for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] -- GitLab From f374e0199a452b144bd98a22886ec780826ce89b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 21 May 2020 14:11:25 -0500 Subject: [PATCH 05/20] adds a FIXME --- loopy/check.py | 69 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index a4ba9ff2c..999a412fa 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -478,14 +478,16 @@ class DependencyTraversingMapper(object): """ Helper class to traverse the dependency graph in a postorder fashion. """ - def __init__(self, insn_id_to_dep_reqs, depends_on, rev_depends): + def __init__(self, insn_id_to_dep_reqs, depends_on, rev_depends, + topological_order): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs self.depends_on = depends_on self.rev_depends = rev_depends + self.topological_order = topological_order self.visited = set() self.memoized_rev_deps = {} - def move(self, insn_id, rev_deps): + def rec(self, insn_id, rev_deps): if insn_id in rev_deps: from loopy.diagnostic import DependencyCycleFound raise DependencyCycleFound("Dependency cycle found:" @@ -500,28 +502,37 @@ class DependencyTraversingMapper(object): self.insn_id_to_dep_reqs[insn_id] -= new_rev_deps self.visited.add(insn_id) - for dep in self.depends_on[insn_id]: - self.move(dep, new_rev_deps) + + deps = self.depends_on[insn_id] + # FIXME: Adding the line below emits segfault for huge kernels + # deps = sorted(list(deps), key=lambda elem: + # (-self.topological_order[elem])) + for dep in deps: + self.rec(dep, new_rev_deps) else: memoized_rev_deps = self.memoized_rev_deps.setdefault(insn_id, set()) memoized_rev_deps.update(rev_deps) return + __call__ = rec + class ReverseDependencyTraversingMapper(object): """ Helper class to traverse the reverse dependency graph in a postorder fashion. """ - def __init__(self, insn_id_to_dep_reqs, rev_depends, depends_on): + def __init__(self, insn_id_to_dep_reqs, rev_depends, depends_on, + topological_order): self.insn_id_to_dep_reqs = insn_id_to_dep_reqs self.rev_depends = rev_depends self.depends_on = depends_on + self.topological_order = topological_order self.visited = set() self.memoized_deps = {} - def move(self, insn_id, deps): + def rec(self, insn_id, deps): if insn_id in deps: from loopy.diagnostic import DependencyCycleFound raise DependencyCycleFound("Dependency cycle found:" @@ -536,14 +547,21 @@ class ReverseDependencyTraversingMapper(object): self.insn_id_to_dep_reqs[insn_id] -= new_deps self.visited.add(insn_id) - for rev_dep in self.rev_depends[insn_id]: - self.move(rev_dep, new_deps) + rev_deps = self.rev_depends[insn_id] + # FIXME: Adding the line below emits segfault for huge kernels + # rev_deps = sorted(list(rev_deps), key=lambda elem: + # (-self.topological_order[elem])) + + for rev_dep in rev_deps: + self.rec(rev_dep, new_deps) else: memoized_deps = self.memoized_deps.setdefault(insn_id, set()) memoized_deps.update(deps) return + __call__ = rec + def _get_address_space(kernel, var): from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg @@ -562,6 +580,32 @@ def _get_address_space(kernel, var): return address_space +def _get_topological_order(kernel): + from pytools import natsorted + + # {{{ topological sort + + visited_insn_ids = set() + insn_order = [] + + def insert_insn_into_order(insn): + if insn.id in visited_insn_ids: + return + visited_insn_ids.add(insn.id) + + for dep_id in natsorted(insn.depends_on): + insert_insn_into_order(kernel.id_to_insn[dep_id]) + + insn_order.append(insn) + + for insn in kernel.instructions: + insert_insn_into_order(insn) + + # }}} + + return dict((insn.id, i) for i, insn in enumerate(insn_order)) + + def _check_variable_access_ordered_inner(kernel): from loopy.kernel.tools import find_aliasing_equivalence_classes from loopy.symbolic import AccessRangeOverlapChecker @@ -615,14 +659,15 @@ def _check_variable_access_ordered_inner(kernel): rev_depends[dep].add(insn.id) # }}} + topological_order = _get_topological_order(kernel) forward_dep_traverser = DependencyTraversingMapper(insn_id_to_dep_reqs, - depends_on, rev_depends) + depends_on, rev_depends, topological_order) reverse_dep_traverser = ReverseDependencyTraversingMapper(insn_id_to_dep_reqs, - rev_depends, depends_on) + rev_depends, depends_on, topological_order) for insn_id, rev_deps in six.iteritems(rev_depends): if not rev_deps: - forward_dep_traverser.move(insn_id, set()) + forward_dep_traverser(insn_id, set()) if set([insn.id for insn in kernel.instructions]) != ( forward_dep_traverser.visited): @@ -634,7 +679,7 @@ def _check_variable_access_ordered_inner(kernel): for insn_id, deps in six.iteritems(depends_on): if not deps: - reverse_dep_traverser.move(insn_id, set()) + reverse_dep_traverser(insn_id, set()) if set([insn.id for insn in kernel.instructions]) != ( reverse_dep_traverser.visited): -- GitLab From 828faa6985b04a933d6bab071258d1c5854266aa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 23 May 2020 01:56:47 -0500 Subject: [PATCH 06/20] do a simple topological sort order traversal instead of recursion --- loopy/check.py | 163 ++++++++++++++----------------------------------- 1 file changed, 45 insertions(+), 118 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 999a412fa..f1f889a6e 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -474,95 +474,6 @@ def declares_nosync_with(kernel, var_address_space, dep_a, dep_b): return ab_nosync and ba_nosync -class DependencyTraversingMapper(object): - """ - Helper class to traverse the dependency graph in a postorder fashion. - """ - def __init__(self, insn_id_to_dep_reqs, depends_on, rev_depends, - topological_order): - self.insn_id_to_dep_reqs = insn_id_to_dep_reqs - self.depends_on = depends_on - self.rev_depends = rev_depends - self.topological_order = topological_order - self.visited = set() - self.memoized_rev_deps = {} - - def rec(self, insn_id, rev_deps): - if insn_id in rev_deps: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(rev_deps)) - - if insn_id in self.visited: - return - - if self.rev_depends[insn_id] <= self.visited: - new_rev_deps = rev_deps | self.memoized_rev_deps.pop(insn_id, - set()) | set([insn_id]) - - self.insn_id_to_dep_reqs[insn_id] -= new_rev_deps - self.visited.add(insn_id) - - deps = self.depends_on[insn_id] - # FIXME: Adding the line below emits segfault for huge kernels - # deps = sorted(list(deps), key=lambda elem: - # (-self.topological_order[elem])) - for dep in deps: - self.rec(dep, new_rev_deps) - else: - memoized_rev_deps = self.memoized_rev_deps.setdefault(insn_id, set()) - memoized_rev_deps.update(rev_deps) - - return - - __call__ = rec - - -class ReverseDependencyTraversingMapper(object): - """ - Helper class to traverse the reverse dependency graph in a postorder - fashion. - """ - def __init__(self, insn_id_to_dep_reqs, rev_depends, depends_on, - topological_order): - self.insn_id_to_dep_reqs = insn_id_to_dep_reqs - self.rev_depends = rev_depends - self.depends_on = depends_on - self.topological_order = topological_order - self.visited = set() - self.memoized_deps = {} - - def rec(self, insn_id, deps): - if insn_id in deps: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(deps)) - - if insn_id in self.visited: - return - - if self.depends_on[insn_id] <= self.visited: - new_deps = deps | self.memoized_deps.pop(insn_id, - set()) | set([insn_id]) - - self.insn_id_to_dep_reqs[insn_id] -= new_deps - self.visited.add(insn_id) - rev_deps = self.rev_depends[insn_id] - # FIXME: Adding the line below emits segfault for huge kernels - # rev_deps = sorted(list(rev_deps), key=lambda elem: - # (-self.topological_order[elem])) - - for rev_dep in rev_deps: - self.rec(rev_dep, new_deps) - else: - memoized_deps = self.memoized_deps.setdefault(insn_id, set()) - memoized_deps.update(deps) - - return - - __call__ = rec - - def _get_address_space(kernel, var): from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg if var in kernel.temporary_variables: @@ -603,7 +514,7 @@ def _get_topological_order(kernel): # }}} - return dict((insn.id, i) for i, insn in enumerate(insn_order)) + return [insn.id for insn in insn_order] def _check_variable_access_ordered_inner(kernel): @@ -660,34 +571,50 @@ def _check_variable_access_ordered_inner(kernel): # }}} topological_order = _get_topological_order(kernel) - forward_dep_traverser = DependencyTraversingMapper(insn_id_to_dep_reqs, - depends_on, rev_depends, topological_order) - reverse_dep_traverser = ReverseDependencyTraversingMapper(insn_id_to_dep_reqs, - rev_depends, depends_on, topological_order) - - for insn_id, rev_deps in six.iteritems(rev_depends): - if not rev_deps: - forward_dep_traverser(insn_id, set()) - - if set([insn.id for insn in kernel.instructions]) != ( - forward_dep_traverser.visited): - not_visited_insns = set([insn.id for insn in kernel.instructions]) - ( - forward_dep_traverser.visited) - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found in:" - " '{}'.".format(not_visited_insns)) - - for insn_id, deps in six.iteritems(depends_on): - if not deps: - reverse_dep_traverser(insn_id, set()) - - if set([insn.id for insn in kernel.instructions]) != ( - reverse_dep_traverser.visited): - not_visited_insns = set([insn.id for insn in kernel.instructions]) - ( - reverse_dep_traverser.visited) - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found in:" - " '{}'.".format(not_visited_insns)) + + # {{{ forward dep. traversal + + # memoized_predecessors: mapping from insn_id to its direct/indirect predecessors + memoized_predecessors = {} + + for insn_id in topological_order[::-1]: + # accumulated_predecessors:insn_id's direct+indirect predecessors + accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) + + if insn_id in accumulated_predecessors: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(accumulated_predecessors)) + + insn_id_to_dep_reqs[insn_id] -= accumulated_predecessors + + for dep in depends_on[insn_id]: + memoized_predecessors.setdefault(dep, set()).update( + accumulated_predecessors | set([insn_id])) + + # }}} + + # {{{ reverse dep. traversal + + # memoized_successors: mapping from insn_id to its direct/indirect successors + memoized_successors = {} + + for insn_id in topological_order: + # accumulated_predecessors:insn_id's direct+indirect predecessors + accumulated_successors = memoized_successors.pop(insn_id, set()) + + if insn_id in accumulated_successors: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(accumulated_predecessors)) + + insn_id_to_dep_reqs[insn_id] -= accumulated_successors + + for rev_dep in rev_depends[insn_id]: + memoized_successors.setdefault(rev_dep, set()).update( + accumulated_successors | set([insn_id])) + + # }}} for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] -- GitLab From 3d0b39105238a897c6723fd95cb3d2f5ba87a6eb Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 23 May 2020 11:29:14 -0500 Subject: [PATCH 07/20] fills some voids in checking dep cycles --- loopy/check.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index f1f889a6e..82d9e44a6 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -577,6 +577,7 @@ def _check_variable_access_ordered_inner(kernel): # memoized_predecessors: mapping from insn_id to its direct/indirect predecessors memoized_predecessors = {} + # reverse postorder traversal of dependency graph for insn_id in topological_order[::-1]: # accumulated_predecessors:insn_id's direct+indirect predecessors accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) @@ -594,11 +595,17 @@ def _check_variable_access_ordered_inner(kernel): # }}} + if memoized_predecessors: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(next(iter(six.viewvalues(memoized_predecessors))))) + # {{{ reverse dep. traversal # memoized_successors: mapping from insn_id to its direct/indirect successors memoized_successors = {} + # postorder traversal of dependency graph for insn_id in topological_order: # accumulated_predecessors:insn_id's direct+indirect predecessors accumulated_successors = memoized_successors.pop(insn_id, set()) @@ -616,6 +623,11 @@ def _check_variable_access_ordered_inner(kernel): # }}} + if memoized_successors: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound("Dependency cycle found:" + " '{}'.".format(next(iter(six.viewvalues(memoized_successors))))) + for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] vars_written_by_insn = insn.write_dependency_names() & ( -- GitLab From c2e5a4beb8cf50a2f9991c1cd2e5329eb2361bf9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 1 Jun 2020 18:49:27 -0500 Subject: [PATCH 08/20] uses pytools.compute_topological_order for performing topological sorting --- loopy/check.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 82d9e44a6..aae119af9 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -492,29 +492,14 @@ def _get_address_space(kernel, var): def _get_topological_order(kernel): - from pytools import natsorted - - # {{{ topological sort - - visited_insn_ids = set() - insn_order = [] - - def insert_insn_into_order(insn): - if insn.id in visited_insn_ids: - return - visited_insn_ids.add(insn.id) - - for dep_id in natsorted(insn.depends_on): - insert_insn_into_order(kernel.id_to_insn[dep_id]) - - insn_order.append(insn) + from pytools.graph import compute_topological_order + rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: - insert_insn_into_order(insn) - - # }}} + for dep in insn.depends_on: + rev_dep_map[dep].add(insn.id) - return [insn.id for insn in insn_order] + return compute_topological_order(rev_dep_map) def _check_variable_access_ordered_inner(kernel): -- GitLab From c0b814ef658307ddc75d0fb8d97efb82edd61def Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 2 Jun 2020 01:09:03 -0500 Subject: [PATCH 09/20] catch CycleError and emit DependencyCycleFound --- loopy/check.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index aae119af9..81e0fb9c9 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -492,14 +492,20 @@ def _get_address_space(kernel, var): def _get_topological_order(kernel): - from pytools.graph import compute_topological_order + from pytools.graph import compute_topological_order, CycleError rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: for dep in insn.depends_on: rev_dep_map[dep].add(insn.id) - return compute_topological_order(rev_dep_map) + try: + order = compute_topological_order(rev_dep_map) + except CycleError as e: + from loopy.diagnostic import DependencyCycleFound + raise DependencyCycleFound(str(e)) + + return order def _check_variable_access_ordered_inner(kernel): @@ -567,11 +573,6 @@ def _check_variable_access_ordered_inner(kernel): # accumulated_predecessors:insn_id's direct+indirect predecessors accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) - if insn_id in accumulated_predecessors: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(accumulated_predecessors)) - insn_id_to_dep_reqs[insn_id] -= accumulated_predecessors for dep in depends_on[insn_id]: @@ -580,11 +581,6 @@ def _check_variable_access_ordered_inner(kernel): # }}} - if memoized_predecessors: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(next(iter(six.viewvalues(memoized_predecessors))))) - # {{{ reverse dep. traversal # memoized_successors: mapping from insn_id to its direct/indirect successors @@ -595,11 +591,6 @@ def _check_variable_access_ordered_inner(kernel): # accumulated_predecessors:insn_id's direct+indirect predecessors accumulated_successors = memoized_successors.pop(insn_id, set()) - if insn_id in accumulated_successors: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(accumulated_predecessors)) - insn_id_to_dep_reqs[insn_id] -= accumulated_successors for rev_dep in rev_depends[insn_id]: @@ -608,11 +599,6 @@ def _check_variable_access_ordered_inner(kernel): # }}} - if memoized_successors: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound("Dependency cycle found:" - " '{}'.".format(next(iter(six.viewvalues(memoized_successors))))) - for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] vars_written_by_insn = insn.write_dependency_names() & ( -- GitLab From fd437c2526cd1c3ab93d0832e04baa14c9907e19 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 2 Jun 2020 16:46:03 -0500 Subject: [PATCH 10/20] better dep cycle error msg --- loopy/check.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 81e0fb9c9..2262aed1b 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -493,6 +493,7 @@ def _get_address_space(kernel, var): def _get_topological_order(kernel): from pytools.graph import compute_topological_order, CycleError + from loopy.diagnostic import DependencyCycleFound rev_dep_map = {insn.id: set() for insn in kernel.instructions} for insn in kernel.instructions: @@ -502,8 +503,40 @@ def _get_topological_order(kernel): try: order = compute_topological_order(rev_dep_map) except CycleError as e: - from loopy.diagnostic import DependencyCycleFound - raise DependencyCycleFound(str(e)) + root = e.node + + visited = set() + visiting = [root] + stack = [(root, iter(rev_dep_map[root]))] + + # Depth first traversal from root + while stack: + node, children = stack.pop() + + for child in children: + # note: each iteration removes child from children + if child in visiting: + cycle = visiting[visiting.index(child):]+[child, ] + raise DependencyCycleFound('->'.join(cycle)) + + if child in visited: + continue + + visiting.append(child) + + # put (node, children) back on stack + stack.append((node, children)) + + # put (child, grandchildren) on stack + stack.append((child, iter(rev_dep_map.get(child, ())))) + break + else: + # loop did not break, + # so either this is a leaf or all children have been visited + assert node == visiting.pop() + visited.add(node) + + assert False return order -- GitLab From c379457ef9e78a8b98eec0de89ade77fca56b95b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 2 Jun 2020 20:49:37 -0500 Subject: [PATCH 11/20] use functions for traversal --- loopy/check.py | 64 +++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 2262aed1b..f6f35776c 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -596,41 +596,35 @@ def _check_variable_access_ordered_inner(kernel): topological_order = _get_topological_order(kernel) - # {{{ forward dep. traversal - - # memoized_predecessors: mapping from insn_id to its direct/indirect predecessors - memoized_predecessors = {} - - # reverse postorder traversal of dependency graph - for insn_id in topological_order[::-1]: - # accumulated_predecessors:insn_id's direct+indirect predecessors - accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) - - insn_id_to_dep_reqs[insn_id] -= accumulated_predecessors - - for dep in depends_on[insn_id]: - memoized_predecessors.setdefault(dep, set()).update( - accumulated_predecessors | set([insn_id])) - - # }}} - - # {{{ reverse dep. traversal - - # memoized_successors: mapping from insn_id to its direct/indirect successors - memoized_successors = {} - - # postorder traversal of dependency graph - for insn_id in topological_order: - # accumulated_predecessors:insn_id's direct+indirect predecessors - accumulated_successors = memoized_successors.pop(insn_id, set()) - - insn_id_to_dep_reqs[insn_id] -= accumulated_successors - - for rev_dep in rev_depends[insn_id]: - memoized_successors.setdefault(rev_dep, set()).update( - accumulated_successors | set([insn_id])) - - # }}} + def discard_dep_reqs_in_order(insn_id_to_dep_reqs, edges, order): + """ + Subtracts dependency requirements of insn_ids by all direct/indirect + predecessors of a directed graph of insn_ids as nodes and *edges* as + the connectivity. + + :arg order: An instance of :class:`list` of instruction ids in which the + *edges* graph is to be traversed. + """ + # memoized_predecessors: mapping from insn_id to its direct/indirect + # predecessors + memoized_predecessors = {} + + # reverse postorder traversal of dependency graph + for insn_id in order: + # accumulated_predecessors:insn_id's direct+indirect predecessors + accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) + + insn_id_to_dep_reqs[insn_id] -= accumulated_predecessors + + for successor in edges[insn_id]: + memoized_predecessors.setdefault(successor, set()).update( + accumulated_predecessors | set([insn_id])) + + # forward dep. graph traversal in reverse topological sort order + discard_dep_reqs_in_order(insn_id_to_dep_reqs, depends_on, + topological_order[::-1]) + # reverse dep. graph traversal in topological sort order + discard_dep_reqs_in_order(insn_id_to_dep_reqs, rev_depends, topological_order) for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] -- GitLab From 0c8d4ca031ff662a79ab54d74c53fcfcfa0f0d81 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 3 Jun 2020 12:20:08 -0500 Subject: [PATCH 12/20] better naming --- loopy/check.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index f6f35776c..07029713f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -626,6 +626,8 @@ def _check_variable_access_ordered_inner(kernel): # reverse dep. graph traversal in topological sort order discard_dep_reqs_in_order(insn_id_to_dep_reqs, rev_depends, topological_order) + # {{{ handle dependency requirements that weren't satisfied + for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] vars_written_by_insn = insn.write_dependency_names() & ( @@ -636,7 +638,7 @@ def _check_variable_access_ordered_inner(kernel): kernel.get_written_variables() | ( kernel.get_read_variables())) eq_classes_accessed_by_dep = set().union( - *(aliasing_equiv_classes[_] for _ in vars_accessed_by_dep)) + *(aliasing_equiv_classes[var] for var in vars_accessed_by_dep)) for var_written_by_insn in vars_written_by_insn: eq_class = aliasing_equiv_classes[var_written_by_insn] @@ -687,6 +689,8 @@ def _check_variable_access_ordered_inner(kernel): from loopy.diagnostic import VariableAccessNotOrdered raise VariableAccessNotOrdered(msg) + # }}} + logger.debug("%s: check_variable_access_ordered: done" % kernel.name) -- GitLab From 9166b8b21edab9231a0a374075acbc50b2180f50 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 3 Jun 2020 14:56:28 -0500 Subject: [PATCH 13/20] adds an assert and some comments to clarify check_variable_access_ordered --- loopy/check.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index 07029713f..0b42772d3 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -632,7 +632,7 @@ def _check_variable_access_ordered_inner(kernel): insn = kernel.id_to_insn[insn_id] vars_written_by_insn = insn.write_dependency_names() & ( kernel.get_written_variables()) - for dep_id in dep_ids: + for dep_id in dep_ids: # iterate through deps reqs. which weren't satisfied dep = kernel.id_to_insn[dep_id] vars_accessed_by_dep = dep.dependency_names() & ( kernel.get_written_variables() | ( @@ -640,9 +640,14 @@ def _check_variable_access_ordered_inner(kernel): eq_classes_accessed_by_dep = set().union( *(aliasing_equiv_classes[var] for var in vars_accessed_by_dep)) + found_var_responsible_for_dep_req = False + + # iterate through all the variables written by 'insn' to find + # which was responsible for the dependency requirement for var_written_by_insn in vars_written_by_insn: eq_class = aliasing_equiv_classes[var_written_by_insn] if eq_class & eq_classes_accessed_by_dep: + found_var_responsible_for_dep_req = True unaliased_readers = rmap.get(var_written_by_insn, set()) unaliased_writers = wmap.get(var_written_by_insn, set()) is_relationship_by_aliasing = not ( @@ -689,6 +694,8 @@ def _check_variable_access_ordered_inner(kernel): from loopy.diagnostic import VariableAccessNotOrdered raise VariableAccessNotOrdered(msg) + assert found_var_responsible_for_dep_req + # }}} logger.debug("%s: check_variable_access_ordered: done" % kernel.name) -- GitLab From 57ab08b5940782294c6d790ede7564952d80716e Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 3 Jun 2020 16:17:31 -0500 Subject: [PATCH 14/20] use compute_sccs to compute topological order and detect cycles at once --- loopy/check.py | 49 ++++++++----------------------------------------- 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 0b42772d3..493e41e1f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -492,51 +492,18 @@ def _get_address_space(kernel, var): def _get_topological_order(kernel): - from pytools.graph import compute_topological_order, CycleError + from pytools.graph import compute_sccs from loopy.diagnostic import DependencyCycleFound - rev_dep_map = {insn.id: set() for insn in kernel.instructions} - for insn in kernel.instructions: - for dep in insn.depends_on: - rev_dep_map[dep].add(insn.id) - - try: - order = compute_topological_order(rev_dep_map) - except CycleError as e: - root = e.node - - visited = set() - visiting = [root] - stack = [(root, iter(rev_dep_map[root]))] - - # Depth first traversal from root - while stack: - node, children = stack.pop() - - for child in children: - # note: each iteration removes child from children - if child in visiting: - cycle = visiting[visiting.index(child):]+[child, ] - raise DependencyCycleFound('->'.join(cycle)) + dep_map = {insn.id: insn.depends_on for insn in kernel.instructions} - if child in visited: - continue - - visiting.append(child) - - # put (node, children) back on stack - stack.append((node, children)) - - # put (child, grandchildren) on stack - stack.append((child, iter(rev_dep_map.get(child, ())))) - break - else: - # loop did not break, - # so either this is a leaf or all children have been visited - assert node == visiting.pop() - visited.add(node) + sccs = compute_sccs(dep_map) + order = [] - assert False + for scc in sccs: + if len(scc) != 1: + raise DependencyCycleFound(', '.join(scc)) + order.append(scc[0]) return order -- GitLab From adf60d6ce7518275389ed76b22438a5fcc936c75 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 19 Jun 2020 16:53:58 -0500 Subject: [PATCH 15/20] corrects the vars written by insn --- loopy/check.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 493e41e1f..cb4fab44a 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -597,8 +597,7 @@ def _check_variable_access_ordered_inner(kernel): for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): insn = kernel.id_to_insn[insn_id] - vars_written_by_insn = insn.write_dependency_names() & ( - kernel.get_written_variables()) + vars_written_by_insn = set(insn.assignee_var_names()) for dep_id in dep_ids: # iterate through deps reqs. which weren't satisfied dep = kernel.id_to_insn[dep_id] vars_accessed_by_dep = dep.dependency_names() & ( -- GitLab From b24e01b87e2d5a7383963d4ff72e29b61c36522d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 20 Jun 2020 18:49:12 -0500 Subject: [PATCH 16/20] change the way how dep reqs are stored --- loopy/check.py | 136 ++++++++++++++++++++++--------------------------- 1 file changed, 61 insertions(+), 75 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index cb4fab44a..fdc8874a6 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -516,16 +516,15 @@ def _check_variable_access_ordered_inner(kernel): logger.debug("%s: check_variable_access_ordered: start" % kernel.name) - # insn_id_to_dep_reqs: A mapping from insn_id to set of insn_ids between - # whom dependency requirements should be proved in order to assert - # variable access ordering. - insn_id_to_dep_reqs = dict((insn.id, set()) for insn in - kernel.instructions) + # dep_reqs_to_vars: A mapping from (writer_id, dep_req_id) between whom + # dependency must be established to the variables which prompted the + # dependency requirement. + dep_reqs_to_vars = {} wmap = kernel.writer_map() rmap = kernel.reader_map() - # {{{ populate 'insn_id_to_dep_reqs' + # {{{ populate 'dep_reqs_to_vars' for var in kernel.get_written_variables(): address_space = _get_address_space(kernel, var) @@ -542,7 +541,8 @@ def _check_variable_access_ordered_inner(kernel): declares_nosync_with(kernel, address_space, writer, req_dep)]) - insn_id_to_dep_reqs[writer] |= required_deps + for req_dep in required_deps: + dep_reqs_to_vars.setdefault((writer, req_dep), set()).add(var) # }}} @@ -563,7 +563,7 @@ def _check_variable_access_ordered_inner(kernel): topological_order = _get_topological_order(kernel) - def discard_dep_reqs_in_order(insn_id_to_dep_reqs, edges, order): + def discard_dep_reqs_in_order(dep_reqs_to_vars, edges, order): """ Subtracts dependency requirements of insn_ids by all direct/indirect predecessors of a directed graph of insn_ids as nodes and *edges* as @@ -581,86 +581,72 @@ def _check_variable_access_ordered_inner(kernel): # accumulated_predecessors:insn_id's direct+indirect predecessors accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) - insn_id_to_dep_reqs[insn_id] -= accumulated_predecessors + for pred in accumulated_predecessors: + dep_reqs_to_vars.pop((insn_id, pred), None) for successor in edges[insn_id]: memoized_predecessors.setdefault(successor, set()).update( accumulated_predecessors | set([insn_id])) # forward dep. graph traversal in reverse topological sort order - discard_dep_reqs_in_order(insn_id_to_dep_reqs, depends_on, + discard_dep_reqs_in_order(dep_reqs_to_vars, depends_on, topological_order[::-1]) # reverse dep. graph traversal in topological sort order - discard_dep_reqs_in_order(insn_id_to_dep_reqs, rev_depends, topological_order) + discard_dep_reqs_in_order(dep_reqs_to_vars, rev_depends, topological_order) # {{{ handle dependency requirements that weren't satisfied - for insn_id, dep_ids in six.iteritems(insn_id_to_dep_reqs): - insn = kernel.id_to_insn[insn_id] - vars_written_by_insn = set(insn.assignee_var_names()) - for dep_id in dep_ids: # iterate through deps reqs. which weren't satisfied - dep = kernel.id_to_insn[dep_id] - vars_accessed_by_dep = dep.dependency_names() & ( - kernel.get_written_variables() | ( - kernel.get_read_variables())) - eq_classes_accessed_by_dep = set().union( - *(aliasing_equiv_classes[var] for var in vars_accessed_by_dep)) - - found_var_responsible_for_dep_req = False - - # iterate through all the variables written by 'insn' to find - # which was responsible for the dependency requirement - for var_written_by_insn in vars_written_by_insn: - eq_class = aliasing_equiv_classes[var_written_by_insn] - if eq_class & eq_classes_accessed_by_dep: - found_var_responsible_for_dep_req = True - unaliased_readers = rmap.get(var_written_by_insn, set()) - unaliased_writers = wmap.get(var_written_by_insn, set()) - is_relationship_by_aliasing = not ( - insn_id in unaliased_writers - and (dep_id in unaliased_writers - or dep_id in unaliased_readers)) - - # Do not enforce ordering for disjoint access ranges - if (not is_relationship_by_aliasing and not - overlap_checker.do_access_ranges_overlap_conservative( - insn_id, "w", dep_id, "any", - var_written_by_insn)): - continue + for (writer_id, other_id), variables in six.iteritems(dep_reqs_to_vars): + writer = kernel.id_to_insn[writer_id] + other = kernel.id_to_insn[other_id] - # Do not enforce ordering for aliasing-based relationships - # in different groups. - if (is_relationship_by_aliasing and ( - bool(insn.groups & dep.conflicts_with_groups) - or - bool(dep.groups & insn.conflicts_with_groups))): - continue + for var in variables: + eq_class = aliasing_equiv_classes[var] + unaliased_readers = rmap.get(var, set()) + unaliased_writers = wmap.get(var, set()) + + is_relationship_by_aliasing = not ( + writer_id in unaliased_writers + and (writer_id in unaliased_writers + or other_id in unaliased_readers)) + + # Do not enforce ordering for disjoint access ranges + if (not is_relationship_by_aliasing and not + overlap_checker.do_access_ranges_overlap_conservative( + writer_id, "w", other_id, "any", var)): + continue + + # Do not enforce ordering for aliasing-based relationships + # in different groups. + if (is_relationship_by_aliasing and ( + bool(insn.groups & other.conflicts_with_groups) + or + bool(other.groups & writer.conflicts_with_groups))): + continue - msg = ("No dependency relationship found between " - "'{writer_id}' which writes {var} and " - "'{other_id}' which also accesses {var}. " - "Either add a (possibly indirect) dependency " - "between the two, or add them to each others' nosync " - "set to indicate that no ordering is intended, or " - "turn off this check by setting the " - "'enforce_variable_access_ordered' option " - "(more issues of this type may exist--only reporting " - "the first one)" - .format( - writer_id=insn_id, - other_id=dep_id, - var=( - "the variable '%s'" % var_written_by_insn - if len(eq_class) == 1 - else ( - "the aliasing equivalence class '%s'" - % ", ".join(eq_class)) - ))) - - from loopy.diagnostic import VariableAccessNotOrdered - raise VariableAccessNotOrdered(msg) - - assert found_var_responsible_for_dep_req + msg = ("No dependency relationship found between " + "'{writer_id}' which writes {var} and " + "'{other_id}' which also accesses {var}. " + "Either add a (possibly indirect) dependency " + "between the two, or add them to each others' nosync " + "set to indicate that no ordering is intended, or " + "turn off this check by setting the " + "'enforce_variable_access_ordered' option " + "(more issues of this type may exist--only reporting " + "the first one)" + .format( + writer_id=writer_id, + other_id=other_id, + var=( + "the variable '%s'" % var + if len(eq_class) == 1 + else ( + "the aliasing equivalence class '%s'" + % ", ".join(eq_class)) + ))) + + from loopy.diagnostic import VariableAccessNotOrdered + raise VariableAccessNotOrdered(msg) # }}} -- GitLab From 87c19b49c461adbb66dd8d19b33b3f0df5a91bff Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 20 Jun 2020 19:41:14 -0500 Subject: [PATCH 17/20] human error: indentation --- loopy/check.py | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index fdc8874a6..d1923144e 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -624,29 +624,29 @@ def _check_variable_access_ordered_inner(kernel): bool(other.groups & writer.conflicts_with_groups))): continue - msg = ("No dependency relationship found between " - "'{writer_id}' which writes {var} and " - "'{other_id}' which also accesses {var}. " - "Either add a (possibly indirect) dependency " - "between the two, or add them to each others' nosync " - "set to indicate that no ordering is intended, or " - "turn off this check by setting the " - "'enforce_variable_access_ordered' option " - "(more issues of this type may exist--only reporting " - "the first one)" - .format( - writer_id=writer_id, - other_id=other_id, - var=( - "the variable '%s'" % var - if len(eq_class) == 1 - else ( - "the aliasing equivalence class '%s'" - % ", ".join(eq_class)) - ))) - - from loopy.diagnostic import VariableAccessNotOrdered - raise VariableAccessNotOrdered(msg) + msg = ("No dependency relationship found between " + "'{writer_id}' which writes {var} and " + "'{other_id}' which also accesses {var}. " + "Either add a (possibly indirect) dependency " + "between the two, or add them to each others' nosync " + "set to indicate that no ordering is intended, or " + "turn off this check by setting the " + "'enforce_variable_access_ordered' option " + "(more issues of this type may exist--only reporting " + "the first one)" + .format( + writer_id=writer_id, + other_id=other_id, + var=( + "the variable '%s'" % var + if len(eq_class) == 1 + else ( + "the aliasing equivalence class '%s'" + % ", ".join(eq_class)) + ))) + + from loopy.diagnostic import VariableAccessNotOrdered + raise VariableAccessNotOrdered(msg) # }}} -- GitLab From bdd537d90760a7a1132e6e489d3b553457b90aaa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 23 Jun 2020 17:45:51 -0500 Subject: [PATCH 18/20] bug fix: wrong variable name --- loopy/check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/check.py b/loopy/check.py index d1923144e..600f5670d 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -619,7 +619,7 @@ def _check_variable_access_ordered_inner(kernel): # Do not enforce ordering for aliasing-based relationships # in different groups. if (is_relationship_by_aliasing and ( - bool(insn.groups & other.conflicts_with_groups) + bool(writer.groups & other.conflicts_with_groups) or bool(other.groups & writer.conflicts_with_groups))): continue -- GitLab From be3738cbdec5bbf3439fe2f80c6cfd7eaf11a1d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 30 Jun 2020 03:34:11 +0200 Subject: [PATCH 19/20] Hack scalability sched and ordering check --- loopy/check.py | 67 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 600f5670d..c26412397 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -514,11 +514,11 @@ def _check_variable_access_ordered_inner(kernel): overlap_checker = AccessRangeOverlapChecker(kernel) aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) - logger.debug("%s: check_variable_access_ordered: start" % kernel.name) - - # dep_reqs_to_vars: A mapping from (writer_id, dep_req_id) between whom - # dependency must be established to the variables which prompted the - # dependency requirement. + # dep_reqs_to_vars: A mapping (writer_id, dep_req_id) -> set of variable names, + # where the tuple denotes a pair of instructions IDs, and the variable + # names are the ones that necessitate a dependency. + # + # Note: This can be worst-case O(n^2) in the number of instructions. dep_reqs_to_vars = {} wmap = kernel.writer_map() @@ -537,8 +537,9 @@ def _check_variable_access_ordered_inner(kernel): for writer in writers: required_deps = (readers | writers) - set([writer]) - required_deps = set([req_dep for req_dep in required_deps if not - declares_nosync_with(kernel, address_space, writer, + required_deps = set([req_dep + for req_dep in required_deps + if not declares_nosync_with(kernel, address_space, writer, req_dep)]) for req_dep in required_deps: @@ -559,8 +560,11 @@ def _check_variable_access_ordered_inner(kernel): depends_on[insn.id].update(insn.depends_on) for dep in insn.depends_on: rev_depends[dep].add(insn.id) + # }}} + # {{{ remove pairs from dep_reqs_to_vars for which dependencies exist + topological_order = _get_topological_order(kernel) def discard_dep_reqs_in_order(dep_reqs_to_vars, edges, order): @@ -572,28 +576,39 @@ def _check_variable_access_ordered_inner(kernel): :arg order: An instance of :class:`list` of instruction ids in which the *edges* graph is to be traversed. """ - # memoized_predecessors: mapping from insn_id to its direct/indirect + # predecessors: mapping from insn_id to its direct/indirect # predecessors - memoized_predecessors = {} + predecessors = {} - # reverse postorder traversal of dependency graph for insn_id in order: - # accumulated_predecessors:insn_id's direct+indirect predecessors - accumulated_predecessors = memoized_predecessors.pop(insn_id, set()) + # insn_predecessors:insn_id's direct+indirect predecessors - for pred in accumulated_predecessors: - dep_reqs_to_vars.pop((insn_id, pred), None) + # This set of predecessors is complete because we're + # traversing in topological order: No predecessor + # can occur after the instruction itself. + insn_predecessors = predecessors.pop(insn_id, set()) + + for pred in insn_predecessors: + dep_reqs_to_vars.pop( + (insn_id, pred), + # don't fail if pair doesn't exist + None) for successor in edges[insn_id]: - memoized_predecessors.setdefault(successor, set()).update( - accumulated_predecessors | set([insn_id])) + predecessors.setdefault(successor, set()).update( + insn_predecessors | set([insn_id])) # forward dep. graph traversal in reverse topological sort order + # (proceeds "end of program" -> "beginning of program") discard_dep_reqs_in_order(dep_reqs_to_vars, depends_on, topological_order[::-1]) + # reverse dep. graph traversal in topological sort order + # (proceeds "beginning of program" -> "end of program") discard_dep_reqs_in_order(dep_reqs_to_vars, rev_depends, topological_order) + # }}} + # {{{ handle dependency requirements that weren't satisfied for (writer_id, other_id), variables in six.iteritems(dep_reqs_to_vars): @@ -650,8 +665,6 @@ def _check_variable_access_ordered_inner(kernel): # }}} - logger.debug("%s: check_variable_access_ordered: done" % kernel.name) - def check_variable_access_ordered(kernel): """Checks that between each write to a variable and all other accesses to @@ -673,15 +686,17 @@ def check_variable_access_ordered(kernel): if kernel.options.enforce_variable_access_ordered == "no_check": return - if kernel.options.enforce_variable_access_ordered: - _check_variable_access_ordered_inner(kernel) - else: - from loopy.diagnostic import VariableAccessNotOrdered - try: + from pytools import ProcessLogger + with ProcessLogger(logger, "%s: check variable access ordered" % kernel.name): + if kernel.options.enforce_variable_access_ordered: _check_variable_access_ordered_inner(kernel) - except VariableAccessNotOrdered as e: - from loopy.diagnostic import warn_with_kernel - warn_with_kernel(kernel, "variable_access_ordered", str(e)) + else: + from loopy.diagnostic import VariableAccessNotOrdered + try: + _check_variable_access_ordered_inner(kernel) + except VariableAccessNotOrdered as e: + from loopy.diagnostic import warn_with_kernel + warn_with_kernel(kernel, "variable_access_ordered", str(e)) # }}} -- GitLab From 35d2cd9abfb4ee54fcff05de05a486911945be5f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 3 Jul 2020 01:27:05 -0500 Subject: [PATCH 20/20] explains why pytools.graph.compute_sccs is a better fit --- loopy/check.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index c26412397..4588a59b4 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -492,11 +492,21 @@ def _get_address_space(kernel, var): def _get_topological_order(kernel): + """ + Returns a :class:`list` of insn ids of *kernel* in a topological sort + order. + + If there is a dependency cycle within the instructions of *kernel* raises a + :class:`loopy.diagnostic.DependencyCycleFound` exception. + """ from pytools.graph import compute_sccs from loopy.diagnostic import DependencyCycleFound dep_map = {insn.id: insn.depends_on for insn in kernel.instructions} + # pytools.graph.compute_sccs serves 2 purposes: + # 1. computes topological sort order of instructions. + # 2. provides info. about any cycles in the graph. sccs = compute_sccs(dep_map) order = [] -- GitLab