From d260f4e7fae51e739af4583110fe4581bcf320bd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 11 Feb 2018 17:57:52 -0600 Subject: [PATCH] Barrier insertion: Only emit barriers for overlapping access patterns --- loopy/schedule/__init__.py | 114 +++++++++++++++++++++++++------------ test/test_loopy.py | 49 ++++++++++++---- 2 files changed, 116 insertions(+), 47 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 850f0a61f..b196b343e 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1427,8 +1427,8 @@ class DependencyTracker(object): raise ValueError("unknown 'var_kind': %s" % var_kind) from collections import defaultdict - self.writer_map = defaultdict(set) - self.reader_map = defaultdict(set) + self.base_writer_map = defaultdict(set) + self.base_access_map = defaultdict(set) self.temp_to_base_storage = kernel.get_temporary_to_base_storage_map() def map_to_base_storage(self, var_names): @@ -1442,23 +1442,27 @@ class DependencyTracker(object): return result def discard_all_sources(self): - self.writer_map.clear() - self.reader_map.clear() + self.base_writer_map.clear() + self.base_access_map.clear() + + # Anything with 'base' in the name in this class contains names normalized + # to their 'base_storage'. def add_source(self, source): """ - Specify that an instruction may be used as the source of a dependency edge. + Specify that an instruction used as the source (depended-upon + part) of a dependency edge is of interest to this tracker. """ # If source is an insn ID, look up the actual instruction. source = self.kernel.id_to_insn.get(source, source) for written in self.map_to_base_storage( set(source.assignee_var_names()) & self.relevant_vars): - self.writer_map[written].add(source.id) + self.base_writer_map[written].add(source.id) for read in self.map_to_base_storage( - source.read_dependency_names() & self.relevant_vars): - self.reader_map[read].add(source.id) + source.dependency_names() & self.relevant_vars): + self.base_access_map[read].add(source.id) def gen_dependencies_with_target_at(self, target): """ @@ -1471,51 +1475,87 @@ class DependencyTracker(object): # If target is an insn ID, look up the actual instruction. target = self.kernel.id_to_insn.get(target, target) - tgt_write = self.map_to_base_storage( - set(target.assignee_var_names()) & self.relevant_vars) - tgt_read = self.map_to_base_storage( - target.read_dependency_names() & self.relevant_vars) - - for (accessed_vars, accessor_map) in [ - (tgt_read, self.writer_map), - (tgt_write, self.reader_map), - (tgt_write, self.writer_map)]: + for ( + tgt_dir, src_dir, src_base_var_to_accessor_map + ) in [ + ("any", "w", self.base_writer_map), + ("w", "any", self.base_access_map), + ]: for dep in self.get_conflicting_accesses( - accessed_vars, accessor_map, target.id): + target, tgt_dir, src_dir, src_base_var_to_accessor_map): yield dep - def get_conflicting_accesses( - self, accessed_vars, var_to_accessor_map, target): + def get_conflicting_accesses(self, target, tgt_dir, src_dir, + src_base_var_to_accessor_map): + + def get_written_names(insn): + return set(insn.assignee_var_names()) & self.relevant_vars + + def get_accessed_names(insn): + return insn.dependency_names() & self.relevant_vars + + dir_to_getter = {"w": get_written_names, "any": get_accessed_names} + + def filter_var_set_for_base_storage(var_name_set, base_storage_name): + return set( + name + for name in var_name_set + if (self.temp_to_base_storage.get(name, name) + == base_storage_name)) + + tgt_accessed_vars = dir_to_getter[tgt_dir](target) + tgt_accessed_vars_base = self.map_to_base_storage(tgt_accessed_vars) + + for race_var_base in sorted(tgt_accessed_vars_base): + for source_id in sorted( + src_base_var_to_accessor_map[race_var_base]): - def determine_conflict_nature(source, target): - if (not self.reverse and source in - self.kernel.get_nosync_set(target, scope=self.var_kind)): - return None - if (self.reverse and target in - self.kernel.get_nosync_set(source, scope=self.var_kind)): - return None - return self.describe_dependency(source, target) + # {{{ no barrier if nosync - for var in sorted(accessed_vars): - for source in sorted(var_to_accessor_map[var]): - dep_descr = determine_conflict_nature(source, target) + if (not self.reverse and source_id in + self.kernel.get_nosync_set(target.id, scope=self.var_kind)): + continue + if (self.reverse and target.id in + self.kernel.get_nosync_set(source_id, scope=self.var_kind)): + continue + # }}} + + dep_descr = self.describe_dependency(source_id, target) if dep_descr is None: continue + source = self.kernel.id_to_insn[source_id] + src_race_vars = filter_var_set_for_base_storage( + dir_to_getter[src_dir](source), race_var_base) + tgt_race_vars = filter_var_set_for_base_storage( + tgt_accessed_vars, race_var_base) + + race_var = race_var_base + + # Only one (non-base_storage) race variable name: Data is not + # being passed between aliases, so we may look at indices. + if src_race_vars == tgt_race_vars and len(src_race_vars) == 1: + race_var, = src_race_vars + + from loopy.symbolic import do_access_ranges_overlap_conservative + if not do_access_ranges_overlap_conservative( + self.kernel, target.id, tgt_dir, + source_id, src_dir, race_var): + continue + yield DependencyRecord( - source=self.kernel.id_to_insn[source], - target=self.kernel.id_to_insn[target], + source=source, + target=target, dep_descr=dep_descr, - variable=var, + variable=race_var, var_kind=self.var_kind) - def describe_dependency(self, source, target): + def describe_dependency(self, source_id, target): dep_descr = None - source = self.kernel.id_to_insn[source] - target = self.kernel.id_to_insn[target] + source = self.kernel.id_to_insn[source_id] if self.reverse: source, target = target, source diff --git a/test/test_loopy.py b/test/test_loopy.py index 86c8c6e43..8581ae5b8 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2368,8 +2368,9 @@ def test_nosync_option_parsing(): assert "id=insn5, no_sync_with=insn1@any" in kernel_str -def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): - from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop) +def barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): + from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop, + CallKernel, ReturnFromKernel) watch_for_barrier = False seen_barrier = False loop_level = 0 @@ -2379,9 +2380,7 @@ def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): if sched_item.insn_id == id1: watch_for_barrier = True elif sched_item.insn_id == id2: - assert watch_for_barrier - assert seen_barrier - return + return watch_for_barrier and seen_barrier elif isinstance(sched_item, Barrier): if watch_for_barrier and loop_level not in ignore_barriers_in_levels: seen_barrier = True @@ -2389,6 +2388,11 @@ def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): loop_level += 1 elif isinstance(sched_item, LeaveLoop): loop_level -= 1 + elif isinstance(sched_item, (CallKernel, ReturnFromKernel)): + pass + else: + raise RuntimeError("schedule item type '%s' not understood" + % type(sched_item).__name__) raise RuntimeError("id2 was not seen") @@ -2415,9 +2419,9 @@ def test_barrier_insertion_near_top_of_loop(): print(knl) - assert_barrier_between(knl, "ainit", "tcomp") - assert_barrier_between(knl, "tcomp", "bcomp1") - assert_barrier_between(knl, "bcomp1", "bcomp2") + assert barrier_between(knl, "ainit", "tcomp") + assert barrier_between(knl, "tcomp", "bcomp1") + assert barrier_between(knl, "bcomp1", "bcomp2") def test_barrier_insertion_near_bottom_of_loop(): @@ -2442,8 +2446,8 @@ def test_barrier_insertion_near_bottom_of_loop(): print(knl) - assert_barrier_between(knl, "bcomp1", "bcomp2") - assert_barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) + assert barrier_between(knl, "bcomp1", "bcomp2") + assert barrier_between(knl, "ainit", "aupdate", ignore_barriers_in_levels=[1]) def test_barrier_in_overridden_get_grid_size_expanded_kernel(): @@ -2827,6 +2831,31 @@ def test_check_for_variable_access_ordering_with_aliasing(): lp.get_one_scheduled_kernel(knl) +@pytest.mark.parametrize(("second_index", "expect_barrier"), + [ + ("2*i", True), + ("2*i+1", False), + ]) +def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): + knl = lp.make_kernel( + "{[i]: 0<=i<128}", + """ + a[2*i] = 12 {id=first} + a[%s] = 13 {id=second,dep=first} + """ % second_index, + [ + lp.TemporaryVariable("a", lp.auto, shape=(256,), + scope=lp.temp_var_scope.LOCAL), + ]) + + knl = lp.tag_inames(knl, "i:l.0") + + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) + + assert barrier_between(knl, "first", "second") == expect_barrier + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab