diff --git a/loopy/transform/save.py b/loopy/transform/save.py index a2e7a4d5b1257b3868ca7cea0f731e52750cc412..d3c4b9092badca4c9e8c42168e5d38130df811d6 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -25,6 +25,7 @@ THE SOFTWARE. from loopy.diagnostic import LoopyError import loopy as lp +import six from loopy.kernel.data import auto, temp_var_scope from pytools import memoize_method, Record @@ -267,6 +268,49 @@ class TemporarySaver(object): arg.name for arg in kernel.args if isinstance(arg, ValueArg))))) + def find_accessing_instructions_in_subkernel(self, temporary, subkernel): + # Find all accessing instructions in the subkernel. If base_storage is + # present, this includes instructions that access aliasing memory. + + aliasing_names = set([temporary]) + base_storage = self.kernel.temporary_variables[temporary].base_storage + + if base_storage is not None: + aliasing_names |= self.base_storage_to_temporary_map[base_storage] + + from loopy.kernel.tools import get_subkernel_to_insn_id_map + accessing_insns_in_subkernel = set() + subkernel_insns = get_subkernel_to_insn_id_map(self.kernel)[subkernel] + + for name in aliasing_names: + try: + accessing_insns_in_subkernel |= ( + self.kernel.reader_map()[name] & subkernel_insns) + except KeyError: + pass + + try: + accessing_insns_in_subkernel |= ( + self.kernel.writer_map()[name] & subkernel_insns) + except KeyError: + pass + + return frozenset(accessing_insns_in_subkernel) + + @property + @memoize_method + def base_storage_to_temporary_map(self): + from collections import defaultdict + + result = defaultdict(set) + + for temporary in six.itervalues(self.kernel.temporary_variables): + if temporary.base_storage is None: + continue + result[temporary.base_storage].add(temporary.name) + + return result + @property @memoize_method def subkernel_to_slice_indices(self): @@ -488,11 +532,8 @@ class TemporarySaver(object): if mode == "save": args = reversed(args) - from loopy.kernel.tools import get_subkernel_to_insn_id_map - accessing_insns_in_subkernel = (frozenset( - self.kernel.reader_map()[temporary] - | self.kernel.writer_map()[temporary]) - & get_subkernel_to_insn_id_map(self.kernel)[subkernel]) + accessing_insns_in_subkernel = self.find_accessing_instructions_in_subkernel( + temporary, subkernel) if mode == "save": depends_on = accessing_insns_in_subkernel diff --git a/test/test_loopy.py b/test/test_loopy.py index 4bb6a27267bd7b1880265bdd5b47ee676a480fb3..e424e063f9e062ed68054e1effe91d66bc389d50 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1146,7 +1146,7 @@ def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False): 1/0 _, (out,) = knl(queue, out_host=True) - assert (out == out_expect).all() + assert (out == out_expect).all(), (out, out_expect) @pytest.mark.parametrize("hw_loop", [True, False]) @@ -1338,6 +1338,31 @@ def test_save_local_multidim_array(ctx_factory, debug=False): save_and_reload_temporaries_test(queue, knl, 1, debug) +def test_save_with_base_storage(ctx_factory, debug=False): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + "{[i]: 0 <= i < 10}", + """ + <>a[i] = 0 + <>b[i] = i + ... gbarrier + out[i] = a[i] + """, + "...", + seq_dependencies=True) + + knl = lp.tag_inames(knl, dict(i="l.0")) + knl = lp.set_temporary_scope(knl, "a", "local") + knl = lp.set_temporary_scope(knl, "b", "local") + + knl = lp.alias_temporaries(knl, ["a", "b"], + synchronize_for_exclusive_use=False) + + save_and_reload_temporaries_test(queue, knl, np.arange(10), debug) + + def test_missing_temporary_definition_detection(): knl = lp.make_kernel( "{ [i]: 0<=i<10 }",