diff --git a/loopy/transform/save.py b/loopy/transform/save.py
index a2e7a4d5b1257b3868ca7cea0f731e52750cc412..d3c4b9092badca4c9e8c42168e5d38130df811d6 100644
--- a/loopy/transform/save.py
+++ b/loopy/transform/save.py
@@ -25,6 +25,7 @@ THE SOFTWARE.
 
 from loopy.diagnostic import LoopyError
 import loopy as lp
+import six
 
 from loopy.kernel.data import auto, temp_var_scope
 from pytools import memoize_method, Record
@@ -267,6 +268,49 @@ class TemporarySaver(object):
                             arg.name for arg in kernel.args
                             if isinstance(arg, ValueArg)))))
 
+    def find_accessing_instructions_in_subkernel(self, temporary, subkernel):
+        # Find all accessing instructions in the subkernel. If base_storage is
+        # present, this includes instructions that access aliasing memory.
+
+        aliasing_names = set([temporary])
+        base_storage = self.kernel.temporary_variables[temporary].base_storage
+
+        if base_storage is not None:
+            aliasing_names |= self.base_storage_to_temporary_map[base_storage]
+
+        from loopy.kernel.tools import get_subkernel_to_insn_id_map
+        accessing_insns_in_subkernel = set()
+        subkernel_insns = get_subkernel_to_insn_id_map(self.kernel)[subkernel]
+
+        for name in aliasing_names:
+            try:
+                accessing_insns_in_subkernel |= (
+                        self.kernel.reader_map()[name] & subkernel_insns)
+            except KeyError:
+                pass
+
+            try:
+                accessing_insns_in_subkernel |= (
+                        self.kernel.writer_map()[name] & subkernel_insns)
+            except KeyError:
+                pass
+
+        return frozenset(accessing_insns_in_subkernel)
+
+    @property
+    @memoize_method
+    def base_storage_to_temporary_map(self):
+        from collections import defaultdict
+
+        result = defaultdict(set)
+
+        for temporary in six.itervalues(self.kernel.temporary_variables):
+            if temporary.base_storage is None:
+                continue
+            result[temporary.base_storage].add(temporary.name)
+
+        return result
+
     @property
     @memoize_method
     def subkernel_to_slice_indices(self):
@@ -488,11 +532,8 @@ class TemporarySaver(object):
         if mode == "save":
             args = reversed(args)
 
-        from loopy.kernel.tools import get_subkernel_to_insn_id_map
-        accessing_insns_in_subkernel = (frozenset(
-                self.kernel.reader_map()[temporary]
-                | self.kernel.writer_map()[temporary])
-            & get_subkernel_to_insn_id_map(self.kernel)[subkernel])
+        accessing_insns_in_subkernel = self.find_accessing_instructions_in_subkernel(
+                temporary, subkernel)
 
         if mode == "save":
             depends_on = accessing_insns_in_subkernel
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 4bb6a27267bd7b1880265bdd5b47ee676a480fb3..e424e063f9e062ed68054e1effe91d66bc389d50 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1146,7 +1146,7 @@ def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False):
         1/0
 
     _, (out,) = knl(queue, out_host=True)
-    assert (out == out_expect).all()
+    assert (out == out_expect).all(), (out, out_expect)
 
 
 @pytest.mark.parametrize("hw_loop", [True, False])
@@ -1338,6 +1338,31 @@ def test_save_local_multidim_array(ctx_factory, debug=False):
     save_and_reload_temporaries_test(queue, knl, 1, debug)
 
 
+def test_save_with_base_storage(ctx_factory, debug=False):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    knl = lp.make_kernel(
+            "{[i]: 0 <= i < 10}",
+            """
+            <>a[i] = 0
+            <>b[i] = i
+            ... gbarrier
+            out[i] = a[i]
+            """,
+            "...",
+            seq_dependencies=True)
+
+    knl = lp.tag_inames(knl, dict(i="l.0"))
+    knl = lp.set_temporary_scope(knl, "a", "local")
+    knl = lp.set_temporary_scope(knl, "b", "local")
+
+    knl = lp.alias_temporaries(knl, ["a", "b"],
+            synchronize_for_exclusive_use=False)
+
+    save_and_reload_temporaries_test(queue, knl, np.arange(10), debug)
+
+
 def test_missing_temporary_definition_detection():
     knl = lp.make_kernel(
             "{ [i]: 0<=i<10 }",