diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst
index 0ce59b6300f3dcd2b198da1064b554bc4598b4fe..4a07b63330747aa69d7ed498e004d60b7c312a7b 100644
--- a/doc/ref_transform.rst
+++ b/doc/ref_transform.rst
@@ -106,8 +106,6 @@ Creating Batches of Operations
 Finishing up
 ------------
 
-.. automodule:: loopy.transform.save
-
 .. currentmodule:: loopy
 
 .. autofunction:: preprocess_kernel
@@ -116,6 +114,8 @@ Finishing up
 
 .. autofunction:: get_one_scheduled_kernel
 
+.. autofunction:: save_and_reload_temporaries
+
 .. autoclass:: GeneratedProgram
 .. autoclass:: CodeGenerationResult
 
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 110652cf75d467ceb473d4997142f4dabe3e763b..6bd764f8df93f1b4b2ae5755c1c90ccddc654fe6 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -106,6 +106,7 @@ from loopy.transform.padding import (
 from loopy.transform.ilp import realize_ilp
 from loopy.transform.batch import to_batched
 from loopy.transform.parameter import assume, fix_parameters
+from loopy.transform.save import save_and_reload_temporaries
 
 # }}}
 
@@ -206,6 +207,8 @@ __all__ = [
 
         "assume", "fix_parameters",
 
+        "save_and_reload_temporaries",
+
         # }}}
 
         "get_dot_dependency_graph",
@@ -258,7 +261,6 @@ __all__ = [
         # }}}
         ]
 
-
 # }}}
 
 
diff --git a/loopy/transform/save.py b/loopy/transform/save.py
index 603f6c4c70dcabbd9598d8107eabc5d3aa49576b..8706bc4da70b94ad678f07158e0a0f648fdd0030 100644
--- a/loopy/transform/save.py
+++ b/loopy/transform/save.py
@@ -40,7 +40,9 @@ logger = logging.getLogger(__name__)
 
 
 __doc__ = """
-.. autofunction:: save_and_reload
+.. currentmodule:: loopy
+
+.. autofunction:: save_and_reload_temporaries
 """
 
 
@@ -517,7 +519,7 @@ class TemporarySaver(object):
 
 # {{{ auto save and reload across kernel calls
 
-def save_and_reload(knl):
+def save_and_reload_temporaries(knl):
     """
     Add instructions to save and reload temporary variables that are live
     across kernel calls.
@@ -538,7 +540,7 @@ def save_and_reload(knl):
 
     where `t_save_slot` is a newly-created global temporary variable.
 
-    :returns:
+    :returns: The resulting kernel
     """
     liveness = LivenessAnalysis(knl)
     saver = TemporarySaver(knl)
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 0f280892d64edb41ede37ab3c4e4bf2112c1e0b8..af4269047539b800a5fd389f9293f11551c9a291 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1105,15 +1105,15 @@ def test_kernel_splitting_with_loop(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
 
 
-def save_and_reload_test(queue, knl, out_expect, debug=False):
+def save_and_reload_temporaries_test(queue, knl, out_expect, debug=False):
     from loopy.preprocess import preprocess_kernel
     from loopy.schedule import get_one_scheduled_kernel
 
     knl = preprocess_kernel(knl)
     knl = get_one_scheduled_kernel(knl)
 
-    from loopy.transform.save import save_and_reload
-    knl = save_and_reload(knl)
+    from loopy.transform.save import save_and_reload_temporaries
+    knl = save_and_reload_temporaries(knl)
     knl = get_one_scheduled_kernel(knl)
 
     if debug:
@@ -1145,7 +1145,7 @@ def test_save_of_private_scalar(ctx_factory, hw_loop, debug=False):
     if hw_loop:
         knl = lp.tag_inames(knl, dict(i="g.0"))
 
-    save_and_reload_test(queue, knl, np.arange(8), debug)
+    save_and_reload_temporaries_test(queue, knl, np.arange(8), debug)
 
 
 def test_save_of_private_array(ctx_factory, debug=False):
@@ -1163,7 +1163,7 @@ def test_save_of_private_array(ctx_factory, debug=False):
         """, seq_dependencies=True)
 
     knl = lp.set_temporary_scope(knl, "t", "private")
-    save_and_reload_test(queue, knl, np.arange(8), debug)
+    save_and_reload_temporaries_test(queue, knl, np.arange(8), debug)
 
 
 def test_save_of_private_array_in_hw_loop(ctx_factory, debug=False):
@@ -1187,7 +1187,8 @@ def test_save_of_private_array_in_hw_loop(ctx_factory, debug=False):
     knl = lp.tag_inames(knl, dict(i="g.0"))
     knl = lp.set_temporary_scope(knl, "t", "private")
 
-    save_and_reload_test(queue, knl, np.vstack((8 * (np.arange(8),))), debug)
+    save_and_reload_temporaries_test(
+        queue, knl, np.vstack((8 * (np.arange(8),))), debug)
 
 
 def test_save_of_private_multidim_array(ctx_factory, debug=False):
@@ -1211,7 +1212,7 @@ def test_save_of_private_multidim_array(ctx_factory, debug=False):
     knl = lp.set_temporary_scope(knl, "t", "private")
 
     result = np.array([np.vstack((8 * (np.arange(8),))) for i in range(8)])
-    save_and_reload_test(queue, knl, result, debug)
+    save_and_reload_temporaries_test(queue, knl, result, debug)
 
 
 def test_save_of_private_multidim_array_in_hw_loop(ctx_factory, debug=False):
@@ -1236,7 +1237,7 @@ def test_save_of_private_multidim_array_in_hw_loop(ctx_factory, debug=False):
     knl = lp.tag_inames(knl, dict(i="g.0"))
 
     result = np.array([np.vstack((8 * (np.arange(8),))) for i in range(8)])
-    save_and_reload_test(queue, knl, result, debug)
+    save_and_reload_temporaries_test(queue, knl, result, debug)
 
 
 @pytest.mark.parametrize("hw_loop", [True, False])
@@ -1270,7 +1271,7 @@ def test_save_of_multiple_private_temporaries(ctx_factory, hw_loop, debug=False)
 
     result = np.array([1, 10, 10, 10, 10, 10, 10, 10, 10, 9])
 
-    save_and_reload_test(queue, knl, result, debug)
+    save_and_reload_temporaries_test(queue, knl, result, debug)
 
 
 def test_save_of_local_array(ctx_factory, debug=False):
@@ -1291,7 +1292,7 @@ def test_save_of_local_array(ctx_factory, debug=False):
     knl = lp.set_temporary_scope(knl, "t", "local")
     knl = lp.tag_inames(knl, dict(i="g.0", j="l.0"))
 
-    save_and_reload_test(queue, knl, np.arange(8), debug)
+    save_and_reload_temporaries_test(queue, knl, np.arange(8), debug)
 
 
 def test_save_local_multidim_array(ctx_factory, debug=False):
@@ -1312,7 +1313,7 @@ def test_save_local_multidim_array(ctx_factory, debug=False):
     knl = lp.set_temporary_scope(knl, "t_local", "local")
     knl = lp.tag_inames(knl, dict(j="l.0", i="g.0"))
 
-    save_and_reload_test(queue, knl, 1, debug)
+    save_and_reload_temporaries_test(queue, knl, 1, debug)
 
 
 def test_global_temporary(ctx_factory):