diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 550288642a053888cf5125129471c8bed9736cfa..72d3a7dba117e1f005a887d9a6c9a04bb7a37588 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -214,7 +214,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             state=kernel_state.INITIAL,
             target=None,
 
-            overridden_get_grid_sizes_for_insn_ids=None):
+            overridden_get_grid_sizes_for_insn_ids=None,
+            _cached_written_variables=None):
         """
         :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get
             intersected in slab decomposition, their grid sizes shouldn't
@@ -290,7 +291,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 state=state,
                 target=target,
                 overridden_get_grid_sizes_for_insn_ids=(
-                    overridden_get_grid_sizes_for_insn_ids))
+                    overridden_get_grid_sizes_for_insn_ids),
+                _cached_written_variables=_cached_written_variables)
 
         self._kernel_executor_cache = {}
 
@@ -801,6 +803,9 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     @memoize_method
     def get_written_variables(self):
+        if self._cached_written_variables is not None:
+            return self._cached_written_variables
+
         return frozenset(
                 var_name
                 for insn in self.instructions
@@ -1277,7 +1282,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         result = dict(
                 (key, getattr(self, key))
                 for key in self.__class__.fields
-                if hasattr(self, key) and key != "instructions")
+                if hasattr(self, key))
 
         result.pop("cache_manager", None)
 
@@ -1292,6 +1297,11 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 eq_key_getter=_get_insn_eq_key,
                 persistent_hash_key_getter=_get_insn_hash_key)
 
+        # Cache written variables to avoid having to unpickle instructions in
+        # order to compute the written variables. This is needed on the
+        # cache-to-execution path.
+        result["_cached_written_variables"] = self.get_written_variables()
+
         # make sure that kernels are pickled with a cached hash key in place
         from loopy.tools import LoopyKeyBuilder
         LoopyKeyBuilder()(self)
diff --git a/loopy/tools.py b/loopy/tools.py
index d6952d54782f113685299641c828907fb7f32a46..288e0c3c4c6035612a3368a6348f624090ea9c16 100644
--- a/loopy/tools.py
+++ b/loopy/tools.py
@@ -25,6 +25,7 @@ THE SOFTWARE.
 
 import collections
 import numpy as np
+from pytools import memoize_method
 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase
 from loopy.symbolic import WalkMapper as LoopyWalkMapper
 from pymbolic.mapper.persistent_hash import (
@@ -155,13 +156,23 @@ class LoopyEqKeyBuilder(object):
         self.field_dict[field_name] = str(value).encode("utf-8")
 
     def key(self):
+        """A key suitable for equality comparison."""
         return (self.class_.__name__.encode("utf-8"), self.field_dict)
 
+    @memoize_method
     def hash_key(self):
-        """Similar to key(), but excludes field names for faster hashing.
+        """A key suitable for hashing.
         """
-        return (self.class_.__name__.encode("utf-8"),) + tuple(
-                self.field_dict[k] for k in sorted(self.field_dict.keys()))
+        # To speed up any calculations that repeatedly use the return value,
+        # this method returns a hash.
+
+        kb = LoopyKeyBuilder()
+        # Build the key. For faster hashing, avoid hashing field names.
+        key = (
+            (self.class_.__name__.encode("utf-8"),) +
+            tuple(self.field_dict[k] for k in sorted(self.field_dict.keys())))
+
+        return kb(key)
 
 # }}}