diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index 6f312ec798e13fa4b1d183c27578089857b13e3d..738def915b363a72dac07af717f092869ad74304 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -397,10 +397,13 @@ def generate_code_v2(kernel):
     if CACHING_ENABLED:
         input_kernel = kernel
         try:
+            print("===trying to find kernel")
             result = code_gen_cache[input_kernel]
+            print("===FOUND")
             logger.debug("%s: code generation cache hit" % kernel.name)
             return result
         except KeyError:
+            print("===NOT FOUND")
             pass
 
     # }}}
@@ -506,6 +509,12 @@ def generate_code_v2(kernel):
 
     # }}}
 
+    # For faster unpickling in the common case when implemented_domains isn't needed.
+    from loopy.tools import LazilyUnpicklingDictionary
+    codegen_result = codegen_result.copy(
+            implemented_domains=LazilyUnpicklingDictionary(
+                    codegen_result.implemented_domains))
+
     logger.info("%s: generate code: done" % kernel.name)
 
     if CACHING_ENABLED: