diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 903749b60afd9b96b94f42dedd3ee71d76f89899..c69f95ccdede76632397ff54bb1e4fb8a16b216c 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -75,6 +75,14 @@ Version 2011.2
 
 * Add :func:`pyopencl.enqueue_migrate_mem_object`.
 * Add :func:`pyopencl.image_from_array`.
+* IMPORTANT BUGFIX: Kernel caching was broken for all the 2011.1.x releases, with
+  severe consequences on the execution time of :class:`pyopencl.array.Array`
+  operations.
+  Henrik Andresen at a `PyOpenCL workshop at DTU <http://gpulab.imm.dtu.dk/courses.html>`_
+  first noticed the timings
+* All comparable PyOpenCL objects are now also hashable.
+* Add :func:`pyopencl.tools.context_dependent_memoize` to the documented
+  functionality.
 
 Version 2011.1.2
 ----------------
diff --git a/doc/source/tools.rst b/doc/source/tools.rst
index 197e82891c5e8989daf564b536b517148b4a8e07..0af6ea8e53eeab6c97239792967fe5885faf7e30 100644
--- a/doc/source/tools.rst
+++ b/doc/source/tools.rst
@@ -73,3 +73,20 @@ the available memory.
         Implicitly calls :meth:`free_held`.
         This is useful as a cleanup action when a memory pool falls out
         of use.
+
+Kernel Caching
+--------------
+
+.. function:: context_dependent_memoize(func)
+
+    This decorator caches the result of the decorated function, *if* a
+    subsequent occurs with the same :class:`pyopencl.Context`.  This is useful
+    for caching of kernels. Assumes that the first argument of the decorated
+    function is the :class:`pyopencl.Context`.
+
+.. function:: clear_context_caches()
+
+    Empties all context-dependent memoization caches. Also releases
+    all held reference contexts. If it is important to you that the
+    program detaches from its context, you might need to call this
+    function to free all remaining references to your context.
diff --git a/pyopencl/tools.py b/pyopencl/tools.py
index 1beb7017bef23020bf997add160390960e9fffef..fce7f7db12d0ebc8e253b7d42d7f3fbad4fb23df 100644
--- a/pyopencl/tools.py
+++ b/pyopencl/tools.py
@@ -43,29 +43,51 @@ MemoryPool = cl.MemoryPool
 
 
 
+context_dependent_memoized_functions = []
+
+
+
+
 @decorator
 def context_dependent_memoize(func, context, *args):
     """Provides memoization for things that get created inside
     a context, i.e. mainly programs and kernels. Assumes that
     the first argument of the decorated function is the context.
     """
-    dicname = "_ctx_memoize_dic_%s_%x" % (
-            func.__name__, hash(func))
-
     try:
-        return getattr(context, dicname)[args]
+        ctx_dict = func._pyopencl_ctx_dep_memoize_dic
     except AttributeError:
-        result = func(context, *args)
-        setattr(context, dicname, {args: result})
-        return result
+        # FIXME: This may keep contexts alive longer than desired.
+        # But I guess since the memory in them is freed, who cares.
+        ctx_dict = func._pyopencl_ctx_dep_memoize_dic = {}
+
+    try:
+        return ctx_dict[context][args]
     except KeyError:
+        context_dependent_memoized_functions.append(func)
+        arg_dict = ctx_dict.setdefault(context, {})
         result = func(context, *args)
-        getattr(context,dicname)[args] = result
+        arg_dict[args] = result
         return result
 
 
 
 
+def clear_context_caches():
+    for func in context_dependent_memoized_functions:
+        try:
+            ctx_dict = func._pycuda_ctx_dep_memoize_dic
+        except AttributeError:
+            pass
+        else:
+            ctx_dict.clear()
+
+import atexit
+atexit.register(clear_context_caches)
+
+
+
+
 def pytest_generate_tests_for_pyopencl(metafunc):
     class ContextFactory:
         def __init__(self, device):
@@ -75,6 +97,9 @@ def pytest_generate_tests_for_pyopencl(metafunc):
             # Get rid of leftovers from past tests.
             # CL implementations are surprisingly limited in how many
             # simultaneous contexts they allow...
+
+            clear_context_caches()
+
             from gc import collect
             collect()
 
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index a262137a9dfc1227892ba7781d8dfdc0e2e62164..44db7b58e0b9b38563a1ea6bd86e41c8d0bfefe3 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -353,6 +353,25 @@ class TestCL:
         cl.Program(context, kernel_src).build('-I.')
         cl.Program(context, kernel_src).build('-I.')
 
+    @pytools.test.mark_test.opencl
+    def test_context_dep_memoize(self, ctx_factory):
+        context = ctx_factory()
+        queue = cl.CommandQueue(context)
+
+        from pyopencl.tools import context_dependent_memoize
+
+        counter = [0]
+
+        @context_dependent_memoize
+        def do_something(ctx):
+            counter[0] += 1
+
+        do_something(context)
+        do_something(context)
+
+        assert counter[0] == 1
+
+
 
 
 if __name__ == "__main__":