diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp
index 71e3d067971bdf843fa723f32cec77e9ec3e45a7..ec4854c03e4294d50cc1c4fa78fb1148df5c83b3 100644
--- a/src/wrap_cl.hpp
+++ b/src/wrap_cl.hpp
@@ -1560,6 +1560,14 @@ namespace pyopencl
         PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event));
       }
 
+      // Called from a destructor context below:
+      // - Should not release the GIL
+      // - Should fail gracefully in the face of errors
+      virtual void wait_during_cleanup_without_releasing_the_gil()
+      {
+        PYOPENCL_CALL_GUARDED_CLEANUP(clWaitForEvents, (1, &m_event));
+      }
+
 #if PYOPENCL_CL_VERSION >= 0x1010
     // {{{ set_callback, by way of a a thread-based construction
 
@@ -1688,7 +1696,11 @@ namespace pyopencl
       { }
 
       ~nanny_event()
-      { wait(); }
+      {
+        // It appears that Pybind can get very confused if we release the GIL here:
+        // https://github.com/inducer/pyopencl/issues/296
+        wait_during_cleanup_without_releasing_the_gil();
+      }
 
       py::object get_ward() const
       {
@@ -1705,6 +1717,12 @@ namespace pyopencl
         event::wait();
         m_ward.reset();
       }
+
+      virtual void wait_during_cleanup_without_releasing_the_gil()
+      {
+        event::wait_during_cleanup_without_releasing_the_gil();
+        m_ward.reset();
+      }
   };
 #else
   class nanny_event : public event
@@ -1726,7 +1744,11 @@ namespace pyopencl
       { }
 
       ~nanny_event()
-      { wait(); }
+      {
+        // It appears that Pybind can get very confused if we release the GIL here:
+        // https://github.com/inducer/pyopencl/issues/296
+        wait_during_cleanup_without_releasing_the_gil();
+      }
 
       py::object get_ward() const
       { return m_ward; }
@@ -1736,6 +1758,12 @@ namespace pyopencl
         event::wait();
         m_ward = py::none();
       }
+
+      virtual void wait_during_cleanup_without_releasing_the_gil()
+      {
+        event::wait_during_cleanup_without_releasing_the_gil();
+        m_ward = py::none();
+      }
   };
 #endif
 
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index e052295550f572234f8495206561528a2c6e6644..75b39b495feab12b18f89b99d3e85bd53ea08849 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -1112,6 +1112,33 @@ def test_copy_buffer_rect(ctx_factory):
             region=arr1.shape[::-1])
 
 
+def test_threaded_nanny_events(ctx_factory):
+    # https://github.com/inducer/pyopencl/issues/296
+
+    import gc
+    import threading
+
+    def create_arrays_thread(n1=10, n2=20):
+        ctx = ctx_factory()
+        queue = cl.CommandQueue(ctx)
+        for i1 in range(n2):
+            for i in range(n1):
+                acl = cl.array.zeros(queue, 10, dtype=np.float32)
+                acl.get()
+            # Garbage collection triggers the error
+            print("collected ", str(gc.collect()))
+            print("stats ", gc.get_stats())
+
+    t1 = threading.Thread(target=create_arrays_thread)
+    t2 = threading.Thread(target=create_arrays_thread)
+
+    t1.start()
+    t2.start()
+
+    t1.join()
+    t2.join()
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.
     import pyopencl  # noqa