From 7a8a23481c812059224b9333be9e3fb021dede93 Mon Sep 17 00:00:00 2001
From: Guillaume THOMAS-COLLIGNON <guillaumet@nvidia.com>
Date: Thu, 7 May 2020 15:00:25 -0500
Subject: [PATCH 1/7] Added function to retain/release primary context

---
 src/cpp/cuda.hpp             | 24 ++++++++++++++++++++++++
 src/wrapper/wrap_cudadrv.cpp |  6 ++++++
 2 files changed, 30 insertions(+)
diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index da60bd57..3acab514 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -491,6 +491,10 @@ namespace pycuda
         return result;
       }
 #endif
+#if CUDAPP_CUDA_VERSION >= 7000
+      boost::shared_ptr<context> retain_primary_context();
+      void release_primary_context();
+#endif
 
   };
 
@@ -832,6 +836,26 @@ namespace pycuda
 
 
 
+#if CUDAPP_CUDA_VERSION >= 7000
+  inline boost::shared_ptr<context> device::retain_primary_context()
+  {
+    context::prepare_context_switch();
+
+    CUcontext ctx;
+    CUDAPP_CALL_GUARDED_THREADED(cuDevicePrimaryCtxRetain, (&ctx, m_device));
+    boost::shared_ptr<context> result(new context(ctx));
+    context_stack::get().push(result);
+    CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx));
+    return result;
+  }
+
+  inline void device::release_primary_context()
+  {
+    CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRelease, (m_device));
+  }
+#endif
+
+
 
 
 
diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
index dfa3d1cd..634e08b0 100644
--- a/src/wrapper/wrap_cudadrv.cpp
+++ b/src/wrapper/wrap_cudadrv.cpp
@@ -1099,6 +1099,12 @@ BOOST_PYTHON_MODULE(_driver)
           (py::args("self"), py::args("flags")=0))
 #if CUDAPP_CUDA_VERSION >= 4000
       .DEF_SIMPLE_METHOD(can_access_peer)
+#endif
+#if CUDAPP_CUDA_VERSION >= 7000
+      .def("retain_primary_context", &cl::retain_primary_context,
+          (py::args("self")))
+      .def("release_primary_context", &cl::release_primary_context,
+          (py::args("self")))
 #endif
       ;
   }
-- 
GitLab


From 2c8f20946fe38d0da4364f8c0aa4b73246457147 Mon Sep 17 00:00:00 2001
From: Guillaume THOMAS-COLLIGNON <guillaumet@nvidia.com>
Date: Tue, 26 May 2020 15:19:36 -0500
Subject: [PATCH 2/7] Switched primary_context to a subclass

---
 doc/source/driver.rst        |  10 ++++
 src/cpp/cuda.hpp             | 105 ++++++++++++++++++++++++++++-------
 src/wrapper/wrap_cudadrv.cpp |   4 +-
 3 files changed, 95 insertions(+), 24 deletions(-)

diff --git a/doc/source/driver.rst b/doc/source/driver.rst
index b371526e..9d1ae12c 100644
--- a/doc/source/driver.rst
+++ b/doc/source/driver.rst
@@ -657,6 +657,16 @@ Devices and Contexts
 
         Also make the newly-created context the current context.
 
+    .. method:: retain_primary_context(flags=ctx_flags.SCHED_AUTO)
+
+        Return the :class:`Context` obtained by retaining the device's
+        primary context, which is the one used by the CUDA runtime API,
+        and sets the context's flags using the :class:`ctx_flags` values.
+
+        Also make the newly-retained context the current context.
+
+        CUDA 7.0 and newer.
+
     .. method:: can_access_peer(dev)
 
         CUDA 4.0 and newer.
diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index 3acab514..9f222a07 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -407,6 +407,7 @@ namespace pycuda
 
   // {{{ device
   class context;
+  class primaryContext;
 
   class device
   {
@@ -479,6 +480,9 @@ namespace pycuda
       }
 
       boost::shared_ptr<context> make_context(unsigned int flags);
+#if CUDAPP_CUDA_VERSION >= 7000
+      boost::shared_ptr<context> retain_primary_context(unsigned int flags);
+#endif
 
       CUdevice handle() const
       { return m_device; }
@@ -491,10 +495,6 @@ namespace pycuda
         return result;
       }
 #endif
-#if CUDAPP_CUDA_VERSION >= 7000
-      boost::shared_ptr<context> retain_primary_context();
-      void release_primary_context();
-#endif
 
   };
 
@@ -563,7 +563,14 @@ namespace pycuda
       { return m_stack.top(); }
 
       void pop()
-      { m_stack.pop(); }
+      {
+        if (m_stack.empty())
+        {
+          throw error("m_stack::pop", CUDA_ERROR_INVALID_CONTEXT,
+              "cannot pop context from empty stack");
+        }
+        m_stack.pop();
+      }
 
       void push(value_type v)
       { m_stack.push(v); }
@@ -579,7 +586,7 @@ namespace pycuda
 
   class context : boost::noncopyable
   {
-    private:
+    protected:
       CUcontext m_context;
       bool m_valid;
       unsigned m_use_count;
@@ -641,7 +648,7 @@ namespace pycuda
         return result;
       }
 
-      void detach()
+      virtual void detach()
       {
         if (m_valid)
         {
@@ -815,10 +822,76 @@ namespace pycuda
       friend void context_push(boost::shared_ptr<context> ctx);
       friend boost::shared_ptr<context>
           gl::make_gl_context(device const &dev, unsigned int flags);
+      friend class primaryContext;
   };
 
+  class primaryContext : public context
+  {
+   protected:
+      CUdevice m_device;
+
+    public:
+      primaryContext(CUcontext ctx, CUdevice dev)
+        : context (ctx), m_device(dev)
+      { }
+
+      ~primaryContext()
+      {
+        if (m_valid)
+        {
+          /* It's possible that we get here with a non-zero m_use_count. Since the context
+            * stack holds shared_ptrs, this must mean that the context stack itself is getting
+            * destroyed, which means it's ok for this context to sign off, too.
+            */
+          detach();
+        }
+      }
+
+      // Primary context was created with retainPrimaryContext.
+      void detach() {
+          if (m_valid)
+          {
+            bool active_before_destruction = current_context().get() == this;
+            if (active_before_destruction)
+            {
+              CUcontext below;
+              CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPopCurrent, (&below));
+              CUDAPP_CALL_GUARDED_CLEANUP(cuDevicePrimaryCtxRelease, (m_device));
+            }
+            else
+            {
+              if (m_thread == boost::this_thread::get_id())
+              {
+                CUDAPP_CALL_GUARDED_CLEANUP(cuDevicePrimaryCtxRelease, (m_device));
+              }
+              else
+              {
+                // In all likelihood, this context's managing thread has exited, and
+                // therefore this context has already been deleted. No need to harp
+                // on the fact that we still thought there was cleanup to do.
+
+                // std::cerr << "PyCUDA WARNING: leaked out-of-thread context " << std::endl;
+              }
+            }
 
+            m_valid = false; // This will also avoid calling context.detach() in parent class
 
+            if (active_before_destruction)
+            {
+              boost::shared_ptr<context> new_active = current_context(this);
+              if (new_active.get())
+              {
+                CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context));
+              }
+            }
+          }
+          else
+            throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
+                "cannot detach from invalid context");
+      }
+      friend class device;
+      friend void context_push(boost::shared_ptr<context> ctx);
+  };
 
   inline
   boost::shared_ptr<context> device::make_context(unsigned int flags)
@@ -833,32 +906,22 @@ namespace pycuda
   }
 
 
-
-
-
 #if CUDAPP_CUDA_VERSION >= 7000
-  inline boost::shared_ptr<context> device::retain_primary_context()
+  inline boost::shared_ptr<context> device::retain_primary_context(unsigned int flags)
   {
     context::prepare_context_switch();
 
     CUcontext ctx;
-    CUDAPP_CALL_GUARDED_THREADED(cuDevicePrimaryCtxRetain, (&ctx, m_device));
-    boost::shared_ptr<context> result(new context(ctx));
+    CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRetain, (&ctx, m_device));
+    CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxSetFlags, (m_device, flags));
+    boost::shared_ptr<context> result(new primaryContext(ctx, m_device));
     context_stack::get().push(result);
     CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx));
     return result;
   }
-
-  inline void device::release_primary_context()
-  {
-    CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRelease, (m_device));
-  }
 #endif
 
 
-
-
-
 #if CUDAPP_CUDA_VERSION >= 2000
   inline
   void context_push(boost::shared_ptr<context> ctx)
diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
index 634e08b0..36be9fbc 100644
--- a/src/wrapper/wrap_cudadrv.cpp
+++ b/src/wrapper/wrap_cudadrv.cpp
@@ -1102,9 +1102,7 @@ BOOST_PYTHON_MODULE(_driver)
 #endif
 #if CUDAPP_CUDA_VERSION >= 7000
       .def("retain_primary_context", &cl::retain_primary_context,
-          (py::args("self")))
-      .def("release_primary_context", &cl::release_primary_context,
-          (py::args("self")))
+          (py::args("self"), py::args("flags")=0))
 #endif
       ;
   }
-- 
GitLab


From 40219864010fc3f66da9e14e698fc90ce8dff816 Mon Sep 17 00:00:00 2001
From: Guillaume THOMAS-COLLIGNON <guillaumet@nvidia.com>
Date: Fri, 31 Jul 2020 12:29:58 -0500
Subject: [PATCH 3/7] Removed flags for primary context, removed implicit push,
 cleaned up code

---
 doc/source/driver.rst        |  8 +++-----
 src/cpp/cuda.hpp             | 14 ++++----------
 src/wrapper/wrap_cudadrv.cpp |  2 +-
 3 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/doc/source/driver.rst b/doc/source/driver.rst
index 9d1ae12c..4ef8bafc 100644
--- a/doc/source/driver.rst
+++ b/doc/source/driver.rst
@@ -657,13 +657,11 @@ Devices and Contexts
 
         Also make the newly-created context the current context.
 
-    .. method:: retain_primary_context(flags=ctx_flags.SCHED_AUTO)
+    .. method:: retain_primary_context()
 
         Return the :class:`Context` obtained by retaining the device's
-        primary context, which is the one used by the CUDA runtime API,
-        and sets the context's flags using the :class:`ctx_flags` values.
-
-        Also make the newly-retained context the current context.
+        primary context, which is the one used by the CUDA runtime API.
+        Unlike :meth:`Context.make_context`, the newly-created context is not made current.
 
         CUDA 7.0 and newer.
 
diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index 9f222a07..b513622c 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -407,7 +407,7 @@ namespace pycuda
 
   // {{{ device
   class context;
-  class primaryContext;
+  class primary_context;
 
   class device
   {
@@ -481,7 +481,7 @@ namespace pycuda
 
       boost::shared_ptr<context> make_context(unsigned int flags);
 #if CUDAPP_CUDA_VERSION >= 7000
-      boost::shared_ptr<context> retain_primary_context(unsigned int flags);
+      boost::shared_ptr<context> retain_primary_context();
 #endif
 
       CUdevice handle() const
@@ -889,8 +889,7 @@ namespace pycuda
             throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
                 "cannot detach from invalid context");
       }
-      friend class device;
-      friend void context_push(boost::shared_ptr<context> ctx);
+      // friend void context_push(boost::shared_ptr<context> ctx);
   };
 
   inline
@@ -907,16 +906,11 @@ namespace pycuda
 
 
 #if CUDAPP_CUDA_VERSION >= 7000
-  inline boost::shared_ptr<context> device::retain_primary_context(unsigned int flags)
+  inline boost::shared_ptr<context> device::retain_primary_context()
   {
-    context::prepare_context_switch();
-
     CUcontext ctx;
     CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRetain, (&ctx, m_device));
-    CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxSetFlags, (m_device, flags));
     boost::shared_ptr<context> result(new primaryContext(ctx, m_device));
-    context_stack::get().push(result);
-    CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (ctx));
     return result;
   }
 #endif
diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
index 36be9fbc..a27b836e 100644
--- a/src/wrapper/wrap_cudadrv.cpp
+++ b/src/wrapper/wrap_cudadrv.cpp
@@ -1102,7 +1102,7 @@ BOOST_PYTHON_MODULE(_driver)
 #endif
 #if CUDAPP_CUDA_VERSION >= 7000
       .def("retain_primary_context", &cl::retain_primary_context,
-          (py::args("self"), py::args("flags")=0))
+          (py::args("self")))
 #endif
       ;
   }
-- 
GitLab


From 078276cb40ceb4237b6eb89daf146eb4e59c73e7 Mon Sep 17 00:00:00 2001
From: Guillaume THOMAS-COLLIGNON <guillaumet@nvidia.com>
Date: Fri, 31 Jul 2020 12:30:36 -0500
Subject: [PATCH 4/7] Added test for primary context

---
 test/test_driver.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/test_driver.py b/test/test_driver.py
index b440eff0..9ff010e6 100644
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -644,6 +644,23 @@ class TestDriver:
         del mem_b
         ctx2.detach()
 
+    @mark_cuda_test
+    def test_additional_primary_context(self):
+        if drv.get_version() < (2, 0, 0):
+            return
+        if drv.get_version() >= (2, 2, 0) and drv.get_version() < (8,):
+            if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
+                return
+
+        mem_a = drv.mem_alloc(50)
+        pctx = drv.Context.get_device().retain_primary_context()
+        pctx.push()
+        mem_b = drv.mem_alloc(60)
+
+        del mem_a
+        del mem_b
+        pctx.detach()
+
     @mark_cuda_test
     def test_3d_texture(self):
         # adapted from code by Nicolas Pinto
-- 
GitLab


From 059eefb307b3a60dca98358f895306ca1d2978be Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 4 Aug 2020 15:51:31 -0500
Subject: [PATCH 5/7] primaryContext -> primary_context for style consistency

---
 src/cpp/cuda.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index b513622c..c57b54c0 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -822,20 +822,20 @@ namespace pycuda
       friend void context_push(boost::shared_ptr<context> ctx);
       friend boost::shared_ptr<context>
           gl::make_gl_context(device const &dev, unsigned int flags);
-      friend class primaryContext;
+      friend class primary_context;
   };
 
-  class primaryContext : public context
+  class primary_context : public context
   {
    protected:
       CUdevice m_device;
 
     public:
-      primaryContext(CUcontext ctx, CUdevice dev)
+      primary_context(CUcontext ctx, CUdevice dev)
         : context (ctx), m_device(dev)
       { }
 
-      ~primaryContext()
+      ~primary_context()
       {
         if (m_valid)
         {
@@ -910,7 +910,7 @@ namespace pycuda
   {
     CUcontext ctx;
     CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRetain, (&ctx, m_device));
-    boost::shared_ptr<context> result(new primaryContext(ctx, m_device));
+    boost::shared_ptr<context> result(new primary_context(ctx, m_device));
     return result;
   }
 #endif
-- 
GitLab


From 6f612c51395bb769f72e3dbaed8c17e052c0fa57 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 4 Aug 2020 16:03:24 -0500
Subject: [PATCH 6/7] Factor out cotnext::detach_internal to avoid code
 duplication in primary_context

---
 src/cpp/cuda.hpp | 70 ++++++++++--------------------------------------
 1 file changed, 14 insertions(+), 56 deletions(-)

diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index c57b54c0..78f49a7a 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -598,7 +598,7 @@ namespace pycuda
         m_thread(boost::this_thread::get_id())
       { }
 
-      ~context()
+      virtual ~context()
       {
         if (m_valid)
         {
@@ -648,6 +648,13 @@ namespace pycuda
         return result;
       }
 
+    protected:
+      virtual void detach_internal()
+      {
+        CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
+      }
+
+    public:
       virtual void detach()
       {
         if (m_valid)
@@ -655,14 +662,14 @@ namespace pycuda
           bool active_before_destruction = current_context().get() == this;
           if (active_before_destruction)
           {
-            CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
+            detach_internal();
           }
           else
           {
             if (m_thread == boost::this_thread::get_id())
             {
               CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPushCurrent, (m_context));
-              CUDAPP_CALL_GUARDED_CLEANUP(cuCtxDetach, (m_context));
+              detach_internal();
               /* pop is implicit in detach */
             }
             else
@@ -835,61 +842,12 @@ namespace pycuda
         : context (ctx), m_device(dev)
       { }
 
-      ~primary_context()
+    protected:
+      virtual void detach_internal()
       {
-        if (m_valid)
-        {
-          /* It's possible that we get here with a non-zero m_use_count. Since the context
-            * stack holds shared_ptrs, this must mean that the context stack itself is getting
-            * destroyed, which means it's ok for this context to sign off, too.
-            */
-          detach();
-        }
-      }
-
-      // Primary context was created with retainPrimaryContext.
-      void detach() {
-          if (m_valid)
-          {
-            bool active_before_destruction = current_context().get() == this;
-            if (active_before_destruction)
-            {
-              CUcontext below;
-              CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPopCurrent, (&below));
-              CUDAPP_CALL_GUARDED_CLEANUP(cuDevicePrimaryCtxRelease, (m_device));
-            }
-            else
-            {
-              if (m_thread == boost::this_thread::get_id())
-              {
-                CUDAPP_CALL_GUARDED_CLEANUP(cuDevicePrimaryCtxRelease, (m_device));
-              }
-              else
-              {
-                // In all likelihood, this context's managing thread has exited, and
-                // therefore this context has already been deleted. No need to harp
-                // on the fact that we still thought there was cleanup to do.
-
-                // std::cerr << "PyCUDA WARNING: leaked out-of-thread context " << std::endl;
-              }
-            }
-
-            m_valid = false; // This will also avoid calling context.detach() in parent class
-
-            if (active_before_destruction)
-            {
-              boost::shared_ptr<context> new_active = current_context(this);
-              if (new_active.get())
-              {
-                CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context));
-              }
-            }
-          }
-          else
-            throw error("context::detach", CUDA_ERROR_INVALID_CONTEXT,
-                "cannot detach from invalid context");
+        // Primary context comes from retainPrimaryContext.
+        CUDAPP_CALL_GUARDED_CLEANUP(cuDevicePrimaryCtxRelease, (m_device));
       }
-      // friend void context_push(boost::shared_ptr<context> ctx);
   };
 
   inline
-- 
GitLab


From 07067df9253564ae69ada7f90b7524d3afcec6b7 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 4 Aug 2020 16:04:54 -0500
Subject: [PATCH 7/7] Add versionadded to the docs for retain_primary_context

---
 doc/source/driver.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/driver.rst b/doc/source/driver.rst
index 4ef8bafc..3edbdc5a 100644
--- a/doc/source/driver.rst
+++ b/doc/source/driver.rst
@@ -665,6 +665,8 @@ Devices and Contexts
 
         CUDA 7.0 and newer.
 
+        .. versionadded:: 2020.1
+
     .. method:: can_access_peer(dev)
 
         CUDA 4.0 and newer.
-- 
GitLab