From b14ca7dcd756f6368a98284a70c92072b25ad403 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 5 Oct 2012 13:51:27 -0400
Subject: [PATCH] Introduce distinction between immediate and deferred
 allocators.

---
 doc/source/array.rst         |   7 +-
 doc/source/misc.rst          |   3 +
 doc/source/tools.rst         |  19 ++++-
 pyopencl/__init__.py         |   9 ++
 pyopencl/array.py            |  11 ++-
 src/wrapper/mempool.hpp      |  22 +++--
 src/wrapper/wrap_cl.hpp      |   9 ++
 src/wrapper/wrap_mempool.cpp | 155 ++++++++++++++++++++++++++++++-----
 8 files changed, 198 insertions(+), 37 deletions(-)

diff --git a/doc/source/array.rst b/doc/source/array.rst
index b8276109..752a1b8f 100644
--- a/doc/source/array.rst
+++ b/doc/source/array.rst
@@ -108,10 +108,6 @@ Under the hood, the complex types are simply `float2` and `double2`.
 The :class:`Array` Class
 ------------------------
 
-.. class:: DefaultAllocator(context, flags=pyopencl.mem_flags.READ_WRITE)
-
-    An alias for :class:`pyopencl.tools.CLAllocator`.
-
 .. class:: Array(cqa, shape, dtype, order="C", *, allocator=None, base=None, data=None)
 
     A :class:`numpy.ndarray` work-alike that stores its data and performs its
@@ -126,7 +122,8 @@ The :class:`Array` Class
 
     *allocator* may be `None` or a callable that, upon being called with an
     argument of the number of bytes to be allocated, returns an
-    :class:`pyopencl.Buffer` object.  (See :class:`DefaultAllocator`.)
+    :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
+    instance is one useful example of an object to pass here.)
 
     .. versionchanged:: 2011.1
         Renamed *context* to *cqa*, made it general-purpose.
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 0d4f937e..b0ba993c 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -91,6 +91,9 @@ Version 2012.2
   :func:`pyopencl.tools.get_or_register_dtype`.
 * Clean up the :class:`pyopencl.array.Array` constructor interface. Deprecate
   arrays with :attr:`pyopencl.array.Array.queue` equal to *None*.
+* Deprecate :class:`pyopencl.array.DefaultAllocator`.
+* Deprecate :class:`pyopencl.CLAllocator`.
+* Introudce :class:`pyopencl.DeferredAllocator`, :class:`pyopencl.ImmediateAllocator`.
 
 Version 2012.1
 --------------
diff --git a/doc/source/tools.rst b/doc/source/tools.rst
index 048d5c7d..f2f9b027 100644
--- a/doc/source/tools.rst
+++ b/doc/source/tools.rst
@@ -30,10 +30,25 @@ the available memory.
     memory is returned to the pool. This supports the same interface
     as :class:`pyopencl.Buffer`.
 
-.. class:: CLAllocator(context, mem_flags=pyopencl.mem_flags.READ_WRITE)
+.. class:: DeferredAllocator(context, mem_flags=pyopencl.mem_flags.READ_WRITE)
 
     *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
-    to the *flags* argument of :class:`pyopencl.Buffer`.
+    to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator
+    has the same semantics as regular OpenCL buffer allocation, i.e. it may
+    promise memory to be available that later on (in any call to a buffer-using
+    CL function).
+
+    .. method:: __call__(size)
+
+        Allocate a :class:`pyopencl.Buffer` of the given *size*.
+
+.. class:: ImmediateAllocator(queue, mem_flags=pyopencl.mem_flags.READ_WRITE)
+
+    *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
+    to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator
+    has the same semantics as regular OpenCL buffer allocation, i.e. it may
+    promise memory to be available that later on (in any call to a buffer-using
+    CL function).
 
     .. method:: __call__(size)
 
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 57781304..cf56c2d3 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -37,6 +37,15 @@ def compiler_output(text):
 
 
 
+class CLAllocator(DeferredAllocator):
+    def __init__(self, *args, **kwargs):
+        from warnings import warn
+        warn("pyopencl.CLAllocator is deprecated. "
+                "It will be continue to exist throughout the 2013.x "
+                "versions of PyOpenCL. Use {Deferred,Immediate}Allocator.",
+                DeprecationWarning, 2)
+        DeferredAllocator.__init__(self, *args, **kwargs)
+
 # {{{ Kernel
 
 class Kernel(_cl._Kernel):
diff --git a/pyopencl/array.py b/pyopencl/array.py
index d03dd596..e4d46264 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -191,9 +191,14 @@ def elwise_kernel_runner(kernel_getter):
 
 
 
-
-
-DefaultAllocator = cl.CLAllocator
+class DefaultAllocator(cl.DeferredAllocator):
+    def __init__(self, *args, **kwargs):
+        from warnings import warn
+        warn("pyopencl.array.DefaultAllocator is deprecated. "
+                "It will be continue to exist throughout the 2013.x "
+                "versions of PyOpenCL.",
+                DeprecationWarning, 2)
+        cl.DeferredAllocator.__init__(self, *args, **kwargs)
 
 # }}}
 
diff --git a/src/wrapper/mempool.hpp b/src/wrapper/mempool.hpp
index f1348343..bbf23275 100644
--- a/src/wrapper/mempool.hpp
+++ b/src/wrapper/mempool.hpp
@@ -58,7 +58,7 @@ namespace PYGPU_PACKAGE
       container_t m_container;
       typedef typename container_t::value_type bin_pair_t;
 
-      Allocator m_allocator;
+      std::auto_ptr<Allocator> m_allocator;
 
       // A held block is one that's been released by the application, but that
       // we are keeping around to dish out again.
@@ -71,8 +71,16 @@ namespace PYGPU_PACKAGE
 
     public:
       memory_pool(Allocator const &alloc=Allocator())
-        : m_allocator(alloc), m_held_blocks(0), m_active_blocks(0), m_stop_holding(false)
+        : m_allocator(alloc.copy()),
+        m_held_blocks(0), m_active_blocks(0), m_stop_holding(false)
       {
+        if (m_allocator->is_deferred())
+        {
+          PyErr_WarnEx(PyExc_UserWarning, "Memory pools expect non-deferred "
+              "semantics from their allocators. You passed a deferred "
+              "allocator, i.e. an allocator whose allocations can turn out to "
+              "be unavailable long after allocation.", 1);
+        }
       }
 
       ~memory_pool()
@@ -163,7 +171,7 @@ namespace PYGPU_PACKAGE
             throw;
         }
 
-        m_allocator.try_release_blocks();
+        m_allocator->try_release_blocks();
         if (bin.size())
           return pop_block_from_bin(bin, size);
 
@@ -198,7 +206,7 @@ namespace PYGPU_PACKAGE
           get_bin(bin_number(size)).push_back(p);
         }
         else
-          m_allocator.free(p);
+          m_allocator->free(p);
       }
 
       void free_held()
@@ -209,7 +217,7 @@ namespace PYGPU_PACKAGE
 
           while (bin.size())
           {
-            m_allocator.free(bin.back());
+            m_allocator->free(bin.back());
             bin.pop_back();
 
             dec_held_blocks();
@@ -241,7 +249,7 @@ namespace PYGPU_PACKAGE
 
           if (bin.size())
           {
-            m_allocator.free(bin.back());
+            m_allocator->free(bin.back());
             bin.pop_back();
 
             dec_held_blocks();
@@ -256,7 +264,7 @@ namespace PYGPU_PACKAGE
     private:
       pointer_type get_from_allocator(size_type alloc_sz)
       {
-        pointer_type result = m_allocator.allocate(alloc_sz);
+        pointer_type result = m_allocator->allocate(alloc_sz);
         ++m_active_blocks;
 
         return result;
diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp
index c629d9d0..6c64ac97 100644
--- a/src/wrapper/wrap_cl.hpp
+++ b/src/wrapper/wrap_cl.hpp
@@ -1159,6 +1159,15 @@ namespace pyopencl
         }
       }
 
+      std::auto_ptr<context> get_context() const
+      {
+        cl_context param_value;
+        PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo,
+            (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), &param_value, 0));
+        return std::auto_ptr<context>(
+            new context(param_value, /*retain*/ true));
+      }
+
 #if PYOPENCL_CL_VERSION < 0x1010
       cl_command_queue_properties set_property(
           cl_command_queue_properties prop,
diff --git a/src/wrapper/wrap_mempool.cpp b/src/wrapper/wrap_mempool.cpp
index af9df05d..411f9d0b 100644
--- a/src/wrapper/wrap_mempool.cpp
+++ b/src/wrapper/wrap_mempool.cpp
@@ -19,28 +19,35 @@ namespace py = boost::python;
 
 namespace
 {
-  class cl_allocator
+  class cl_allocator_base
   {
+    protected:
       boost::shared_ptr<pyopencl::context> m_context;
       cl_mem_flags m_flags;
 
     public:
-      cl_allocator(boost::shared_ptr<pyopencl::context> const &ctx,
+      cl_allocator_base(boost::shared_ptr<pyopencl::context> const &ctx,
           cl_mem_flags flags=CL_MEM_READ_WRITE)
         : m_context(ctx), m_flags(flags)
       {
         if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))
-          throw pyopencl::error("PoolAllocator", CL_INVALID_VALUE,
+          throw pyopencl::error("Allocator", CL_INVALID_VALUE,
               "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags");
       }
 
+      cl_allocator_base(cl_allocator_base const &src)
+      : m_context(src.m_context), m_flags(src.m_flags)
+      { }
+
+      virtual ~cl_allocator_base()
+      { }
+
       typedef cl_mem pointer_type;
       typedef size_t size_type;
 
-      pointer_type allocate(size_type s)
-      {
-        return pyopencl::create_buffer(m_context->data(), m_flags, s, 0);
-      }
+      virtual cl_allocator_base *copy() const = 0;
+      virtual bool is_deferred() const = 0;
+      virtual pointer_type allocate(size_type s) = 0;
 
       void free(pointer_type p)
       {
@@ -53,13 +60,110 @@ namespace
       }
   };
 
+  class cl_deferred_allocator : public cl_allocator_base
+  {
+    private:
+      typedef cl_allocator_base super;
+
+    public:
+      cl_deferred_allocator(boost::shared_ptr<pyopencl::context> const &ctx,
+          cl_mem_flags flags=CL_MEM_READ_WRITE)
+        : super(ctx, flags)
+      { }
+
+      cl_allocator_base *copy() const
+      {
+        return new cl_deferred_allocator(*this);
+      }
+
+      bool is_deferred() const
+      { return true; }
+
+      pointer_type allocate(size_type s)
+      {
+        return pyopencl::create_buffer(m_context->data(), m_flags, s, 0);
+      }
+  };
+
+  const unsigned zero = 0;
+
+  class cl_immediate_allocator : public cl_allocator_base
+  {
+    private:
+      typedef cl_allocator_base super;
+      pyopencl::command_queue m_queue;
+
+    public:
+      cl_immediate_allocator(pyopencl::command_queue &queue,
+          cl_mem_flags flags=CL_MEM_READ_WRITE)
+        : super(boost::shared_ptr<pyopencl::context>(queue.get_context()), flags),
+        m_queue(queue.data(), /*retain*/ true)
+      { }
+
+      cl_immediate_allocator(cl_immediate_allocator const &src)
+        : super(src), m_queue(src.m_queue)
+      { }
+
+      cl_allocator_base *copy() const
+      {
+        return new cl_immediate_allocator(*this);
+      }
+
+      bool is_deferred() const
+      { return false; }
+
+      pointer_type allocate(size_type s)
+      {
+        pointer_type ptr =  pyopencl::create_buffer(
+            m_context->data(), m_flags, s, 0);
+
+        // Make sure the buffer gets allocated right here and right now.
+        // This looks (and is) expensive. But immediate allocators
+        // have their main use in memory pools, whose basic assumption
+        // is that allocation is too expensive anyway--but they rely
+        // on exact 'out-of-memory' information.
+        unsigned zero = 0;
+        PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, (
+              m_queue.data(),
+              ptr,
+              /* is blocking */ CL_FALSE,
+              0, std::min(s, sizeof(zero)), &zero,
+              0, NULL, NULL
+              ));
+
+        // No need to wait for completion here. clWaitForEvents (e.g.)
+        // cannot return mem object allocation failures. This implies that
+        // the buffer is faulted onto the device on enqueue.
+
+        return ptr;
+      }
+  };
+
 
 
 
   inline
-  pyopencl::buffer *allocator_call(cl_allocator &alloc, size_t size)
+  pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size)
   {
-    cl_mem mem = alloc.allocate(size);
+    cl_mem mem;
+    int try_count = 0;
+    while (try_count < 2)
+    {
+      try
+      {
+        mem = alloc.allocate(size);
+        break;
+      }
+      catch (pyopencl::error &e)
+      {
+        if (!e.is_out_of_memory())
+          throw;
+        if (++try_count == 2)
+          throw;
+      }
+
+      alloc.try_release_blocks();
+    }
 
     try
     {
@@ -76,12 +180,12 @@ namespace
 
 
   class pooled_buffer
-    : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator> >,
+    : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >,
     public pyopencl::memory_object_holder
   {
     private:
       typedef
-        pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator> >
+        pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >
         super;
 
     public:
@@ -98,8 +202,8 @@ namespace
 
 
   pooled_buffer *device_pool_allocate(
-      boost::shared_ptr<pyopencl::memory_pool<cl_allocator> > pool,
-      pyopencl::memory_pool<cl_allocator>::size_type sz)
+      boost::shared_ptr<pyopencl::memory_pool<cl_allocator_base> > pool,
+      pyopencl::memory_pool<cl_allocator_base>::size_type sz)
   {
     return new pooled_buffer(pool, sz);
   }
@@ -132,11 +236,8 @@ void pyopencl_expose_mempool()
   py::def("bitlog2", pyopencl::bitlog2);
 
   {
-    typedef cl_allocator cls;
-    py::class_<cls> wrapper("CLAllocator",
-        py::init<
-          boost::shared_ptr<pyopencl::context> const &,
-          py::optional<cl_mem_flags> >());
+    typedef cl_allocator_base cls;
+    py::class_<cls, boost::noncopyable> wrapper("AllocatorBase", py::no_init);
     wrapper
       .def("__call__", allocator_call,
           py::return_value_policy<py::manage_new_object>())
@@ -145,12 +246,26 @@ void pyopencl_expose_mempool()
   }
 
   {
-    typedef pyopencl::memory_pool<cl_allocator> cl;
+    typedef cl_deferred_allocator cls;
+    py::class_<cls, py::bases<cl_allocator_base> > wrapper("DeferredAllocator",
+        py::init<
+          boost::shared_ptr<pyopencl::context> const &,
+          py::optional<cl_mem_flags> >());
+  }
+
+  {
+    typedef cl_immediate_allocator cls;
+    py::class_<cls, py::bases<cl_allocator_base> > wrapper("ImmediateAllocator",
+        py::init<pyopencl::command_queue &, py::optional<cl_mem_flags> >());
+  }
+
+  {
+    typedef pyopencl::memory_pool<cl_allocator_base> cl;
 
     py::class_<
       cl, boost::noncopyable,
       boost::shared_ptr<cl> > wrapper("MemoryPool",
-          py::init<cl_allocator const &>()
+          py::init<cl_allocator_base const &>()
           );
     wrapper
       .def("allocate", device_pool_allocate,
-- 
GitLab