From b14ca7dcd756f6368a98284a70c92072b25ad403 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 5 Oct 2012 13:51:27 -0400 Subject: [PATCH] Introduce distinction between immediate and deferred allocators. --- doc/source/array.rst | 7 +- doc/source/misc.rst | 3 + doc/source/tools.rst | 19 ++++- pyopencl/__init__.py | 9 ++ pyopencl/array.py | 11 ++- src/wrapper/mempool.hpp | 22 +++-- src/wrapper/wrap_cl.hpp | 9 ++ src/wrapper/wrap_mempool.cpp | 155 ++++++++++++++++++++++++++++++----- 8 files changed, 198 insertions(+), 37 deletions(-) diff --git a/doc/source/array.rst b/doc/source/array.rst index b8276109..752a1b8f 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -108,10 +108,6 @@ Under the hood, the complex types are simply `float2` and `double2`. The :class:`Array` Class ------------------------ -.. class:: DefaultAllocator(context, flags=pyopencl.mem_flags.READ_WRITE) - - An alias for :class:`pyopencl.tools.CLAllocator`. - .. class:: Array(cqa, shape, dtype, order="C", *, allocator=None, base=None, data=None) A :class:`numpy.ndarray` work-alike that stores its data and performs its @@ -126,7 +122,8 @@ The :class:`Array` Class *allocator* may be `None` or a callable that, upon being called with an argument of the number of bytes to be allocated, returns an - :class:`pyopencl.Buffer` object. (See :class:`DefaultAllocator`.) + :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool` + instance is one useful example of an object to pass here.) .. versionchanged:: 2011.1 Renamed *context* to *cqa*, made it general-purpose. diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 0d4f937e..b0ba993c 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -91,6 +91,9 @@ Version 2012.2 :func:`pyopencl.tools.get_or_register_dtype`. * Clean up the :class:`pyopencl.array.Array` constructor interface. Deprecate arrays with :attr:`pyopencl.array.Array.queue` equal to *None*. +* Deprecate :class:`pyopencl.array.DefaultAllocator`. +* Deprecate :class:`pyopencl.CLAllocator`. +* Introudce :class:`pyopencl.DeferredAllocator`, :class:`pyopencl.ImmediateAllocator`. Version 2012.1 -------------- diff --git a/doc/source/tools.rst b/doc/source/tools.rst index 048d5c7d..f2f9b027 100644 --- a/doc/source/tools.rst +++ b/doc/source/tools.rst @@ -30,10 +30,25 @@ the available memory. memory is returned to the pool. This supports the same interface as :class:`pyopencl.Buffer`. -.. class:: CLAllocator(context, mem_flags=pyopencl.mem_flags.READ_WRITE) +.. class:: DeferredAllocator(context, mem_flags=pyopencl.mem_flags.READ_WRITE) *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds - to the *flags* argument of :class:`pyopencl.Buffer`. + to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator + has the same semantics as regular OpenCL buffer allocation, i.e. it may + promise memory to be available that later on (in any call to a buffer-using + CL function). + + .. method:: __call__(size) + + Allocate a :class:`pyopencl.Buffer` of the given *size*. + +.. class:: ImmediateAllocator(queue, mem_flags=pyopencl.mem_flags.READ_WRITE) + + *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds + to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator + has the same semantics as regular OpenCL buffer allocation, i.e. it may + promise memory to be available that later on (in any call to a buffer-using + CL function). .. method:: __call__(size) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 57781304..cf56c2d3 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -37,6 +37,15 @@ def compiler_output(text): +class CLAllocator(DeferredAllocator): + def __init__(self, *args, **kwargs): + from warnings import warn + warn("pyopencl.CLAllocator is deprecated. " + "It will be continue to exist throughout the 2013.x " + "versions of PyOpenCL. Use {Deferred,Immediate}Allocator.", + DeprecationWarning, 2) + DeferredAllocator.__init__(self, *args, **kwargs) + # {{{ Kernel class Kernel(_cl._Kernel): diff --git a/pyopencl/array.py b/pyopencl/array.py index d03dd596..e4d46264 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -191,9 +191,14 @@ def elwise_kernel_runner(kernel_getter): - - -DefaultAllocator = cl.CLAllocator +class DefaultAllocator(cl.DeferredAllocator): + def __init__(self, *args, **kwargs): + from warnings import warn + warn("pyopencl.array.DefaultAllocator is deprecated. " + "It will be continue to exist throughout the 2013.x " + "versions of PyOpenCL.", + DeprecationWarning, 2) + cl.DeferredAllocator.__init__(self, *args, **kwargs) # }}} diff --git a/src/wrapper/mempool.hpp b/src/wrapper/mempool.hpp index f1348343..bbf23275 100644 --- a/src/wrapper/mempool.hpp +++ b/src/wrapper/mempool.hpp @@ -58,7 +58,7 @@ namespace PYGPU_PACKAGE container_t m_container; typedef typename container_t::value_type bin_pair_t; - Allocator m_allocator; + std::auto_ptr<Allocator> m_allocator; // A held block is one that's been released by the application, but that // we are keeping around to dish out again. @@ -71,8 +71,16 @@ namespace PYGPU_PACKAGE public: memory_pool(Allocator const &alloc=Allocator()) - : m_allocator(alloc), m_held_blocks(0), m_active_blocks(0), m_stop_holding(false) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false) { + if (m_allocator->is_deferred()) + { + PyErr_WarnEx(PyExc_UserWarning, "Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation.", 1); + } } ~memory_pool() @@ -163,7 +171,7 @@ namespace PYGPU_PACKAGE throw; } - m_allocator.try_release_blocks(); + m_allocator->try_release_blocks(); if (bin.size()) return pop_block_from_bin(bin, size); @@ -198,7 +206,7 @@ namespace PYGPU_PACKAGE get_bin(bin_number(size)).push_back(p); } else - m_allocator.free(p); + m_allocator->free(p); } void free_held() @@ -209,7 +217,7 @@ namespace PYGPU_PACKAGE while (bin.size()) { - m_allocator.free(bin.back()); + m_allocator->free(bin.back()); bin.pop_back(); dec_held_blocks(); @@ -241,7 +249,7 @@ namespace PYGPU_PACKAGE if (bin.size()) { - m_allocator.free(bin.back()); + m_allocator->free(bin.back()); bin.pop_back(); dec_held_blocks(); @@ -256,7 +264,7 @@ namespace PYGPU_PACKAGE private: pointer_type get_from_allocator(size_type alloc_sz) { - pointer_type result = m_allocator.allocate(alloc_sz); + pointer_type result = m_allocator->allocate(alloc_sz); ++m_active_blocks; return result; diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp index c629d9d0..6c64ac97 100644 --- a/src/wrapper/wrap_cl.hpp +++ b/src/wrapper/wrap_cl.hpp @@ -1159,6 +1159,15 @@ namespace pyopencl } } + std::auto_ptr<context> get_context() const + { + cl_context param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); + return std::auto_ptr<context>( + new context(param_value, /*retain*/ true)); + } + #if PYOPENCL_CL_VERSION < 0x1010 cl_command_queue_properties set_property( cl_command_queue_properties prop, diff --git a/src/wrapper/wrap_mempool.cpp b/src/wrapper/wrap_mempool.cpp index af9df05d..411f9d0b 100644 --- a/src/wrapper/wrap_mempool.cpp +++ b/src/wrapper/wrap_mempool.cpp @@ -19,28 +19,35 @@ namespace py = boost::python; namespace { - class cl_allocator + class cl_allocator_base { + protected: boost::shared_ptr<pyopencl::context> m_context; cl_mem_flags m_flags; public: - cl_allocator(boost::shared_ptr<pyopencl::context> const &ctx, + cl_allocator_base(boost::shared_ptr<pyopencl::context> const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - throw pyopencl::error("PoolAllocator", CL_INVALID_VALUE, + throw pyopencl::error("Allocator", CL_INVALID_VALUE, "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); } + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } + + virtual ~cl_allocator_base() + { } + typedef cl_mem pointer_type; typedef size_t size_type; - pointer_type allocate(size_type s) - { - return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); - } + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual pointer_type allocate(size_type s) = 0; void free(pointer_type p) { @@ -53,13 +60,110 @@ namespace } }; + class cl_deferred_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + + public: + cl_deferred_allocator(boost::shared_ptr<pyopencl::context> const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(ctx, flags) + { } + + cl_allocator_base *copy() const + { + return new cl_deferred_allocator(*this); + } + + bool is_deferred() const + { return true; } + + pointer_type allocate(size_type s) + { + return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); + } + }; + + const unsigned zero = 0; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + pyopencl::command_queue m_queue; + + public: + cl_immediate_allocator(pyopencl::command_queue &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(boost::shared_ptr<pyopencl::context>(queue.get_context()), flags), + m_queue(queue.data(), /*retain*/ true) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : super(src), m_queue(src.m_queue) + { } + + cl_allocator_base *copy() const + { + return new cl_immediate_allocator(*this); + } + + bool is_deferred() const + { return false; } + + pointer_type allocate(size_type s) + { + pointer_type ptr = pyopencl::create_buffer( + m_context->data(), m_flags, s, 0); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, ( + m_queue.data(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + )); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; + } + }; + inline - pyopencl::buffer *allocator_call(cl_allocator &alloc, size_t size) + pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) { - cl_mem mem = alloc.allocate(size); + cl_mem mem; + int try_count = 0; + while (try_count < 2) + { + try + { + mem = alloc.allocate(size); + break; + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + + alloc.try_release_blocks(); + } try { @@ -76,12 +180,12 @@ namespace class pooled_buffer - : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator> >, + : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >, public pyopencl::memory_object_holder { private: typedef - pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator> > + pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> > super; public: @@ -98,8 +202,8 @@ namespace pooled_buffer *device_pool_allocate( - boost::shared_ptr<pyopencl::memory_pool<cl_allocator> > pool, - pyopencl::memory_pool<cl_allocator>::size_type sz) + boost::shared_ptr<pyopencl::memory_pool<cl_allocator_base> > pool, + pyopencl::memory_pool<cl_allocator_base>::size_type sz) { return new pooled_buffer(pool, sz); } @@ -132,11 +236,8 @@ void pyopencl_expose_mempool() py::def("bitlog2", pyopencl::bitlog2); { - typedef cl_allocator cls; - py::class_<cls> wrapper("CLAllocator", - py::init< - boost::shared_ptr<pyopencl::context> const &, - py::optional<cl_mem_flags> >()); + typedef cl_allocator_base cls; + py::class_<cls, boost::noncopyable> wrapper("AllocatorBase", py::no_init); wrapper .def("__call__", allocator_call, py::return_value_policy<py::manage_new_object>()) @@ -145,12 +246,26 @@ void pyopencl_expose_mempool() } { - typedef pyopencl::memory_pool<cl_allocator> cl; + typedef cl_deferred_allocator cls; + py::class_<cls, py::bases<cl_allocator_base> > wrapper("DeferredAllocator", + py::init< + boost::shared_ptr<pyopencl::context> const &, + py::optional<cl_mem_flags> >()); + } + + { + typedef cl_immediate_allocator cls; + py::class_<cls, py::bases<cl_allocator_base> > wrapper("ImmediateAllocator", + py::init<pyopencl::command_queue &, py::optional<cl_mem_flags> >()); + } + + { + typedef pyopencl::memory_pool<cl_allocator_base> cl; py::class_< cl, boost::noncopyable, boost::shared_ptr<cl> > wrapper("MemoryPool", - py::init<cl_allocator const &>() + py::init<cl_allocator_base const &>() ); wrapper .def("allocate", device_pool_allocate, -- GitLab