// Warp memory pool // // Copyright (C) 2009 Andreas Kloeckner // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // Gregor Thalhammer (on Apr 13, 2011) said it's necessary to import Python.h // first to prevent OS X from overriding a bunch of macros. (e.g. isspace) #include #define NO_IMPORT_ARRAY #define PY_ARRAY_UNIQUE_SYMBOL pyopencl_ARRAY_API #include #include #include "wrap_helpers.hpp" #include "wrap_cl.hpp" #include "mempool.hpp" #include "tools.hpp" namespace { class cl_allocator_base { protected: std::shared_ptr m_context; cl_mem_flags m_flags; public: cl_allocator_base(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) throw pyopencl::error("Allocator", CL_INVALID_VALUE, "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); } cl_allocator_base(cl_allocator_base const &src) : m_context(src.m_context), m_flags(src.m_flags) { } virtual ~cl_allocator_base() { } typedef cl_mem pointer_type; typedef size_t size_type; virtual cl_allocator_base *copy() const = 0; virtual bool is_deferred() const = 0; virtual pointer_type allocate(size_type s) = 0; void free(pointer_type p) { PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); } void try_release_blocks() { pyopencl::run_python_gc(); } }; class cl_deferred_allocator : public cl_allocator_base { private: typedef cl_allocator_base super; public: cl_deferred_allocator(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(ctx, flags) { } cl_allocator_base *copy() const { return new cl_deferred_allocator(*this); } bool is_deferred() const { return true; } pointer_type allocate(size_type s) { if (s == 0) return nullptr; return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); } }; const unsigned zero = 0; class cl_immediate_allocator : public cl_allocator_base { private: typedef cl_allocator_base super; pyopencl::command_queue m_queue; public: cl_immediate_allocator(pyopencl::command_queue &queue, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(std::shared_ptr(queue.get_context()), flags), m_queue(queue.data(), /*retain*/ true) { } cl_immediate_allocator(cl_immediate_allocator const &src) : super(src), m_queue(src.m_queue) { } cl_allocator_base *copy() const { return new cl_immediate_allocator(*this); } bool is_deferred() const { return false; } pointer_type allocate(size_type s) { if (s == 0) return nullptr; pointer_type ptr = pyopencl::create_buffer( m_context->data(), m_flags, s, 0); // Make sure the buffer gets allocated right here and right now. // This looks (and is) expensive. But immediate allocators // have their main use in memory pools, whose basic assumption // is that allocation is too expensive anyway--but they rely // on 'out-of-memory' being reported on allocation. (If it is // reported in a deferred manner, it has no way to react // (e.g. by freeing unused memory) because it is not part of // the call stack.) unsigned zero = 0; PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, ( m_queue.data(), ptr, /* is blocking */ CL_FALSE, 0, std::min(s, sizeof(zero)), &zero, 0, NULL, NULL )); // No need to wait for completion here. clWaitForEvents (e.g.) // cannot return mem object allocation failures. This implies that // the buffer is faulted onto the device on enqueue. return ptr; } }; inline pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) { cl_mem mem; int try_count = 0; while (try_count < 2) { try { mem = alloc.allocate(size); break; } catch (pyopencl::error &e) { if (!e.is_out_of_memory()) throw; if (++try_count == 2) throw; } alloc.try_release_blocks(); } if (!mem) { if (size == 0) return nullptr; else throw pyopencl::error("Allocator", CL_INVALID_VALUE, "allocator succeeded but returned NULL cl_mem"); } try { return new pyopencl::buffer(mem, false); } catch (...) { PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); throw; } } class pooled_buffer : public pyopencl::pooled_allocation >, public pyopencl::memory_object_holder { private: typedef pyopencl::pooled_allocation > super; public: pooled_buffer( std::shared_ptr p, super::size_type s) : super(p, s) { } const super::pointer_type data() const { return ptr(); } }; pooled_buffer *device_pool_allocate( std::shared_ptr > pool, pyopencl::memory_pool::size_type sz) { return new pooled_buffer(pool, sz); } template void expose_memory_pool(Wrapper &wrapper) { typedef typename Wrapper::type cls; wrapper .def_property_readonly("held_blocks", &cls::held_blocks) .def_property_readonly("active_blocks", &cls::active_blocks) .DEF_SIMPLE_METHOD(bin_number) .DEF_SIMPLE_METHOD(alloc_size) .DEF_SIMPLE_METHOD(free_held) .DEF_SIMPLE_METHOD(stop_holding) ; } } void pyopencl_expose_mempool(py::module &m) { m.def("bitlog2", pyopencl::bitlog2); { typedef cl_allocator_base cls; py::class_ wrapper( m, "_tools_AllocatorBase"/*, py::no_init */); wrapper .def("__call__", allocator_call) ; } { typedef cl_deferred_allocator cls; py::class_ wrapper( m, "_tools_DeferredAllocator"); wrapper .def(py::init< std::shared_ptr const &>()) .def(py::init< std::shared_ptr const &, cl_mem_flags>(), py::arg("queue"), py::arg("mem_flags")) ; } { typedef cl_immediate_allocator cls; py::class_ wrapper( m, "_tools_ImmediateAllocator"); wrapper .def(py::init()) .def(py::init(), py::arg("queue"), py::arg("mem_flags")) ; } { typedef pyopencl::memory_pool cls; py::class_< cls, /* boost::noncopyable, */ std::shared_ptr> wrapper( m, "MemoryPool"); wrapper .def(py::init(), py::arg("allocator"), py::arg("leading_bits_in_bin_id")=4 ) .def("allocate", device_pool_allocate) .def("__call__", device_pool_allocate) // undoc for now .DEF_SIMPLE_METHOD(set_trace) ; expose_memory_pool(wrapper); } { typedef pooled_buffer cls; py::class_( m, "PooledBuffer"/* , py::no_init */) .def("release", &cls::free) ; } }