From 2d8f2a51ce91d25292a7be3af17c230e549ca13d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 10 Jan 2011 11:29:06 -0600 Subject: [PATCH] Initial cut of memory pool support. --- setup.py | 3 + src/wrapper/bitlog.hpp | 53 ++++++ src/wrapper/mempool.hpp | 326 +++++++++++++++++++++++++++++++++ src/wrapper/numpy_init.hpp | 1 + src/wrapper/tools.hpp | 43 +++++ src/wrapper/wrap_cl.cpp | 2 + src/wrapper/wrap_cl.hpp | 129 +++++++++---- src/wrapper/wrap_cl_part_1.cpp | 22 ++- src/wrapper/wrap_mempool.cpp | 143 +++++++++++++++ 9 files changed, 679 insertions(+), 43 deletions(-) create mode 100644 src/wrapper/bitlog.hpp create mode 100644 src/wrapper/mempool.hpp create mode 100644 src/wrapper/tools.hpp create mode 100644 src/wrapper/wrap_mempool.cpp diff --git a/setup.py b/setup.py index bcd5dcaa..80492f5e 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,8 @@ def main(): EXTRA_LIBRARY_DIRS = [] EXTRA_LIBRARIES = [] + EXTRA_DEFINES["PYGPU_PACKAGE"] = "pyopencl"; + if conf["CL_TRACE"]: EXTRA_DEFINES["PYOPENCL_TRACE"] = 1 @@ -151,6 +153,7 @@ def main(): "src/wrapper/wrap_cl_part_1.cpp", "src/wrapper/wrap_cl_part_2.cpp", "src/wrapper/wrap_constants.cpp", + "src/wrapper/wrap_mempool.cpp", ]+EXTRA_OBJECTS, include_dirs=INCLUDE_DIRS + EXTRA_INCLUDE_DIRS, library_dirs=LIBRARY_DIRS + conf["CL_LIB_DIR"], diff --git a/src/wrapper/bitlog.hpp b/src/wrapper/bitlog.hpp new file mode 100644 index 00000000..405599e7 --- /dev/null +++ b/src/wrapper/bitlog.hpp @@ -0,0 +1,53 @@ +// Base-2 logarithm bithack. + + + + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP + + + + +#include +#include + + + + +namespace pyopencl +{ + extern const char log_table_8[]; + + inline unsigned bitlog2_16(boost::uint16_t v) + { + if (unsigned long t = v >> 8) + return 8+log_table_8[t]; + else + return log_table_8[v]; + } + + inline unsigned bitlog2_32(boost::uint32_t v) + { + if (boost::uint16_t t = v >> 16) + return 16+bitlog2_16(t); + else + return bitlog2_16(v); + } + + inline unsigned bitlog2(unsigned long v) + { +#if (ULONG_MAX != 4294967295) + if (boost::uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else +#endif + return bitlog2_32(v); + } +} + + + + + +#endif diff --git a/src/wrapper/mempool.hpp b/src/wrapper/mempool.hpp new file mode 100644 index 00000000..e2f2314d --- /dev/null +++ b/src/wrapper/mempool.hpp @@ -0,0 +1,326 @@ +// Abstract memory pool implementation + + + + +#ifndef _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP +#define _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP + + + + +#include +#include +#include +#include "bitlog.hpp" + + + + +namespace PYGPU_PACKAGE +{ + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + + + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + + + + template + class memory_pool + { + public: + typedef typename Allocator::pointer_type pointer_type; + typedef typename Allocator::size_type size_type; + + private: + typedef boost::uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef boost::ptr_map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + Allocator m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc), m_held_blocks(0), m_active_blocks(0), m_stop_holding(false) + { + } + + ~memory_pool() + { free_held(); } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = bitlog2(size); + size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = signed_left_shift( + (1<second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + return pop_block_from_bin(bin, size); + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + m_allocator.try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + throw PYGPU_PACKAGE::error( + "memory_pool::allocate", + CL_MEM_OBJECT_ALLOCATION_FAILURE, + "failed to free memory for allocation"); + } + + void free(pointer_type p, size_type size) + { + --m_active_blocks; + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_number(size)).push_back(p); + } + else + m_allocator.free(p); + } + + void free_held() + { + BOOST_FOREACH(bin_pair_t bin_pair, m_container) + { + bin_t &bin = *bin_pair.second; + + while (bin.size()) + { + m_allocator.free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + BOOST_FOREACH(bin_pair_t bin_pair, + // free largest stuff first + std::make_pair(m_container.rbegin(), m_container.rend())) + { + bin_t &bin = *bin_pair.second; + + if (bin.size()) + { + m_allocator.free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + pointer_type get_from_allocator(size_type alloc_sz) + { + pointer_type result = m_allocator.allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; + + + + + + template + class pooled_allocation : public boost::noncopyable + { + public: + typedef Pool pool_type; + typedef typename Pool::pointer_type pointer_type; + typedef typename Pool::size_type size_type; + + private: + boost::shared_ptr m_pool; + + pointer_type m_ptr; + size_type m_size; + bool m_valid; + + public: + pooled_allocation(boost::shared_ptr p, size_type size) + : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) + { } + + ~pooled_allocation() + { + if (m_valid) + free(); + } + + void free() + { + if (m_valid) + { + m_pool->free(m_ptr, m_size); + m_valid = false; + } + else + throw PYGPU_PACKAGE::error( + "pooled_device_allocation::free", + CL_INVALID_VALUE); + } + + pointer_type ptr() const + { return m_ptr; } + + size_type size() const + { return m_size; } + }; +} + + + + +#endif diff --git a/src/wrapper/numpy_init.hpp b/src/wrapper/numpy_init.hpp index bf62482c..54dc31e7 100644 --- a/src/wrapper/numpy_init.hpp +++ b/src/wrapper/numpy_init.hpp @@ -1,4 +1,5 @@ #ifndef _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP +#define _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP diff --git a/src/wrapper/tools.hpp b/src/wrapper/tools.hpp new file mode 100644 index 00000000..7254ace1 --- /dev/null +++ b/src/wrapper/tools.hpp @@ -0,0 +1,43 @@ +#ifndef _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP +#define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP + + + + +#include +#include +#include "numpy_init.hpp" + + + + +namespace pyopencl +{ + inline + npy_intp size_from_dims(int ndim, const npy_intp *dims) + { + if (ndim != 0) + return std::accumulate(dims, dims+ndim, 1, std::multiplies()); + else + return 1; + } + + + + + inline void run_python_gc() + { + namespace py = boost::python; + + py::object gc_mod( + py::handle<>( + PyImport_ImportModule("gc"))); + gc_mod.attr("collect")(); + } +} + + + + + +#endif diff --git a/src/wrapper/wrap_cl.cpp b/src/wrapper/wrap_cl.cpp index 903cc666..6ba82dfd 100644 --- a/src/wrapper/wrap_cl.cpp +++ b/src/wrapper/wrap_cl.cpp @@ -11,12 +11,14 @@ using namespace pyopencl; extern void pyopencl_expose_constants(); extern void pyopencl_expose_part_1(); extern void pyopencl_expose_part_2(); +extern void pyopencl_mempool(); BOOST_PYTHON_MODULE(_cl) { pyopencl_expose_constants(); pyopencl_expose_part_1(); pyopencl_expose_part_2(); + pyopencl_expose_mempool(); } // vim: foldmethod=marker diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp index 719655f9..9a33e6d9 100644 --- a/src/wrapper/wrap_cl.hpp +++ b/src/wrapper/wrap_cl.hpp @@ -43,16 +43,13 @@ #include #include "wrap_helpers.hpp" #include "numpy_init.hpp" +#include "tools.hpp" // }}} -// #define PYOPENCL_TRACE - - - // {{{ tools #if PY_VERSION_HEX >= 0x02050000 @@ -268,6 +265,13 @@ namespace pyopencl return m_code; } + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + static const char *cl_error_to_str(cl_int e) { switch (e) @@ -1082,7 +1086,28 @@ namespace pyopencl // {{{ memory_object - class memory_object : boost::noncopyable + class memory_object_holder + { + public: + virtual const cl_mem data() const = 0; + + PYOPENCL_EQUALITY_TESTS(memory_object_holder); + + size_t size() const + { + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); + return param_value; + } + + py::object get_info(cl_mem_info param_name) const; + }; + + + + + class memory_object : boost::noncopyable, public memory_object_holder { private: bool m_valid; @@ -1132,23 +1157,60 @@ namespace pyopencl return (npy_intp) data(); } - PYOPENCL_EQUALITY_TESTS(memory_object); - - size_t size() const - { - size_t param_value; - PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, - (m_mem, CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); - return param_value; - } - - py::object get_info(cl_mem_info param_name) const; }; // }}} // {{{ buffer + inline cl_mem create_buffer( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + cl_int status_code; + cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code); + + PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); + if (status_code != CL_SUCCESS) + throw pyopencl::error("create_buffer", status_code); + + return mem; + } + + + + + inline cl_mem create_buffer_gc( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + try + { + return create_buffer(ctx, flags, size, host_ptr); + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + // If we get here, we got an error from CL. + // We should run the Python GC to try and free up + // some memory references. + run_python_gc(); + + // Now retry the allocation. If it fails again, + // let it fail. + return create_buffer(ctx, flags, size, host_ptr); + } + + + + class buffer : public memory_object { public: @@ -1208,7 +1270,7 @@ namespace pyopencl // {{{ buffer creation inline - buffer *create_buffer( + buffer *create_buffer_py( context &ctx, cl_mem_flags flags, size_t size, @@ -1247,12 +1309,7 @@ namespace pyopencl size = len; } - cl_int status_code; - cl_mem mem = clCreateBuffer(ctx.data(), flags, size, buf, &status_code); - - PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); - if (status_code != CL_SUCCESS) - throw pyopencl::error("create_host_buffer", status_code); + cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf); try { @@ -2473,9 +2530,9 @@ namespace pyopencl sizeof(cl_mem), &m)); } - void set_arg_mem(cl_uint arg_index, memory_object &mo) + void set_arg_mem(cl_uint arg_index, memory_object_holder &moh) { - cl_mem m = mo.data(); + cl_mem m = moh.data(); PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, sizeof(cl_mem), &m)); } @@ -2943,30 +3000,30 @@ namespace pyopencl // {{{ deferred implementation bits inline - py::object memory_object::get_info(cl_mem_info param_name) const + py::object memory_object_holder::get_info(cl_mem_info param_name) const { switch (param_name) { case CL_MEM_TYPE: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, cl_mem_object_type); case CL_MEM_FLAGS: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, cl_mem_flags); case CL_MEM_SIZE: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, size_t); case CL_MEM_HOST_PTR: throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE, "Use MemoryObject.get_host_array to get host pointer."); case CL_MEM_MAP_COUNT: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, cl_uint); case CL_MEM_REFERENCE_COUNT: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, cl_uint); case CL_MEM_CONTEXT: - PYOPENCL_GET_OPAQUE_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name, cl_context, context); #ifdef CL_VERSION_1_1 @@ -2974,7 +3031,7 @@ namespace pyopencl { cl_mem param_value; PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ - (m_mem, param_name, sizeof(param_value), ¶m_value, 0)); + (data(), param_name, sizeof(param_value), ¶m_value, 0)); if (param_value == 0) { // no associated memory object? no problem. @@ -2983,7 +3040,7 @@ namespace pyopencl cl_mem_object_type mem_obj_type; PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ - (m_mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0)); + (data(), CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0)); switch (mem_obj_type) { @@ -3000,12 +3057,12 @@ namespace pyopencl } } case CL_MEM_OFFSET: - PYOPENCL_GET_INTEGRAL_INFO(MemObject, m_mem, param_name, + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, size_t); #endif default: - throw error("MemoryObject.get_info", CL_INVALID_VALUE); + throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE); } } diff --git a/src/wrapper/wrap_cl_part_1.cpp b/src/wrapper/wrap_cl_part_1.cpp index 233f7db0..58fda5d8 100644 --- a/src/wrapper/wrap_cl_part_1.cpp +++ b/src/wrapper/wrap_cl_part_1.cpp @@ -46,7 +46,8 @@ void pyopencl_expose_part_1() { typedef context cls; - py::class_("Context", py::no_init) + py::class_ >("Context", py::no_init) .def("__init__", make_constructor(create_context, py::default_call_policies(), (py::arg("devices")=py::object(), @@ -117,18 +118,25 @@ void pyopencl_expose_part_1() // {{{ memory_object { - typedef memory_object cls; - py::class_("MemoryObject", py::no_init) + typedef memory_object_holder cls; + py::class_( + "MemoryObjectHolder", py::no_init) .DEF_SIMPLE_METHOD(get_info) - .DEF_SIMPLE_METHOD(release) .def("get_host_array", get_mem_obj_host_array, (py::arg("shape"), py::arg("dtype"), py::arg("order")="C")) - .add_property("obj_ptr", &cls::obj_ptr) - .add_property("hostbuf", &cls::hostbuf) .def(py::self == py::self) .def(py::self != py::self) ; } + { + typedef memory_object cls; + py::class_ >( + "MemoryObject", py::no_init) + .DEF_SIMPLE_METHOD(release) + .add_property("obj_ptr", &cls::obj_ptr) + .add_property("hostbuf", &cls::hostbuf) + ; + } // }}} @@ -137,7 +145,7 @@ void pyopencl_expose_part_1() typedef buffer cls; py::class_, boost::noncopyable>( "Buffer", py::no_init) - .def("__init__", make_constructor(create_buffer, + .def("__init__", make_constructor(create_buffer_py, py::default_call_policies(), (py::args("context", "flags"), py::arg("size")=0, diff --git a/src/wrapper/wrap_mempool.cpp b/src/wrapper/wrap_mempool.cpp new file mode 100644 index 00000000..6cfa84b8 --- /dev/null +++ b/src/wrapper/wrap_mempool.cpp @@ -0,0 +1,143 @@ +#include +#include "wrap_helpers.hpp" +#include "wrap_cl.hpp" +#include "mempool.hpp" +#include "tools.hpp" +#include + + + + +namespace py = boost::python; + + + + +namespace +{ + class cl_allocator + { + boost::shared_ptr m_context; + cl_mem_flags m_flags; + + public: + cl_allocator(boost::shared_ptr const &ctx, + cl_mem_flags flags) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + throw pyopencl::error("PoolAllocator", CL_INVALID_VALUE, + "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + } + + typedef cl_mem pointer_type; + typedef size_t size_type; + + pointer_type allocate(size_type s) + { + return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); + } + + void free(pointer_type p) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); + } + + void try_release_blocks() + { + pyopencl::run_python_gc(); + } + }; + + + + + class pooled_buffer + : public pyopencl::pooled_allocation >, + public pyopencl::memory_object_holder + { + private: + typedef + pyopencl::pooled_allocation > + super; + + public: + pooled_buffer( + boost::shared_ptr p, super::size_type s) + : super(p, s) + { } + + const super::pointer_type data() const + { return ptr(); } + }; + + + + + pooled_buffer *device_pool_allocate( + boost::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new pooled_buffer(pool, sz); + } + + + + + template + void expose_memory_pool(Wrapper &wrapper) + { + typedef typename Wrapper::wrapped_type cls; + wrapper + .add_property("held_blocks", &cls::held_blocks) + .add_property("active_blocks", &cls::active_blocks) + .DEF_SIMPLE_METHOD(bin_number) + .DEF_SIMPLE_METHOD(alloc_size) + .DEF_SIMPLE_METHOD(free_held) + .DEF_SIMPLE_METHOD(stop_holding) + .staticmethod("bin_number") + .staticmethod("alloc_size") + ; + } +} + + + + +void pyopencl_expose_mempool() +{ + py::def("bitlog2", pyopencl::bitlog2); + + { + typedef cl_allocator cls; + py::class_ wrapper("CLAllocator", + py::init< + boost::shared_ptr const &, + cl_mem_flags>()); + } + + { + typedef pyopencl::memory_pool cl; + + py::class_< + cl, boost::noncopyable, + boost::shared_ptr > wrapper("MemoryPool", + py::init() + ); + wrapper + .def("allocate", device_pool_allocate, + py::return_value_policy()) + ; + + expose_memory_pool(wrapper); + } + + { + typedef pooled_buffer cls; + py::class_ >( + "PooledBuffer", py::no_init) + .def("release", &cls::free) + ; + } +} -- GitLab