diff --git a/src/mempool.hpp b/src/mempool.hpp index 23e5758a9ea54117273810f74129e9d97be08c4b..44f0fd64398509132a1dfef917540a3f8fd6de77 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -34,12 +34,28 @@ #include <memory> #include <ostream> #include <iostream> -#include "wrap_cl.hpp" #include "bitlog.hpp" namespace PYGPU_PACKAGE { + // https://stackoverflow.com/a/44175911 + class mp_noncopyable { + public: + mp_noncopyable() = default; + ~mp_noncopyable() = default; + + private: + mp_noncopyable(const mp_noncopyable&) = delete; + mp_noncopyable& operator=(const mp_noncopyable&) = delete; + }; + +#ifdef PYGPU_PYCUDA +#define PYGPU_SHARED_PTR boost::shared_ptr +#else +#define PYGPU_SHARED_PTR std::shared_ptr +#endif + template <class T> inline T signed_left_shift(T x, signed shift_amount) { @@ -64,8 +80,15 @@ namespace PYGPU_PACKAGE +#define always_assert(cond) \ + do { \ + if (!(cond)) \ + throw std::logic_error("mem pool assertion violated: " #cond); \ + } while (false); + + template<class Allocator> - class memory_pool : noncopyable + class memory_pool : mp_noncopyable { public: typedef typename Allocator::pointer_type pointer_type; @@ -151,13 +174,13 @@ namespace PYGPU_PACKAGE bin_nr_t exponent = bin >> m_leading_bits_in_bin_id; bin_nr_t mantissa = bin & mantissa_mask(); - size_type ones = signed_left_shift(1, + size_type ones = signed_left_shift((size_type) 1, signed(exponent)-signed(m_leading_bits_in_bin_id) ); if (ones) ones -= 1; size_type head = signed_left_shift( - (1<<m_leading_bits_in_bin_id) | mantissa, + (size_type) ((1<<m_leading_bits_in_bin_id) | mantissa), signed(exponent)-signed(m_leading_bits_in_bin_id)); if (ones & head) throw std::runtime_error("memory_pool::alloc_size: bit-counting fault"); @@ -213,9 +236,10 @@ namespace PYGPU_PACKAGE return pop_block_from_bin(bin, size); } - size_type alloc_sz = alloc_size(bin_nr); + size_type alloc_sz = alloc_size(bin_nr); - assert(bin_number(alloc_sz) == bin_nr); + always_assert(bin_number(alloc_sz) == bin_nr); + always_assert(alloc_sz >= size); if (m_trace) std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; @@ -321,8 +345,10 @@ namespace PYGPU_PACKAGE bool try_to_free_memory() { // free largest stuff first - for (bin_pair_t &bin_pair: reverse(m_container)) + for (typename container_t::reverse_iterator it = m_container.rbegin(); + it != m_container.rend(); ++it) { + bin_pair_t &bin_pair = *it; bin_t &bin = bin_pair.second; if (bin.size()) @@ -366,7 +392,7 @@ namespace PYGPU_PACKAGE template <class Pool> - class pooled_allocation : public noncopyable + class pooled_allocation : public mp_noncopyable { public: typedef Pool pool_type; @@ -374,14 +400,14 @@ namespace PYGPU_PACKAGE typedef typename Pool::size_type size_type; private: - std::shared_ptr<pool_type> m_pool; + PYGPU_SHARED_PTR<pool_type> m_pool; pointer_type m_ptr; size_type m_size; bool m_valid; public: - pooled_allocation(std::shared_ptr<pool_type> p, size_type size) + pooled_allocation(PYGPU_SHARED_PTR<pool_type> p, size_type size) : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) { } diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 04027b014b3b8c82bc1e9433d4453a82b3e7a052..6b014ba5e19ab025993ebd3aaa4c9dc73f217878 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -42,6 +42,34 @@ namespace { + class test_allocator + { + public: + typedef void *pointer_type; + typedef size_t size_type; + + virtual test_allocator *copy() const + { + return new test_allocator(); + } + + virtual bool is_deferred() const + { + return false; + } + virtual pointer_type allocate(size_type s) + { + return nullptr; + } + + void free(pointer_type p) + { } + + void try_release_blocks() + { } + }; + + class cl_allocator_base { protected: @@ -297,6 +325,25 @@ void pyopencl_expose_mempool(py::module &m) } + { + typedef pyopencl::memory_pool<test_allocator> cls; + + py::class_<cls, std::shared_ptr<cls>> wrapper( m, "_TestMemoryPool"); + wrapper + .def(py::init([](unsigned leading_bits_in_bin_id) + { return new cls(test_allocator(), leading_bits_in_bin_id); }), + py::arg("leading_bits_in_bin_id")=4 + ) + .def("allocate", [](std::shared_ptr<cls> pool, cls::size_type sz) + { + pool->allocate(sz); + return py::none(); + }) + ; + + expose_memory_pool(wrapper); + } + { typedef cl_deferred_allocator cls; py::class_<cls, cl_allocator_base> wrapper( diff --git a/test/test_wrapper.py b/test/test_wrapper.py index debcb2b405c62f6828b8d5fa4efb35dbb3ddb865..a9863a40db25e243ad56cf83808b7e0e233fee30 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -571,8 +571,7 @@ def test_mempool_2(ctx_factory): pool = MemoryPool(ImmediateAllocator(queue)) - for i in range(2000): - s = randrange(1 << 31) >> randrange(32) + for s in [randrange(1 << 31) >> randrange(32) for _ in range(2000)] + [2**30]: bin_nr = pool.bin_number(s) asize = pool.alloc_size(bin_nr) @@ -581,6 +580,16 @@ def test_mempool_2(ctx_factory): assert asize < asize*(1+1/8) +def test_mempool_32bit_issues(): + # https://github.com/inducer/pycuda/issues/282 + from pyopencl._cl import _TestMemoryPool + pool = _TestMemoryPool() + + for i in [30, 31, 32, 33, 34]: + for offs in range(-5, 5): + pool.allocate(2**i + offs) + + @pytest.mark.parametrize("allocator_cls", [ImmediateAllocator, DeferredAllocator]) def test_allocator(ctx_factory, allocator_cls): context = ctx_factory()