// Abstract memory pool implementation #ifndef _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_MEMPOOL_HPP #define _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_MEMPOOL_HPP #include #include #include #include namespace pycuda { template inline T signed_left_shift(T x, signed shift_amount) { if (shift_amount < 0) return x >> -shift_amount; else return x << shift_amount; } template inline T signed_right_shift(T x, signed shift_amount) { if (shift_amount < 0) return x << -shift_amount; else return x >> shift_amount; } template class memory_pool { public: typedef typename Allocator::pointer_type pointer_type; typedef typename Allocator::size_type size_type; private: typedef boost::uint32_t bin_nr_t; typedef std::vector bin_t; typedef boost::ptr_map container_t; container_t m_container; typedef typename container_t::value_type bin_pair_t; Allocator m_allocator; // A held block is one that's been released by the application, but that // we are keeping around to dish out again. unsigned m_held_blocks; // An active block is one that is in use by the application. unsigned m_active_blocks; bool m_stop_holding; public: memory_pool() : m_held_blocks(0), m_active_blocks(0), m_stop_holding(false) { } ~memory_pool() { free_held(); } static const unsigned mantissa_bits = 2; static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; static bin_nr_t bin_number(size_type size) { signed l = bitlog2(size); size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); if (size && (shifted & (1 << mantissa_bits)) == 0) throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); size_type chopped = shifted & mantissa_mask; return l << mantissa_bits | chopped; } static size_type alloc_size(bin_nr_t bin) { bin_nr_t exponent = bin >> mantissa_bits; bin_nr_t mantissa = bin & mantissa_mask; size_type ones = signed_left_shift(1, signed(exponent)-signed(mantissa_bits) ); if (ones) ones -= 1; size_type head = signed_left_shift( (1<second; } void inc_held_blocks() { if (m_held_blocks == 0) start_holding_blocks(); ++m_held_blocks; } void dec_held_blocks() { --m_held_blocks; if (m_held_blocks == 0) stop_holding_blocks(); } virtual void start_holding_blocks() { } virtual void stop_holding_blocks() { } public: pointer_type allocate(size_type size) { bin_nr_t bin_nr = bin_number(size); bin_t &bin = get_bin(bin_nr); if (bin.size()) return pop_block_from_bin(bin, size); size_type alloc_sz = alloc_size(bin_nr); assert(bin_number(alloc_sz) == bin_nr); try { return get_from_allocator(alloc_sz); } catch (cuda::error &e) { // Not OOM? Propagate. if (e.code() != CUDA_ERROR_OUT_OF_MEMORY) throw; } m_allocator.try_release_blocks(); if (bin.size()) return pop_block_from_bin(bin, size); while (try_to_free_memory()) { try { return get_from_allocator(alloc_sz); } catch (cuda::error &e) { // Not OOM? Propagate. if (e.code() != CUDA_ERROR_OUT_OF_MEMORY) throw; } } throw cuda::error( "memory_pool::allocate", CUDA_ERROR_OUT_OF_MEMORY, "failed to free memory for allocation"); } void free(pointer_type p, size_type size) { --m_active_blocks; if (!m_stop_holding) { inc_held_blocks(); get_bin(bin_number(size)).push_back(p); } else m_allocator.free(p); } void free_held() { BOOST_FOREACH(bin_pair_t bin_pair, m_container) { bin_t &bin = *bin_pair.second; while (bin.size()) { m_allocator.free(bin.back()); bin.pop_back(); dec_held_blocks(); } } assert(m_held_blocks == 0); } void stop_holding() { m_stop_holding = true; free_held(); } unsigned active_blocks() { return m_active_blocks; } unsigned held_blocks() { return m_held_blocks; } bool try_to_free_memory() { BOOST_FOREACH(bin_pair_t bin_pair, // free largest stuff first std::make_pair(m_container.rbegin(), m_container.rend())) { bin_t &bin = *bin_pair.second; if (bin.size()) { m_allocator.free(bin.back()); bin.pop_back(); dec_held_blocks(); return true; } } return false; } private: pointer_type get_from_allocator(size_type alloc_sz) { pointer_type result = m_allocator.allocate(alloc_sz); ++m_active_blocks; return result; } pointer_type pop_block_from_bin(bin_t &bin, size_type size) { pointer_type result = bin.back(); bin.pop_back(); dec_held_blocks(); ++m_active_blocks; return result; } }; template class pooled_allocation : public boost::noncopyable { public: typedef Pool pool_type; typedef typename Pool::pointer_type pointer_type; typedef typename Pool::size_type size_type; private: boost::shared_ptr m_pool; pointer_type m_ptr; size_type m_size; bool m_valid; public: pooled_allocation(boost::shared_ptr p, size_type size) : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) { } ~pooled_allocation() { free(); } void free() { if (m_valid) { m_pool->free(m_ptr, m_size); m_valid = false; } else throw cuda::error("pooled_device_allocation::free", CUDA_ERROR_INVALID_HANDLE); } pointer_type ptr() const { return m_ptr; } size_type size() const { return m_size; } }; } #endif